diff --git a/.bazelrc b/.bazelrc
index bdfd910d431..224238d7c0b 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -163,6 +163,8 @@ build:cuda_clang --action_env TF_CUDA_CLANG=1
 build:dbg --config=opt -c dbg
 # for now, disable arm_neon. see: https://github.com/tensorflow/tensorflow/issues/33360
 build:dbg --cxxopt -DTF_LITE_DISABLE_X86_NEON
+# AWS SDK must be compiled in release mode. see: https://github.com/tensorflow/tensorflow/issues/37498
+build:dbg --copt -DDEBUG_BUILD
 
 build:tensorrt --action_env TF_NEED_TENSORRT=1
 
@@ -356,9 +358,10 @@ build:rbe_linux --linkopt=-lm
 build:rbe_cpu_linux --config=rbe_linux
 build:rbe_cpu_linux --crosstool_top="//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:toolchain"
 build:rbe_cpu_linux --extra_toolchains="//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:cc-toolchain-k8"
-build:rbe_cpu_linux --extra_execution_platforms"=@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010"
-build:rbe_cpu_linux --host_platform="@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010"
-build:rbe_cpu_linux --platforms="@org_tensorflow//third_party/toolchains:rbe_ubuntu16.04-manylinux2010"
+build:rbe_cpu_linux --extra_execution_platforms="@ubuntu16.04-manylinux2010-py3_config_platform//:platform"
+build:rbe_cpu_linux --extra_execution_platforms="@ubuntu16.04-manylinux2010-py3_config_platform//:platform"
+build:rbe_cpu_linux --host_platform="@ubuntu16.04-manylinux2010-py3_config_platform//:platform"
+build:rbe_cpu_linux --platforms="@ubuntu16.04-manylinux2010-py3_config_platform//:platform"
 
 build:rbe_linux_cuda_base --config=rbe_linux
 build:rbe_linux_cuda_base --repo_env=TF_NEED_TENSORRT=1
diff --git a/README.md b/README.md
index 27032043e07..ba4597af14c 100644
--- a/README.md
+++ b/README.md
@@ -103,17 +103,17 @@ open-source software development:
 
 ### Official Builds
 
-Build Type               | Status                                                                                                                                                                                                                                                                                                                                        | Artifacts
------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
-**Linux CPU**            | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.html)                                                                                                                                                                        | [PyPI](https://pypi.org/project/tf-nightly/)
-**Linux GPU**            | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-py3.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-py3.html)                                                                                                                                                              | [PyPI](https://pypi.org/project/tf-nightly-gpu/)
-**Linux XLA**            | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-xla.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-xla.html)                                                                                                                                                                      | TBA
-**macOS**                | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.html)                                                                                                                                                                  | [PyPI](https://pypi.org/project/tf-nightly/)
-**Windows CPU**          | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-cpu.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-cpu.html)                                                                                                                                                                    | [PyPI](https://pypi.org/project/tf-nightly/)
-**Windows GPU**          | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-gpu.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-gpu.html)                                                                                                                                                                    | [PyPI](https://pypi.org/project/tf-nightly-gpu/)
-**Android**              | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/android.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/android.html)                                                                                                                                                                            | [![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg)](https://bintray.com/google/tensorflow/tensorflow/_latestVersion)
-**Raspberry Pi 0 and 1** | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py2.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py2.html) [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py3.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py3.html) | [Py2](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp27-none-linux_armv6l.whl) [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv6l.whl)
-**Raspberry Pi 2 and 3** | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py2.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py2.html) [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py3.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py3.html) | [Py2](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp27-none-linux_armv7l.whl) [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv7l.whl)
+Build Type               | Status                                                                                                                                                                           | Artifacts
+------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
+**Linux CPU**            | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.html)           | [PyPI](https://pypi.org/project/tf-nightly/)
+**Linux GPU**            | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-py3.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-py3.html) | [PyPI](https://pypi.org/project/tf-nightly-gpu/)
+**Linux XLA**            | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-xla.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-xla.html)         | TBA
+**macOS**                | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.html)     | [PyPI](https://pypi.org/project/tf-nightly/)
+**Windows CPU**          | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-cpu.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-cpu.html)       | [PyPI](https://pypi.org/project/tf-nightly/)
+**Windows GPU**          | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-gpu.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-gpu.html)       | [PyPI](https://pypi.org/project/tf-nightly-gpu/)
+**Android**              | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/android.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/android.html)               | [![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg)](https://bintray.com/google/tensorflow/tensorflow/_latestVersion)
+**Raspberry Pi 0 and 1** | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py3.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py3.html)           | [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv6l.whl)
+**Raspberry Pi 2 and 3** | [![Status](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py3.svg)](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py3.html)           | [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv7l.whl)
 
 ### Community Supported Builds
 
diff --git a/RELEASE.md b/RELEASE.md
index b5d088821e4..6c8921cf492 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,147 @@
+# Release 2.2.0
+
+TensorFlow 2.2 discontinues support for Python 2, [previously announced](https://groups.google.com/a/tensorflow.org/d/msg/announce/gVwS5RC8mds/dCt1ka2XAAAJ) as following [Python 2's EOL on January 1, 2020](https://www.python.org/dev/peps/pep-0373/#update).
+
+Coinciding with this change, new releases of [TensorFlow's Docker images](https://hub.docker.com/r/tensorflow/tensorflow/) provide Python 3 exclusively. Because all images now use Python 3, Docker tags containing `-py3` will no longer be provided and existing `-py3` tags like `latest-py3` will not be updated.
+
+## Major Features and Improvements
+
+* Replaced the scalar type for string tensors from `std::string` to `tensorflow::tstring` which is now ABI stable.
+* A new Profiler for TF 2 for CPU/GPU/TPU. It offers both device and host performance analysis, including input pipeline and TF Ops. Optimization advisory is provided whenever possible. Please see [this tutorial](https://www.tensorflow.org/tensorboard/tensorboard_profiling_keras) and [guide](https://www.tensorflow.org/guide/profiler) for usage guidelines.
+* Export C++ functions to Python using `pybind11` as opposed to `SWIG` as a part of our [deprecation of swig efforts](https://github.com/tensorflow/community/blob/master/rfcs/20190208-pybind11.md).
+* `tf.distribute`:
+  * Support added for global sync `BatchNormalization` by using the newly added `tf.keras.layers.experimental.SyncBatchNormalization` layer. This layer will sync `BatchNormalization` statistics every step across all replicas taking part in sync training.
+  * Performance improvements for GPU multi-worker distributed training using `tf.distribute.experimental.MultiWorkerMirroredStrategy`
+    * Update NVIDIA `NCCL` to `2.5.7-1` for better performance and performance tuning. Please see [nccl developer guide](https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/env.html) for more information on this.
+    * Support gradient `allreduce` in `float16`. See this [example](https://github.com/tensorflow/models/blob/master/official/staging/training/grad_utils.py) usage.
+    * Experimental support of [all reduce gradient packing](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CollectiveHints) to allow overlapping gradient aggregation with backward path computation.
+    * Deprecated `experimental_run_v2` method for distribution strategies and renamed the method `run` as it is no longer experimental.
+    * Add CompositeTensor support for DistributedIterators. This should help prevent unnecessary function retracing and memory leaks.
+* `tf.keras`:
+  * `Model.fit` major improvements:
+     * You can now use custom training logic with `Model.fit` by overriding `Model.train_step`.
+     * Easily write state-of-the-art training loops without worrying about all of the features `Model.fit` handles for you (distribution strategies, callbacks, data formats, looping logic, etc)
+     * See the default [`Model.train_step`](https://github.com/tensorflow/tensorflow/blob/1381fc8e15e22402417b98e3881dfd409998daea/tensorflow/python/keras/engine/training.py#L540) for an example of what this function should look like. Same applies for validation and inference via `Model.test_step` and `Model.predict_step`.
+     * SavedModel uses its own `Model._saved_model_inputs_spec` attr now instead of
+       relying on `Model.inputs` and `Model.input_names`, which are no longer set for subclass Models.
+       This attr is set in eager, `tf.function`, and graph modes. This gets rid of the need for users to
+       manually call `Model._set_inputs` when using Custom Training Loops(CTLs).
+     * Dynamic shapes are supported for generators by calling the Model on the first batch we "peek" from the generator.
+       This used to happen implicitly in `Model._standardize_user_data`. Long-term, a solution where the
+       `DataAdapter` doesn't need to call the Model is probably preferable.
+  * The SavedModel format now supports all Keras built-in layers (including metrics, preprocessing layers, and stateful RNN layers)
+  * Update Keras batch normalization layer to use the running mean and average computation in the `fused_batch_norm`. You should see significant performance improvements when using `fused_batch_norm` in Eager mode.
+
+* `tf.lite`:
+  * Enable TFLite experimental new converter by default.
+* XLA
+  * XLA now builds and works on windows. All prebuilt packages come with XLA available.
+  * XLA can be [enabled for a `tf.function`](https://www.tensorflow.org/xla#explicit_compilation_with_tffunction
+) with “compile or throw exception” semantics on CPU and GPU.
+
+## Breaking Changes
+* `tf.keras`:
+  * In `tf.keras.applications` the name of the "top" layer has been standardized to "predictions". This is only a problem if your code relies on the exact name of the layer.
+  * Huber loss function has been updated to be consistent with other Keras losses. It now computes mean over the last axis of per-sample losses before applying the reduction function.
+* AutoGraph no longer converts functions passed to `tf.py_function`, `tf.py_func` and `tf.numpy_function`.
+* Deprecating `XLA_CPU` and `XLA_GPU` devices with this release.
+* Increasing the minimum bazel version to build TF to 2.0.0 to use Bazel's `cc_experimental_shared_library`.
+* Keras compile/fit behavior for functional and subclassed models have been unified. Model properties such as `metrics`, `metrics_names` will now be available only after **training/evaluating the model on actual data** for functional models. `metrics` will **now include** model `loss` and output losses.`loss_functions` property has been removed from the model. This was an undocumented property that was accidentally public and has now been removed.
+
+## Known Caveats
+* The current TensorFlow release now **requires** [gast](https://pypi.org/project/gast/) version 0.3.3.
+
+## Bug Fixes and Other Changes
+* `tf.data`:
+  * Removed `autotune_algorithm` from experimental optimization options.
+* TF Core:
+  * `tf.constant` always creates CPU tensors irrespective of the current device context.
+  * Eager `TensorHandles` maintain a list of mirrors for any copies to local or remote devices. This avoids any redundant copies due to op execution.
+  * For `tf.Tensor` & `tf.Variable`, `.experimental_ref()` is no longer experimental and is available as simply `.ref()`.
+  * `pfor/vectorized_map`: Added support for vectorizing 56 more ops. Vectorizing `tf.cond` is also supported now.
+  * Set as much partial shape as we can infer statically within the gradient impl of the gather op.
+  * Gradient of `tf.while_loop` emits `StatelessWhile` op if `cond` and body functions are stateless. This allows multiple gradients while ops to run in parallel under distribution strategy.
+  * Speed up `GradientTape` in eager mode by auto-generating list of op inputs/outputs which are unused and hence not cached for gradient functions.
+  * Support `back_prop=False` in `while_v2` but mark it as deprecated.
+  * Improve error message when attempting to use `None` in data-dependent control flow.
+  * Add `RaggedTensor.numpy()`.
+  * Update `RaggedTensor.__getitem__` to preserve uniform dimensions & allow indexing into uniform dimensions.
+  * Update `tf.expand_dims` to always insert the new dimension as a non-ragged dimension.
+  * Update `tf.embedding_lookup` to use `partition_strategy` and `max_norm` when `ids` is ragged.
+  * Allow `batch_dims==rank(indices)` in `tf.gather`.
+  * Add support for bfloat16 in `tf.print`.
+* `tf.distribute`:
+  * Support `embedding_column` with variable-length input features for `MultiWorkerMirroredStrategy`.
+* `tf.keras`:
+  * Added `experimental_aggregate_gradients` argument to `tf.keras.optimizer.Optimizer.apply_gradients`. This allows custom gradient aggregation and processing aggregated gradients in custom training loop.
+  * Allow `pathlib.Path` paths for loading models via Keras API.
+* `tf.function`/AutoGraph:
+  * AutoGraph is now available in `ReplicaContext.merge_call`, `Strategy.extended.update` and `Strategy.extended.update_non_slot`.
+  * Experimental support for shape invariants has been enabled in `tf.function`. See the API docs for `tf.autograph.experimental.set_loop_options` for additonal info.
+  * AutoGraph error messages now exclude frames corresponding to APIs internal to AutoGraph.
+  * Improve shape inference for `tf.function` input arguments to unlock more Grappler optimizations in TensorFlow 2.x.
+  * Improve automatic control dependency management of resources by allowing resource reads to occur in parallel and synchronizing only on writes.
+  * Fix execution order of multiple stateful calls to `experimental_run_v2` in `tf.function`.
+  * You can now iterate over `RaggedTensors` using a for loop inside `tf.function`.
+* `tf.lite`:
+  *  Migrated the `tf.lite` C inference API out of experimental into lite/c.
+  * Add an option to disallow `NNAPI` CPU / partial acceleration on Android 10
+  * TFLite Android AARs now include the C headers and APIs are required to use TFLite from native code.
+  * Refactors the delegate and delegate kernel sources to allow usage in the linter.
+  * Limit delegated ops to actually supported ones if a device name is specified or `NNAPI` CPU Fallback is disabled.
+  * TFLite now supports `tf.math.reciprocal1` op by lowering to `tf.div op`.
+  * TFLite's unpack op now supports boolean tensor inputs.
+  * Microcontroller and embedded code moved from experimental to main TensorFlow Lite folder
+  * Check for large TFLite tensors.
+  * Fix GPU delegate crash with C++17.
+  * Add 5D support to TFLite `strided_slice`.
+  * Fix error in delegation of `DEPTH_TO_SPACE` to `NNAPI` causing op not to be accelerated.
+  * Fix segmentation fault when running a model with LSTM nodes using `NNAPI` Delegate
+  * Fix `NNAPI` delegate failure when an operand for Maximum/Minimum operation is a scalar.
+  * Fix `NNAPI` delegate failure when Axis input for reduce operation is a scalar.
+  * Expose option to limit the number of partitions that will be delegated to `NNAPI`.
+  * If a target accelerator is specified, use its feature level to determine operations to delegate instead of SDK version.
+* `tf.random`:
+  * Various random number generation improvements:
+    * Add a fast path for default `random_uniform`
+    * `random_seed` documentation improvement.
+    * `RandomBinomial` broadcasts and appends the sample shape to the left rather than the right.
+  * Added `tf.random.stateless_binomial`, `tf.random.stateless_gamma`, `tf.random.stateless_poisson`
+  * `tf.random.stateless_uniform` now supports unbounded sampling of `int` types.
+* Math and Linear Algebra:
+  * Add `tf.linalg.LinearOperatorTridiag`.
+  * Add `LinearOperatorBlockLowerTriangular`
+  * Add broadcasting support to tf.linalg.triangular_solve[#26204](https://github.com/tensorflow/tensorflow/issues/26204), tf.math.invert_permutation.
+  * Add `tf.math.sobol_sample` op.
+  * Add `tf.math.xlog1py`.
+  * Add `tf.math.special.{dawsn,expi,fresnel_cos,fresnel_sin,spence}`.
+  * Add a Modified Discrete Cosine Transform (MDCT) and its inverse to `tf.signal`.
+* TPU Enhancements:
+  * Refactor `TpuClusterResolver` to move shared logic to a separate pip package.
+  * Support configuring TPU software version from cloud tpu client.
+  * Allowed TPU embedding weight decay factor to be multiplied by learning rate.
+* XLA Support:
+  * Add standalone XLA AOT runtime target + relevant .cc sources to pip package.
+  * Add check for memory alignment to MemoryAllocation::MemoryAllocation() on 32-bit ARM. This ensures a deterministic early exit instead of a hard to debug bus error later.
+  * `saved_model_cli aot_compile_cpu` allows you to compile saved models to XLA header+object files and include them in your C++ programs.
+  * Enable `Igamma`, `Igammac` for XLA.
+* Deterministic Op Functionality:
+  * XLA reduction emitter is deterministic when the environment variable `TF_DETERMINISTIC_OPS` is set to "true" or "1". This extends deterministic `tf.nn.bias_add` back-prop functionality (and therefore also deterministic back-prop of bias-addition in Keras layers) to include when XLA JIT complilation is enabled.
+  * Fix problem, when running on a CUDA GPU and when either environment variable `TF_DETERMINSTIC_OPS` or environment variable `TF_CUDNN_DETERMINISTIC` is set to "true" or "1", in which some layer configurations led to an exception with the message "No algorithm worked!"
+* Tracing and Debugging:
+  * Add source, destination name to `_send` traceme to allow easier debugging.
+  * Add traceme event to `fastpathexecute`.
+* Other:
+  * Fix an issue with AUC.reset_states for multi-label AUC [#35852](https://github.com/tensorflow/tensorflow/issues/35852)
+  * Fix the TF upgrade script to not delete files when there is a parsing error and the output mode is `in-place`.
+  * Move `tensorflow/core:framework/*_pyclif` rules to `tensorflow/core/framework:*_pyclif`.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+372046933, 8bitmp3, aaronhma, Abin Shahab, Aditya Patwardhan, Agoniii, Ahti Kitsik, Alan Yee, Albin Joy, Alex Hoffman, Alexander Grund, Alexandre E. Eichenberger, Amit Kumar Jaiswal, amoitra, Andrew Anderson, Angus-Luo, Anthony Barbier, Anton Kachatkou, Anuj Rawat, archis, Arpan-Dhatt, Arvind Sundararajan, Ashutosh Hathidara, autoih, Bairen Yi, Balint Cristian, Bas Aarts, BashirSbaiti, Basit Ayantunde, Ben Barsdell, Benjamin Gaillard, boron, Brett Koonce, Bryan Cutler, Christian Goll, Christian Sachs, Clayne Robison, comet, Daniel Falbel, Daria Zhuravleva, darsh8200, David Truby, Dayananda-V, deepakm, Denis Khalikov, Devansh Singh, Dheeraj R Reddy, Diederik Van Liere, Diego Caballero, Dominic Jack, dothinking, Douman, Drake Gens, Duncan Riach, Ehsan Toosi, ekuznetsov139, Elena Zhelezina, elzino, Ending2015a, Eric Schweitz, Erik Zettel, Ethan Saadia, Eugene Kuznetsov, Evgeniy Zheltonozhskiy, Ewout Ter Hoeven, exfalso, FAIJUL, Fangjun Kuang, Fei Hu, Frank Laub, Frederic Bastien, Fredrik Knutsson, frreiss, Frédéric Rechtenstein, fsx950223, Gaurav Singh, gbaned, George Grzegorz Pawelczak, George Sterpu, Gian Marco Iodice, Giorgio Arena, Hans Gaiser, Hans Pabst, Haoyu Wu, Harry Slatyer, hsahovic, Hugo, Hugo Sjöberg, IrinaM21, jacco, Jake Tae, Jean-Denis Lesage, Jean-Michel Gorius, Jeff Daily, Jens Elofsson, Jerry Shih, jerryyin, Jin Mingjian, Jinjing Zhou, JKIsaacLee, jojimonv, Jonathan Dekhtiar, Jose Ignacio Gomez, Joseph-Rance, Judd, Julian Gross, Kaixi Hou, Kaustubh Maske Patil, Keunwoo Choi, Kevin Hanselman, Khor Chean Wei, Kilaru Yasaswi Sri Chandra Gandhi, Koan-Sin Tan, Koki Ibukuro, Kristian Holsheimer, kurileo, Lakshay Tokas, Lee Netherton, leike666666, Leslie-Fang-Intel, Li, Guizi, LIUJIAN435, Lukas Geiger, Lyo Nguyen, madisetti, Maher Jendoubi, Mahmoud Abuzaina, Manuel Freiberger, Marcel Koester, Marco Jacopo Ferrarotti, Markus Franke, marload, Mbah-Javis, mbhuiyan, Meng Zhang, Michael Liao, MichaelKonobeev, Michal Tarnowski, Milan Straka, minoring, Mohamed Nour Abouelseoud, MoussaMM, Mrinal Jain, mrTsjolder, Måns Nilsson, Namrata Bhave, Nicholas Gao, Niels Ole Salscheider, nikochiko, Niranjan Hasabnis, Nishidha Panpaliya, nmostafa, Noah Trenaman, nuka137, Officium, Owen L - Sfe, Pallavi G, Paul Andrey, Peng Sun, Peng Wu, Phil Pearl, PhilipMay, pingsutw, Pooya Davoodi, PragmaTwice, pshiko, Qwerty71, R Gomathi, Rahul Huilgol, Richard Xiao, Rick Wierenga, Roberto Rosmaninho, ruchit2801, Rushabh Vasani, Sami, Sana Damani, Sarvesh Dubey, Sasan Jafarnejad, Sergii Khomenko, Shane Smiskol, Shaochen Shi, sharkdtu, Shawn Presser, ShengYang1, Shreyash Patodia, Shyam Sundar Dhanabalan, Siju Samuel, Somyajit Chakraborty Sam, Srihari Humbarwadi, srinivasan.narayanamoorthy, Srishti Yadav, Steph-En-M, Stephan Uphoff, Stephen Mugisha, SumanSudhir, Taehun Kim, Tamas Bela Feher, TengLu, Tetragramm, Thierry Herrmann, Tian Jin, tigertang, Tom Carchrae, Tom Forbes, Trent Lo, Victor Peng, vijayphoenix, Vincent Abriou, Vishal Bhola, Vishnuvardhan Janapati, vladbataev, VoVAllen, Wallyss Lima, Wen-Heng (Jack) Chung, wenxizhu, William D. Irons, William Zhang, Xiaoming (Jason) Cui, Xiaoquan Kong, Xinan Jiang, Yasir Modak, Yasuhiro Matsumoto, Yaxun (Sam) Liu, Yong Tang, Ytyt-Yt, yuan, Yuan Mingshuai, Yuan Tang, Yuki Ueda, Yusup, zhangshijin, zhuwenxi
+
 # Release 2.0.1
 
 ## Bug Fixes and Other Changes
diff --git a/configure.py b/configure.py
index ac9ed0c4d88..945c3036a8d 100644
--- a/configure.py
+++ b/configure.py
@@ -144,7 +144,7 @@ def write_to_bazelrc(line):
 
 
 def write_action_env_to_bazelrc(var_name, var):
-  write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var)))
+  write_to_bazelrc('build --action_env {}="{}"'.format(var_name, str(var)))
 
 
 def run_shell(cmd, allow_non_zero=False, stderr=None):
@@ -205,7 +205,7 @@ def setup_python(environ_cp):
   # Get PYTHON_BIN_PATH, default is the current running python.
   default_python_bin_path = sys.executable
   ask_python_bin_path = ('Please specify the location of python. [Default is '
-                         '%s]: ') % default_python_bin_path
+                         '{}]: ').format(default_python_bin_path)
   while True:
     python_bin_path = get_from_env_or_user_or_default(environ_cp,
                                                       'PYTHON_BIN_PATH',
@@ -215,9 +215,10 @@ def setup_python(environ_cp):
     if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK):
       break
     elif not os.path.exists(python_bin_path):
-      print('Invalid python path: %s cannot be found.' % python_bin_path)
+      print('Invalid python path: {} cannot be found.'.format(python_bin_path))
     else:
-      print('%s is not executable.  Is it the python binary?' % python_bin_path)
+      print('{} is not executable.  Is it the python binary?'.format(
+          python_bin_path))
     environ_cp['PYTHON_BIN_PATH'] = ''
 
   # Convert python path to Windows style before checking lib and version
@@ -236,7 +237,7 @@ def setup_python(environ_cp):
       default_python_lib_path = python_lib_paths[0]
       python_lib_path = get_input(
           'Please input the desired Python library path to use.  '
-          'Default is [%s]\n' % python_lib_paths[0])
+          'Default is [{}]\n'.format(python_lib_paths[0]))
       if not python_lib_path:
         python_lib_path = default_python_lib_path
     environ_cp['PYTHON_LIB_PATH'] = python_lib_path
@@ -252,7 +253,7 @@ def setup_python(environ_cp):
   # Set-up env variables used by python_configure.bzl
   write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path)
   write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path)
-  write_to_bazelrc('build --python_path=\"%s"' % python_bin_path)
+  write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path))
   environ_cp['PYTHON_BIN_PATH'] = python_bin_path
 
   # If choosen python_lib_path is from a path specified in the PYTHONPATH
@@ -266,7 +267,7 @@ def setup_python(environ_cp):
   with open(
       os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'),
       'w') as f:
-    f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path)
+    f.write('export PYTHON_BIN_PATH="{}"'.format(python_bin_path))
 
 
 def reset_tf_configure_bazelrc():
@@ -320,11 +321,12 @@ def get_var(environ_cp,
       Raise the error to avoid infinitely looping.
   """
   if not question:
-    question = 'Do you wish to build TensorFlow with %s support?' % query_item
+    question = 'Do you wish to build TensorFlow with {} support?'.format(
+        query_item)
   if not yes_reply:
-    yes_reply = '%s support will be enabled for TensorFlow.' % query_item
+    yes_reply = '{} support will be enabled for TensorFlow.'.format(query_item)
   if not no_reply:
-    no_reply = 'No %s' % yes_reply
+    no_reply = 'No {}'.format(yes_reply)
 
   yes_reply += '\n'
   no_reply += '\n'
@@ -368,7 +370,7 @@ def get_var(environ_cp,
         print(no_reply)
         var = False
     else:
-      print('Invalid selection: %s' % user_input_origin)
+      print('Invalid selection: {}'.format(user_input_origin))
   return var
 
 
@@ -479,13 +481,13 @@ def check_bazel_version(min_version, max_version):
   if which('bazel') is None:
     print('Cannot find bazel. Please install bazel.')
     sys.exit(0)
-  curr_version = run_shell(
-      ['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
 
-  for line in curr_version.split('\n'):
-    if 'Build label: ' in line:
-      curr_version = line.split('Build label: ')[1]
-      break
+  stderr = open(os.devnull, 'wb')
+  curr_version = run_shell(['bazel', '--version'],
+                           allow_non_zero = True,
+                           stderr = stderr)
+  if curr_version.startswith('bazel '):
+    curr_version = curr_version.split('bazel ')[1]
 
   min_version_int = convert_version_to_int(min_version)
   curr_version_int = convert_version_to_int(curr_version)
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index f2018220a56..ab4316d5ed0 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -517,6 +517,7 @@ package_group(
         "//perftools/accelerators/xprof/api/...",
         "//third_party/py/autograph/...",
         "//third_party/swift/tensorflow/x10/...",
+        "//third_party/swift/tensorflow_apis/...",
         "//tensorflow/...",
         "//tensorflow_estimator/python/estimator/...",
         "//tensorflow_models/official/...",
@@ -529,6 +530,13 @@ package_group(name = "ndarray_tensor_allow_list")
 # TODO(b/154762408) Remove this package group once it's no longer needed.
 package_group(name = "composite_tensor_whitelist")
 
+# Packages that use private types symbols, until they are exported.
+# TODO(b/154650521) Remove.
+package_group(
+    name = "types_whitelist",
+    packages = ["//learning/deepmind/tensorflow/replicator/..."],
+)
+
 filegroup(
     name = "intel_binary_blob",
     data = if_mkl_ml(
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 1c4c0d1e06a..05d5f9a3ed2 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -58,6 +58,7 @@ filegroup(
     name = "pywrap_required_hdrs",
     srcs = [
         "c_api_internal.h",
+        "conversion_macros.h",
         "python_api.h",
         "tensor_interface.h",
         "tf_status_helper.h",
@@ -84,7 +85,14 @@ tf_cuda_library(
     ],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
+        ],
+        "//tensorflow:chromiumos": [
+            ":tf_attrtype",
+            "//tensorflow/core:core_cpu",
+            "//tensorflow/core:framework",
+            "//tensorflow/core:lib",
+            "//tensorflow/core/platform:platform",
         ],
         "//conditions:default": [
             ":tf_attrtype",
@@ -174,7 +182,7 @@ tf_cuda_library(
         ":tf_status_internal",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             ":tf_status",
@@ -211,7 +219,7 @@ tf_cuda_library(
     ],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
@@ -224,12 +232,13 @@ cc_library(
     srcs = ["tf_status.cc"],
     hdrs = ["tf_status.h"],
     visibility = ["//visibility:public"],
-    deps = select({
+    deps = [
+        ":tf_status_internal",
+    ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
-            ":tf_status_internal",
             "//tensorflow/core:lib",
         ],
     }),
@@ -251,10 +260,15 @@ cc_library(
     name = "tensor_interface",
     hdrs = ["tensor_interface.h"],
     visibility = ["//tensorflow:internal"],
-    deps = [
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-    ],
+    deps = select({
+        "//tensorflow:android": [
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+        ],
+        "//conditions:default": [
+            "//tensorflow/core:lib",
+            "//tensorflow/core:protos_all_cc",
+        ],
+    }),
 )
 
 cc_library(
@@ -264,7 +278,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
@@ -278,16 +292,17 @@ cc_library(
     srcs = ["tf_tensor.cc"],
     hdrs = ["tf_tensor.h"],
     visibility = ["//visibility:public"],
-    deps = select({
+    deps = [
+        ":tensor_interface",
+        ":tf_datatype",
+        ":tf_status",
+        ":tf_status_helper",
+        ":tf_tensor_internal",
+    ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
-            ":tensor_interface",
-            ":tf_datatype",
-            ":tf_status",
-            ":tf_status_helper",
-            ":tf_tensor_internal",
             "//tensorflow/core:framework",
             "//tensorflow/core:lib",
             "//tensorflow/core:protos_all_cc",
@@ -303,14 +318,15 @@ tf_cuda_library(
         "tf_tensor_internal.h",
     ],
     visibility = ["//tensorflow:internal"],
-    deps = select({
+    deps = [
+        ":tensor_interface",
+        ":tf_datatype",
+        ":tf_status",
+    ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
-            ":tensor_interface",
-            ":tf_datatype",
-            ":tf_status",
             "//tensorflow/core:framework",
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core/platform:casts",
@@ -418,7 +434,7 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
@@ -449,7 +465,7 @@ tf_cuda_library(
     ] + select({
         "//tensorflow:android": [
             ":c_api_internal",
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             ":c_api_internal",
@@ -476,7 +492,7 @@ tf_cuda_library(
         ":tf_status_helper",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
@@ -532,6 +548,7 @@ tf_cuda_cc_test(
         "//conditions:default": [],
     }),
     tags = [
+        "no_windows",  # TODO(b/155444728)
         "noasan",
     ],
     # We must ensure that the dependencies can be dynamically linked since
diff --git a/tensorflow/c/conversion_macros.h b/tensorflow/c/conversion_macros.h
index ce8adfadb26..d1f99b7b5b0 100644
--- a/tensorflow/c/conversion_macros.h
+++ b/tensorflow/c/conversion_macros.h
@@ -16,15 +16,18 @@ limitations under the License.
 #ifndef TENSORFLOW_C_CONVERSION_MACROS_H_
 #define TENSORFLOW_C_CONVERSION_MACROS_H_
 
-#define DEFINE_CONVERSION_FUNCTIONS(cpp_impl, wrapper) \
-  inline cpp_impl *unwrap(wrapper *w) {                \
-    return reinterpret_cast<cpp_impl *>(w);            \
-  }                                                    \
-                                                       \
-  inline const cpp_impl *unwrap(const wrapper *w) {    \
-    return reinterpret_cast<const cpp_impl *>(w);      \
-  }                                                    \
-                                                       \
-  inline wrapper *wrap(cpp_impl *i) { return reinterpret_cast<wrapper *>(i); }
+#define DEFINE_CONVERSION_FUNCTIONS(cpp_impl, wrapper)                         \
+  inline cpp_impl *unwrap(wrapper *w) {                                        \
+    return reinterpret_cast<cpp_impl *>(w);                                    \
+  }                                                                            \
+                                                                               \
+  inline const cpp_impl *unwrap(const wrapper *w) {                            \
+    return reinterpret_cast<const cpp_impl *>(w);                              \
+  }                                                                            \
+                                                                               \
+  inline wrapper *wrap(cpp_impl *i) { return reinterpret_cast<wrapper *>(i); } \
+  inline const wrapper *wrap(const cpp_impl *i) {                              \
+    return reinterpret_cast<const wrapper *>(i);                               \
+  }
 
 #endif  // TENSORFLOW_C_CONVERSION_MACROS_H_
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 42a31444380..fe4d5ac6ffe 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -35,7 +35,7 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             ":context_interface",
@@ -246,6 +246,7 @@ cc_library(
         "//tensorflow:internal",
     ],
     deps = [
+        "//tensorflow/c:conversion_macros",
         "//tensorflow/c:tf_status",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/common_runtime/eager:attr_builder",
@@ -316,7 +317,8 @@ tf_cuda_cc_test(
     ],
     extra_copts = tfe_xla_copts(),
     tags = [
-        "guitar",
+        "noguitar",  # TODO(b/155445984): flaky
+        #"guitar",
         "multi_gpu",
     ],
     deps = [
@@ -344,7 +346,10 @@ tf_cuda_cc_test(
     srcs = [
         "c_api_remote_test.cc",
     ],
+    # TODO(b/136478427): Figure out how to correctly shut the server down
+    args = ["--heap_check=local"],
     extra_copts = tfe_xla_copts(),
+    tags = ["noasan"],  # leaks gRPC server instances
     deps = [
         ":c_api",
         ":c_api_experimental",
@@ -362,6 +367,34 @@ tf_cuda_cc_test(
     ],
 )
 
+tf_cuda_cc_test(
+    name = "c_api_cluster_test",
+    size = "small",
+    srcs = [
+        "c_api_cluster_test.cc",
+    ],
+    # TODO(b/136478427): Figure out how to correctly shut the server down
+    args = ["--heap_check=local"],
+    extra_copts = tfe_xla_copts(),
+    tags = ["noasan"],  # leaks gRPC server instances
+    deps = [
+        ":c_api",
+        ":c_api_experimental",
+        ":c_api_internal",
+        ":c_api_test_util",
+        ":tfe_tensorhandle_internal",
+        "//tensorflow/c:c_test_util",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/common_runtime/eager:eager_operation",
+        "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
+        "//tensorflow/core/platform:env",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 tf_cuda_library(
     name = "c_api_experimental",
     srcs = [
@@ -379,7 +412,7 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             ":c_api",
@@ -415,6 +448,8 @@ tf_cuda_library(
         "//conditions:default": [],
     }) + [
         "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/container:flat_hash_map",
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/core/distributed_runtime/eager:eager_client",
         "//tensorflow/core/distributed_runtime/rpc/eager:grpc_eager_client",
@@ -575,7 +610,6 @@ filegroup(
         ],
         exclude = [
             "c_api_experimental.cc",
-            "*c_api_tfrt*",
             "*test*",
             "*dlpack*",
         ],
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 540efe9dcc0..5c01ccb82bb 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -38,7 +38,7 @@ limitations under the License.
 #include "tensorflow/c/eager/tfe_tensorhandle_internal.h"
 #include "tensorflow/c/tf_tensor_internal.h"
 #ifdef PLATFORM_GOOGLE
-#include "tensorflow/c/eager/c_api_tfrt.h"
+#include "tensorflow/core/tfrt/eager/c_api_tfrt.h"
 #endif
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
@@ -500,6 +500,17 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
       grpc_server->master_env()->worker_cache->GetEagerClientCache(
           &remote_eager_workers));
 
+  // For cluster update, use a status group to aggregate statuses from
+  //   * adding and removing remote devices
+  //   * creating remote contexts on newly added workers
+  //   * updating remote contexts on existing workers
+  //   * updating the master context
+  // Note that we should not return immediately on errors in the middle of these
+  // updates to prevent cluster from having inconsistent context views.
+  //
+  // Unused if `reset_context` is True.
+  tensorflow::StatusGroup sg;
+
   // When updating an existing context, populate the following lists with:
   // * added_workers: set(remote_workers) - set(curr_remote_workers)
   // * removed_workers: set(curr_remote_workers) - set(remote_workers)
@@ -535,7 +546,7 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
     DifferentiateWorkerLists(&curr_remote_workers, &remote_workers,
                              &added_workers, &removed_workers,
                              &existing_workers);
-    LOG_AND_RETURN_IF_ERROR(GetReplacedFromExistingWorkers(
+    sg.Update(GetReplacedFromExistingWorkers(
         &existing_workers, context_id, context->GetContextViewId(), server_def,
         remote_eager_workers.get(), &replaced_workers));
     if (VLOG_IS_ON(1)) {
@@ -559,11 +570,10 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
             existing_workers.end());
       }
     }
-    LOG_AND_RETURN_IF_ERROR(
-        RemoveRemoteDevicesFromMgr(removed_workers, remote_device_mgr));
-    LOG_AND_RETURN_IF_ERROR(AddRemoteDevicesToMgr(
-        added_workers, grpc_server->master_env()->worker_cache,
-        remote_device_mgr));
+    sg.Update(RemoveRemoteDevicesFromMgr(removed_workers, remote_device_mgr));
+    sg.Update(AddRemoteDevicesToMgr(added_workers,
+                                    grpc_server->master_env()->worker_cache,
+                                    remote_device_mgr));
   }
 
   std::vector<tensorflow::DeviceAttributes> cluster_device_attributes;
@@ -584,7 +594,6 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
   }
 
   // Initialize remote eager workers.
-  // TODO(b/138847548) Create remote eager contexts in async mode by default.
   if (reset_context) {
     LOG_AND_RETURN_IF_ERROR(CreateRemoteContexts(
         ctx, remote_workers, context_id, context_view_id, keep_alive_secs,
@@ -596,7 +605,7 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
     // existing workers to also have the updated context_view_id, so
     // we must set their context_view_id to the existing master's
     // context_view_id + 1.
-    LOG_AND_RETURN_IF_ERROR(CreateRemoteContexts(
+    sg.Update(CreateRemoteContexts(
         ctx, added_workers, context_id, context_view_id + 1, keep_alive_secs,
         server_def, remote_eager_workers.get(), context->Executor().Async(),
         context->LazyCopyFunctionRemoteInputs(), base_request));
@@ -606,10 +615,10 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
           VLOG(1) << "Updating cluster with existing worker " << w;
         }
       }
-      LOG_AND_RETURN_IF_ERROR(UpdateRemoteContexts(
-          ctx, existing_workers, added_workers, removed_workers, context_id,
-          context_view_id + 1, server_def, remote_eager_workers.get(),
-          base_request));
+      sg.Update(UpdateRemoteContexts(ctx, existing_workers, added_workers,
+                                     removed_workers, context_id,
+                                     context_view_id + 1, server_def,
+                                     remote_eager_workers.get(), base_request));
     }
   }
 
@@ -645,13 +654,13 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
     // GrpcServer cannot be destroyed after it is started.
     LOG_AND_RETURN_IF_ERROR(grpc_server->Start());
   } else {
-    LOG_AND_RETURN_IF_ERROR(
-        grpc_server->worker_env()->session_mgr->UpdateSession(
-            session_name, server_def, base_request.cluster_device_attributes(),
-            /*isolate_session_state=*/true));
-    LOG_AND_RETURN_IF_ERROR(
-        context->UpdateRemoteMaster(context_id, std::move(remote_eager_workers),
-                                    added_workers, removed_workers));
+    sg.Update(grpc_server->worker_env()->session_mgr->UpdateSession(
+        session_name, server_def, base_request.cluster_device_attributes(),
+        /*isolate_session_state=*/true));
+    sg.Update(context->UpdateRemoteMaster(context_id,
+                                          std::move(remote_eager_workers),
+                                          added_workers, removed_workers));
+    LOG_AND_RETURN_IF_ERROR(sg.as_summary_status());
   }
 #undef LOG_AND_RETURN_IF_ERROR
 
@@ -685,8 +694,13 @@ void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; }
 TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
   if (opts->use_tfrt) {
 #ifdef PLATFORM_GOOGLE
-    status->status = tensorflow::Status::OK();
-    return tensorflow::wrap(new tfrt::ContextInterface());
+    tfrt::SmallVector<std::string, 4> op_handler_chains;
+    tfrt::SmallVector<tensorflow::DeviceAttributes, 4> device_attributes;
+    status->status = tfrt::ListOpHandlerChains(
+        opts->session_options.options, &op_handler_chains, &device_attributes);
+    if (!status->status.ok()) return nullptr;
+    return tensorflow::wrap(
+        new tfrt::ContextInterface(op_handler_chains, device_attributes));
 #else
     status->status = tensorflow::errors::Unimplemented("TFRT is not supported");
     return nullptr;
@@ -910,7 +924,7 @@ extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy(
       context->GetDevicePlacementPolicy());
 }
 
-TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) {
+TFE_TensorHandle* TFE_NewTensorHandle(const TF_Tensor* t, TF_Status* status) {
   tensorflow::Tensor tensor;
   status->status = tensorflow::TF_TensorToTensor(t, &tensor);
   if (!status->status.ok()) return nullptr;
@@ -1458,20 +1472,21 @@ TFE_Op* GetFunc(TFE_Context* ctx, const tensorflow::NameAttrList& func,
 }  // namespace
 
 void TFE_ContextStartStep(TFE_Context* ctx) {
-  tensorflow::EagerContext* context =
-      tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
-  context->StartStep();
+  tensorflow::unwrap(ctx)->StartStep();
 }
 
 void TFE_ContextEndStep(TFE_Context* ctx) {
-  tensorflow::EagerContext* context =
-      tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
-  context->EndStep();
+  tensorflow::unwrap(ctx)->EndStep();
+}
+
+const TFE_OpAttrs* TFE_OpGetAttrs(TFE_Op* op) {
+  return tensorflow::wrap(
+      &OperationFromInterface(tensorflow::unwrap(op))->Attrs());
 }
 
 void TFE_OpAddAttrs(TFE_Op* op, const TFE_OpAttrs* attrs) {
   tensorflow::AttrValueMap m;
-  attrs->attributes->FillAttrValueMap(&m);
+  tensorflow::unwrap(attrs)->FillAttrValueMap(&m);
   tensorflow::EagerOperation* operation =
       OperationFromInterface(tensorflow::unwrap(op));
   tensorflow::AttrBuilder* destination = operation->MutableAttrs();
@@ -1483,8 +1498,8 @@ void TFE_OpAddAttrs(TFE_Op* op, const TFE_OpAttrs* attrs) {
 void TFE_OpAttrsSerialize(const TFE_OpAttrs* attrs, TF_Buffer* buf,
                           TF_Status* status) {
   tensorflow::NameAttrList name_and_attrs;
-  attrs->attributes->FillAttrValueMap(name_and_attrs.mutable_attr());
-  name_and_attrs.set_name(attrs->attributes->op_name());
+  tensorflow::unwrap(attrs)->FillAttrValueMap(name_and_attrs.mutable_attr());
+  name_and_attrs.set_name(tensorflow::unwrap(attrs)->op_name());
   status->status = MessageToBuffer(name_and_attrs, buf);
 }
 
@@ -1605,9 +1620,9 @@ class CustomDeviceAPI : public tensorflow::CustomDevice {
     }
     std::vector<TFE_TensorHandle*> outputs(*num_retvals);
     TF_Status status;
-    TFE_OpAttrs attributes(&op->Attrs());
     device_.execute(context_, inputs.size(), inputs.data(), op->Name().c_str(),
-                    &attributes, num_retvals, outputs.data(), &status, info_);
+                    wrap(&op->Attrs()), num_retvals, outputs.data(), &status,
+                    info_);
     if (status.status.ok()) {
       for (int i = 0; i < *num_retvals; ++i) {
         retvals[i] = tensorflow::TensorHandleFromInterface(
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 070b3a9bb60..5afe3047dd7 100644
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -137,7 +137,7 @@ TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx,
 // placed in memory of different devices or remote address spaces.
 typedef struct TFE_TensorHandle TFE_TensorHandle;
 
-TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t,
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(const TF_Tensor* t,
                                                             TF_Status* status);
 // Indicates that the caller will not be using `h` any more.
 TF_CAPI_EXPORT extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h);
diff --git a/tensorflow/c/eager/c_api_cluster_test.cc b/tensorflow/c/eager/c_api_cluster_test.cc
new file mode 100644
index 00000000000..252a0408758
--- /dev/null
+++ b/tensorflow/c/eager/c_api_cluster_test.cc
@@ -0,0 +1,433 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/c/eager/c_api_internal.h"
+#include "tensorflow/c/eager/c_api_test_util.h"
+#include "tensorflow/c/eager/tfe_tensorhandle_internal.h"
+#include "tensorflow/core/common_runtime/eager/eager_operation.h"
+#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h"
+#include "tensorflow/core/platform/casts.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/cluster.pb.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
+
+namespace {
+
+using ::tensorflow::string;
+
+tensorflow::ServerDef GetServerDef(const string& job_name, int num_tasks) {
+  tensorflow::ServerDef server_def;
+  server_def.set_protocol("grpc");
+  server_def.set_job_name(job_name);
+  server_def.set_task_index(0);
+  tensorflow::ClusterDef* cluster_def = server_def.mutable_cluster();
+  tensorflow::JobDef* job_def = cluster_def->add_job();
+  job_def->set_name(job_name);
+  for (int i = 0; i < num_tasks; i++) {
+    int port = tensorflow::testing::PickUnusedPortOrDie();
+    job_def->mutable_tasks()->insert(
+        {i, tensorflow::strings::StrCat("localhost:", port)});
+  }
+  return server_def;
+}
+
+tensorflow::ServerDef GetServerDef(int num_tasks) {
+  return GetServerDef("localhost", num_tasks);
+}
+
+void ReplaceTaskInServerDef(tensorflow::ServerDef* server_def, int task_index) {
+  tensorflow::JobDef* job_def = server_def->mutable_cluster()->mutable_job(0);
+  int port = tensorflow::testing::PickUnusedPortOrDie();
+  job_def->mutable_tasks()->at(task_index) =
+      tensorflow::strings::StrCat("localhost:", port);
+}
+
+void CheckTFE_TensorHandleHasFloats(TFE_TensorHandle* handle,
+                                    const std::vector<float>& expected_values) {
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
+      TF_NewStatus(), TF_DeleteStatus);
+  TF_Tensor* t = TFE_TensorHandleResolve(handle, status.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+  std::unique_ptr<float[]> actual_values(new float[expected_values.size()]);
+  EXPECT_EQ(sizeof(float) * expected_values.size(), TF_TensorByteSize(t));
+  memcpy(actual_values.get(), TF_TensorData(t), TF_TensorByteSize(t));
+  TF_DeleteTensor(t);
+
+  for (int i = 0; i < expected_values.size(); i++) {
+    EXPECT_EQ(expected_values[i], actual_values[i])
+        << "Mismatch in expected values at (zero-based) index " << i;
+  }
+}
+
+void CheckRemoteMatMulExecutesOK(TFE_Context* ctx,
+                                 const char* remote_device_name,
+                                 const char* local_device_name) {
+  TF_Status* status = TF_NewStatus();
+  TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle(ctx);
+
+  TFE_Op* matmul = MatMulOp(ctx, h0_task0, h0_task0);
+  TFE_OpSetDevice(matmul, remote_device_name, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  TFE_TensorHandle* retvals[1];
+  int num_retvals = 1;
+  TFE_Execute(matmul, &retvals[0], &num_retvals, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  auto* retval_task0 =
+      TFE_TensorHandleCopyToDevice(retvals[0], ctx, local_device_name, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  CheckTFE_TensorHandleHasFloats(retval_task0, {7, 10, 15, 22});
+
+  TFE_DeleteTensorHandle(retval_task0);
+  TFE_DeleteTensorHandle(h0_task0);
+  TFE_DeleteTensorHandle(retvals[0]);
+
+  TFE_DeleteOp(matmul);
+
+  TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+  TFE_ExecutorWaitForAllPendingNodes(executor, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteExecutor(executor);
+  TF_DeleteStatus(status);
+}
+
+// Read the value of variable `var` and save it into `out_value`.
+void ReadVariable(TFE_Context* ctx, TFE_TensorHandle* var,
+                  TFE_TensorHandle** out_value) {
+  TF_Status* status = TF_NewStatus();
+  TFE_Op* op = TFE_NewOp(ctx, "ReadVariableOp", status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
+  TFE_OpAddInput(op, var, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  int num_retvals = 1;
+  TFE_Execute(op, out_value, &num_retvals, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteOp(op);
+  TF_DeleteStatus(status);
+}
+
+void TestRemoteExecuteChangeServerDef(bool async) {
+  tensorflow::ServerDef server_def = GetServerDef(2);
+
+  // This server def has the task index set to 0.
+  string serialized = server_def.SerializeAsString();
+
+  server_def.set_task_index(1);
+
+  std::unique_ptr<tensorflow::GrpcServer> worker_server;
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server)
+                  .ok());
+  ASSERT_TRUE(worker_server->Start().ok());
+
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
+  TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  const char remote_device_name[] =
+      "/job:localhost/replica:0/task:1/device:CPU:0";
+  const char local_device_name[] =
+      "/job:localhost/replica:0/task:0/device:CPU:0";
+  CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
+
+  TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+  TFE_ExecutorWaitForAllPendingNodes(executor, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server.release();
+
+  // Update the server def with a new set of names (worker instead of
+  // localhost).
+  tensorflow::ServerDef updated_server_def = GetServerDef("worker", 2);
+  serialized = updated_server_def.SerializeAsString();
+
+  updated_server_def.set_task_index(1);
+  tensorflow::Status s = tensorflow::GrpcServer::Create(
+      updated_server_def, tensorflow::Env::Default(), &worker_server);
+  ASSERT_TRUE(s.ok()) << s.error_message();
+  ASSERT_TRUE(worker_server->Start().ok());
+
+  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  // Create a new tensor_handle.
+  TFE_TensorHandle* h0_task0_new = TestMatrixTensorHandle(ctx);
+
+  // Check that copying it to the old remote device (named localhost) fails.
+  TFE_TensorHandleCopyToDevice(h0_task0_new, ctx, remote_device_name, status);
+  EXPECT_NE(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  // Copying and executing on the new remote device works.
+  const char new_remote_device_name[] =
+      "/job:worker/replica:0/task:1/device:CPU:0";
+  const char new_local_device_name[] =
+      "/job:worker/replica:0/task:0/device:CPU:0";
+
+  auto* h0_task1_new = TFE_TensorHandleCopyToDevice(
+      h0_task0_new, ctx, new_remote_device_name, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  TFE_DeleteTensorHandle(h0_task0_new);
+  TFE_DeleteTensorHandle(h0_task1_new);
+
+  CheckRemoteMatMulExecutesOK(ctx, new_remote_device_name,
+                              new_local_device_name);
+
+  TFE_ExecutorWaitForAllPendingNodes(executor, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteExecutor(executor);
+
+  TF_DeleteStatus(status);
+
+  TFE_DeleteContext(ctx);
+
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server.release();
+}
+
+TEST(CAPI, RemoteExecuteChangeServerDef) {
+  TestRemoteExecuteChangeServerDef(false);
+}
+TEST(CAPI, RemoteExecuteChangeServerDefAsync) {
+  TestRemoteExecuteChangeServerDef(true);
+}
+
+void TestRemoteExecuteUpdateServerDef(bool async) {
+  tensorflow::ServerDef server_def = GetServerDef(2);
+  // This server def has the task index set to 0.
+  string serialized = server_def.SerializeAsString();
+
+  server_def.set_task_index(1);
+  std::unique_ptr<tensorflow::GrpcServer> worker_server;
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server)
+                  .ok());
+  ASSERT_TRUE(worker_server->Start().ok());
+
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
+  TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  const char local_device_name[] =
+      "/job:localhost/replica:0/task:0/device:CPU:0";
+  const char remote_device_name[] =
+      "/job:localhost/replica:0/task:1/device:CPU:0";
+  CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
+
+  TFE_ContextUpdateServerDef(ctx, 0, serialized.data(), serialized.size(),
+                             status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
+
+  TFE_DeleteContext(ctx);
+  TF_DeleteStatus(status);
+
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server.release();
+}
+
+TEST(CAPI, RemoteExecuteUpdateServerDef) {
+  TestRemoteExecuteUpdateServerDef(false);
+}
+
+TEST(CAPI, RemoteExecuteUpdateServerDefAsync) {
+  TestRemoteExecuteUpdateServerDef(true);
+}
+
+void TestRemoteExecuteUpdateServerDefResourceAccess(bool async) {
+  tensorflow::ServerDef server_def = GetServerDef(2);
+  // This server def has the task index set to 0.
+  string serialized = server_def.SerializeAsString();
+
+  server_def.set_task_index(1);
+  std::unique_ptr<tensorflow::GrpcServer> worker_server;
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server)
+                  .ok());
+  ASSERT_TRUE(worker_server->Start().ok());
+
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
+  TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  const char dev0_name[] = "/job:localhost/replica:0/task:0/device:CPU:0";
+  const char dev1_name[] = "/job:localhost/replica:0/task:1/device:CPU:0";
+
+  TFE_TensorHandle* var_handle0 = TestVariable(ctx, 1.0, dev0_name);
+  EXPECT_NE(var_handle0, nullptr);
+  TFE_TensorHandle* var_handle1 = TestVariable(ctx, 2.0, dev1_name);
+  EXPECT_NE(var_handle1, nullptr);
+
+  TFE_TensorHandle* value_handle = nullptr;
+  ReadVariable(ctx, var_handle1, &value_handle);
+  CheckTFE_TensorHandleHasFloats(value_handle, {2});
+  TFE_DeleteTensorHandle(value_handle);
+
+  // Start a new worker to replace task:1
+  ReplaceTaskInServerDef(&server_def, 1);
+  server_def.set_task_index(1);
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server.release();
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server)
+                  .ok());
+  ASSERT_TRUE(worker_server->Start().ok());
+
+  // Update server def to replace the remote device with the device info on the
+  // new worker (different incarnation ID).
+  server_def.set_task_index(0);
+  string serialized_update = server_def.SerializeAsString();
+  TFE_ContextUpdateServerDef(ctx, 0, serialized_update.data(),
+                             serialized_update.size(), status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  // The device of var_handle0 is local device which is the same before and
+  // after cluster update. Remove resource with valid device should succeed.
+  TFE_Op* op = TFE_NewOp(ctx, "DestroyResourceOp", status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_OpAddInput(op, var_handle0, status);
+  TFE_OpSetDevice(op, dev0_name, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  int num_retvals = 0;
+  TFE_Execute(op, nullptr, &num_retvals, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteOp(op);
+
+  // The device of var_handle1 is remote device, which was replaced during
+  // cluster update. Removing resource with invalid device should fail
+  // gracefully (i.e., with error status) instead of crashing with segfaults.
+  op = TFE_NewOp(ctx, "DestroyResourceOp", status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_OpAddInput(op, var_handle1, status);
+  TFE_OpSetDevice(op, dev1_name, status);
+  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  num_retvals = 0;
+  TFE_Execute(op, nullptr, &num_retvals, status);
+  EXPECT_NE(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteOp(op);
+
+  TFE_DeleteTensorHandle(var_handle0);
+  TFE_DeleteTensorHandle(var_handle1);
+
+  TFE_DeleteContext(ctx);
+  TF_DeleteStatus(status);
+
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server.release();
+}
+
+TEST(CAPI, TestRemoteExecuteUpdateServerDefResourceAccess) {
+  TestRemoteExecuteUpdateServerDefResourceAccess(false);
+}
+
+TEST(CAPI, TestRemoteExecuteUpdateServerDefResourceAccessAsync) {
+  TestRemoteExecuteUpdateServerDefResourceAccess(true);
+}
+
+void TestRemoteExecuteUpdateServerDefWithFailures(bool async) {
+  // Fail fast on GetStatus requests so we can get errors instead of timeout
+  // when updating cluster with non-exsitent worker
+  tensorflow::setenv("GRPC_FAIL_FAST", "TRUE", /*overwrite=*/1);
+
+  tensorflow::ServerDef server_def = GetServerDef(2);
+  // This server def has the task index set to 0.
+  string serialized = server_def.SerializeAsString();
+
+  server_def.set_task_index(1);
+  std::unique_ptr<tensorflow::GrpcServer> worker_server;
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server)
+                  .ok());
+  ASSERT_TRUE(worker_server->Start().ok());
+
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
+  TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  const char local_device_name[] =
+      "/job:localhost/replica:0/task:0/device:CPU:0";
+  const char remote_device_name[] =
+      "/job:localhost/replica:0/task:1/device:CPU:0";
+  CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
+
+  // Adding a non-existent remote worker to cluster def. This should cause the
+  // UpdateServerDef call to fail.
+  tensorflow::ClusterDef* cluster_def = server_def.mutable_cluster();
+  tensorflow::JobDef* job_def = cluster_def->mutable_job(0);
+  int port = tensorflow::testing::PickUnusedPortOrDie();
+  job_def->mutable_tasks()->insert(
+      {2, tensorflow::strings::StrCat("localhost:", port)});
+  server_def.set_task_index(0);
+  string serialized_update = server_def.SerializeAsString();
+  TFE_ContextUpdateServerDef(ctx, 0, serialized_update.data(),
+                             serialized_update.size(), status);
+  EXPECT_NE(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+  // Even after the prevoiusly failed cluster update, another update and op
+  // execution should work fine as long as the provided server_def is valid.
+  TFE_ContextUpdateServerDef(ctx, 0, serialized.data(), serialized.size(),
+                             status);
+  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
+
+  TFE_DeleteContext(ctx);
+  TF_DeleteStatus(status);
+
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server.release();
+  tensorflow::unsetenv("GRPC_FAIL_FAST");
+}
+
+TEST(CAPI, RemoteExecuteUpdateServerDefWithFailures) {
+  TestRemoteExecuteUpdateServerDefWithFailures(false);
+}
+
+TEST(CAPI, RemoteExecuteUpdateServerDefWithFailuresAsync) {
+  TestRemoteExecuteUpdateServerDefWithFailures(true);
+}
+
+}  // namespace
diff --git a/tensorflow/c/eager/c_api_experimental.cc b/tensorflow/c/eager/c_api_experimental.cc
index 820650e315f..0d71b11531b 100644
--- a/tensorflow/c/eager/c_api_experimental.cc
+++ b/tensorflow/c/eager/c_api_experimental.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/c/eager/tfe_op_internal.h"
 #include "tensorflow/c/eager/tfe_tensorhandle_internal.h"
 #include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/core/common_runtime/composite_device.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/eager/eager_operation.h"
 #include "tensorflow/core/lib/monitoring/counter.h"
@@ -638,3 +639,35 @@ TFE_TensorHandle* TFE_NewTensorHandleFromTensor(TFE_Context* ctx, TF_Tensor* t,
   return tensorflow::wrap(
       tensorflow::unwrap(ctx)->CreateLocalHandle(t->tensor));
 }
+
+TFE_TensorHandle* TFE_CreatePackedTensorHandle(TFE_Context* ctx,
+                                               TFE_TensorHandle** handles,
+                                               int* num_handles,
+                                               TF_Status* status) {
+  std::vector<tensorflow::TensorHandle*> tensor_handles;
+  tensor_handles.reserve(*num_handles);
+  for (int i = 0; i < *num_handles; ++i) {
+    tensor_handles.push_back(
+        tensorflow::TensorHandleFromInterface(tensorflow::unwrap(handles[i])));
+  }
+  tensorflow::EagerContext* context =
+      tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
+  tensorflow::TensorHandle* handle = nullptr;
+  status->status = tensorflow::TensorHandle::CreatePackedHandle(
+      std::move(tensor_handles), context, &handle);
+  return tensorflow::wrap(handle);
+}
+
+void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx, unsigned char enable,
+                                       TF_Status* status) {
+  tensorflow::EagerContext* context =
+      tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
+  context->SetAllowSoftPlacement(enable);
+}
+
+void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx, unsigned char enable,
+                                      TF_Status* status) {
+  tensorflow::EagerContext* context =
+      tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
+  context->SetLogDevicePlacement(enable);
+}
diff --git a/tensorflow/c/eager/c_api_experimental.h b/tensorflow/c/eager/c_api_experimental.h
index d1e99d86180..1b8efe61ee0 100644
--- a/tensorflow/c/eager/c_api_experimental.h
+++ b/tensorflow/c/eager/c_api_experimental.h
@@ -431,6 +431,9 @@ TF_CAPI_EXPORT extern void TFE_HostAddressSpace(TFE_Context* ctx,
 // A reference to an op's name -> attribute mapping
 typedef struct TFE_OpAttrs TFE_OpAttrs;
 
+// Fetch a reference to `op`'s attributes. The returned reference is only valid
+// while `op` is alive.
+const TFE_OpAttrs* TFE_OpGetAttrs(TFE_Op* op);
 // Add attributes in `attrs` to `op`.
 //
 // Does not overwrite or update existing attributes, but adds new ones.
@@ -538,6 +541,26 @@ TF_CAPI_EXPORT extern TF_Tensor* TFE_AllocateHostTensor(TFE_Context* ctx,
 TF_CAPI_EXPORT TFE_TensorHandle* TFE_NewTensorHandleFromTensor(
     TFE_Context* ctx, TF_Tensor* t, TF_Status* status);
 
+// Create a packed TensorHandle with the given list of TensorHandles.
+// If `handles` are on the same device, assign the same device to the packed
+// handle; if `handles` are on different deivces, assign a CompositeDevice to
+// it.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_CreatePackedTensorHandle(
+    TFE_Context* ctx, TFE_TensorHandle** handles, int* num_handles,
+    TF_Status* status);
+
+// Configure soft device placement policy for the eager executor. Note this
+// policy is applied to any subsequent op executions.
+TF_CAPI_EXPORT void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx,
+                                                      unsigned char enable,
+                                                      TF_Status* status);
+
+// Configure device placement policy logging for the eager executor. Note this
+// policy is applied to any subsequent op executions.
+TF_CAPI_EXPORT void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx,
+                                                     unsigned char enable,
+                                                     TF_Status* status);
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/eager/c_api_remote_test.cc b/tensorflow/c/eager/c_api_remote_test.cc
index 7c6836af69b..d04e4ef4212 100644
--- a/tensorflow/c/eager/c_api_remote_test.cc
+++ b/tensorflow/c/eager/c_api_remote_test.cc
@@ -168,7 +168,11 @@ string MatMulFunction() {
   return def.SerializeAsString();
 }
 
-void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
+// If heavy_load_on_streaming_rpc is true, send some rpc reqeusts before the one
+// which creates a remote remote input, to simulate a scenario that the remote
+// input is not ready when we start running an op or a function.
+void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func,
+                                   bool heavy_load_on_streaming_rpc) {
   tensorflow::ServerDef server_def = GetServerDef(3);
 
   // This server def has the task index set to 0.
@@ -193,47 +197,62 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
   TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
   TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
   TFE_Context* ctx = TFE_NewContext(opts, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
 
   TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
 
   TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle(ctx);
   TFE_TensorHandle* h1_task0 = TestMatrixTensorHandle(ctx);
+  std::vector<TFE_TensorHandle*> handles_task0;
+  if (heavy_load_on_streaming_rpc) {
+    // Send 50 tensor copy requests to simulate that there have been some RPC
+    // requests been enqueued.
+    for (int i = 0; i < 50; ++i) {
+      handles_task0.push_back(TestMatrixTensorHandle(ctx));
+    }
+  }
   const char task1_name[] = "/job:localhost/replica:0/task:1/device:CPU:0";
   const char task2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0";
 
+  std::vector<TFE_TensorHandle*> handles_task2;
+  for (auto* h_task0 : handles_task0) {
+    handles_task2.push_back(
+        TFE_TensorHandleCopyToDevice(h_task0, ctx, task2_name, status));
+    ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  }
+
   auto* h1_task2 =
       TFE_TensorHandleCopyToDevice(h1_task0, ctx, task2_name, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
 
   TFE_Op* matmul = nullptr;
   if (func) {
     string function_def = MatMulFunction();
     TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(),
                               status);
-    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
 
     matmul = TFE_NewOp(ctx, "MatMulFunction", status);
-    ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
     TFE_OpAddInput(matmul, h0_task0, status);
-    ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
     TFE_OpAddInput(matmul, h1_task2, status);
-    ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
   } else {
     // Handles are on task0 (local), and task2, but op is on task1.
     matmul = MatMulOp(ctx, h0_task0, h1_task2);
   }
   if (remote) {
     TFE_OpSetDevice(matmul, task1_name, status);
-    EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
   } else if (!async) {
     // Set the local device to CPU to easily validate mirroring
     string cpu_device_name;
     ASSERT_TRUE(GetDeviceName(ctx, &cpu_device_name, "CPU"));
     TFE_OpSetDevice(matmul, cpu_device_name.c_str(), status);
-    EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
     auto remote_arg =
         tensorflow::TensorHandleFromInterface(tensorflow::unwrap(h1_task2));
     // The input handles should never change since they have been mirrored.
@@ -243,7 +262,7 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
   TFE_TensorHandle* retvals[1];
   int num_retvals = 1;
   TFE_Execute(matmul, &retvals[0], &num_retvals, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
 
   // TODO(gjn): Add support for waiting on async local mirrors
   if (!remote && !async) {
@@ -255,10 +274,10 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
 
   auto* retval_task0 = TFE_TensorHandleCopyToDevice(
       retvals[0], ctx, "/job:localhost/replica:0/task:0/device:CPU:0", status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
 
   TF_Tensor* t = TFE_TensorHandleResolve(retval_task0, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
   TFE_DeleteTensorHandle(retval_task0);
   float product[4] = {0};
   EXPECT_EQ(sizeof(product), TF_TensorByteSize(t));
@@ -273,12 +292,18 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
   TFE_DeleteTensorHandle(h1_task0);
   TFE_DeleteTensorHandle(h1_task2);
   TFE_DeleteTensorHandle(retvals[0]);
+  for (auto* h : handles_task0) {
+    TFE_DeleteTensorHandle(h);
+  }
+  for (auto* h : handles_task2) {
+    TFE_DeleteTensorHandle(h);
+  }
 
   TFE_DeleteOp(matmul);
 
   TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
   TFE_ExecutorWaitForAllPendingNodes(executor, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
   TFE_DeleteExecutor(executor);
   if (func) {
     TFE_ContextRemoveFunction(ctx, "MatMulFunction", status);
@@ -293,22 +318,260 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
 }
 
 TEST(CAPI, RemoteExecuteSilentCopies) {
-  TestRemoteExecuteSilentCopies(false, true, false);
+  TestRemoteExecuteSilentCopies(/*async=*/false, /*remote=*/true,
+                                /*func=*/false,
+                                /*heavy_load_on_streaming_rpc=*/false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesAsync) {
-  TestRemoteExecuteSilentCopies(true, true, false);
+  TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/true, /*func=*/false,
+                                /*heavy_load_on_streaming_rpc=*/false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesAsyncFunc) {
-  TestRemoteExecuteSilentCopies(true, true, true);
+  TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/true, /*func=*/true,
+                                /*heavy_load_on_streaming_rpc=*/false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesLocal) {
-  TestRemoteExecuteSilentCopies(false, false, false);
+  TestRemoteExecuteSilentCopies(/*async=*/false, /*remote=*/false,
+                                /*func=*/false,
+                                /*heavy_load_on_streaming_rpc=*/false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesLocalAsync) {
-  TestRemoteExecuteSilentCopies(true, false, false);
+  TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false,
+                                /*func=*/false,
+                                /*heavy_load_on_streaming_rpc=*/false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFunc) {
-  TestRemoteExecuteSilentCopies(true, false, true);
+  TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false, /*func=*/true,
+                                /*heavy_load_on_streaming_rpc=*/false);
+}
+TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFuncOrdering) {
+  // A remote input may be not ready when we start running a function. Test that
+  // the function execution should wait until the remote input is ready.
+  TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false, /*func=*/true,
+                                /*heavy_load_on_streaming_rpc=*/true);
+}
+
+// Add the values of three variables on three different tasks.
+string AddVariablesFunction() {
+  tensorflow::FunctionDef def;
+  CHECK(tensorflow::protobuf::TextFormat::ParseFromString(
+      "    signature {"
+      "      name: 'AddVariablesFunction'"
+      "      input_arg {"
+      "        name: 'var'"
+      "        type: DT_RESOURCE"
+      "      }"
+      "      output_arg {"
+      "        name: 'sum'"
+      "        type: DT_FLOAT"
+      "      }"
+      "    }"
+      "    node_def {"
+      "      name: 'read0'"
+      "      op: 'ReadVariableOp'"
+      "      input: 'var'"
+      "      device: '/job:localhost/replica:0/task:0/device:CPU:0'"
+      "      attr {"
+      "        key: 'dtype'"
+      "        value {"
+      "          type: DT_FLOAT"
+      "        }"
+      "      }"
+      "    }"
+      "    node_def {"
+      "      name: 'read1'"
+      "      op: 'ReadVariableOp'"
+      "      input: 'var'"
+      "      device: '/job:localhost/replica:0/task:1/device:CPU:0'"
+      "      attr {"
+      "        key: 'dtype'"
+      "        value {"
+      "          type: DT_FLOAT"
+      "        }"
+      "      }"
+      "    }"
+      "    node_def {"
+      "      name: 'read2'"
+      "      op: 'ReadVariableOp'"
+      "      input: 'var'"
+      "      device: '/job:localhost/replica:0/task:2/device:CPU:0'"
+      "      attr {"
+      "        key: 'dtype'"
+      "        value {"
+      "          type: DT_FLOAT"
+      "        }"
+      "      }"
+      "    }"
+      "    node_def {"
+      "      name: 'add1'"
+      "      op: 'Add'"
+      "      input: 'read0:value:0'"
+      "      input: 'read1:value:0'"
+      "      attr {"
+      "        key: 'T'"
+      "        value {"
+      "          type: DT_FLOAT"
+      "        }"
+      "      }"
+      "    }"
+      "    node_def {"
+      "      name: 'add2'"
+      "      op: 'Add'"
+      "      input: 'add1:z:0'"
+      "      input: 'read2:value:0'"
+      "      attr {"
+      "        key: 'T'"
+      "        value {"
+      "          type: DT_FLOAT"
+      "        }"
+      "      }"
+      "    }"
+      "    ret {"
+      "      key: 'sum'"
+      "      value: 'add2:z:0'"
+      "    }",
+      &def));
+  return def.SerializeAsString();
+}
+
+void VarIsInitialized(TFE_Context* ctx, TFE_TensorHandle* var_handle) {
+  TF_Status* status = TF_NewStatus();
+  TFE_Op* op = TFE_NewOp(ctx, "VarIsInitializedOp", status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  TFE_OpAddInput(op, var_handle, status);
+  TFE_TensorHandle* is_initialized[1] = {nullptr};
+  int num_retvals = 1;
+  TFE_Execute(op, &is_initialized[0], &num_retvals, status);
+  CHECK_EQ(1, num_retvals);
+  TF_Tensor* t = TFE_TensorHandleResolve(is_initialized[0], status);
+  bool initialized = false;
+  memcpy(&initialized, TF_TensorData(t), TF_TensorByteSize(t));
+  EXPECT_EQ(initialized, true);
+  TF_DeleteTensor(t);
+  TFE_DeleteTensorHandle(is_initialized[0]);
+  TFE_DeleteOp(op);
+  delete status;
+}
+
+void TestFunctionWithPackedInput(const bool remote) {
+  tensorflow::ServerDef server_def = GetServerDef(3);
+
+  // This server def has the task index set to 0.
+  string serialized = server_def.SerializeAsString();
+
+  server_def.set_task_index(1);
+  std::unique_ptr<tensorflow::GrpcServer> worker_server1;
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server1)
+                  .ok());
+  ASSERT_TRUE(worker_server1->Start().ok());
+
+  server_def.set_task_index(2);
+  std::unique_ptr<tensorflow::GrpcServer> worker_server2;
+  ASSERT_TRUE(tensorflow::GrpcServer::Create(
+                  server_def, tensorflow::Env::Default(), &worker_server2)
+                  .ok());
+  ASSERT_TRUE(worker_server2->Start().ok());
+
+  TF_Status* status = TF_NewStatus();
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(/*enable=*/true));
+  TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
+  TFE_Context* ctx = TFE_NewContext(opts, status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  TFE_DeleteContextOptions(opts);
+
+  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+
+  const char task0_name[] = "/job:localhost/replica:0/task:0/device:CPU:0";
+  const char task1_name[] = "/job:localhost/replica:0/task:1/device:CPU:0";
+  const char task2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0";
+
+  // Create one variable per task.
+  TFE_TensorHandle* h0 = TestVariable(ctx, 1.0, task0_name);
+  TFE_TensorHandle* h1 = TestVariable(ctx, 2.0, task1_name);
+  TFE_TensorHandle* h2 = TestVariable(ctx, 3.0, task2_name);
+
+  // Add a sync point in order to make sure that variables have been initialized
+  // before the function execution starts.
+  // TODO(b/155789951): Remove once b/155789951 is fixed.
+  VarIsInitialized(ctx, h1);
+  VarIsInitialized(ctx, h2);
+
+  // Pack 3 variable handles into one TFE_TensorHandle.
+  int num_replicas = 3;
+  std::vector<TFE_TensorHandle*> handles = {h0, h1, h2};
+  TFE_TensorHandle* packed_handle =
+      TFE_CreatePackedTensorHandle(ctx, handles.data(), &num_replicas, status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  EXPECT_EQ(TFE_TensorHandleDataType(packed_handle), TF_RESOURCE);
+  EXPECT_EQ(TFE_TensorHandleNumDims(packed_handle, status), 0);
+  EXPECT_EQ(TFE_TensorHandleNumElements(packed_handle, status), 1);
+
+  const string composite_device_name =
+      "/job:localhost/replica:0/task:0/device:COMPOSITE:0";
+  EXPECT_EQ(TFE_TensorHandleDeviceName(packed_handle, status),
+            composite_device_name);
+  EXPECT_EQ(TFE_TensorHandleBackingDeviceName(packed_handle, status),
+            composite_device_name);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+
+  // Register and run a function which returns the sum of 3 variables.
+  const string function_def = AddVariablesFunction();
+  TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(),
+                            status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+
+  TFE_Op* func = TFE_NewOp(ctx, "AddVariablesFunction", status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  TFE_OpAddInput(func, packed_handle, status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  if (remote) {
+    TFE_OpSetDevice(func, task1_name, status);
+    ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  }
+
+  TFE_TensorHandle* retvals[1] = {nullptr};
+  int num_retvals = 1;
+  TFE_Execute(func, &retvals[0], &num_retvals, status);
+  EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  ASSERT_EQ(1, num_retvals);
+  TFE_DeleteOp(func);
+  TFE_DeleteTensorHandle(packed_handle);
+  TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  TFE_DeleteTensorHandle(retvals[0]);
+  float sum = 0;
+  EXPECT_EQ(sizeof(sum), TF_TensorByteSize(t));
+  memcpy(&sum, TF_TensorData(t), TF_TensorByteSize(t));
+  TF_DeleteTensor(t);
+  EXPECT_EQ(sum, 6.0);
+
+  TFE_DeleteTensorHandle(h0);
+  TFE_DeleteTensorHandle(h1);
+  TFE_DeleteTensorHandle(h2);
+
+  TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+  TFE_ExecutorWaitForAllPendingNodes(executor, status);
+  ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+  TFE_DeleteExecutor(executor);
+  TFE_ContextRemoveFunction(ctx, "AddVariablesFunction", status);
+  TFE_DeleteContext(ctx);
+
+  TF_DeleteStatus(status);
+
+  // TODO(b/136478427): Figure out how to correctly shut the server down.
+  worker_server1.release();
+  worker_server2.release();
+}
+
+TEST(CAPI, TestLocalFunctionWithPackedInput) {
+  TestFunctionWithPackedInput(/*remote=*/false);
+}
+
+TEST(CAPI, TestRemoteFunctionWithPackedInput) {
+  TestFunctionWithPackedInput(/*remote=*/true);
 }
 
 void TestRemoteExecuteDeleteContextWithOutstandingRPC(bool async) {
@@ -381,150 +644,4 @@ TEST(CAPI, RemoteExecuteDeleteContextWithOutstandingRPC) {
 TEST(CAPI, RemoteExecuteDeleteContextWithOutstandingRPCAsync) {
   TestRemoteExecuteDeleteContextWithOutstandingRPC(true);
 }
-
-void CheckTFE_TensorHandleHasFloats(TFE_TensorHandle* handle,
-                                    const std::vector<float>& expected_values) {
-  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
-      TF_NewStatus(), TF_DeleteStatus);
-  TF_Tensor* t = TFE_TensorHandleResolve(handle, status.get());
-  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
-  std::unique_ptr<float[]> actual_values(new float[expected_values.size()]);
-  EXPECT_EQ(sizeof(float) * expected_values.size(), TF_TensorByteSize(t));
-  memcpy(actual_values.get(), TF_TensorData(t), TF_TensorByteSize(t));
-  TF_DeleteTensor(t);
-
-  for (int i = 0; i < expected_values.size(); i++) {
-    EXPECT_EQ(expected_values[i], actual_values[i])
-        << "Mismatch in expected values at (zero-based) index " << i;
-  }
-}
-
-void CheckRemoteMatMulExecutesOK(TFE_Context* ctx,
-                                 const char* remote_device_name,
-                                 const char* local_device_name) {
-  TF_Status* status = TF_NewStatus();
-  TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle(ctx);
-
-  TFE_Op* matmul = MatMulOp(ctx, h0_task0, h0_task0);
-  TFE_OpSetDevice(matmul, remote_device_name, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  TFE_TensorHandle* retvals[1];
-  int num_retvals = 1;
-  TFE_Execute(matmul, &retvals[0], &num_retvals, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  auto* retval_task0 =
-      TFE_TensorHandleCopyToDevice(retvals[0], ctx, local_device_name, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  CheckTFE_TensorHandleHasFloats(retval_task0, {7, 10, 15, 22});
-
-  TFE_DeleteTensorHandle(retval_task0);
-  TFE_DeleteTensorHandle(h0_task0);
-  TFE_DeleteTensorHandle(retvals[0]);
-
-  TFE_DeleteOp(matmul);
-
-  TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
-  TFE_ExecutorWaitForAllPendingNodes(executor, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-  TFE_DeleteExecutor(executor);
-  TF_DeleteStatus(status);
-}
-
-void TestRemoteExecuteChangeServerDef(bool async) {
-  tensorflow::ServerDef server_def = GetServerDef(2);
-
-  // This server def has the task index set to 0.
-  string serialized = server_def.SerializeAsString();
-
-  server_def.set_task_index(1);
-
-  std::unique_ptr<tensorflow::GrpcServer> worker_server;
-  ASSERT_TRUE(tensorflow::GrpcServer::Create(
-                  server_def, tensorflow::Env::Default(), &worker_server)
-                  .ok());
-  ASSERT_TRUE(worker_server->Start().ok());
-
-  TF_Status* status = TF_NewStatus();
-  TFE_ContextOptions* opts = TFE_NewContextOptions();
-  TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
-  TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT);
-  TFE_Context* ctx = TFE_NewContext(opts, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-  TFE_DeleteContextOptions(opts);
-
-  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  const char remote_device_name[] =
-      "/job:localhost/replica:0/task:1/device:CPU:0";
-  const char local_device_name[] =
-      "/job:localhost/replica:0/task:0/device:CPU:0";
-  CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
-
-  TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
-  TFE_ExecutorWaitForAllPendingNodes(executor, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  // TODO(b/136478427): Figure out how to correctly shut the server down.
-  worker_server.release();
-
-  // Update the server def with a new set of names (worker instead of
-  // localhost).
-  tensorflow::ServerDef updated_server_def = GetServerDef("worker", 2);
-  serialized = updated_server_def.SerializeAsString();
-
-  updated_server_def.set_task_index(1);
-  tensorflow::Status s = tensorflow::GrpcServer::Create(
-      updated_server_def, tensorflow::Env::Default(), &worker_server);
-  ASSERT_TRUE(s.ok()) << s.error_message();
-  ASSERT_TRUE(worker_server->Start().ok());
-
-  TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  // Create a new tensor_handle.
-  TFE_TensorHandle* h0_task0_new = TestMatrixTensorHandle(ctx);
-
-  // Check that copying it to the old remote device (named localhost) fails.
-  TFE_TensorHandleCopyToDevice(h0_task0_new, ctx, remote_device_name, status);
-  EXPECT_NE(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  // Copying and executing on the new remote device works.
-  const char new_remote_device_name[] =
-      "/job:worker/replica:0/task:1/device:CPU:0";
-  const char new_local_device_name[] =
-      "/job:worker/replica:0/task:0/device:CPU:0";
-
-  auto* h0_task1_new = TFE_TensorHandleCopyToDevice(
-      h0_task0_new, ctx, new_remote_device_name, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-
-  TFE_DeleteTensorHandle(h0_task0_new);
-  TFE_DeleteTensorHandle(h0_task1_new);
-
-  CheckRemoteMatMulExecutesOK(ctx, new_remote_device_name,
-                              new_local_device_name);
-
-  TFE_ExecutorWaitForAllPendingNodes(executor, status);
-  ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-  TFE_DeleteExecutor(executor);
-
-  TF_DeleteStatus(status);
-
-  TFE_DeleteContext(ctx);
-
-  // TODO(b/136478427): Figure out how to correctly shut the server down.
-  worker_server.release();
-}
-
-TEST(CAPI, RemoteExecuteChangeServerDef) {
-  TestRemoteExecuteChangeServerDef(false);
-}
-TEST(CAPI, RemoteExecuteChangeServerDefAsync) {
-  TestRemoteExecuteChangeServerDef(true);
-}
-
 }  // namespace
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index 0e4183dad16..724176505ba 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -1132,51 +1132,6 @@ void BM_ExecuteFunction(int iters, int async) {
 }
 BENCHMARK(BM_ExecuteFunction)->Arg(0)->Arg(1);
 
-TFE_TensorHandle* CreateVariable(TFE_Context* ctx, float value,
-                                 TF_Status* status) {
-  // Create the variable handle.
-  TFE_Op* op = TFE_NewOp(ctx, "VarHandleOp", status);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
-  TFE_OpSetAttrShape(op, "shape", {}, 0, status);
-  TFE_OpSetAttrString(op, "container", "", 0);
-  TFE_OpSetAttrString(op, "shared_name", "", 0);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-  TFE_TensorHandle* var_handle = nullptr;
-  int num_retvals = 1;
-  TFE_Execute(op, &var_handle, &num_retvals, status);
-  TFE_DeleteOp(op);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-  CHECK_EQ(1, num_retvals);
-
-  // Assign 'value' to it.
-  op = TFE_NewOp(ctx, "AssignVariableOp", status);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
-  TFE_OpAddInput(op, var_handle, status);
-
-  // Convert 'value' to a TF_Tensor then a TFE_TensorHandle.
-  std::unique_ptr<TF_Tensor, decltype(&TF_DeleteTensor)> t(
-      TF_AllocateTensor(TF_FLOAT, nullptr, 0, sizeof(value)), TF_DeleteTensor);
-  memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get()));
-
-  std::unique_ptr<TFE_TensorHandle, decltype(&TFE_DeleteTensorHandle)>
-      value_handle(TFE_NewTensorHandle(t.get(), status),
-                   TFE_DeleteTensorHandle);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-
-  TFE_OpAddInput(op, value_handle.get(), status);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-
-  num_retvals = 0;
-  TFE_Execute(op, nullptr, &num_retvals, status);
-  TFE_DeleteOp(op);
-  if (TF_GetCode(status) != TF_OK) return nullptr;
-  CHECK_EQ(0, num_retvals);
-
-  return var_handle;
-}
-
 TEST(CAPI, Variables) {
   // Variables use resource handles, so this is really a test for resource
   // tensor handling.
@@ -1186,7 +1141,7 @@ TEST(CAPI, Variables) {
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
 
-  TFE_TensorHandle* var_handle = CreateVariable(ctx, 12.0, status);
+  TFE_TensorHandle* var_handle = TestVariable(ctx, 12.0);
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
   TFE_Op* op = TFE_NewOp(ctx, "ReadVariableOp", status);
@@ -1227,7 +1182,7 @@ void BM_ReadVariable(int iters) {
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
 
-  TFE_TensorHandle* var_handle = CreateVariable(ctx, 5.0, status);
+  TFE_TensorHandle* var_handle = TestVariable(ctx, 5.0);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
   TFE_Op* op = TFE_NewOp(ctx, "ReadVariableOp", status);
@@ -1248,6 +1203,8 @@ void BM_ReadVariable(int iters) {
     CHECK_EQ(0, TFE_TensorHandleNumDims(h, status));
     CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
     h = nullptr;
+    TFE_OpAddInput(op, var_handle, status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   }
   tensorflow::testing::StopTiming();
   TFE_DeleteOp(op);
@@ -1591,15 +1548,11 @@ TEST(CAPI, TestTFE_OpAddAttrs) {
   TFE_Op* var_op = TFE_NewOp(ctx, "VarHandleOp", status);
   TFE_OpSetAttrType(var_op, "dtype", TF_INT64);
   TFE_OpSetAttrShape(var_op, "shape", {}, 0, status);
-  // There is currently no API to fetch attributes from an operation, fetching
-  // happens only as an implementation detail of custom devices.
-  tensorflow::EagerOperation* operation =
-      OperationFromInterface(tensorflow::unwrap(var_op));
-  TFE_OpAttrs attributes{&operation->Attrs()};
+  const TFE_OpAttrs* attributes = TFE_OpGetAttrs(var_op);
 
   TFE_Op* copy_op = TFE_NewOp(ctx, "VarHandleOp", status);
   TFE_OpSetAttrType(copy_op, "dtype", TF_FLOAT);
-  TFE_OpAddAttrs(copy_op, &attributes);
+  TFE_OpAddAttrs(copy_op, attributes);
   unsigned char is_list = 0;
   ASSERT_EQ(TF_ATTR_TYPE,
             TFE_OpGetAttrType(copy_op, "dtype", &is_list, status));
@@ -1631,14 +1584,10 @@ TEST(CAPI, TestTFE_OpAttrsSerialize) {
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_OpSetAttrType(var_op, "dtype", TF_INT64);
   TFE_OpSetAttrShape(var_op, "shape", {}, 0, status);
-  // There is currently no API to fetch attributes from an operation, fetching
-  // happens only as an implementation detail of custom devices.
-  tensorflow::EagerOperation* operation =
-      OperationFromInterface(tensorflow::unwrap(var_op));
-  TFE_OpAttrs attributes{&operation->Attrs()};
+  const TFE_OpAttrs* attributes = TFE_OpGetAttrs(var_op);
 
   TF_Buffer* serialized_attr_values = TF_NewBuffer();
-  TFE_OpAttrsSerialize(&attributes, serialized_attr_values, status);
+  TFE_OpAttrsSerialize(attributes, serialized_attr_values, status);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   tensorflow::NameAttrList name_and_attrs;
   ASSERT_TRUE(name_and_attrs.ParseFromArray(serialized_attr_values->data,
diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index e67e17963b3..29b624b8537 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -133,6 +133,58 @@ TFE_TensorHandle* TestMatrixTensorHandle3X2(TFE_Context* ctx) {
   return th;
 }
 
+TFE_TensorHandle* TestVariable(TFE_Context* ctx, float value,
+                               const tensorflow::string& device_name) {
+  TF_Status* status = TF_NewStatus();
+  // Create the variable handle.
+  TFE_Op* op = TFE_NewOp(ctx, "VarHandleOp", status);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
+  TFE_OpSetAttrShape(op, "shape", {}, 0, status);
+  TFE_OpSetAttrString(op, "container", "", 0);
+  TFE_OpSetAttrString(op, "shared_name", "", 0);
+  if (!device_name.empty()) {
+    TFE_OpSetDevice(op, device_name.c_str(), status);
+  }
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+  TFE_TensorHandle* var_handle = nullptr;
+  int num_retvals = 1;
+  TFE_Execute(op, &var_handle, &num_retvals, status);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+  TFE_DeleteOp(op);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+  CHECK_EQ(1, num_retvals);
+
+  // Assign 'value' to it.
+  op = TFE_NewOp(ctx, "AssignVariableOp", status);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+  TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
+  TFE_OpAddInput(op, var_handle, status);
+
+  // Convert 'value' to a TF_Tensor then a TFE_TensorHandle.
+  std::unique_ptr<TF_Tensor, decltype(&TF_DeleteTensor)> t(
+      TF_AllocateTensor(TF_FLOAT, nullptr, 0, sizeof(value)), TF_DeleteTensor);
+  memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get()));
+
+  std::unique_ptr<TFE_TensorHandle, decltype(&TFE_DeleteTensorHandle)>
+      value_handle(TFE_NewTensorHandle(t.get(), status),
+                   TFE_DeleteTensorHandle);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+
+  TFE_OpAddInput(op, value_handle.get(), status);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+
+  num_retvals = 0;
+  TFE_Execute(op, nullptr, &num_retvals, status);
+  TFE_DeleteOp(op);
+  if (TF_GetCode(status) != TF_OK) return nullptr;
+  CHECK_EQ(0, num_retvals);
+
+  TF_DeleteStatus(status);
+
+  return var_handle;
+}
+
 TFE_Op* AddOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) {
   TF_Status* status = TF_NewStatus();
 
diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h
index 11ae6d1181b..4c43f8d5833 100644
--- a/tensorflow/c/eager/c_api_test_util.h
+++ b/tensorflow/c/eager/c_api_test_util.h
@@ -42,6 +42,11 @@ TFE_TensorHandle* DoubleTestMatrixTensorHandle3X2(TFE_Context* ctx);
 // Return a tensor handle containing a 3x2 matrix of floats
 TFE_TensorHandle* TestMatrixTensorHandle3X2(TFE_Context* ctx);
 
+// Return a variable handle referring to a variable with the given initial value
+// on the given device.
+TFE_TensorHandle* TestVariable(TFE_Context* ctx, float value,
+                               const tensorflow::string& device_name = "");
+
 // Return an add op multiplying `a` by `b`.
 TFE_Op* AddOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b);
 
diff --git a/tensorflow/c/eager/c_api_unified_experimental.cc b/tensorflow/c/eager/c_api_unified_experimental.cc
index 68afffb28b4..e5030a602b3 100644
--- a/tensorflow/c/eager/c_api_unified_experimental.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_cat.h"
 #include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
 #include "tensorflow/c/tf_datatype.h"
 #include "tensorflow/c/tf_status.h"
@@ -26,6 +28,51 @@ using tensorflow::string;
 using tensorflow::internal::OutputList;
 using tensorflow::internal::unwrap;
 
+namespace tensorflow {
+namespace internal {
+typedef absl::flat_hash_map<std::string, FactoryFunction> FactoriesMap;
+
+static FactoriesMap& GetFactories() {
+  static FactoriesMap* factories = new FactoriesMap;
+  return *factories;
+}
+
+static const char* default_factory = "<unset>";
+
+void RegisterTracingEngineFactory(const string& name, FactoryFunction factory) {
+  assert((!GetFactories().count(name)) ||
+         (GetFactories()[name] == factory) &&
+             "Duplicate tracing factory registration");
+  GetFactories()[name] = factory;
+}
+
+void SetDefaultTracingEngine(const char* name) { default_factory = name; }
+
+static ExecutionContext* CreateTracingExecutionContext(const char* fn_name,
+                                                       TF_Status* s) {
+  auto entry = GetFactories().find(default_factory);
+  if (entry != GetFactories().end()) return entry->second(fn_name, s);
+  string msg = absl::StrCat(
+      "No tracing engine factory has been registered with the key '",
+      default_factory, "' (available: ");
+  // Ensure deterministic (sorted) order in the error message
+  std::set<string> factories_sorted;
+  for (const auto& factory : GetFactories())
+    factories_sorted.insert(factory.first);
+  const char* comma = "";
+  for (const string& factory : factories_sorted) {
+    msg += comma + factory;
+    comma = ", ";
+  }
+  msg += ")";
+
+  TF_SetStatus(s, TF_INVALID_ARGUMENT, msg.c_str());
+  return nullptr;
+}
+
+}  // end namespace internal
+}  // end namespace tensorflow
+
 // =============================================================================
 // Public C API entry points
 //
@@ -36,6 +83,28 @@ using tensorflow::internal::unwrap;
 //
 // =============================================================================
 
+void TF_SetTracingImplementation(const char* name) {
+  tensorflow::internal::SetDefaultTracingEngine(name);
+}
+
+// Creates a new TensorFlow function, it is an execution context attached to a
+// given tracing context.
+TF_ExecutionContext* TF_CreateFunction(const char* fn_name, TF_Status* s) {
+  return wrap(tensorflow::internal::CreateTracingExecutionContext(fn_name, s));
+}
+
+TF_AbstractFunction* TF_FinalizeFunction(TF_ExecutionContext* ctx,
+                                         TF_OutputList* outputs, TF_Status* s) {
+  auto* func = wrap(unwrap(ctx)->Finalize(unwrap(outputs), s));
+  TF_DeleteExecutionContext(ctx);
+  return func;
+}
+
+TF_AbstractTensor* TF_AddFunctionParameter(TF_ExecutionContext* func,
+                                           TF_DataType dtype, TF_Status* s) {
+  return wrap(unwrap(func)->AddParameter(dtype, s));
+}
+
 void TF_DeleteExecutionContext(TF_ExecutionContext* c) { delete unwrap(c); }
 
 TF_AbstractOp* TF_NewAbstractOp(TF_ExecutionContext* c) {
@@ -58,6 +127,10 @@ int TF_OutputListNumOutputs(TF_OutputList* o) {
 TF_AbstractTensor* TF_OutputListGet(TF_OutputList* o, int i) {
   return wrap(unwrap(o)->outputs[i]);
 }
+void TF_OutputListPushBack(TF_OutputList* o, TF_AbstractTensor* tensor,
+                           TF_Status* s) {
+  unwrap(o)->outputs.push_back(unwrap(tensor));
+}
 
 void TF_AbstractOpSetOpType(TF_AbstractOp* op, const char* const op_type,
                             TF_Status* s) {
diff --git a/tensorflow/c/eager/c_api_unified_experimental.h b/tensorflow/c/eager/c_api_unified_experimental.h
index be8fc64c2e1..86c59a7f625 100644
--- a/tensorflow/c/eager/c_api_unified_experimental.h
+++ b/tensorflow/c/eager/c_api_unified_experimental.h
@@ -49,15 +49,26 @@ typedef struct TF_AbstractOp TF_AbstractOp;
 // setting functional attributes of other composite ops e.g. control flow.
 typedef struct TF_AbstractFunction TF_AbstractFunction;
 
-// Creates a context for tracing the execution of operations into a function.
-TF_ExecutionContext* TF_NewGraphExecutionContext(TF_Status* s);
+// This allows the client to swap the implementation of the tracing engine.
+// Any future call to TF_CreateFunction will use the implementation defined
+// here.
+void TF_SetTracingImplementation(const char* name);
+
+// Creates a new TensorFlow function. A Function is an execution context, and as
+// such it can trace operations through TF_ExecuteOperation. After completing
+// tracing, a function can be obtained by TF_FinalizeFunction.
+TF_ExecutionContext* TF_CreateFunction(const char* fn_name, TF_Status* status);
 
 // Creates a context for eager execution of operations.
 TF_ExecutionContext* TF_NewEagerExecutionContext(TFE_ContextOptions*,
                                                  TF_Status* s);
-
 void TF_DeleteExecutionContext(TF_ExecutionContext*);
 
+// Add a new parameter to a TensorFlow Function.
+// TODO(aminim): what about shape?
+TF_AbstractTensor* TF_AddFunctionParameter(TF_ExecutionContext* func,
+                                           TF_DataType dtype, TF_Status* s);
+
 // Create an operation suitable to use with the provided context. The operation
 // requires its type (e.g. "AddV2") to be set independently.
 TF_AbstractOp* TF_NewAbstractOp(TF_ExecutionContext* ctx);
@@ -77,19 +88,21 @@ void TF_AbstractOpSetAttrType(TF_AbstractOp* op, const char* const attr_name,
 void TF_DeleteAbstractTensor(TF_AbstractTensor*);
 
 // TF_OutputList holds the list of TF_AbstractTensor that results from executing
-// an operation.
-// It just lets us not specify the number of outputs of an operation
-// beforehand. This forces a memory allocation in the runtime, which is bad, but
-// it allows for generic code.
-// TODO(aminim): the description above isn't clear with respect to
-// TF_OutputListNumOutputs and the current eager implementation which requires
-// the number of outputs to be set by the client.
+// an operation, or provided to create a function.
+// When executing an operation in an eager context, the expected number of
+// outputs must be set beforehand with `TF_OutputListSetNumOutputs`.
 typedef struct TF_OutputList TF_OutputList;
 TF_OutputList* TF_NewOutputList();
 void TF_DeleteOutputList(TF_OutputList* o);
-void TF_OutputListSetNumOutputs(TF_OutputList* o, int, TF_Status*);
+// Prepare tracing to the expected number of output for an operation.
+void TF_OutputListSetNumOutputs(TF_OutputList* o, int num_outputs, TF_Status*);
+// Return the number of outputs in the list.
 int TF_OutputListNumOutputs(TF_OutputList* o);
+// Return the `i`th output in the list.
 TF_AbstractTensor* TF_OutputListGet(TF_OutputList* o, int i);
+// Append a tensor at the end of the output list, growing its size by one.
+void TF_OutputListPushBack(TF_OutputList* o, TF_AbstractTensor* tensor,
+                           TF_Status*);
 
 // TF_ExecuteOperation will, if in eager mode, execute, if in graph mode, maybe
 // capture some inputs and then add a node in the graph. The output tensors are
@@ -100,13 +113,12 @@ void TF_ExecuteOperation(TF_AbstractOp* op, int num_inputs,
                          TF_ExecutionContext* ctx, TF_Status* s);
 
 // Creates a new TF_AbstractFunction from the current tracing states in the
-// context. The returned TF_GraphToFunction must be deleted by the client.
+// context. The provided `ctx` is consumed by this API call and deleted.
+// The returned TF_AbstractFunction must be deleted by the client,
 // TODO(aminim): clarify the contract on the state of the context after this
 // call.
-TF_AbstractFunction* TF_ExecutionContextToFunction(
-    const TF_ExecutionContext* fn_body, const char* fn_name, int num_inputs,
-    const TF_AbstractTensor* inputs, int num_outputs,
-    const TF_AbstractTensor* outputs, TF_Status* status);
+TF_AbstractFunction* TF_FinalizeFunction(TF_ExecutionContext* ctx,
+                                         TF_OutputList*, TF_Status*);
 
 void TF_DeleteAbstractFunction(TF_AbstractFunction*);
 
diff --git a/tensorflow/c/eager/c_api_unified_experimental_eager.cc b/tensorflow/c/eager/c_api_unified_experimental_eager.cc
index 820c61445fb..cf8cf845834 100644
--- a/tensorflow/c/eager/c_api_unified_experimental_eager.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental_eager.cc
@@ -123,6 +123,17 @@ class EagerContext : public ExecutionContext {
     }
   }
 
+  AbstractTensor* AddParameter(TF_DataType dtype, TF_Status* s) override {
+    TF_SetStatus(s, TF_INVALID_ARGUMENT,
+                 "Can't add function parameter on an eager context.");
+    return nullptr;
+  }
+  AbstractFunction* Finalize(OutputList* outputs, TF_Status* s) override {
+    TF_SetStatus(s, TF_INVALID_ARGUMENT,
+                 "Can't use finalize function on an eager context.");
+    return nullptr;
+  }
+
   void RegisterFunction(AbstractFunction* afunc, TF_Status* s) override {
     auto* func = afunc->GetTfFunction(s);
     if (!func) {
diff --git a/tensorflow/c/eager/c_api_unified_experimental_graph.cc b/tensorflow/c/eager/c_api_unified_experimental_graph.cc
index 36f8353894b..dd5a95b3526 100644
--- a/tensorflow/c/eager/c_api_unified_experimental_graph.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental_graph.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/strings/str_cat.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/c/eager/c_api_unified_experimental.h"
@@ -114,12 +115,14 @@ struct GraphFunction : public AbstractFunction {
   static constexpr AbstractFunctionKind kKind = kGraphFunc;
 };
 
-// GraphContext wraps a TF_Graph and manages the "execution" of operation, i.e.
-// adding them to the graph.
+// GraphContext wraps a TF_Graph modeling a single function and manages the
+// "execution" of operation, i.e. adding them to the function.
 class GraphContext : public ExecutionContext {
  public:
-  GraphContext()
-      : ExecutionContext(kKind), graph_(new TF_Graph(), TF_DeleteGraph) {}
+  explicit GraphContext(const char* name)
+      : ExecutionContext(kKind),
+        graph_(new TF_Graph(), TF_DeleteGraph),
+        name_(name) {}
 
   AbstractOp* CreateOperation() override {
     // TODO(srbs): Should the lifetime of this op be tied to the context.
@@ -136,6 +139,10 @@ class GraphContext : public ExecutionContext {
       return;
     }
     auto* tf_opdesc = graph_op->op_.release();
+    if (tf_opdesc == nullptr) {
+      TF_SetStatus(s, TF_INVALID_ARGUMENT, "AbstractOp is incomplete.");
+      return;
+    }
     for (int i = 0; i < num_inputs; ++i) {
       auto* graph_tensor = dyncast<GraphTensor>(inputs[i]);
       if (!graph_tensor) {
@@ -164,24 +171,38 @@ class GraphContext : public ExecutionContext {
     }
   }
 
-  TF_Function* ToFunction(const char* fn_name, int num_inputs,
-                          const GraphTensor* inputs, int num_outputs,
-                          const GraphTensor* outputs, TF_Status* status) const {
-    std::vector<TF_Output> graph_inputs;
-    graph_inputs.resize(num_inputs);
+  AbstractTensor* AddParameter(TF_DataType dtype, TF_Status* s) override {
+    TF_OperationDescription* opdesc =
+        TF_NewOperation(graph_.get(), "Placeholder",
+                        absl::StrCat("_input_", inputs_.size()).c_str());
+    TF_SetAttrType(opdesc, "dtype", dtype);
+    auto* operation = TF_FinishOperation(opdesc, s);
+    if (!s->status.ok()) return nullptr;
+
+    inputs_.push_back(TF_Output{operation, 0});
+    return new GraphTensor(inputs_.back(), this);
+  }
+
+  AbstractFunction* Finalize(OutputList* outputs, TF_Status* s) override {
+    std::unique_ptr<GraphFunction> func(new GraphFunction);
     std::vector<TF_Output> graph_outputs;
-    graph_outputs.resize(num_outputs);
-    for (int i = 0; i < num_inputs; i++) {
-      graph_inputs[i] = inputs[i].output;
-    }
-    for (int i = 0; i < num_outputs; i++) {
-      graph_outputs[i] = outputs[i].output;
+    graph_outputs.reserve(outputs->outputs.size());
+    for (AbstractTensor* abstract_output : outputs->outputs) {
+      GraphTensor* output = dyncast<GraphTensor>(abstract_output);
+      if (!output) {
+        TF_SetStatus(s, TF_UNIMPLEMENTED,
+                     "Returning a non-graph tensor from a function has not "
+                     "been implemented yet.");
+        return nullptr;
+      }
+      graph_outputs.push_back(output->output);
     }
 
-    return TF_GraphToFunction(graph_.get(), fn_name, 0, -1, nullptr,
-                              graph_inputs.size(), graph_inputs.data(),
-                              graph_outputs.size(), graph_outputs.data(),
-                              nullptr, nullptr, fn_name, status);
+    func->func = TF_GraphToFunction(
+        graph_.get(), name_, 0, -1, nullptr, inputs_.size(), inputs_.data(),
+        graph_outputs.size(), graph_outputs.data(), nullptr, nullptr, name_, s);
+    if (TF_GetCode(s) != TF_OK) return nullptr;
+    return func.release();
   }
 
   void RegisterFunction(AbstractFunction* func, TF_Status* s) override {
@@ -195,54 +216,20 @@ class GraphContext : public ExecutionContext {
 
  private:
   std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)> graph_;
+  std::vector<TF_Output> inputs_;
+  const char* name_;
 };
 
-// Helper that converts the graph currently held in the context into a function.
-static AbstractFunction* ExecutionContextToFunction(
-    const ExecutionContext* fn_body, const char* fn_name, int num_inputs,
-    const AbstractTensor* inputs, int num_outputs,
-    const AbstractTensor* outputs, TF_Status* status) {
-  auto* graph_ctx = dyncast<const GraphContext>(fn_body);
-  if (graph_ctx == nullptr) {
-    TF_SetStatus(status, TF_INVALID_ARGUMENT,
-                 "fn_body is not a TF_GraphContext.");
-    return nullptr;
-  }
-  auto* graph_inputs = dyncast<const GraphTensor>(inputs);
-  if (!graph_inputs) {
-    TF_SetStatus(status, TF_INVALID_ARGUMENT, "inputs aren't GraphTensors.");
-    return nullptr;
-  }
-  auto* graph_outputs = dyncast<const GraphTensor>(outputs);
-  if (!graph_outputs) {
-    TF_SetStatus(status, TF_INVALID_ARGUMENT, "outputs aren't GraphTensors.");
-    return nullptr;
-  }
-  GraphFunction* func = new GraphFunction;
-  func->func = graph_ctx->ToFunction(fn_name, num_inputs, graph_inputs,
-                                     num_outputs, graph_outputs, status);
-  return func;
+static ExecutionContext* GraphTracingFactory(const char* name, TF_Status* s) {
+  return new GraphContext(name);
 }
 
+// Register the tracing implemented in this file as the default tracing engine.
+static bool register_tracing = [] {
+  RegisterTracingEngineFactory("graphdef", GraphTracingFactory);
+  SetDefaultTracingEngine("graphdef");
+  return true;
+}();
+
 }  // namespace internal
 }  // namespace tensorflow
-
-// =============================================================================
-// Public C API entry points
-// These are only the entry points specific to the Graph API.
-// =============================================================================
-
-using tensorflow::internal::unwrap;
-
-TF_ExecutionContext* TF_NewGraphExecutionContext(TF_Status* s) {
-  return wrap(new tensorflow::internal::GraphContext());
-}
-
-TF_AbstractFunction* TF_ExecutionContextToFunction(
-    const TF_ExecutionContext* fn_body, const char* fn_name, int num_inputs,
-    const TF_AbstractTensor* inputs, int num_outputs,
-    const TF_AbstractTensor* outputs, TF_Status* status) {
-  return wrap(ExecutionContextToFunction(unwrap(fn_body), fn_name, num_inputs,
-                                         unwrap(inputs), num_outputs,
-                                         unwrap(outputs), status));
-}
diff --git a/tensorflow/c/eager/c_api_unified_experimental_internal.h b/tensorflow/c/eager/c_api_unified_experimental_internal.h
index ab085a20ff0..49212a230ee 100644
--- a/tensorflow/c/eager/c_api_unified_experimental_internal.h
+++ b/tensorflow/c/eager/c_api_unified_experimental_internal.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/c/tf_datatype.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/core/platform/casts.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace internal {
@@ -148,6 +149,17 @@ struct ExecutionContext {
   // Creates an empty AbstractOperation suitable to use with this context.
   virtual AbstractOp* CreateOperation() = 0;
 
+  // Add a function parameter and return the corresponding tensor.
+  // This is only valid with an ExecutionContext obtained from a TracingContext,
+  // it'll always error out with an eager context.
+  virtual AbstractTensor* AddParameter(TF_DataType dtype, TF_Status* s) = 0;
+
+  // Finalize this context and make a function out of it. The context is in a
+  // invalid state after this call and must be destroyed.
+  // This is only valid with an ExecutionContext obtained from a TracingContext,
+  // it'll always error out with an eager context.
+  virtual AbstractFunction* Finalize(OutputList* outputs, TF_Status* s) = 0;
+
   // Registers a functions with this context, after this the function is
   // available to be called/referenced by its name in this context.
   virtual void RegisterFunction(AbstractFunction* func, TF_Status* s) = 0;
@@ -156,6 +168,11 @@ struct ExecutionContext {
   const ExecutionContextKind k;
 };
 
+typedef ExecutionContext* (*FactoryFunction)(const char* fn_name, TF_Status*);
+void SetDefaultTracingEngine(const char* name);
+void RegisterTracingEngineFactory(const ::tensorflow::string& name,
+                                  FactoryFunction factory);
+
 // Create utilities to wrap/unwrap: this convert from the C opaque types to the
 // C++ implementation, and back.
 #define MAKE_WRAP_UNWRAP(C_TYPEDEF, CPP_CLASS)                              \
diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc
index 170b82333d8..9776b4d13ed 100644
--- a/tensorflow/c/eager/c_api_unified_experimental_test.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc
@@ -29,7 +29,12 @@ using tensorflow::string;
 namespace tensorflow {
 namespace {
 
-TEST(UnifedCAPI, TestBasicEager) {
+class UnifiedCAPI : public ::testing::TestWithParam<const char*> {
+ protected:
+  void SetUp() override { TF_SetTracingImplementation(GetParam()); }
+};
+
+TEST_P(UnifiedCAPI, TestBasicEager) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
   TFE_ContextOptions* opts = TFE_NewContextOptions();
@@ -81,33 +86,18 @@ TEST(UnifedCAPI, TestBasicEager) {
   TF_DeleteExecutionContext(ctx);
 }
 
-TEST(UnifedCAPI, TestBasicGraph) {
+TEST_P(UnifiedCAPI, TestBasicGraph) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
-  TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get());
+  // Start a new function / execution context.
+  string fn_name = "double";
+  TF_ExecutionContext* graph_ctx =
+      TF_CreateFunction(fn_name.c_str(), status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
-  // Add a placeholder to the graph.
-  auto* placeholder_op = TF_NewAbstractOp(graph_ctx);
-  TF_AbstractOpSetOpType(placeholder_op, "Placeholder", status.get());
+  auto* placeholder_t =
+      TF_AddFunctionParameter(graph_ctx, TF_FLOAT, status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
-  TF_AbstractOpSetOpName(placeholder_op, "my_ph", status.get());
-  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
-  TF_AbstractOpSetAttrType(placeholder_op, "dtype", TF_FLOAT, status.get());
-  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
-
-  // Build inputs and outputs.
-  TF_OutputList* placeholder_outputs = TF_NewOutputList();
-
-  // Execute.
-  TF_ExecuteOperation(placeholder_op, 0, nullptr, placeholder_outputs,
-                      graph_ctx, status.get());
-  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
-  ASSERT_EQ(1, TF_OutputListNumOutputs(placeholder_outputs));
-  TF_AbstractTensor* placeholder_t = TF_OutputListGet(placeholder_outputs, 0);
-
-  // Delete placeholder op.
-  TF_DeleteAbstractOp(placeholder_op);
 
   // Build an abstract operation.
   auto* add_op = TF_NewAbstractOp(graph_ctx);
@@ -123,16 +113,13 @@ TEST(UnifedCAPI, TestBasicGraph) {
   // Execute.
   TF_ExecuteOperation(add_op, 2, inputs, add_outputs, graph_ctx, status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
-  TF_AbstractTensor* output_t = TF_OutputListGet(add_outputs, 0);
 
   // Clean up operation and inputs.
   TF_DeleteAbstractOp(add_op);
 
-  string fn_name = "double";
-  TF_AbstractFunction* func = TF_ExecutionContextToFunction(
-      graph_ctx, fn_name.c_str(), 1, placeholder_t, 1, output_t, status.get());
-  TF_DeleteAbstractTensor(placeholder_t);
-  TF_DeleteAbstractTensor(output_t);
+  TF_AbstractFunction* func =
+      TF_FinalizeFunction(graph_ctx, add_outputs, status.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
   // Build eager context.
   TFE_ContextOptions* opts = TFE_NewContextOptions();
@@ -173,18 +160,161 @@ TEST(UnifedCAPI, TestBasicGraph) {
   ASSERT_EQ(*f_value, 4.0);
 
   TF_DeleteOutputList(add_outputs);
-  TF_DeleteOutputList(placeholder_outputs);
   TF_DeleteAbstractOp(fn_op);
   TF_DeleteAbstractTensor(input_t);
   TF_DeleteAbstractTensor(final_result);
   TF_DeleteTensor(f_t);
   TF_DeleteAbstractFunction(func);
 
-  TF_DeleteExecutionContext(graph_ctx);
   TF_DeleteExecutionContext(eager_execution_ctx);
 }
 
-TEST(UnifedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) {
+TEST_P(UnifiedCAPI, TestMultiOutputGraph) {
+  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
+      TF_NewStatus(), TF_DeleteStatus);
+  TF_Status* s = status.get();
+
+  // Start a new function / execution context.
+  string fn_name = "two_adds";
+  TF_ExecutionContext* graph_ctx = TF_CreateFunction(fn_name.c_str(), s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+  auto* arg0 = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  auto* arg1 = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+  // Create a first "Add" computing `arg0 + arg1`.
+  TF_AbstractTensor* add_output1;
+  {
+    // Build an abstract operation, inputs and output.
+    auto* add_op = TF_NewAbstractOp(graph_ctx);
+    TF_AbstractOpSetOpType(add_op, "Add", s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_AbstractOpSetOpName(add_op, "my_add1", s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_AbstractTensor* inputs[2] = {arg0, arg1};
+    TF_OutputList* add_outputs = TF_NewOutputList();
+    // Trace the operation now (create a node in the graph).
+    TF_ExecuteOperation(add_op, 2, inputs, add_outputs, graph_ctx, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_DeleteAbstractOp(add_op);
+    // Extract the resulting tensor.
+    add_output1 = TF_OutputListGet(add_outputs, 0);
+    TF_DeleteOutputList(add_outputs);
+  }
+
+  // Same with a second "Add" computing `arg1 + arg1`.
+  TF_AbstractTensor* add_output2;
+  {
+    // Build an abstract operation, inputs and output.
+    auto* add_op = TF_NewAbstractOp(graph_ctx);
+    TF_AbstractOpSetOpType(add_op, "Add", s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_AbstractOpSetOpName(add_op, "my_add2", s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_AbstractTensor* inputs[2] = {arg1, arg1};
+    TF_OutputList* add_outputs = TF_NewOutputList();
+    // Trace the operation now (create a node in the graph).
+    TF_ExecuteOperation(add_op, 2, inputs, add_outputs, graph_ctx, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_DeleteAbstractOp(add_op);
+    // Extract the resulting tensor.
+    add_output2 = TF_OutputListGet(add_outputs, 0);
+    TF_DeleteOutputList(add_outputs);
+  }
+
+  // Finalize the function by providing the returned values.
+  TF_AbstractFunction* func;
+  {
+    // We want to return the output of both add operations, create a new list
+    // and populate it.
+    TF_OutputList* func_outputs = TF_NewOutputList();
+    TF_OutputListPushBack(func_outputs, add_output1, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_OutputListPushBack(func_outputs, add_output2, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    func = TF_FinalizeFunction(graph_ctx, func_outputs, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_DeleteOutputList(func_outputs);
+  }
+
+  /**
+   * We traced so far this function:
+   *
+   *   def two_adds(a, b):
+   *     my_add1 = a + b
+   *     my_add2 = b + b
+   *     return my_add1, my_add2
+   *
+   * Now we will execute this function with an eager context:
+   *
+   *   output1, output2 = two_adds(2.0, 3.0)
+   *
+   * and check that we got 5.0 and 6.0 as results.
+   */
+
+  // Build eager context.
+  TFE_ContextOptions* opts = TFE_NewContextOptions();
+  TF_ExecutionContext* eager_execution_ctx =
+      TF_NewEagerExecutionContext(opts, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TFE_DeleteContextOptions(opts);
+
+  TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+  // Build the abstract op to run the function.
+  TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx);
+  TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+  // Build two abstract input tensors as function arguments.
+  std::vector<TF_AbstractTensor*> func_args;
+  {
+    TFE_Context* eager_ctx =
+        TF_ExecutionContextGetTFEContext(eager_execution_ctx);
+    TFE_TensorHandle* input_eager = TestScalarTensorHandle(eager_ctx, 2.0f);
+    func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s));
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    input_eager = TestScalarTensorHandle(eager_ctx, 3.0f);
+    func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s));
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  }
+
+  TF_OutputList* func_outputs = TF_NewOutputList();
+  TF_OutputListSetNumOutputs(func_outputs, 2, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_ExecuteOperation(fn_op, func_args.size(), func_args.data(), func_outputs,
+                      eager_execution_ctx, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_DeleteAbstractOp(fn_op);
+  for (TF_AbstractTensor* t : func_args) TF_DeleteAbstractTensor(t);
+
+  ASSERT_EQ(2, TF_OutputListNumOutputs(func_outputs));
+  float results[2];
+  for (int idx = 0; idx < 2; ++idx) {
+    TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx);
+    TFE_TensorHandle* handle = TF_AbstractTensorGetEagerTensor(result, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    TF_Tensor* f_t = TFE_TensorHandleResolve(handle, s);
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    results[idx] = *static_cast<float*>(TF_TensorData(f_t));
+    TF_DeleteTensor(f_t);
+  }
+  ASSERT_EQ(results[0], 5.0);
+  ASSERT_EQ(results[1], 6.0);
+
+  for (int idx = 0; idx < 2; ++idx) {
+    TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx);
+    TF_DeleteAbstractTensor(result);
+  }
+  TF_DeleteOutputList(func_outputs);
+  TF_DeleteExecutionContext(eager_execution_ctx);
+  TF_DeleteAbstractFunction(func);
+}
+
+TEST(UnifiedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
   TFE_ContextOptions* opts = TFE_NewContextOptions();
@@ -192,18 +322,15 @@ TEST(UnifedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) {
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
   TFE_DeleteContextOptions(opts);
 
-  TF_AbstractFunction* func = TF_ExecutionContextToFunction(
-      ctx, nullptr, 0, nullptr, 0, nullptr, status.get());
+  TF_AbstractFunction* func = TF_FinalizeFunction(ctx, nullptr, status.get());
   ASSERT_EQ(nullptr, func);
   ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(status.get()));
-
-  TF_DeleteExecutionContext(ctx);
 }
 
-TEST(UnifedCAPI, TF_CallingSetOpTypeAfterFinishingOpBuildingRaises) {
+TEST_P(UnifiedCAPI, TF_CallingSetOpTypeAfterFinishingOpBuildingRaises) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
-  TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get());
+  TF_ExecutionContext* graph_ctx = TF_CreateFunction("some_func", status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
   // Add a placeholder to the graph.
@@ -221,10 +348,10 @@ TEST(UnifedCAPI, TF_CallingSetOpTypeAfterFinishingOpBuildingRaises) {
   TF_DeleteExecutionContext(graph_ctx);
 }
 
-TEST(UnifedCAPI, TF_CallingSetOpNameAfterFinishingOpBuildingRaises) {
+TEST_P(UnifiedCAPI, TF_CallingSetOpNameAfterFinishingOpBuildingRaises) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
-  TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get());
+  TF_ExecutionContext* graph_ctx = TF_CreateFunction("some_func", status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
   // Add a placeholder to the graph.
@@ -242,7 +369,7 @@ TEST(UnifedCAPI, TF_CallingSetOpNameAfterFinishingOpBuildingRaises) {
   TF_DeleteExecutionContext(graph_ctx);
 }
 
-TEST(UnifedCAPI, TestExecutingEagerOpInGraphModeRaises) {
+TEST_P(UnifiedCAPI, TestExecutingEagerOpInGraphModeRaises) {
   // Build an Eager context.
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
@@ -272,7 +399,8 @@ TEST(UnifedCAPI, TestExecutingEagerOpInGraphModeRaises) {
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
   // Build a Graph context.
-  TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+  TF_ExecutionContext* graph_ctx = TF_CreateFunction("some_func", status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
   // Execute eager op using graph context.
@@ -288,10 +416,11 @@ TEST(UnifedCAPI, TestExecutingEagerOpInGraphModeRaises) {
   TF_DeleteExecutionContext(graph_ctx);
 }
 
-TEST(UnifedCAPI, TestExecutingGraphOpInEagerModeRaises) {
+TEST_P(UnifiedCAPI, TestExecutingGraphOpInEagerModeRaises) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
-  TF_ExecutionContext* graph_ctx = TF_NewGraphExecutionContext(status.get());
+  ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+  TF_ExecutionContext* graph_ctx = TF_CreateFunction("some_func", status.get());
   ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
 
   // Add a placeholder to the graph.
@@ -348,5 +477,7 @@ TEST(UnifedCAPI, TestExecutingGraphOpInEagerModeRaises) {
   TF_DeleteExecutionContext(eager_execution_ctx);
 }
 
+INSTANTIATE_TEST_SUITE_P(Tracing, UnifiedCAPI, ::testing::Values("graphdef"));
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/c/eager/context_interface.h b/tensorflow/c/eager/context_interface.h
index be0aad31a35..d21ab45e579 100644
--- a/tensorflow/c/eager/context_interface.h
+++ b/tensorflow/c/eager/context_interface.h
@@ -59,6 +59,20 @@ class AbstractContextInterface {
   virtual AbstractTensorInterface* CreateTensor(
       DataType dtype, absl::Span<const int64> dim_sizes) = 0;
 
+  typedef void (*MemoryReleaser)(void* data, size_t len, void* arg);
+
+  // Create a tensor instance from the given data buffer and description.
+  // `memory_releaser` will be called on destruction, and it's responsible for
+  // cleaning up the underlying buffer. `convert_string` indicates whether it
+  // has to handle tstring conversion. Expected to be removed once tstring
+  // migration is done.
+  virtual AbstractTensorInterface* CreateTensor(DataType dtype,
+                                                const int64_t* dims,
+                                                int num_dims, void* data,
+                                                size_t len, bool convert_string,
+                                                MemoryReleaser memory_releaser,
+                                                void* memory_releaser_arg) = 0;
+
   // Create a handle to wrap and manage a Tensor
   virtual AbstractTensorHandleInterface* CreateLocalHandle(
       AbstractTensorInterface* t) = 0;
@@ -81,6 +95,12 @@ class AbstractContextInterface {
 
   virtual void ClearCachesAndThreadExecutors() = 0;
 
+  // Initialize the step resource container for a training step. This is used
+  // in current TF runtime. For tfrt, it is used by fallback op handler.
+  virtual void StartStep() = 0;
+  // Destroy the step resource container for a training step.
+  virtual void EndStep() = 0;
+
  protected:
   virtual ~AbstractContextInterface() {}
 };
diff --git a/tensorflow/c/eager/parallel_device/BUILD b/tensorflow/c/eager/parallel_device/BUILD
index f4dbcc6cead..3b2640e14d1 100644
--- a/tensorflow/c/eager/parallel_device/BUILD
+++ b/tensorflow/c/eager/parallel_device/BUILD
@@ -27,6 +27,7 @@ cc_library(
     name = "parallel_device",
     srcs = [":sources"],
     hdrs = [":headers"],
+    visibility = ["//tensorflow:internal"],
     deps = [
         "//tensorflow/c:c_api",
         "//tensorflow/c/eager:c_api",
@@ -43,6 +44,7 @@ tf_cc_test(
     srcs = ["parallel_device_test.cc"],
     deps = [
         ":parallel_device",
+        ":parallel_device_ops",
         "//tensorflow/c:c_api",
         "//tensorflow/c:c_api_experimental",
         "//tensorflow/c/eager:c_api",
@@ -52,3 +54,19 @@ tf_cc_test(
         "//tensorflow/core:test_main",
     ],
 )
+
+# Note: ParallelDevice-specific ops are experimental and not currently linked in
+# to TensorFlow by default, just used in a few tests.
+filegroup(
+    name = "parallel_device_ops_srcs",
+    srcs = ["parallel_device_ops.cc"],
+    visibility = ["//tensorflow/python/distribute/parallel_device:__pkg__"],
+)
+
+cc_library(
+    name = "parallel_device_ops",
+    srcs = [":parallel_device_ops_srcs"],
+    visibility = ["//tensorflow:internal"],
+    deps = ["//tensorflow/core:framework"],
+    alwayslink = 1,
+)
diff --git a/tensorflow/c/eager/parallel_device/parallel_device.cc b/tensorflow/c/eager/parallel_device/parallel_device.cc
index e6846809fcf..27c2699c4c2 100644
--- a/tensorflow/c/eager/parallel_device/parallel_device.cc
+++ b/tensorflow/c/eager/parallel_device/parallel_device.cc
@@ -92,6 +92,10 @@ class ParallelDevice {
                                                        TFE_TensorHandle* tensor,
                                                        TF_Status* status) const;
 
+  // A parallel tensor with scalar integers numbering component devices.
+  std::unique_ptr<ParallelTensor> DeviceIDs(TFE_Context* context,
+                                            TF_Status* status) const;
+
   // Takes a description of a single operation being executed on the
   // ParallelDevice, and in turn runs one operation per component device with
   // its corresponding inputs from the input ParallelTensors (or
@@ -208,6 +212,46 @@ std::unique_ptr<ParallelTensor> ParallelDevice::CopyToParallelDevice(
                                            status);
 }
 
+std::unique_ptr<ParallelTensor> ParallelDevice::DeviceIDs(
+    TFE_Context* context, TF_Status* status) const {
+  // TODO(allenl): We could cache DeviceIDs (keyed by context).
+  std::vector<TensorHandlePtr> components;
+  components.reserve(underlying_devices_.size());
+  for (int device_index = 0; device_index < underlying_devices_.size();
+       ++device_index) {
+    int64_t* device_id = new int64_t;
+    *device_id = device_index;
+    std::unique_ptr<TF_Tensor, decltype(&TF_DeleteTensor)> tensor(
+        TF_NewTensor(
+            TF_INT64, /*dims=*/nullptr, /*num_dims=*/0, device_id,
+            sizeof(int64_t),
+            [](void* data, size_t, void* arg) {
+              delete reinterpret_cast<int64_t*>(data);
+            },
+            nullptr),
+        TF_DeleteTensor);
+    // TODO(allenl): Here and when executing regular operations, we could hold
+    // on to one TFE_Op per device and just call TFE_ResetOp to avoid parsing
+    // device names repeatedly.
+    OpPtr const_op(TFE_NewOp(context, "Const", status));
+    if (TF_GetCode(status) != TF_OK) return nullptr;
+    TFE_OpSetDevice(const_op.get(), underlying_devices_[device_index].c_str(),
+                    status);
+    if (TF_GetCode(status) != TF_OK) return nullptr;
+    TFE_OpSetAttrTensor(const_op.get(), "value", tensor.get(), status);
+    if (TF_GetCode(status) != TF_OK) return nullptr;
+    TFE_OpSetAttrType(const_op.get(), "dtype", TF_INT64);
+    TFE_TensorHandle* device_handle;
+    int num_outputs = 1;
+    TFE_Execute(const_op.get(), &device_handle, &num_outputs, status);
+    if (TF_GetCode(status) != TF_OK) return nullptr;
+    components.emplace_back(device_handle);
+    if (TF_GetCode(status) != TF_OK) return nullptr;
+  }
+  return ParallelTensor::FromTensorHandles(*this, std::move(components),
+                                           status);
+}
+
 absl::optional<std::vector<MaybeParallelTensorOwned>> ParallelDevice::Execute(
     TFE_Context* context, std::vector<MaybeParallelTensorUnowned> inputs,
     const char* operation_name, const TFE_OpAttrs* attributes,
@@ -282,6 +326,13 @@ absl::optional<std::vector<MaybeParallelTensorOwned>> ParallelDevice::Execute(
     }
     result.emplace(std::move(outputs));
     return result;
+  } else if (operation_name == std::string("DeviceID")) {
+    std::vector<MaybeParallelTensorOwned> result_content;
+    result_content.reserve(1);
+    result_content.push_back(DeviceIDs(context, status));
+    if (TF_GetCode(status) != TF_OK) return result;
+    result.emplace(std::move(result_content));
+    return result;
   }
   absl::optional<std::vector<std::unique_ptr<ParallelTensor>>>
       maybe_parallel_results(
diff --git a/tensorflow/c/eager/parallel_device/parallel_device_ops.cc b/tensorflow/c/eager/parallel_device/parallel_device_ops.cc
new file mode 100644
index 00000000000..1decffca047
--- /dev/null
+++ b/tensorflow/c/eager/parallel_device/parallel_device_ops.cc
@@ -0,0 +1,26 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+
+// TODO(allenl): Figure out if we need this op, and if so whether we should move
+// it to core TF. Right now the eager C API does some checking of op
+// registrations before calling into custom devices, but we may be able to avoid
+// that.
+REGISTER_OP("DeviceID")
+    .Output("device_id: int64")
+    .SetIsStateful()
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape);
diff --git a/tensorflow/c/eager/parallel_device/parallel_device_test.cc b/tensorflow/c/eager/parallel_device/parallel_device_test.cc
index 9b0613b0391..fdc140407df 100644
--- a/tensorflow/c/eager/parallel_device/parallel_device_test.cc
+++ b/tensorflow/c/eager/parallel_device/parallel_device_test.cc
@@ -278,14 +278,15 @@ TensorHandlePtr Multiply(TFE_Context* context, TFE_TensorHandle* first,
 }
 
 // Assert that `handle` is equal to `expected_value`.
-void AssertScalarFloatEq(TFE_TensorHandle* handle, float expected_value) {
+template <typename value_type>
+void ExpectScalarEq(TFE_TensorHandle* handle, value_type expected_value) {
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
   std::unique_ptr<TF_Tensor, decltype(&TF_DeleteTensor)> value_zero(
       TFE_TensorHandleResolve(handle, status.get()), TF_DeleteTensor);
   ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
-  ASSERT_EQ(expected_value,
-            *static_cast<float*>(TF_TensorData(value_zero.get())));
+  EXPECT_EQ(expected_value,
+            *static_cast<value_type*>(TF_TensorData(value_zero.get())));
 }
 
 template <std::size_t num_devices>
@@ -343,8 +344,8 @@ void BasicTestsForTwoDevices(TFE_Context* context, const char* first_device,
     ExtractPerDeviceValues(context, read.get(), &components, status.get());
     ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
 
-    AssertScalarFloatEq(components[0].get(), 20.);
-    AssertScalarFloatEq(components[1].get(), 20.);
+    ExpectScalarEq<float>(components[0].get(), 20.);
+    ExpectScalarEq<float>(components[1].get(), 20.);
 
     std::string first_device =
         TFE_TensorHandleBackingDeviceName(components[0].get(), status.get());
@@ -373,8 +374,8 @@ void BasicTestsForTwoDevices(TFE_Context* context, const char* first_device,
     ExtractPerDeviceValues(context, read.get(), &components, status.get());
     ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
 
-    AssertScalarFloatEq(components[0].get(), 23.);
-    AssertScalarFloatEq(components[1].get(), 18.);
+    ExpectScalarEq<float>(components[0].get(), 23.);
+    ExpectScalarEq<float>(components[1].get(), 18.);
 
     std::string first_device =
         TFE_TensorHandleBackingDeviceName(components[0].get(), status.get());
@@ -383,6 +384,32 @@ void BasicTestsForTwoDevices(TFE_Context* context, const char* first_device,
         TFE_TensorHandleBackingDeviceName(components[1].get(), status.get());
     ASSERT_EQ(underlying_devices[1], second_device);
   }
+  // Compute the device ID twice and verify the result
+  for (int i = 0; i < 2; ++i) {
+    std::unique_ptr<TFE_Op, decltype(&TFE_DeleteOp)> op(
+        TFE_NewOp(context, "DeviceID", status.get()), TFE_DeleteOp);
+    ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
+    TFE_OpSetDevice(op.get(), device_name, status.get());
+    ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
+
+    TFE_TensorHandle* result_handle;
+    int num_retvals = 1;
+    TFE_Execute(op.get(), &result_handle, &num_retvals, status.get());
+    ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
+    std::array<TensorHandlePtr, 2> components;
+    ExtractPerDeviceValues(context, result_handle, &components, status.get());
+    TFE_DeleteTensorHandle(result_handle);
+    ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
+
+    ExpectScalarEq<int64_t>(components[0].get(), 0);
+    ExpectScalarEq<int64_t>(components[1].get(), 1);
+    std::string first_device =
+        TFE_TensorHandleBackingDeviceName(components[0].get(), status.get());
+    ASSERT_EQ(underlying_devices[0], first_device);
+    std::string second_device =
+        TFE_TensorHandleBackingDeviceName(components[1].get(), status.get());
+    ASSERT_EQ(underlying_devices[1], second_device);
+  }
 }
 
 TEST(PARALLEL_DEVICE, TestBasicCPU) {
@@ -498,8 +525,8 @@ TEST(PARALLEL_DEVICE, TestExplicitCopies) {
     ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
 
     // The value of the original tensor is replicated on each device.
-    AssertScalarFloatEq(components[0].get(), 3.);
-    AssertScalarFloatEq(components[1].get(), 3.);
+    ExpectScalarEq<float>(components[0].get(), 3.);
+    ExpectScalarEq<float>(components[1].get(), 3.);
 
     // Verify that the mirrors are placed on the component devices.
     std::string first_device =
@@ -630,7 +657,7 @@ TEST(PARALLEL_DEVICE, TestNestedParallelDevices) {
                          &second_components, status.get());
   ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
 
-  AssertScalarFloatEq(second_components[1].get(), 9.);
+  ExpectScalarEq<float>(second_components[1].get(), 9.);
 
   // Verify that the mirrors are placed on the component devices.
   std::string first_device = TFE_TensorHandleBackingDeviceName(
@@ -644,8 +671,8 @@ TEST(PARALLEL_DEVICE, TestNestedParallelDevices) {
   std::array<TensorHandlePtr, 2> first_components;
   ExtractPerDeviceValues(context.get(), second_components[0].get(),
                          &first_components, status.get());
-  AssertScalarFloatEq(first_components[0].get(), 3.);
-  AssertScalarFloatEq(first_components[1].get(), 6.);
+  ExpectScalarEq<float>(first_components[0].get(), 3.);
+  ExpectScalarEq<float>(first_components[1].get(), 6.);
 
   first_device = TFE_TensorHandleBackingDeviceName(first_components[0].get(),
                                                    status.get());
@@ -806,8 +833,8 @@ TEST(PARALLEL_DEVICE, TestCollective) {
   ExtractPerDeviceValues(context.get(), reduced.get(), &result_components,
                          status.get());
   ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
-  AssertScalarFloatEq(result_components[0].get(), 3.);
-  AssertScalarFloatEq(result_components[1].get(), 3.);
+  ExpectScalarEq<float>(result_components[0].get(), 3.);
+  ExpectScalarEq<float>(result_components[1].get(), 3.);
 }
 
 void RegisterCollectiveMulFunction(TFE_Context* context,
@@ -909,8 +936,8 @@ TEST(PARALLEL_DEVICE, TestFunction) {
   ExtractPerDeviceValues(context.get(), reduced.get(), &result_components,
                          status.get());
   ASSERT_TRUE(TF_GetCode(status.get()) == TF_OK) << TF_Message(status.get());
-  AssertScalarFloatEq(result_components[0].get(), 7. * 9.);
-  AssertScalarFloatEq(result_components[1].get(), 7. * 9.);
+  ExpectScalarEq<float>(result_components[0].get(), 7. * 9.);
+  ExpectScalarEq<float>(result_components[1].get(), 7. * 9.);
 
   std::string first_device = TFE_TensorHandleBackingDeviceName(
       result_components[0].get(), status.get());
diff --git a/tensorflow/c/eager/tfe_op_attrs_internal.h b/tensorflow/c/eager/tfe_op_attrs_internal.h
index 935d7d520e5..0287502dea6 100644
--- a/tensorflow/c/eager/tfe_op_attrs_internal.h
+++ b/tensorflow/c/eager/tfe_op_attrs_internal.h
@@ -15,33 +15,21 @@ limitations under the License.
 #ifndef TENSORFLOW_C_EAGER_TFE_OP_ATTRS_INTERNAL_H_
 #define TENSORFLOW_C_EAGER_TFE_OP_ATTRS_INTERNAL_H_
 
-#include <algorithm>
-#include <cstddef>
-#include <map>
-#include <memory>
-#include <queue>
-#include <string>
-#include <vector>
-
+#include "tensorflow/c/conversion_macros.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/core/common_runtime/eager/attr_builder.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 
 // An equivalent of a tensorflow::NameAttrList protocol buffer, but used in ways
 // that sometimes do not require serialization.
+typedef struct TFE_OpAttrs TFE_OpAttrs;
+
 typedef struct TFE_Context TFE_Context;
 typedef struct TFE_Op TFE_Op;
 
-struct TFE_OpAttrs {
-  explicit TFE_OpAttrs() : attributes(nullptr) {}
-
-  explicit TFE_OpAttrs(const tensorflow::AttrBuilder* value)
-      : attributes(value) {}
-
-  const tensorflow::AttrBuilder* attributes;
-};
-
 namespace tensorflow {
+DEFINE_CONVERSION_FUNCTIONS(tensorflow::AttrBuilder, TFE_OpAttrs);
+
 // Set an AttrValue on the op. Doesn't handle the list types.
 void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op,
                           const tensorflow::AttrValue& default_value,
diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc b/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc
index 53e247cd038..8ee47da01dd 100644
--- a/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc
+++ b/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc
@@ -85,17 +85,36 @@ class ModularFileSystemTest : public ::testing::TestWithParam<std::string> {
     const std::string test_name = tensorflow::str_util::StringReplace(
         ::testing::UnitTest::GetInstance()->current_test_info()->name(), "/",
         "_", /*replace_all=*/true);
-    root_dir_ = tensorflow::io::JoinPath(
-        ::testing::TempDir(),
-        tensorflow::strings::StrCat("tf_fs_", rng_val_, "_", test_name));
+    if (!cloud_path_.empty()) {
+      // We have to join path for non-local filesystem manually to make sure
+      // that this test will run on Windows since `tensorflow::io::JoinPath`
+      // behaves differently on Windows. `tmp_dir` should be something like
+      // `path/to/tmp/dir/`. After joining path, we will have
+      // /path/to/tmp/dir/tf_fs_rng_name/`
+      root_dir_ = tensorflow::strings::StrCat(
+          "/", tmp_dir_,
+          tensorflow::strings::StrCat("tf_fs_", rng_val_, "_", test_name), "/");
+    } else {
+      root_dir_ = tensorflow::io::JoinPath(
+          tmp_dir_,
+          tensorflow::strings::StrCat("tf_fs_", rng_val_, "_", test_name));
+    }
+    if (!GetParam().empty()) {
+      root_dir_ = tensorflow::strings::StrCat(GetParam(), "://", cloud_path_,
+                                              root_dir_);
+    }
     env_ = Env::Default();
   }
 
   void SetUp() override {
-    if (mkdir(root_dir_.c_str(), 0755) != 0) {
-      int error_code = errno;
-      GTEST_SKIP() << "Cannot create working directory: "
-                   << tensorflow::IOError(root_dir_, error_code);
+    FileSystem* fs = nullptr;
+    Status s = env_->GetFileSystemForFile(root_dir_, &fs);
+    if (fs == nullptr || !s.ok())
+      GTEST_SKIP() << "No filesystem registered: " << s;
+
+    s = fs->CreateDir(root_dir_);
+    if (!s.ok()) {
+      GTEST_SKIP() << "Cannot create working directory: " << s;
     }
   }
 
@@ -115,9 +134,10 @@ class ModularFileSystemTest : public ::testing::TestWithParam<std::string> {
   std::string GetURIForPath(StringPiece path) {
     const std::string translated_name =
         tensorflow::io::JoinPath(root_dir_, path);
-    if (GetParam().empty()) return translated_name;
-
-    return tensorflow::strings::StrCat(GetParam(), "://", translated_name);
+    // We have already checked `GetParam().empty()` in
+    // `ModularFileSystemTest()`. root_dir_ should contain `GetParam() + "://"`
+    // if it isn't empty.
+    return translated_name;
   }
 
   // Converts absolute paths to paths relative to root_dir_.
@@ -133,15 +153,28 @@ class ModularFileSystemTest : public ::testing::TestWithParam<std::string> {
     rng_val_ = distribution(gen);
   }
 
+  static void SetCloudPath(const std::string& cloud_path) {
+    cloud_path_ = cloud_path;
+    if (cloud_path_.back() == '/') cloud_path_.pop_back();
+  }
+
+  static void SetTmpDir(const std::string& tmp_dir) {
+    tmp_dir_ = tmp_dir.empty() ? ::testing::TempDir() : tmp_dir;
+  }
+
  protected:
   Env* env_;
 
  private:
   std::string root_dir_;
   static int rng_val_;
+  static std::string cloud_path_;
+  static std::string tmp_dir_;
 };
 
 int ModularFileSystemTest::rng_val_;
+std::string ModularFileSystemTest::cloud_path_;
+std::string ModularFileSystemTest::tmp_dir_;
 
 // As some of the implementations might be missing, the tests should still pass
 // if the returned `Status` signals the unimplemented state.
@@ -1729,6 +1762,20 @@ static bool GetURIScheme(const std::string& scheme) {
   return true;
 }
 
+// This function is used for cloud filesystem
+// `S3` and `GCS` require the `root_dir_` to have bucket name
+// `HDFS` requires the `root_dir` to have namenode
+// `root_dir_ = scheme + "://" cloud_path_ + root_dir_`
+static bool SetCloudPath(const std::string& cloud_path_) {
+  ModularFileSystemTest::SetCloudPath(cloud_path_);
+  return true;
+}
+
+static bool SetTmpDir(const std::string& tmp_dir_) {
+  ModularFileSystemTest::SetTmpDir(tmp_dir_);
+  return true;
+}
+
 }  // namespace
 }  // namespace tensorflow
 
@@ -1741,7 +1788,12 @@ GTEST_API_ int main(int argc, char** argv) {
       tensorflow::Flag("dso", tensorflow::LoadDSO, "",
                        "Path to shared object to load"),
       tensorflow::Flag("scheme", tensorflow::GetURIScheme, "",
-                       "URI scheme to test")};
+                       "URI scheme to test"),
+      tensorflow::Flag("cloud_path", tensorflow::SetCloudPath, "",
+                       "Path for cloud filesystem (namenode for hdfs, "
+                       "bucketname for s3/gcs)"),
+      tensorflow::Flag("tmp_dir", tensorflow::SetTmpDir, "",
+                       "Temporary directory to store test data.")};
   if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
     std::cout << tensorflow::Flags::Usage(argv[0], flag_list);
     return -1;
diff --git a/tensorflow/c/experimental/saved_model/internal/BUILD b/tensorflow/c/experimental/saved_model/internal/BUILD
index 7a694f4f803..5c51e26f925 100644
--- a/tensorflow/c/experimental/saved_model/internal/BUILD
+++ b/tensorflow/c/experimental/saved_model/internal/BUILD
@@ -31,9 +31,6 @@ cc_library(
         "//tensorflow/c/experimental/saved_model/public:concrete_function.h",
     ],
     copts = tf_copts(),
-    # TODO(bmzhao): Remove this as we refactor C API to granular targets,
-    # so that we can depend on c/eager/c_api_unified_experimental.h.
-    features = ["-layering_check"],
     visibility = [
         "//tensorflow/c/experimental/saved_model/public:__pkg__",
     ],
@@ -41,6 +38,8 @@ cc_library(
         ":concrete_function_type",
         ":function_metadata",
         ":function_metadata_type",
+        ":tensorhandle_list",
+        ":tensorhandle_list_type",
         "//tensorflow/c:c_api_macros",
         "//tensorflow/c/eager:c_api",
         "//tensorflow/c/eager:c_api_internal",
@@ -160,6 +159,38 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "tensorhandle_list",
+    srcs = [
+        "tensorhandle_list.cc",
+    ],
+    hdrs = [
+        "//tensorflow/c/experimental/saved_model/public:tensorhandle_list.h",
+    ],
+    copts = tf_copts(),
+    visibility = [
+        "//tensorflow/c/experimental/saved_model/public:__pkg__",
+    ],
+    deps = [
+        ":tensorhandle_list_type",
+        "//tensorflow/c:c_api_macros",
+        "//tensorflow/c/eager:c_api",
+        "//tensorflow/c/eager:tensor_handle_interface",
+        "//tensorflow/c/eager:tfe_tensorhandle_internal",
+    ],
+)
+
+cc_library(
+    name = "tensorhandle_list_type",
+    hdrs = [
+        "tensorhandle_list_type.h",
+    ],
+    deps = [
+        "//tensorflow/c:conversion_macros",
+        "//tensorflow/c/eager:tensor_handle_interface",
+    ],
+)
+
 tf_cc_test(
     name = "saved_model_api_test",
     size = "small",
diff --git a/tensorflow/c/experimental/saved_model/internal/concrete_function.cc b/tensorflow/c/experimental/saved_model/internal/concrete_function.cc
index 4884f9e2e97..dd54416ddf9 100644
--- a/tensorflow/c/experimental/saved_model/internal/concrete_function.cc
+++ b/tensorflow/c/experimental/saved_model/internal/concrete_function.cc
@@ -15,12 +15,12 @@ limitations under the License.
 
 #include "tensorflow/c/experimental/saved_model/public/concrete_function.h"
 
-#include "tensorflow/c/eager/c_api_unified_experimental.h"
 #include "tensorflow/c/eager/tfe_op_internal.h"
 #include "tensorflow/c/experimental/saved_model/core/concrete_function.h"
 #include "tensorflow/c/experimental/saved_model/core/function_metadata.h"
 #include "tensorflow/c/experimental/saved_model/internal/concrete_function_type.h"
 #include "tensorflow/c/experimental/saved_model/internal/function_metadata_type.h"
+#include "tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h"
 
 extern "C" {
 
@@ -29,10 +29,9 @@ TF_FunctionMetadata* TF_ConcreteFunctionGetMetadata(TF_ConcreteFunction* func) {
       &tensorflow::unwrap(func)->GetFunctionMetadata()));
 }
 
-TF_OutputList* TF_ConcreteFunctionGetCaptures(TF_ConcreteFunction* func) {
-  // TODO(bmzhao): Refactor TF_OutputList struct definition into a separate
-  // internal header, and implement this function.
-  return nullptr;
+const TF_TensorHandleList* TF_ConcreteFunctionGetCaptures(
+    TF_ConcreteFunction* func) {
+  return tensorflow::wrap(&tensorflow::unwrap(func)->GetCaptures());
 }
 
 TFE_Op* TF_ConcreteFunctionGetCallOp(TF_ConcreteFunction* func) {
diff --git a/tensorflow/c/experimental/saved_model/internal/saved_model_api.cc b/tensorflow/c/experimental/saved_model/internal/saved_model_api.cc
index cce1b27d9ad..629610dbe29 100644
--- a/tensorflow/c/experimental/saved_model/internal/saved_model_api.cc
+++ b/tensorflow/c/experimental/saved_model/internal/saved_model_api.cc
@@ -66,7 +66,7 @@ TF_SavedModel* TF_LoadSavedModelWithTags(const char* dirname, TFE_Context* ctx,
 void TF_DeleteSavedModel(TF_SavedModel* model) { delete model; }
 
 TF_ConcreteFunction* TF_GetSavedModelConcreteFunction(TF_SavedModel* model,
-                                                      char* function_path,
+                                                      const char* function_path,
                                                       TF_Status* status) {
   tensorflow::ConcreteFunction* result = nullptr;
   tensorflow::Status get_function_status =
@@ -79,7 +79,7 @@ TF_ConcreteFunction* TF_GetSavedModelConcreteFunction(TF_SavedModel* model,
 }
 
 TF_CAPI_EXPORT extern TF_ConcreteFunction* TF_GetSavedModelSignatureDefFunction(
-    TF_SavedModel* model, char* signature_def_key, TF_Status* status) {
+    TF_SavedModel* model, const char* signature_def_key, TF_Status* status) {
   tensorflow::ConcreteFunction* result = nullptr;
   tensorflow::Status get_function_status =
       model->saved_model->GetSignatureDefFunction(signature_def_key, &result);
diff --git a/tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc b/tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc
new file mode 100644
index 00000000000..7d018658101
--- /dev/null
+++ b/tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc
@@ -0,0 +1,36 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h"
+
+#include <stddef.h>
+
+#include "tensorflow/c/eager/tensor_handle_interface.h"
+#include "tensorflow/c/eager/tfe_tensorhandle_internal.h"
+#include "tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h"
+
+extern "C" {
+
+size_t TF_TensorHandleListSize(const TF_TensorHandleList* list) {
+  return tensorflow::unwrap(list)->size();
+}
+
+TFE_TensorHandle* TF_TensorHandleListGet(const TF_TensorHandleList* list,
+                                         int i) {
+  return tensorflow::wrap((*tensorflow::unwrap(list))[i]);
+}
+
+
+}  // end extern "C"
diff --git a/tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h b/tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h
new file mode 100644
index 00000000000..8cbec2806a8
--- /dev/null
+++ b/tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h
@@ -0,0 +1,37 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_INTERNAL_CONCRETE_FUNCTION_LIST_TYPE_H_
+#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_INTERNAL_CONCRETE_FUNCTION_LIST_TYPE_H_
+
+#include <vector>
+
+#include "tensorflow/c/conversion_macros.h"
+#include "tensorflow/c/eager/tensor_handle_interface.h"
+
+// Internal structures used by the SavedModel C API. These are likely to
+// change and should not be depended on.
+
+typedef struct TF_TensorHandleList TF_TensorHandleList;
+
+namespace tensorflow {
+
+DEFINE_CONVERSION_FUNCTIONS(
+    std::vector<tensorflow::AbstractTensorHandleInterface*>,
+    TF_TensorHandleList)
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_INTERNAL_CONCRETE_FUNCTION_LIST_TYPE_H_
diff --git a/tensorflow/c/experimental/saved_model/public/BUILD b/tensorflow/c/experimental/saved_model/public/BUILD
index af65e05e7f6..0cfa0a2c005 100644
--- a/tensorflow/c/experimental/saved_model/public/BUILD
+++ b/tensorflow/c/experimental/saved_model/public/BUILD
@@ -24,6 +24,7 @@ exports_files(
         "concrete_function_list.h",
         "function_metadata.h",
         "saved_model_api.h",
+        "tensorhandle_list.h",
     ],
     visibility = ["//tensorflow/c/experimental/saved_model/internal:__pkg__"],
 )
@@ -39,6 +40,7 @@ cc_library(
         ":concrete_function_list",
         ":function_metadata",
         ":saved_model_api",
+        ":tensorhandle_list",
     ],
 )
 
@@ -61,3 +63,8 @@ alias(
     name = "saved_model_api",
     actual = "//tensorflow/c/experimental/saved_model/internal:saved_model_api",
 )
+
+alias(
+    name = "tensorhandle_list",
+    actual = "//tensorflow/c/experimental/saved_model/internal:tensorhandle_list",
+)
diff --git a/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h b/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h
index 30f533f140a..aae95a5477c 100644
--- a/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h
+++ b/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/c/experimental/saved_model/public/concrete_function_list.h"
 #include "tensorflow/c/experimental/saved_model/public/function_metadata.h"
 #include "tensorflow/c/experimental/saved_model/public/saved_model_api.h"
+#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h"
 // IWYU pragma: end_exports
 
 #endif  // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_C_SAVED_MODEL_API_H_
diff --git a/tensorflow/c/experimental/saved_model/public/concrete_function.h b/tensorflow/c/experimental/saved_model/public/concrete_function.h
index 351d8daed8e..2a87214270c 100644
--- a/tensorflow/c/experimental/saved_model/public/concrete_function.h
+++ b/tensorflow/c/experimental/saved_model/public/concrete_function.h
@@ -17,9 +17,9 @@ limitations under the License.
 #define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_CONCRETE_FUNCTION_H_
 
 #include "tensorflow/c/c_api_macros.h"
-#include "tensorflow/c/eager/c_api_internal.h"
-#include "tensorflow/c/eager/c_api_unified_experimental.h"
+#include "tensorflow/c/eager/c_api.h"
 #include "tensorflow/c/experimental/saved_model/public/function_metadata.h"
+#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -36,7 +36,7 @@ TF_CAPI_EXPORT extern TF_FunctionMetadata* TF_ConcreteFunctionGetMetadata(
     TF_ConcreteFunction* func);
 
 // Returns a list of TensorHandles implicitly captured by this function.
-TF_CAPI_EXPORT extern TF_OutputList* TF_ConcreteFunctionGetCaptures(
+TF_CAPI_EXPORT extern const TF_TensorHandleList* TF_ConcreteFunctionGetCaptures(
     TF_ConcreteFunction* func);
 
 // Returns a TFE_Op suitable for executing this function.
diff --git a/tensorflow/c/experimental/saved_model/public/concrete_function_list.h b/tensorflow/c/experimental/saved_model/public/concrete_function_list.h
index 7add847259c..e35546751f1 100644
--- a/tensorflow/c/experimental/saved_model/public/concrete_function_list.h
+++ b/tensorflow/c/experimental/saved_model/public/concrete_function_list.h
@@ -21,19 +21,27 @@ limitations under the License.
 #include "tensorflow/c/c_api_macros.h"
 #include "tensorflow/c/experimental/saved_model/public/concrete_function.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
 // An opaque type that is acts like a list of TF_ConcreteFunction pointers.
 typedef struct TF_ConcreteFunctionList TF_ConcreteFunctionList;
 
 // Returns the size of `list`.
-TF_CAPI_EXPORT size_t
-TF_ConcreteFunctionListSize(TF_ConcreteFunctionList* list);
+TF_CAPI_EXPORT extern size_t TF_ConcreteFunctionListSize(
+    TF_ConcreteFunctionList* list);
 
 // Returns the `i`th TF_ConcreteFunction in the list.
-TF_CAPI_EXPORT TF_ConcreteFunction* TF_ConcreteFunctionListGet(
+TF_CAPI_EXPORT extern TF_ConcreteFunction* TF_ConcreteFunctionListGet(
     TF_ConcreteFunctionList* list, int i);
 
 // Deletes `list`.
-TF_CAPI_EXPORT void TF_DeleteConcreteFunctionList(
+TF_CAPI_EXPORT extern void TF_DeleteConcreteFunctionList(
     TF_ConcreteFunctionList* list);
 
+#ifdef __cplusplus
+}  // end extern "C"
+#endif  // __cplusplus
+
 #endif  // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_CONCRETE_FUNCTION_LIST_H_
diff --git a/tensorflow/c/experimental/saved_model/public/saved_model_api.h b/tensorflow/c/experimental/saved_model/public/saved_model_api.h
index ad381937e3c..875167bec63 100644
--- a/tensorflow/c/experimental/saved_model/public/saved_model_api.h
+++ b/tensorflow/c/experimental/saved_model/public/saved_model_api.h
@@ -80,7 +80,7 @@ TF_CAPI_EXPORT extern void TF_DeleteSavedModel(TF_SavedModel* model);
 //  "conceptually" bound to `model`. Once `model` is deleted, all
 //  `TF_ConcreteFunctions` retrieved from it are invalid, and have been deleted.
 TF_CAPI_EXPORT extern TF_ConcreteFunction* TF_GetSavedModelConcreteFunction(
-    TF_SavedModel* model, char* function_path, TF_Status* status);
+    TF_SavedModel* model, const char* function_path, TF_Status* status);
 
 // Retrieve a function from the TF SavedModel via a SignatureDef key.
 //
@@ -94,7 +94,7 @@ TF_CAPI_EXPORT extern TF_ConcreteFunction* TF_GetSavedModelConcreteFunction(
 //  TF_ConcreteFunction instance. Once `model` is deleted, all
 //  `TF_ConcreteFunctions` retrieved from it are invalid, and have been deleted.
 TF_CAPI_EXPORT extern TF_ConcreteFunction* TF_GetSavedModelSignatureDefFunction(
-    TF_SavedModel* model, char* signature_def_key, TF_Status* status);
+    TF_SavedModel* model, const char* signature_def_key, TF_Status* status);
 
 // Returns a list of all ConcreteFunctions stored in this SavedModel.
 // The lifetime of the returned list is bound to `model`.
diff --git a/tensorflow/c/experimental/saved_model/public/tensorhandle_list.h b/tensorflow/c/experimental/saved_model/public/tensorhandle_list.h
new file mode 100644
index 00000000000..a1e88db3474
--- /dev/null
+++ b/tensorflow/c/experimental/saved_model/public/tensorhandle_list.h
@@ -0,0 +1,43 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_TENSORHANDLE_LIST_H_
+#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_TENSORHANDLE_LIST_H_
+
+#include <stddef.h>
+
+#include "tensorflow/c/c_api_macros.h"
+#include "tensorflow/c/eager/c_api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// An opaque type that is acts like a list of TF_ConcreteFunction pointers.
+typedef struct TF_TensorHandleList TF_TensorHandleList;
+
+// Returns the size of `list`.
+TF_CAPI_EXPORT extern size_t TF_TensorHandleListSize(
+    const TF_TensorHandleList* list);
+
+// Returns the `i`th TFE_TensorHandle in the list.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TF_TensorHandleListGet(
+    const TF_TensorHandleList* list, int i);
+
+#ifdef __cplusplus
+}  // end extern "C"
+#endif  // __cplusplus
+
+#endif  // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_TENSORHANDLE_LIST_H_
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index e8cb40f153b..e1fad8e697a 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -178,7 +178,7 @@ cc_library_with_android_deps(
     name = "ops",
     srcs = ["framework/ops.cc"],
     hdrs = ["framework/ops.h"],
-    android_deps = ["//tensorflow/core:android_tensorflow_lib"],
+    android_deps = ["//tensorflow/core:portable_tensorflow_lib"],
     deps = [
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
@@ -197,7 +197,7 @@ cc_library_with_android_deps(
         "framework/scope_internal.h",
     ],
     hdrs = ["framework/scope.h"],
-    android_deps = ["//tensorflow/core:android_tensorflow_lib"],
+    android_deps = ["//tensorflow/core:portable_tensorflow_lib"],
     common_deps = [
         ":ops",
     ],
@@ -237,7 +237,7 @@ cc_library_with_android_deps(
     name = "client_session",
     srcs = ["client/client_session.cc"],
     hdrs = ["client/client_session.h"],
-    android_deps = ["//tensorflow/core:android_tensorflow_lib"],
+    android_deps = ["//tensorflow/core:portable_tensorflow_lib"],
     common_deps = [
         ":ops",
         ":scope",
@@ -275,7 +275,7 @@ cc_library_with_android_deps(
     srcs = ["ops/const_op.cc"],
     hdrs = ["ops/const_op.h"],
     android_deps = [
-        "//tensorflow/core:android_tensorflow_lib",
+        "//tensorflow/core:portable_tensorflow_lib",
     ],
     common_deps = [
         ":ops",
@@ -304,7 +304,7 @@ cc_library_with_android_deps(
     srcs = ["ops/while_loop.cc"],
     hdrs = ["ops/while_loop.h"],
     android_deps = [
-        "//tensorflow/core:android_tensorflow_lib",
+        "//tensorflow/core:portable_tensorflow_lib",
     ],
     common_deps = [
         ":cc_ops",
diff --git a/tensorflow/cc/experimental/base/public/BUILD b/tensorflow/cc/experimental/base/public/BUILD
new file mode 100644
index 00000000000..045d4e6cd97
--- /dev/null
+++ b/tensorflow/cc/experimental/base/public/BUILD
@@ -0,0 +1,78 @@
+# Experimental C++ APIs for TensorFlow.
+# New TF C++ APIs under the tensorflow::cc namespace aim to guarantee ABI stability.
+# Users are expected to compile against public c++ headers, and link against
+# libtensorflow (https://www.tensorflow.org/install/lang_c).
+# We aim to achieve ABI stability in new C++ APIs by only using types
+# on the API surface that:
+# 1. Have a header-only implementation
+# 2. Are std:: types
+# 3. Wrap an opaque C type
+
+package(
+    # This is intentionally public
+    default_visibility = [
+        "//visibility:public",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "runtime",
+    hdrs = [
+        "runtime.h",
+    ],
+    deps = [
+        ":status",
+        "//tensorflow/c/eager:c_api",
+        "//tensorflow/c/eager:c_api_experimental",
+    ],
+)
+
+cc_library(
+    name = "runtime_builder",
+    hdrs = [
+        "runtime_builder.h",
+    ],
+    deps = [
+        ":runtime",
+        ":status",
+        "//tensorflow/c/eager:c_api",
+        "//tensorflow/c/eager:c_api_experimental",
+    ],
+)
+
+cc_library(
+    name = "status",
+    hdrs = [
+        "status.h",
+    ],
+    deps = [
+        "//tensorflow/c:tf_status",
+    ],
+)
+
+cc_library(
+    name = "tensor",
+    hdrs = [
+        "tensor.h",
+    ],
+    deps = [
+        ":status",
+        "//tensorflow/c:tf_datatype",
+        "//tensorflow/c:tf_tensor",
+    ],
+)
+
+cc_library(
+    name = "tensorhandle",
+    hdrs = [
+        "tensorhandle.h",
+    ],
+    deps = [
+        ":runtime",
+        ":status",
+        ":tensor",
+        "//tensorflow/c/eager:c_api",
+        "//tensorflow/c/eager:c_api_experimental",
+    ],
+)
diff --git a/tensorflow/cc/experimental/base/public/runtime.h b/tensorflow/cc/experimental/base/public/runtime.h
new file mode 100644
index 00000000000..711a38c233a
--- /dev/null
+++ b/tensorflow/cc/experimental/base/public/runtime.h
@@ -0,0 +1,71 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_H_
+#define TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_H_
+
+#include <memory>
+
+#include "tensorflow/c/eager/c_api_experimental.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// Runtime represents an opaque instance of a Tensorflow runtime, with its own
+// resources, threadpools, etc. Clients are expected to construct a Runtime
+// object through tensorflow::cc::RuntimeBuilder::Build, after setting any
+// relevant configuration options. Many Tensorflow functions take a reference to
+// the runtime as an argument (eg: tensorflow::cc::SavedModelAPI::Load), and
+// may have different implementations depending on the runtime. For many of
+// these Runtime-attached objects (such as tensorflow::cc::TensorHandle), the
+// Runtime must outlive these objects.
+class Runtime {
+ public:
+  // Runtime is movable, but not copyable.
+  Runtime(Runtime&&) = default;
+  Runtime& operator=(Runtime&&) = default;
+
+ private:
+  friend class RuntimeBuilder;
+  friend class SavedModelAPI;
+  friend class TensorHandle;
+
+  // Wraps a TFE_Context. Takes ownership of ctx.
+  explicit Runtime(TFE_Context* ctx) : ctx_(ctx) {}
+
+  // Deletes the currently wrapped TFE_Context, swaps it with ctx,
+  // and takes ownership of ctx.
+  void Reset(TFE_Context* ctx) { ctx_.reset(ctx); }
+
+  // Returns the TFE_Context that this object wraps. This object
+  // retains ownership of the pointer.
+  TFE_Context* GetTFEContext() const { return ctx_.get(); }
+
+  // Runtime is not copyable
+  Runtime(const Runtime&) = delete;
+  Runtime& operator=(const Runtime&) = delete;
+
+  struct TFEContextDeleter {
+    void operator()(TFE_Context* p) const { TFE_DeleteContext(p); }
+  };
+  std::unique_ptr<TFE_Context, TFEContextDeleter> ctx_;
+};
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_H_
diff --git a/tensorflow/cc/experimental/base/public/runtime_builder.h b/tensorflow/cc/experimental/base/public/runtime_builder.h
new file mode 100644
index 00000000000..737e06cb2c6
--- /dev/null
+++ b/tensorflow/cc/experimental/base/public/runtime_builder.h
@@ -0,0 +1,86 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_BUILDER_H_
+#define TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_BUILDER_H_
+
+#include <memory>
+
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/cc/experimental/base/public/runtime.h"
+#include "tensorflow/cc/experimental/base/public/status.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// RuntimeBuilder is a builder used to construct a tensorflow::cc::Runtime.
+// Use this to set configuration options, like threadpool size, etc.
+class RuntimeBuilder {
+ public:
+  RuntimeBuilder() : options_(TFE_NewContextOptions()) {}
+
+  // If `use_tfrt` is true, we will use the new Tensorflow Runtime
+  // (https://blog.tensorflow.org/2020/04/tfrt-new-tensorflow-runtime.html) as
+  // our runtime implementation.
+  RuntimeBuilder& SetUseTFRT(bool use_tfrt);
+
+  // Build a Tensorflow Runtime.
+  //
+  // Params:
+  //  status - Set to OK on success and an appropriate error on failure.
+  // Returns:
+  //  If status is not OK, returns nullptr. Otherwise, returns a
+  //  unique_ptr<tensorflow::cc::Runtime>.
+  std::unique_ptr<Runtime> Build(Status* status);
+
+  // RuntimeBuilder is movable, but not copyable.
+  RuntimeBuilder(RuntimeBuilder&&) = default;
+  RuntimeBuilder& operator=(RuntimeBuilder&&) = default;
+
+ private:
+  // RuntimeBuilder is not copyable
+  RuntimeBuilder(const RuntimeBuilder&) = delete;
+  RuntimeBuilder& operator=(const RuntimeBuilder&) = delete;
+
+  struct TFEContextOptionsDeleter {
+    void operator()(TFE_ContextOptions* p) const {
+      TFE_DeleteContextOptions(p);
+    }
+  };
+  std::unique_ptr<TFE_ContextOptions, TFEContextOptionsDeleter> options_;
+};
+
+inline RuntimeBuilder& RuntimeBuilder::SetUseTFRT(bool use_tfrt) {
+  TFE_ContextOptionsSetTfrt(options_.get(), use_tfrt);
+  return *this;
+}
+
+inline std::unique_ptr<Runtime> RuntimeBuilder::Build(Status* status) {
+  TFE_Context* result = TFE_NewContext(options_.get(), status->GetTFStatus());
+  if (!status->ok()) {
+    return nullptr;
+  }
+  // We can't use std::make_unique here because of its interaction with a
+  // private constructor: https://abseil.io/tips/134
+  return std::unique_ptr<Runtime>(new Runtime(result));
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_RUNTIME_BUILDER_H_
diff --git a/tensorflow/cc/experimental/base/public/status.h b/tensorflow/cc/experimental/base/public/status.h
new file mode 100644
index 00000000000..98c8cf6ced2
--- /dev/null
+++ b/tensorflow/cc/experimental/base/public/status.h
@@ -0,0 +1,96 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_STATUS_H_
+#define TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_STATUS_H_
+
+#include <memory>
+#include <string>
+
+#include "tensorflow/c/tf_status.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// Status is a wrapper around an error code and an optional error message.
+// The set of error codes are defined here:
+// https://github.com/tensorflow/tensorflow/blob/08931c1e3e9eb2e26230502d678408e66730826c/tensorflow/c/tf_status.h#L39-L60
+// Many Tensorflow APIs return a Status, or take a Status as an out parameter.
+// Clients should check for status.ok() after calling these APIs, and either
+// handle or propagate the error appropriately.
+// TODO(bmzhao): Add a detailed code example before moving out of experimental.
+class Status {
+ public:
+  // Create a success status
+  Status() : status_(TF_NewStatus()) {}
+
+  // Return the status code
+  TF_Code code() const;
+
+  // Returns the error message in Status.
+  std::string message() const;
+
+  // Returns the error message in Status.
+  bool ok() const;
+
+  // Record <code, msg> in Status. Any previous information is lost.
+  // A common use is to clear a status: SetStatus(TF_OK, "");
+  void SetStatus(TF_Code code, const std::string& msg);
+
+  // Status is movable, but not copyable.
+  Status(Status&&) = default;
+  Status& operator=(Status&&) = default;
+
+ private:
+  friend class RuntimeBuilder;
+  friend class Runtime;
+  friend class SavedModelAPI;
+  friend class TensorHandle;
+
+  // Wraps a TF_Status*, and takes ownership of it.
+  explicit Status(TF_Status* status) : status_(status) {}
+
+  // Status is not copyable
+  Status(const Status&) = delete;
+  Status& operator=(const Status&) = delete;
+
+  // Returns the TF_Status that this object wraps. This object
+  // retains ownership of the pointer.
+  TF_Status* GetTFStatus() const { return status_.get(); }
+
+  struct TFStatusDeleter {
+    void operator()(TF_Status* p) const { TF_DeleteStatus(p); }
+  };
+  std::unique_ptr<TF_Status, TFStatusDeleter> status_;
+};
+
+inline TF_Code Status::code() const { return TF_GetCode(status_.get()); }
+
+inline std::string Status::message() const {
+  return std::string(TF_Message(status_.get()));
+}
+
+inline bool Status::ok() const { return code() == TF_OK; }
+
+inline void Status::SetStatus(TF_Code code, const std::string& msg) {
+  TF_SetStatus(status_.get(), code, msg.c_str());
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_STATUS_H_
diff --git a/tensorflow/cc/experimental/base/public/tensor.h b/tensorflow/cc/experimental/base/public/tensor.h
new file mode 100644
index 00000000000..fc447262ce1
--- /dev/null
+++ b/tensorflow/cc/experimental/base/public/tensor.h
@@ -0,0 +1,175 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSOR_H_
+#define TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSOR_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "tensorflow/c/tf_datatype.h"
+#include "tensorflow/c/tf_tensor.h"
+#include "tensorflow/cc/experimental/base/public/status.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// Tensor represents an n-dimensional array of values.
+class Tensor {
+ public:
+  using DeleterCallback = std::function<void(void*, size_t)>;
+
+  // Constructs a Tensor from user provided buffer.
+  //
+  // Params:
+  //  dtype - The dtype of the tensor's data.
+  //  shape - A shape vector, where each element corresponds to the size of
+  //          the tensor's corresponding dimension.
+  //  data - Pointer to a buffer of memory to construct a Tensor out of.
+  //  len - The length (in bytes) of `data`
+  //  deleter - A std::function to be called when the Tensor no longer needs the
+  //            memory in `data`. This can be used to free `data`, or
+  //            perhaps decrement a refcount associated with `data`, etc.
+  //  status - Set to OK on success and an error on failure.
+  // Returns:
+  // If an error occurred, status->ok() will be false, and the returned
+  // Tensor must not be used.
+  // TODO(bmzhao): Add Runtime as an argument to this function so we can swap to
+  // a TFRT backed tensor.
+  // TODO(bmzhao): Add benchmarks on overhead for this function; we can
+  // consider using int64_t* + length rather than vector.
+  static Tensor FromBuffer(TF_DataType dtype, const std::vector<int64_t>& shape,
+                           void* data, size_t len, DeleterCallback deleter,
+                           Status* status);
+
+  // TODO(bmzhao): In the case we construct a tensor from non-owned memory,
+  // we should offer a way to deep copy the tensor into a new tensor, which
+  // owns the underlying memory. This could be a .deepcopy()/clone() method.
+
+  // TODO(bmzhao): In the future, we want to relax the non-copyability
+  // constraint. To do so, we can add a C API function that acts like
+  // CopyFrom:
+  // https://github.com/tensorflow/tensorflow/blob/08931c1e3e9eb2e26230502d678408e66730826c/tensorflow/core/framework/tensor.h#L301-L311
+
+  // Tensor is movable, but not copyable
+  Tensor(Tensor&&) = default;
+  Tensor& operator=(Tensor&&) = default;
+
+  // Returns the number of dimensions in the tensor. Can be -1, which represents
+  // unknown rank.
+  int dims() const;
+
+  // Returns the number of elements in in demension `d`.
+  // REQUIRES: `0 <= d < dims()`
+  int64_t dim_size(int d) const;
+
+  // Returns a pointer to the underlying data buffer.
+  void* data() const;
+
+  // Returns the data type of the tensor.
+  TF_DataType dtype() const;
+
+  // Returns the number of elements in the tensor. For a tensor with a partially
+  // defined shape, -1 means not fully defined.
+  int64_t num_elements() const;
+
+  // Returns the size of the underlying data in bytes.
+  size_t num_bytes() const;
+
+ private:
+  friend class TensorHandle;
+  friend class Runtime;
+
+  // Wraps a TF_Tensor. Takes ownership of handle.
+  explicit Tensor(TF_Tensor* tensor) : tensor_(tensor) {}
+
+  // Tensor is not copyable
+  Tensor(const Tensor&) = delete;
+  Tensor& operator=(const Tensor&) = delete;
+
+  // Returns the underlying TF_Tensor that this object wraps.
+  // This object retains ownership of the pointer.
+  TF_Tensor* GetTFTensor() const { return tensor_.get(); }
+
+  struct DeleterStruct {
+    std::function<void(void*, size_t)> deleter;
+  };
+
+  static void DeleterFunction(void* memory, size_t len, void* deleter_struct) {
+    DeleterStruct* deleter = reinterpret_cast<DeleterStruct*>(deleter_struct);
+    deleter->deleter(memory, len);
+    delete deleter;
+  }
+
+  struct TFTensorDeleter {
+    void operator()(TF_Tensor* p) const { TF_DeleteTensor(p); }
+  };
+  std::unique_ptr<TF_Tensor, TFTensorDeleter> tensor_;
+};
+
+inline void* Tensor::data() const { return TF_TensorData(tensor_.get()); }
+
+inline int Tensor::dims() const { return TF_NumDims(tensor_.get()); }
+
+inline int64_t Tensor::dim_size(int d) const {
+  return TF_Dim(tensor_.get(), d);
+}
+
+inline TF_DataType Tensor::dtype() const {
+  return TF_TensorType(tensor_.get());
+}
+
+inline int64_t Tensor::num_elements() const {
+  return TF_TensorElementCount(tensor_.get());
+}
+
+inline size_t Tensor::num_bytes() const {
+  return TF_TensorByteSize(tensor_.get());
+}
+
+inline Tensor Tensor::FromBuffer(TF_DataType dtype,
+                                 const std::vector<int64_t>& shape, void* data,
+                                 size_t len, DeleterCallback deleter,
+                                 Status* status) {
+  // Credit to apassos@ for this technique:
+  // Despite the fact that our API takes a std::function deleter, we are able
+  // to maintain ABI stability because:
+  // 1. Only a function pointer is sent across the C API (&DeleterFunction)
+  // 2. DeleterFunction is defined in the same build artifact that constructed
+  //    the std::function (so there isn't confusion about std::function ABI).
+  // Note that 2. is satisifed by the fact that this is a header-only API, where
+  // the function implementations are inline.
+
+  DeleterStruct* deleter_struct = new DeleterStruct{deleter};
+  TF_Tensor* tensor = TF_NewTensor(dtype, shape.data(), shape.size(), data, len,
+                                   &DeleterFunction, deleter_struct);
+  if (tensor == nullptr) {
+    status->SetStatus(TF_INVALID_ARGUMENT,
+                      "Failed to create tensor for input buffer");
+    return Tensor(nullptr);
+  }
+  return Tensor(tensor);
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSOR_H_
diff --git a/tensorflow/cc/experimental/base/public/tensorhandle.h b/tensorflow/cc/experimental/base/public/tensorhandle.h
new file mode 100644
index 00000000000..99453ee7ea8
--- /dev/null
+++ b/tensorflow/cc/experimental/base/public/tensorhandle.h
@@ -0,0 +1,98 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSORHANDLE_H_
+#define TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSORHANDLE_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
+#include "tensorflow/cc/experimental/base/public/runtime.h"
+#include "tensorflow/cc/experimental/base/public/status.h"
+#include "tensorflow/cc/experimental/base/public/tensor.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// An opaque representation of a tensor computed/managed by the Tensorflow
+// runtime (tensorflow:cc::Runtime). Unlike a tensor, a Tensorhandle may refer
+// to tensors placed in memory of different devices or remote address spaces.
+// Note that tensorflow::cc::Runtime MUST outlive all TensorHandles created
+// from it.
+class TensorHandle {
+ public:
+  // Unwraps a Tensor from the given TensorHandle. If an error occurred,
+  // status->ok() will be false, and the returned Tensor must not be used.
+  Tensor Resolve(Status* status);
+
+  // Constructs a TensorHandle from a Tensor. If an error occurred,
+  // status->ok() will be false, and the returned TensorHandle must not be used.
+  static TensorHandle FromTensor(const Tensor& tensor, const Runtime& runtime,
+                                 Status* status);
+
+  // TensorHandle is movable, and not copyable
+  TensorHandle(TensorHandle&&) = default;
+  TensorHandle& operator=(TensorHandle&&) = default;
+
+ private:
+  // Wraps a TFE_TensorHandle. Takes ownership of handle.
+  explicit TensorHandle(TFE_TensorHandle* handle) : handle_(handle) {}
+
+  // TensorHandle is not copyable
+  TensorHandle(const TensorHandle&) = delete;
+  TensorHandle& operator=(const TensorHandle&) = delete;
+
+  // Returns the underlying TFE_TensorHandle that this object wraps.
+  // This object retains ownership of the pointer.
+  TFE_TensorHandle* GetTFETensorHandle() const { return handle_.get(); }
+
+  // Deletes the currently wrapped TFE_TensorHandle, and swaps it with handle,
+  // and takes ownership of handle.
+  void Reset(TFE_TensorHandle* handle) { handle_.reset(handle); }
+
+  struct TFETensorHandleDeleter {
+    void operator()(TFE_TensorHandle* p) const { TFE_DeleteTensorHandle(p); }
+  };
+  std::unique_ptr<TFE_TensorHandle, TFETensorHandleDeleter> handle_;
+};
+
+inline Tensor TensorHandle::Resolve(Status* status) {
+  TF_Tensor* tensor =
+      TFE_TensorHandleResolve(handle_.get(), status->GetTFStatus());
+  if (!status->ok()) {
+    return Tensor(nullptr);
+  }
+  return Tensor(tensor);
+}
+
+inline TensorHandle TensorHandle::FromTensor(const Tensor& tensor,
+                                             const Runtime& runtime,
+                                             Status* status) {
+  TFE_TensorHandle* tensor_handle = TFE_NewTensorHandleFromTensor(
+      runtime.GetTFEContext(), tensor.GetTFTensor(), status->GetTFStatus());
+  if (!status->ok()) {
+    return TensorHandle(nullptr);
+  }
+  return TensorHandle(tensor_handle);
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_EXPERIMENTAL_BASE_PUBLIC_TENSORHANDLE_H_
diff --git a/tensorflow/cc/experimental/base/tests/BUILD b/tensorflow/cc/experimental/base/tests/BUILD
new file mode 100644
index 00000000000..f449d618f72
--- /dev/null
+++ b/tensorflow/cc/experimental/base/tests/BUILD
@@ -0,0 +1,50 @@
+# Tests for the C++ header-only base types.
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+package(
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "tensor_types_test_util",
+    testonly = True,
+    hdrs = ["tensor_types_test_util.h"],
+    deps = [
+        "//tensorflow/c:tf_datatype",
+    ],
+)
+
+tf_cc_test(
+    name = "tensor_test",
+    srcs = [
+        "tensor_test.cc",
+    ],
+    deps = [
+        ":tensor_types_test_util",
+        "//tensorflow/c:tf_datatype",
+        "//tensorflow/cc/experimental/base/public:status",
+        "//tensorflow/cc/experimental/base/public:tensor",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+tf_cc_test(
+    name = "tensorhandle_test",
+    srcs = [
+        "tensorhandle_test.cc",
+    ],
+    deps = [
+        ":tensor_types_test_util",
+        "//tensorflow/c:tf_datatype",
+        "//tensorflow/cc/experimental/base/public:runtime",
+        "//tensorflow/cc/experimental/base/public:runtime_builder",
+        "//tensorflow/cc/experimental/base/public:status",
+        "//tensorflow/cc/experimental/base/public:tensor",
+        "//tensorflow/cc/experimental/base/public:tensorhandle",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/cc/experimental/base/tests/tensor_test.cc b/tensorflow/cc/experimental/base/tests/tensor_test.cc
new file mode 100644
index 00000000000..33f9ab637e8
--- /dev/null
+++ b/tensorflow/cc/experimental/base/tests/tensor_test.cc
@@ -0,0 +1,163 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/experimental/base/public/tensor.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "tensorflow/c/tf_datatype.h"
+#include "tensorflow/cc/experimental/base/tests/tensor_types_test_util.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace {
+
+using tensorflow::experimental::cc::Status;
+using tensorflow::experimental::cc::Tensor;
+
+using SimpleTypes = ::testing::Types<
+    tensorflow::FloatType, tensorflow::DoubleType, tensorflow::Int32Type,
+    tensorflow::UINT8Type, tensorflow::INT8Type, tensorflow::INT64Type,
+    tensorflow::UINT16Type, tensorflow::UINT32Type, tensorflow::UINT64Type>;
+
+template <typename T>
+class ConstructScalarTensorTest : public ::testing::Test {};
+TYPED_TEST_SUITE(ConstructScalarTensorTest, SimpleTypes);
+
+// This test constructs a scalar tensor for each of the types in "SimpleTypes",
+// and verifies the expected dimensions, dtype, value, number of bytes, and
+// number of elements.
+TYPED_TEST(ConstructScalarTensorTest, ValidTensorAttributesAfterConstruction) {
+  Status status;
+  TF_DataType dtype = TypeParam::kDType;
+  typename TypeParam::type value = 42;
+  Tensor tensor = Tensor::FromBuffer(/*dtype=*/dtype, /*shape=*/{},
+                                     /*data=*/&value,
+                                     /*len=*/sizeof(value),
+                                     /*deleter=*/[](void*, size_t) {}, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(tensor.dims(), 0);
+  EXPECT_EQ(tensor.dtype(), dtype);
+  EXPECT_EQ(*reinterpret_cast<typename TypeParam::type*>(tensor.data()), 42);
+  EXPECT_EQ(tensor.num_bytes(), sizeof(typename TypeParam::type));
+  EXPECT_EQ(tensor.num_elements(), 1);
+}
+
+template <typename T>
+class Construct1DTensorTest : public ::testing::Test {};
+TYPED_TEST_SUITE(Construct1DTensorTest, SimpleTypes);
+
+// This test constructs a 1D tensor for each of the types in "SimpleTypes",
+// and verifies the expected dimensions, dtype, value, number of bytes, and
+// number of elements.
+TYPED_TEST(Construct1DTensorTest, ValidTensorAttributesAfterConstruction) {
+  Status status;
+  TF_DataType dtype = TypeParam::kDType;
+  // This is our 1D tensor of varying dtype.
+  std::vector<typename TypeParam::type> value = {42, 100, 0, 1, 4, 29};
+  // Shape is Rank 1 vector.
+  std::vector<int64_t> shape;
+  shape.push_back(value.size());
+
+  Tensor tensor = Tensor::FromBuffer(
+      /*dtype=*/dtype, /*shape=*/shape,
+      /*data=*/value.data(),
+      /*len=*/value.size() * sizeof(typename TypeParam::type),
+      /*deleter=*/[](void*, size_t) {}, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(tensor.dims(), 1);
+  EXPECT_EQ(tensor.dtype(), dtype);
+  tensorflow::gtl::ArraySlice<typename TypeParam::type> tensor_view(
+      reinterpret_cast<typename TypeParam::type*>(tensor.data()), value.size());
+  EXPECT_EQ(tensor_view[0], 42);
+  EXPECT_EQ(tensor_view[1], 100);
+  EXPECT_EQ(tensor_view[2], 0);
+  EXPECT_EQ(tensor_view[3], 1);
+  EXPECT_EQ(tensor_view[4], 4);
+  EXPECT_EQ(tensor_view[5], 29);
+
+  EXPECT_EQ(tensor.num_bytes(),
+            value.size() * sizeof(typename TypeParam::type));
+  EXPECT_EQ(tensor.num_elements(), value.size());
+}
+
+template <typename T>
+class Construct2DTensorTest : public ::testing::Test {};
+TYPED_TEST_SUITE(Construct2DTensorTest, SimpleTypes);
+
+// This test constructs a 2D tensor for each of the types in "SimpleTypes",
+// and verifies the expected dimensions, dtype, value, number of bytes, and
+// number of elements.
+TYPED_TEST(Construct2DTensorTest, ValidTensorAttributesAfterConstruction) {
+  Status status;
+  TF_DataType dtype = TypeParam::kDType;
+  // This is our 1D tensor of varying dtype.
+  std::vector<typename TypeParam::type> value = {42, 100, 0, 1, 4, 29};
+  // Shape is Rank 2 vector with shape 2 x 3.
+  std::vector<int64_t> shape({2, 3});
+
+  Tensor tensor = Tensor::FromBuffer(
+      /*dtype=*/dtype, /*shape=*/shape,
+      /*data=*/value.data(),
+      /*len=*/value.size() * sizeof(typename TypeParam::type),
+      /*deleter=*/[](void*, size_t) {}, &status);
+
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(tensor.dims(), 2);
+  EXPECT_EQ(tensor.dtype(), dtype);
+  tensorflow::gtl::ArraySlice<typename TypeParam::type> tensor_view(
+      reinterpret_cast<typename TypeParam::type*>(tensor.data()), value.size());
+  EXPECT_EQ(tensor_view[0], 42);
+  EXPECT_EQ(tensor_view[1], 100);
+  EXPECT_EQ(tensor_view[2], 0);
+  EXPECT_EQ(tensor_view[3], 1);
+  EXPECT_EQ(tensor_view[4], 4);
+  EXPECT_EQ(tensor_view[5], 29);
+
+  EXPECT_EQ(tensor.num_bytes(),
+            value.size() * sizeof(typename TypeParam::type));
+  EXPECT_EQ(tensor.num_elements(), value.size());
+}
+
+TEST(CPPTensorAPI, ConstructTensorFromBuffer) {
+  bool done = false;
+  Status status;
+  std::vector<int32_t> data_vector({12, 14, 20, 18, 39, 42, 100});
+  {
+    // data_vector is a rank 1 tensor.
+    std::vector<int64_t> shape;
+    shape.push_back(data_vector.size());
+
+    Tensor::DeleterCallback callback = [&done](void* data, size_t len) {
+      done = true;
+    };
+
+    Tensor tensor =
+        Tensor::FromBuffer(/*dtype=*/TF_INT32, /*shape=*/shape,
+                           /*data=*/data_vector.data(),
+                           /*len=*/data_vector.size() * sizeof(int32_t),
+                           /*deleter=*/callback, &status);
+    ASSERT_TRUE(status.ok()) << status.message();
+  }
+  // At this point, tensor has been destroyed, and the deleter callback should
+  // have run.
+  EXPECT_TRUE(done);
+}
+
+}  // namespace
diff --git a/tensorflow/cc/experimental/base/tests/tensor_types_test_util.h b/tensorflow/cc/experimental/base/tests/tensor_types_test_util.h
new file mode 100644
index 00000000000..af9cad7529b
--- /dev/null
+++ b/tensorflow/cc/experimental/base/tests/tensor_types_test_util.h
@@ -0,0 +1,76 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CC_EXPERIMENTAL_BASE_TEST_TENSOR_TYPES_TEST_UTIL_H_
+#define TENSORFLOW_CC_EXPERIMENTAL_BASE_TEST_TENSOR_TYPES_TEST_UTIL_H_
+
+#include <stdint.h>
+
+#include "tensorflow/c/tf_datatype.h"
+
+namespace tensorflow {
+
+// Each of the following struct types have two members: a kDType that
+// corresponds to a TF_Datatype enum value, and a typedef "type"
+// of its corresponding C++ type. These types allow us to write Dtype-agnostic
+// tests via GoogleTest's TypedTests:
+// https://github.com/google/googletest/blob/e589a337170554c48bc658cc857cf15080c9eacc/googletest/docs/advanced.md#typed-tests
+struct FloatType {
+  using type = float;
+  static constexpr TF_DataType kDType = TF_FLOAT;
+};
+
+struct DoubleType {
+  using type = double;
+  static constexpr TF_DataType kDType = TF_DOUBLE;
+};
+
+struct Int32Type {
+  using type = int32_t;
+  static constexpr TF_DataType kDType = TF_INT32;
+};
+
+struct UINT8Type {
+  using type = uint8_t;
+  static constexpr TF_DataType kDType = TF_UINT8;
+};
+
+struct INT8Type {
+  using type = int8_t;
+  static constexpr TF_DataType kDType = TF_INT8;
+};
+
+struct INT64Type {
+  using type = int64_t;
+  static constexpr TF_DataType kDType = TF_INT64;
+};
+
+struct UINT16Type {
+  using type = uint16_t;
+  static constexpr TF_DataType kDType = TF_UINT16;
+};
+
+struct UINT32Type {
+  using type = uint32_t;
+  static constexpr TF_DataType kDType = TF_UINT32;
+};
+
+struct UINT64Type {
+  using type = uint64_t;
+  static constexpr TF_DataType kDType = TF_UINT64;
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_EXPERIMENTAL_BASE_TEST_TENSOR_TYPES_TEST_UTIL_H_
diff --git a/tensorflow/cc/experimental/base/tests/tensorhandle_test.cc b/tensorflow/cc/experimental/base/tests/tensorhandle_test.cc
new file mode 100644
index 00000000000..cfeaba4e392
--- /dev/null
+++ b/tensorflow/cc/experimental/base/tests/tensorhandle_test.cc
@@ -0,0 +1,184 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/experimental/base/public/tensorhandle.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+
+#include "tensorflow/c/tf_datatype.h"
+#include "tensorflow/cc/experimental/base/public/runtime.h"
+#include "tensorflow/cc/experimental/base/public/runtime_builder.h"
+#include "tensorflow/cc/experimental/base/public/tensor.h"
+#include "tensorflow/cc/experimental/base/tests/tensor_types_test_util.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+using tensorflow::experimental::cc::Runtime;
+using tensorflow::experimental::cc::RuntimeBuilder;
+using tensorflow::experimental::cc::Status;
+using tensorflow::experimental::cc::Tensor;
+using tensorflow::experimental::cc::TensorHandle;
+
+using SimpleTypes = ::testing::Types<
+    tensorflow::FloatType, tensorflow::DoubleType, tensorflow::Int32Type,
+    tensorflow::UINT8Type, tensorflow::INT8Type, tensorflow::INT64Type,
+    tensorflow::UINT16Type, tensorflow::UINT32Type, tensorflow::UINT64Type>;
+
+template <typename T>
+class ConstructScalarTensorHandleTest : public ::testing::Test {};
+TYPED_TEST_SUITE(ConstructScalarTensorHandleTest, SimpleTypes);
+
+// This test constructs a scalar tensor for each of the types in "SimpleTypes",
+// then wraps it in a TensorHandle. We then unwrap it back into a Tensor, and
+// verify the expected dims, dtype, value, num bytes, and num elements.
+TYPED_TEST(ConstructScalarTensorHandleTest,
+           ValidTensorAttributesAfterConstruction) {
+  Status status;
+  RuntimeBuilder runtime_builder;
+  std::unique_ptr<Runtime> runtime = runtime_builder.Build(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  TF_DataType dtype = TypeParam::kDType;
+  typename TypeParam::type value = 42;
+  Tensor original_tensor =
+      Tensor::FromBuffer(/*dtype=*/dtype, /*shape=*/{},
+                         /*data=*/&value,
+                         /*len=*/sizeof(value),
+                         /*deleter=*/[](void*, size_t) {}, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  TensorHandle handle =
+      TensorHandle::FromTensor(original_tensor, *runtime, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  Tensor tensor = handle.Resolve(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(tensor.dims(), 0);
+  EXPECT_EQ(tensor.dtype(), dtype);
+  EXPECT_EQ(*reinterpret_cast<typename TypeParam::type*>(tensor.data()), 42);
+  EXPECT_EQ(tensor.num_bytes(), sizeof(typename TypeParam::type));
+  EXPECT_EQ(tensor.num_elements(), 1);
+}
+
+template <typename T>
+class Construct1DTensorHandleTest : public ::testing::Test {};
+TYPED_TEST_SUITE(Construct1DTensorHandleTest, SimpleTypes);
+
+// This test constructs a 1D tensor for each of the types in "SimpleTypes",
+// and verifies the expected dimensions, dtype, value, number of bytes, and
+// number of elements.
+TYPED_TEST(Construct1DTensorHandleTest,
+           ValidTensorAttributesAfterConstruction) {
+  Status status;
+  RuntimeBuilder runtime_builder;
+  std::unique_ptr<Runtime> runtime = runtime_builder.Build(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  TF_DataType dtype = TypeParam::kDType;
+  // This is our 1D tensor of varying dtype.
+  std::vector<typename TypeParam::type> value = {42, 100, 0, 1, 4, 29};
+  // Shape is Rank 1 vector.
+  std::vector<int64_t> shape;
+  shape.push_back(value.size());
+
+  Tensor original_tensor = Tensor::FromBuffer(
+      /*dtype=*/dtype, /*shape=*/shape,
+      /*data=*/value.data(),
+      /*len=*/value.size() * sizeof(typename TypeParam::type),
+      /*deleter=*/[](void*, size_t) {}, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  TensorHandle handle =
+      TensorHandle::FromTensor(original_tensor, *runtime, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  Tensor tensor = handle.Resolve(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(tensor.dims(), 1);
+  EXPECT_EQ(tensor.dtype(), dtype);
+  tensorflow::gtl::ArraySlice<typename TypeParam::type> tensor_view(
+      reinterpret_cast<typename TypeParam::type*>(tensor.data()), value.size());
+  EXPECT_EQ(tensor_view[0], 42);
+  EXPECT_EQ(tensor_view[1], 100);
+  EXPECT_EQ(tensor_view[2], 0);
+  EXPECT_EQ(tensor_view[3], 1);
+  EXPECT_EQ(tensor_view[4], 4);
+  EXPECT_EQ(tensor_view[5], 29);
+
+  EXPECT_EQ(tensor.num_bytes(),
+            value.size() * sizeof(typename TypeParam::type));
+  EXPECT_EQ(tensor.num_elements(), value.size());
+}
+
+template <typename T>
+class Construct2DTensorHandleTest : public ::testing::Test {};
+TYPED_TEST_SUITE(Construct2DTensorHandleTest, SimpleTypes);
+
+// This test constructs a 2D tensor for each of the types in "SimpleTypes",
+// and verifies the expected dimensions, dtype, value, number of bytes, and
+// number of elements.
+TYPED_TEST(Construct2DTensorHandleTest,
+           ValidTensorAttributesAfterConstruction) {
+  Status status;
+  RuntimeBuilder runtime_builder;
+  std::unique_ptr<Runtime> runtime = runtime_builder.Build(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  TF_DataType dtype = TypeParam::kDType;
+  // This is our 1D tensor of varying dtype.
+  std::vector<typename TypeParam::type> value = {42, 100, 0, 1, 4, 29};
+  // Shape is Rank 2 vector with shape 2 x 3.
+  std::vector<int64_t> shape({2, 3});
+
+  Tensor original_tensor = Tensor::FromBuffer(
+      /*dtype=*/dtype, /*shape=*/shape,
+      /*data=*/value.data(),
+      /*len=*/value.size() * sizeof(typename TypeParam::type),
+      /*deleter=*/[](void*, size_t) {}, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  TensorHandle handle =
+      TensorHandle::FromTensor(original_tensor, *runtime, &status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  Tensor tensor = handle.Resolve(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(tensor.dims(), 2);
+  EXPECT_EQ(tensor.dtype(), dtype);
+  tensorflow::gtl::ArraySlice<typename TypeParam::type> tensor_view(
+      reinterpret_cast<typename TypeParam::type*>(tensor.data()), value.size());
+  EXPECT_EQ(tensor_view[0], 42);
+  EXPECT_EQ(tensor_view[1], 100);
+  EXPECT_EQ(tensor_view[2], 0);
+  EXPECT_EQ(tensor_view[3], 1);
+  EXPECT_EQ(tensor_view[4], 4);
+  EXPECT_EQ(tensor_view[5], 29);
+
+  EXPECT_EQ(tensor.num_bytes(),
+            value.size() * sizeof(typename TypeParam::type));
+  EXPECT_EQ(tensor.num_elements(), value.size());
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index 882b4032f76..b13d8db48a9 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -4,7 +4,6 @@
 load(
     "//tensorflow:tensorflow.bzl",
     "if_android",
-    "if_ios",
     "if_mobile",
     "if_not_mobile",
     "tf_cc_test",
@@ -85,7 +84,7 @@ cc_library(
         "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
     ]) + if_android([
-        "//tensorflow/core:android_tensorflow_lib",
+        "//tensorflow/core:portable_tensorflow_lib",
     ]),
 )
 
diff --git a/tensorflow/cc/saved_model/experimental/public/BUILD b/tensorflow/cc/saved_model/experimental/public/BUILD
new file mode 100644
index 00000000000..3e9a671a61f
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/public/BUILD
@@ -0,0 +1,58 @@
+# Experimental C++ SavedModel Header Only APIs. See RFC
+# https://github.com/tensorflow/community/pull/207
+
+package(
+    # This is intentionally public
+    default_visibility = [
+        "//visibility:public",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "concrete_function",
+    hdrs = [
+        "concrete_function.h",
+    ],
+    deps = [
+        ":function_metadata",
+        "//tensorflow/c/eager:c_api",
+        "//tensorflow/c/experimental/saved_model/public:concrete_function",
+        "//tensorflow/cc/experimental/base/public:status",
+    ],
+)
+
+cc_library(
+    name = "concrete_function_list",
+    hdrs = [
+        "concrete_function_list.h",
+    ],
+    deps = [
+        ":concrete_function",
+        "//tensorflow/c/experimental/saved_model/public:concrete_function_list",
+    ],
+)
+
+cc_library(
+    name = "function_metadata",
+    hdrs = [
+        "function_metadata.h",
+    ],
+    deps = [
+        "//tensorflow/c/experimental/saved_model/public:function_metadata",
+    ],
+)
+
+cc_library(
+    name = "saved_model_api",
+    hdrs = [
+        "saved_model_api.h",
+    ],
+    deps = [
+        ":concrete_function",
+        ":concrete_function_list",
+        "//tensorflow/c/experimental/saved_model/public:saved_model_api",
+        "//tensorflow/cc/experimental/base/public:runtime",
+        "//tensorflow/cc/experimental/base/public:status",
+    ],
+)
diff --git a/tensorflow/cc/saved_model/experimental/public/concrete_function.h b/tensorflow/cc/saved_model/experimental/public/concrete_function.h
new file mode 100644
index 00000000000..1adaf70b01a
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/public/concrete_function.h
@@ -0,0 +1,61 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_H_
+#define TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_H_
+
+#include <vector>
+
+#include "tensorflow/c/eager/c_api.h"
+#include "tensorflow/c/experimental/saved_model/public/concrete_function.h"
+#include "tensorflow/cc/experimental/base/public/status.h"
+#include "tensorflow/cc/saved_model/experimental/public/function_metadata.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// ConcreteFunction is an executable "function" loaded from a SavedModelAPI.
+class ConcreteFunction final {
+ public:
+  // TODO(bmzhao): Adding ConcreteFunction::Run in subsequent CL, since
+  // it depends on tensorflow::cc::Tensor and tensorflow::cc::TensorHandle
+
+  // Returns FunctionMetadata associated with this ConcreteFunction.
+  const FunctionMetadata* GetFunctionMetadata();
+
+ private:
+  friend class SavedModelAPI;
+  friend class ConcreteFunctionList;
+
+  // TODO(bmzhao): Consider adding a macro for wrapping/unwrapping
+  // when moving out of experimental.
+  static ConcreteFunction* wrap(TF_ConcreteFunction* p) {
+    return reinterpret_cast<ConcreteFunction*>(p);
+  }
+  static TF_ConcreteFunction* unwrap(ConcreteFunction* p) {
+    return reinterpret_cast<TF_ConcreteFunction*>(p);
+  }
+};
+
+inline const FunctionMetadata* ConcreteFunction::GetFunctionMetadata() {
+  return FunctionMetadata::wrap(TF_ConcreteFunctionGetMetadata(unwrap(this)));
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_H_
diff --git a/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h b/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h
new file mode 100644
index 00000000000..88cb779ef15
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/public/concrete_function_list.h
@@ -0,0 +1,63 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_LIST_H_
+#define TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_LIST_H_
+
+#include <vector>
+
+#include "tensorflow/c/experimental/saved_model/public/concrete_function_list.h"
+#include "tensorflow/cc/saved_model/experimental/public/concrete_function.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// ConcreteFunctionList helps convert an opaque pointer to an array of
+// ConcreteFunction pointers to a std::vector.
+class ConcreteFunctionList {
+ public:
+  // Converts this object to a std::vector<ConcreteFunction*>
+  std::vector<ConcreteFunction*> ToVector();
+
+ private:
+  friend class SavedModelAPI;
+  // Wraps a TF_ConcreteFunctionList. Takes ownership of list.
+  explicit ConcreteFunctionList(TF_ConcreteFunctionList* list) : list_(list) {}
+
+  struct TFConcreteFunctionListDeleter {
+    void operator()(TF_ConcreteFunctionList* p) const {
+      TF_DeleteConcreteFunctionList(p);
+    }
+  };
+  std::unique_ptr<TF_ConcreteFunctionList, TFConcreteFunctionListDeleter> list_;
+};
+
+inline std::vector<ConcreteFunction*> ConcreteFunctionList::ToVector() {
+  int size = TF_ConcreteFunctionListSize(list_.get());
+  std::vector<ConcreteFunction*> result;
+  result.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    result.push_back(
+        ConcreteFunction::wrap(TF_ConcreteFunctionListGet(list_.get(), i)));
+  }
+  return result;
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_CONCRETE_FUNCTION_LIST_H_
diff --git a/tensorflow/cc/saved_model/experimental/public/function_metadata.h b/tensorflow/cc/saved_model/experimental/public/function_metadata.h
new file mode 100644
index 00000000000..11e1a860d84
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/public/function_metadata.h
@@ -0,0 +1,47 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_FUNCTION_METADATA_H_
+#define TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_FUNCTION_METADATA_H_
+
+#include <memory>
+
+#include "tensorflow/c/experimental/saved_model/public/function_metadata.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// FunctionMetadata stores additional function information, including
+// optional signaturedef feeds/fetches (for TF1-based ConcreteFunctions),
+// a valid function path (for TF2-based ConcreteFunctions), and
+// the types + number of inputs and outputs.
+class FunctionMetadata final {
+  // TODO(bmzhao): Add getters here as necessary.
+ private:
+  friend class ConcreteFunction;
+  static FunctionMetadata* wrap(TF_FunctionMetadata* p) {
+    return reinterpret_cast<FunctionMetadata*>(p);
+  }
+  static TF_FunctionMetadata* unwrap(FunctionMetadata* p) {
+    return reinterpret_cast<TF_FunctionMetadata*>(p);
+  }
+};
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_FUNCTION_METADATA_H_
diff --git a/tensorflow/cc/saved_model/experimental/public/saved_model_api.h b/tensorflow/cc/saved_model/experimental/public/saved_model_api.h
new file mode 100644
index 00000000000..04018bf2aab
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/public/saved_model_api.h
@@ -0,0 +1,162 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_SAVED_MODEL_API_H_
+#define TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_SAVED_MODEL_API_H_
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "tensorflow/c/experimental/saved_model/public/saved_model_api.h"
+#include "tensorflow/cc/experimental/base/public/runtime.h"
+#include "tensorflow/cc/experimental/base/public/status.h"
+#include "tensorflow/cc/saved_model/experimental/public/concrete_function.h"
+#include "tensorflow/cc/saved_model/experimental/public/concrete_function_list.h"
+
+namespace tensorflow {
+namespace experimental {
+namespace cc {
+
+// SavedModelAPI offers a way to load Tensorflow Saved Models
+// (https://www.tensorflow.org/guide/saved_model) and execute saved
+// tf.functions or legacy SignatureDefs in a TF2-idiomatic fashion.
+// See RFC 207
+// (https://github.com/tensorflow/community/blob/master/rfcs/20200218-tf-c-saved-model.md)
+// TODO(bmzhao): Add an e2e example here, once ConcreteFunction::Run is added.
+class SavedModelAPI {
+ public:
+  // Load a SavedModel from `dirname`.
+  //
+  // Params:
+  //  saved_model_path - A directory filepath that the SavedModel is at.
+  //  runtime - A runtime used to load SavedModelAPI. `runtime` must outlive the
+  //            returned TF_SavedModel pointer.
+  //  tags - Optional set of tags. If tags = nullptr, we expect the SavedModel
+  //         to contain a single Metagraph (as for those exported from TF2's
+  //         `tf.saved_model.save`). If tags != nullptr, we load the metagraph
+  //         matching the tags:
+  //         https://github.com/tensorflow/tensorflow/blob/428cdeda09aef81e958eeb274b83d27ad635b57b/tensorflow/core/protobuf/meta_graph.proto#L50-L56
+  //  status - Set to OK on success and an appropriate error on failure.
+  // Returns:
+  //  If status is not OK, returns nullptr.
+  static std::unique_ptr<SavedModelAPI> Load(
+      const std::string& saved_model_path, const Runtime& runtime,
+      Status* status, const std::unordered_set<std::string>* tags = nullptr);
+
+  // Retrieve a function from the TF2 SavedModel via function path.
+  //
+  // Params:
+  //  function_path - A string containing the path from the root saved python
+  //                  object to a tf.function method.
+  //  status - Set to OK on success and an appropriate error on failure.
+  // Returns:
+  //  If status is not OK, returns nullptr. Otherwise, returns a
+  //  tensorflow::cc::ConcreteFunction pointer. The lifetime of this pointer
+  //  is bound to SavedModelAPI it was loaded from.
+  ConcreteFunction* GetConcreteFunction(const std::string& function_path,
+                                        Status* status);
+
+  // Retrieve a function from the TF SavedModel via a SignatureDef key.
+  //
+  // Params:
+  //  signature_def_key - String key of SignatureDef map of a SavedModel:
+  //                      https://github.com/tensorflow/tensorflow/blob/69b08900b1e991d84bce31f3b404f5ed768f339f/tensorflow/core/protobuf/meta_graph.proto#L89
+  //  status - Set to OK on success and an appropriate error on failure.
+  // Returns:
+  //  If status is not OK, returns nullptr. Otherwise, returns a
+  //  tensorflow::cc::ConcreteFunction pointer. The lifetime of this pointer
+  //  is bound to SavedModelAPI it was loaded from.
+  ConcreteFunction* GetSignatureDefFunction(const std::string& function_path,
+                                            Status* status);
+
+  // Lists all Conrete Functions available from the SavedModel.
+  std::vector<ConcreteFunction*> ListFunctions();
+
+  // SavedModelAPI is movable, but not copyable.
+  SavedModelAPI(SavedModelAPI&&) = default;
+  SavedModelAPI& operator=(SavedModelAPI&&) = default;
+
+ private:
+  SavedModelAPI(const SavedModelAPI&) = delete;
+  SavedModelAPI& operator=(const SavedModelAPI&) = delete;
+
+  explicit SavedModelAPI(TF_SavedModel* model) : saved_model_(model) {}
+  struct TFSavedModelDeleter {
+    void operator()(TF_SavedModel* p) const { TF_DeleteSavedModel(p); }
+  };
+  std::unique_ptr<TF_SavedModel, TFSavedModelDeleter> saved_model_;
+};
+
+inline std::unique_ptr<SavedModelAPI> SavedModelAPI::Load(
+    const std::string& saved_model_path, const Runtime& runtime, Status* status,
+    const std::unordered_set<std::string>* tags) {
+  TF_SavedModel* saved_model = nullptr;
+
+  if (tags == nullptr) {
+    saved_model =
+        TF_LoadSavedModel(saved_model_path.c_str(), runtime.GetTFEContext(),
+                          status->GetTFStatus());
+  } else {
+    std::vector<const char*> tags_vector;
+    tags_vector.reserve(tags->size());
+    for (const std::string& tag : *tags) {
+      tags_vector.push_back(tag.c_str());
+    }
+    saved_model = TF_LoadSavedModelWithTags(
+        saved_model_path.c_str(), runtime.GetTFEContext(), tags_vector.data(),
+        tags_vector.size(), status->GetTFStatus());
+  }
+
+  if (!status->ok()) {
+    return nullptr;
+  }
+
+  // We can't use std::make_unique here because of its interaction with a
+  // private constructor: https://abseil.io/tips/134
+  return std::unique_ptr<SavedModelAPI>(new SavedModelAPI(saved_model));
+}
+
+inline ConcreteFunction* SavedModelAPI::GetConcreteFunction(
+    const std::string& function_path, Status* status) {
+  TF_ConcreteFunction* function = TF_GetSavedModelConcreteFunction(
+      saved_model_.get(), function_path.c_str(), status->GetTFStatus());
+  if (!status->ok()) {
+    return nullptr;
+  }
+  return ConcreteFunction::wrap(function);
+}
+
+inline ConcreteFunction* SavedModelAPI::GetSignatureDefFunction(
+    const std::string& function_path, Status* status) {
+  TF_ConcreteFunction* function = TF_GetSavedModelSignatureDefFunction(
+      saved_model_.get(), function_path.c_str(), status->GetTFStatus());
+  if (!status->ok()) {
+    return nullptr;
+  }
+  return ConcreteFunction::wrap(function);
+}
+
+inline std::vector<ConcreteFunction*> SavedModelAPI::ListFunctions() {
+  ConcreteFunctionList list(TF_ListSavedModelFunctions(saved_model_.get()));
+  return list.ToVector();
+}
+
+}  // namespace cc
+}  // namespace experimental
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CC_SAVED_MODEL_EXPERIMENTAL_PUBLIC_SAVED_MODEL_API_H_
diff --git a/tensorflow/cc/saved_model/experimental/tests/BUILD b/tensorflow/cc/saved_model/experimental/tests/BUILD
new file mode 100644
index 00000000000..f24bcfdee2a
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/tests/BUILD
@@ -0,0 +1,22 @@
+# Tests for the C++ header-only SavedModelAPI.
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+package(
+    licenses = ["notice"],  # Apache 2.0
+)
+
+tf_cc_test(
+    name = "saved_model_api_test",
+    srcs = [
+        "saved_model_api_test.cc",
+    ],
+    deps = [
+        "//tensorflow/cc/experimental/base/public:runtime",
+        "//tensorflow/cc/experimental/base/public:runtime_builder",
+        "//tensorflow/cc/experimental/base/public:status",
+        "//tensorflow/cc/saved_model/experimental/public:saved_model_api",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
diff --git a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc
new file mode 100644
index 00000000000..7f7f6b09a6d
--- /dev/null
+++ b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc
@@ -0,0 +1,100 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/saved_model/experimental/public/saved_model_api.h"
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+
+#include "tensorflow/cc/experimental/base/public/runtime.h"
+#include "tensorflow/cc/experimental/base/public/runtime_builder.h"
+#include "tensorflow/cc/experimental/base/public/status.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/stringpiece.h"
+#include "tensorflow/core/platform/test.h"
+
+
+namespace {
+
+using tensorflow::experimental::cc::Runtime;
+using tensorflow::experimental::cc::RuntimeBuilder;
+using tensorflow::experimental::cc::SavedModelAPI;
+using tensorflow::experimental::cc::Status;
+
+constexpr char kTestData[] = "cc/saved_model/testdata";
+
+std::string SavedModelPath(tensorflow::StringPiece saved_model_dir) {
+  return tensorflow::io::JoinPath(tensorflow::testing::TensorFlowSrcRoot(),
+                                  kTestData, saved_model_dir);
+}
+
+// This value parameterized test allows us to test both TFRT
+// and non TFRT runtimes.
+// https://github.com/google/googletest/blob/dcc92d0ab6c4ce022162a23566d44f673251eee4/googletest/docs/advanced.md#value-parameterized-tests
+class CPPSavedModelAPITest : public ::testing::TestWithParam<bool> {};
+
+TEST_P(CPPSavedModelAPITest, LoadsSavedModelWithTags) {
+  Status status;
+  RuntimeBuilder builder;
+  bool use_tfrt = GetParam();
+  if (use_tfrt) {
+    GTEST_SKIP();  // TODO(chky) : Enable this once TFRT is open sourced.
+  }
+
+  builder.SetUseTFRT(use_tfrt);
+  std::unique_ptr<Runtime> runtime = builder.Build(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  std::string model_dir = SavedModelPath("VarsAndArithmeticObjectGraph");
+  std::unordered_set<std::string> tags = {"serve"};
+  std::unique_ptr<SavedModelAPI> model =
+      SavedModelAPI::Load(model_dir, *runtime, &status, &tags);
+
+  // TODO(bmzhao): Change this to expect TF_OK when loading is implemented.
+  // That unblocks writing other tests that require a TF_SavedModel*,
+  // like loading a ConcreteFunction. This test at least checks that the
+  // C API builds and can be minimally run.
+  EXPECT_EQ(status.code(), TF_UNIMPLEMENTED);
+}
+
+TEST_P(CPPSavedModelAPITest, LoadsSavedModel) {
+  Status status;
+  RuntimeBuilder builder;
+  bool use_tfrt = GetParam();
+  if (use_tfrt) {
+    GTEST_SKIP();  // TODO(chky) : Enable this once TFRT is open sourced.
+  }
+
+  builder.SetUseTFRT(use_tfrt);
+  std::unique_ptr<Runtime> runtime = builder.Build(&status);
+  ASSERT_TRUE(status.ok()) << status.message();
+
+  std::string model_dir = SavedModelPath("VarsAndArithmeticObjectGraph");
+  std::unique_ptr<SavedModelAPI> model =
+      SavedModelAPI::Load(model_dir, *runtime, &status);
+
+  // TODO(bmzhao): Change this to expect TF_OK when loading is implemented.
+  // That unblocks writing other tests that require a TF_SavedModel*,
+  // like loading a ConcreteFunction. This test at least checks that the
+  // C API builds and can be minimally run.
+  EXPECT_EQ(status.code(), TF_UNIMPLEMENTED);
+}
+
+INSTANTIATE_TEST_SUITE_P(RuntimeAgnosticCPPSavedModelTests,
+                         CPPSavedModelAPITest, ::testing::Bool());
+
+}  // namespace
+
diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc
index c9a36b88795..e4df3090046 100644
--- a/tensorflow/compiler/aot/codegen.cc
+++ b/tensorflow/compiler/aot/codegen.cc
@@ -131,6 +131,7 @@ Status AddRewritesForShape(int i, const xla::Shape& shape,
   TF_RETURN_IF_ERROR(XLATypeToCpp(shape.element_type(), &type));
   std::vector<string> dim_vars;
   string dim_sizes, indices;
+  int count = 1;
   if (shape.rank() == 0 ||
       (shape.dimensions_size() == 1 && shape.dimensions(0) == 1)) {
     dim_sizes = "[1]";
@@ -140,6 +141,7 @@ Status AddRewritesForShape(int i, const xla::Shape& shape,
       dim_vars.push_back(absl::StrCat("size_t dim", dim));
       dim_sizes += absl::StrCat("[", shape.dimensions(dim), "]");
       indices += absl::StrCat("[dim", dim, "]");
+      count *= shape.dimensions(dim);
     }
   }
   rewrites->push_back({"{{I}}", absl::StrCat(i)});
@@ -147,6 +149,7 @@ Status AddRewritesForShape(int i, const xla::Shape& shape,
   rewrites->push_back({"{{DIM_VARS}}", absl::StrJoin(dim_vars, ", ")});
   rewrites->push_back({"{{DIM_SIZES}}", dim_sizes});
   rewrites->push_back({"{{INDICES}}", indices});
+  rewrites->push_back({"{{COUNT}}", absl::StrCat(count)});
   return Status::OK();
 }
 
@@ -199,6 +202,12 @@ Status GenArgMethods(const tf2xla::Config& config,
     return (*static_cast<const {{TYPE}}(*){{DIM_SIZES}}>(
         arg_data({{I}}))){{INDICES}};
   }
+  int arg{{NAME}}_size() const {
+    return {{COUNT}} * sizeof({{TYPE}});
+  }
+  int arg{{NAME}}_count() const {
+    return {{COUNT}};
+  }
 )";
     *methods += RewriteWithName(absl::StrCat(i), code, rewrites);
     if (!config.feed(i).name().empty()) {
@@ -246,6 +255,12 @@ Status GenResultMethods(const tf2xla::Config& config,
     return (*static_cast<const {{TYPE}}(*){{DIM_SIZES}}>(
         result_data({{I}}))){{INDICES}};
   }
+  int result{{NAME}}_size() const {
+    return {{COUNT}} * sizeof({{TYPE}});
+  }
+  int result{{NAME}}_count() const {
+    return {{COUNT}};
+  }
 )";
     *methods += RewriteWithName(absl::StrCat(i), code, rewrites);
     if (!config.fetch(i).name().empty()) {
@@ -281,6 +296,12 @@ Status GenVariableMethods(const tf2xla::Config& config,
     return (*static_cast<const {{TYPE}}(*){{DIM_SIZES}}>(
         arg_data({{I}}))){{INDICES}};
   }
+  int var_{{NAME}}_size() const {
+    return {{COUNT}} * sizeof({{TYPE}});
+  }
+  int var_{{NAME}}_count() const {
+    return {{COUNT}};
+  }
 )";
     const tf2xla::Variable& var = config.variable(i - config.feed_size());
     rewrites.emplace_back("{{MAYBE_CONST}}", var.readonly() ? "const " : "");
diff --git a/tensorflow/compiler/aot/codegen_test_h.golden b/tensorflow/compiler/aot/codegen_test_h.golden
index af58ca233f0..d011279dbb7 100644
--- a/tensorflow/compiler/aot/codegen_test_h.golden
+++ b/tensorflow/compiler/aot/codegen_test_h.golden
@@ -138,6 +138,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const float(*)[1][2]>(
         arg_data(0)))[dim0][dim1];
   }
+  int arg0_size() const {
+    return 2 * sizeof(float);
+  }
+  int arg0_count() const {
+    return 2;
+  }
 
   void set_arg_myfeed_data(const void* data) {
     set_arg_data(0, data);
@@ -156,6 +162,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const float(*)[1][2]>(
         arg_data(0)))[dim0][dim1];
   }
+  int arg_myfeed_size() const {
+    return 2 * sizeof(float);
+  }
+  int arg_myfeed_count() const {
+    return 2;
+  }
 
   void set_arg1_data(const void* data) {
     set_arg_data(1, data);
@@ -174,6 +186,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const tensorflow::int64(*)[3][4]>(
         arg_data(1)))[dim0][dim1];
   }
+  int arg1_size() const {
+    return 12 * sizeof(tensorflow::int64);
+  }
+  int arg1_count() const {
+    return 12;
+  }
 
   // Result methods for managing output buffers. Buffers are in row-major order.
   // Must only be called after a successful Run call. There is a set of methods
@@ -204,6 +222,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const tensorflow::uint32(*)[5][6]>(
         result_data(0)))[dim0][dim1];
   }
+  int result0_size() const {
+    return 30 * sizeof(tensorflow::uint32);
+  }
+  int result0_count() const {
+    return 30;
+  }
 
   tensorflow::uint32* result_myfetch_data() {
     return static_cast<tensorflow::uint32*>(result_data(0));
@@ -219,6 +243,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const tensorflow::uint32(*)[5][6]>(
         result_data(0)))[dim0][dim1];
   }
+  int result_myfetch_size() const {
+    return 30 * sizeof(tensorflow::uint32);
+  }
+  int result_myfetch_count() const {
+    return 30;
+  }
 
   // Methods for managing variable buffers. Buffers are in row-major order.
   //
@@ -261,6 +291,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const float(*)[1]>(
         arg_data(2)))[0];
   }
+  int var_myvar_readonly_size() const {
+    return 1 * sizeof(float);
+  }
+  int var_myvar_readonly_count() const {
+    return 1;
+  }
 
   void set_var_myvar_data(float* data) {
     set_arg_data(3, data);
@@ -279,6 +315,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const float(*)[1]>(
         arg_data(3)))[0];
   }
+  int var_myvar_size() const {
+    return 1 * sizeof(float);
+  }
+  int var_myvar_count() const {
+    return 1;
+  }
 
   void set_var_myvar2_data(tensorflow::int32* data) {
     set_arg_data(4, data);
@@ -297,6 +339,12 @@ class MyClass final : public tensorflow::XlaCompiledCpuFunction {
     return (*static_cast<const tensorflow::int32(*)[5]>(
         arg_data(4)))[dim0];
   }
+  int var_myvar2_size() const {
+    return 5 * sizeof(tensorflow::int32);
+  }
+  int var_myvar2_count() const {
+    return 5;
+  }
 
  private:
   // Number of buffers for the compiled computation.
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 28d922f9e3c..bc8fac0e88f 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -251,7 +251,7 @@ cc_library(
     visibility = [":friends"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:graph",
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
index ff76786a66f..174250f18bd 100644
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -2078,6 +2078,8 @@ absl::flat_hash_set<string> GetKnownXLAWhitelistOp() {
                                      "XlaSend",
                                      "XlaSharding",
                                      "XlaSort",
+                                     "XlaSpmdFullToShardShape",
+                                     "XlaSpmdShardToFullShape",
                                      "XlaSvd",
                                      "XlaWhile",
                                      "_Arg",
diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc
index c90e8dead76..62b0c0ab4cf 100644
--- a/tensorflow/compiler/jit/xla_compilation_cache.cc
+++ b/tensorflow/compiler/jit/xla_compilation_cache.cc
@@ -41,6 +41,7 @@ limitations under the License.
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/protobuf/graph_debug_info.pb.h"
 #include "tensorflow/core/public/version.h"
@@ -277,29 +278,25 @@ Status XlaCompilationCache::CompileSingleOp(
     const NodeDef& node_def = ctx->op_kernel().def();
     TF_ASSIGN_OR_RETURN(auto graph, CreateGraph(node_def, args, result_dtypes));
 
-    bool are_params = absl::c_all_of(args, [](const XlaCompiler::Argument arg) {
-      return arg.kind == XlaCompiler::Argument::kParameter;
-    });
+    bool are_args_supported =
+        absl::c_all_of(args, [](const XlaCompiler::Argument arg) {
+          return arg.kind == XlaCompiler::Argument::kConstant ||
+                 arg.kind == XlaCompiler::Argument::kParameter;
+        });
     const ConfigProto* config = ctx->function_library()->config_proto();
     bool use_mlir = config && config->experimental().enable_mlir_bridge();
-    // Use MLIR bridge if all the arguments are parameters.
-    // TODO(hinsu): Support other argument types instead of silently falling
-    // back to the XLA compiler.
-    if (!are_params || !use_mlir) {
+    // TODO(b/155596779): Understand the source of other argument types and
+    // depending on the source either support those or avoid these codepath.
+    if (!use_mlir || !are_args_supported) {
       return compiler->CompileGraph(compile_options, node_def.name(),
                                     std::move(graph), args, result);
     }
 
-    absl::InlinedVector<TensorShape, 4> arg_shapes;
-    arg_shapes.reserve(args.size());
-    for (const XlaCompiler::Argument& arg : args) {
-      arg_shapes.push_back(absl::get<TensorShape>(arg.shape));
-    }
     GraphDebugInfo debug_info;
     return CompileGraphToXlaHlo(
-        *graph, {arg_shapes.data(), arg_shapes.size()},
-        options.device_type.type_string(), compile_options.use_tuple_arg,
-        *options.flib_def, debug_info, options.shape_representation_fn, result);
+        *graph, {args.data(), args.size()}, options.device_type.type_string(),
+        compile_options.use_tuple_arg, *options.flib_def, debug_info,
+        options.shape_representation_fn, result);
   };
   return CompileImpl(options, name, args, compile_op,
                      /*compile_threshold=*/absl::nullopt,
diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index 34ff0c55615..17e4226405a 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -180,12 +180,10 @@ class XlaAssignVariableOp : public OpKernel {
       data::MakeIteratorOp);                                                   \
   REGISTER_KERNEL_BUILDER(Name("AnonymousIterator").Device(DEVICE),            \
                           data::AnonymousIteratorHandleOp);                    \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name("AnonymousIteratorV2").Device(DEVICE).HostMemory("deleter"),        \
-      data::AnonymousIteratorHandleOp);                                        \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name("DeleteIterator").Device(DEVICE).HostMemory("deleter"),             \
-      data::DeleteIteratorOp);                                                 \
+  REGISTER_KERNEL_BUILDER(Name("AnonymousIteratorV2").Device(DEVICE),          \
+                          data::AnonymousIteratorHandleOp);                    \
+  REGISTER_KERNEL_BUILDER(Name("DeleteIterator").Device(DEVICE),               \
+                          data::DeleteIteratorOp);                             \
   REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE),              \
                           data::IteratorGetNextOp);                            \
   REGISTER_KERNEL_BUILDER(Name("IteratorGetNextAsOptional").Device(DEVICE),    \
diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD
index cb23137e7fe..9b5b0c209e5 100644
--- a/tensorflow/compiler/mlir/lite/BUILD
+++ b/tensorflow/compiler/mlir/lite/BUILD
@@ -31,7 +31,7 @@ filegroup(
         "//tensorflow/compiler/mlir/lite/quantization:quantization_td_files",
         "@llvm-project//mlir:OpBaseTdFiles",
         "@llvm-project//mlir:include/mlir/Interfaces/LoopLikeInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
     ],
 )
 
@@ -523,7 +523,6 @@ cc_library(
         "@flatbuffers",
         "@llvm-project//llvm:analysis",
         "@llvm-project//llvm:support",
-        "@llvm-project//mlir:AllPassesAndDialects",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:TransformUtils",
     ],
@@ -696,9 +695,9 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@llvm-project//llvm:support",
         "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:LoopOpsTransforms",
         "@llvm-project//mlir:MlirTranslateMain",
         "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:SCFTransforms",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:Translation",
@@ -710,6 +709,8 @@ tf_cc_binary(
     name = "flatbuffer_translate",
     deps = [
         ":flatbuffer_translate_registeration",
+        # TODO(b/155809683): Link only necessary dialects.
+        "@llvm-project//mlir:AllPassesAndDialects",
     ],
 )
 
@@ -758,6 +759,13 @@ tf_cc_binary(
         ":tf_tfl_passes",
         ":tf_tfl_translate_cl_options",
         ":tf_to_tfl_flatbuffer",
+        "@com_google_absl//absl/strings",
+        "@llvm-project//llvm:support",
+        # TODO(b/155809683): Link only necessary dialects.
+        "@llvm-project//mlir:AllPassesAndDialects",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
         "//tensorflow/compiler/mlir:init_mlir",
         "//tensorflow/compiler/mlir/tensorflow:translate_cl_options",
         "//tensorflow/core:protos_all_cc",
@@ -765,11 +773,6 @@ tf_cc_binary(
         "//tensorflow/lite:framework",
         "//tensorflow/lite/schema:schema_fbs",
         "//tensorflow/stream_executor/lib",
-        "@com_google_absl//absl/strings",
-        "@llvm-project//llvm:support",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:Support",
     ],
 )
 
@@ -781,17 +784,19 @@ tf_cc_binary(
     deps = [
         ":flatbuffer_translate_lib",
         ":flatbuffer_translate_registeration",
+        "@com_google_absl//absl/strings",
+        "@llvm-project//llvm:support",
+        # TODO(b/155809683): Link only necessary dialects.
+        "@llvm-project//mlir:AllPassesAndDialects",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Support",
         "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags",
         "//tensorflow/core:lib",
         "//tensorflow/core/platform:logging",
         "//tensorflow/lite:framework",
         "//tensorflow/lite/delegates/flex:delegate",
         "//tensorflow/lite/kernels:builtin_ops",
-        "@com_google_absl//absl/strings",
-        "@llvm-project//llvm:support",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Parser",
-        "@llvm-project//mlir:Support",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
index e9192388070..6a631b1433d 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
@@ -1020,7 +1020,7 @@ Optional<BufferOffset<tflite::Operator>> Translator::BuildOperator(
       if (!inst->getMutableAttrDict().getAttrs().empty()) {
         os << " {";
         bool first = true;
-        for (auto& named_attr : inst->getMutableAttrDict().getDictionary()) {
+        for (auto& named_attr : inst->getAttrDictionary()) {
           os << (!first ? ", " : "");
           first = false;
           named_attr.first.print(os);
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
index 93bf1dcde53..a585b8e1520 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
@@ -20,7 +20,7 @@ limitations under the License.
 
 include "mlir/IR/OpBase.td"
 include "mlir/Interfaces/LoopLikeInterface.td"
-include "mlir/Interfaces/SideEffects.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 include "tensorflow/compiler/mlir/lite/ir/tfl_op_interfaces.td"
 include "tensorflow/compiler/mlir/lite/quantization/quantization.td"
 
@@ -247,7 +247,14 @@ class TFL_TFTypesWithSameBits<int i, int j, int num> :
     Or<[CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isa<mlir::TF::Quint" # num # "Type>()">,
         CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isUnsignedInteger(" # num # ")">]>]>;
 
-class TFL_OperandIsNoneOrHasRankLessThanOrEqualTo<int n, int m> :
+class TFL_TFOperandTypesWithSameBits<int i, int j, int num> :
+  And<[
+    Or<[CPred<"getElementTypeOrSelf($_op.getOperand(" # i # ")).isa<mlir::TF::Quint" # num # "Type>()">,
+        CPred<"getElementTypeOrSelf($_op.getOperand(" # i # ")).isUnsignedInteger(" # num # ")">]>,
+    Or<[CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isa<mlir::TF::Quint" # num # "Type>()">,
+        CPred<"getElementTypeOrSelf($_op.getOperand(" # j # ")).isUnsignedInteger(" # num # ")">]>]>;
+
+class TFL_OperandIsNoneOrHasRankAtMost<int n, int m> :
   PredOpTrait<"operand " # n # " is at most " # m # "-D",
     Or<[
       CPred<"$_op.getOperand(" # n # ").getType().isa<NoneType>()">,
@@ -255,13 +262,13 @@ class TFL_OperandIsNoneOrHasRankLessThanOrEqualTo<int n, int m> :
       CPred<"$_op.getOperand(" # n #
       ").getType().cast<ShapedType>().getRank() <= " # m>]>>;
 
-class TFL_OperandHasRankLessThanOrEqualTo<int n, int m> :
+class TFL_OperandHasRankAtMost<int n, int m> :
   PredOpTrait<"operand " # n # " is at most " # m # "-D",
     Or<[TFL_OperandIsUnrankedPred<n>,
       CPred<"$_op.getOperand(" # n #
       ").getType().cast<ShapedType>().getRank() <= " # m>]>>;
 
-class TFL_OperandHasRankGreaterThanOrEqualTo<int n, int m> :
+class TFL_OperandHasRankAtLeast<int n, int m> :
   PredOpTrait<"operand " # n # " is at least " # m # "-D",
     Or<[TFL_OperandIsUnrankedPred<n>,
       CPred<"$_op.getOperand(" # n #
@@ -300,6 +307,18 @@ class TFL_TCresVTEtIsSameAsOp<int i, int j> : And<[
             "quant::QuantizedType::castToStorageType("
                 "getElementTypeOrSelf($_op.getOperand(" # j # ")))">]>]>]>;
 
+// This is a quantization-aware version of TCresVTEtIsSameAsOp
+class TFL_TCopVTEtAreSameAt<int i, int j> : Or<[
+  TCopVTEtAreSameAt<[i, j]>,
+  TFL_TFOperandTypesWithSameBits<i, j, 8>,
+  And<[
+    SubstLeaves<"$_self", "getElementTypeOrSelf($_op.getOperand(" # j # "))",
+      quant_QuantizedType.predicate>,
+    CPred<"quant::QuantizedType::castToStorageType("
+              "getElementTypeOrSelf($_op.getOperand(" # i # "))) == "
+          "quant::QuantizedType::castToStorageType("
+              "getElementTypeOrSelf($_op.getOperand(" # j # ")))">]>]>;
+
 //===----------------------------------------------------------------------===//
 // TFL op common constraints.
 //===----------------------------------------------------------------------===//
@@ -395,9 +414,9 @@ class TFL_ConvOp<string mnemonic, string opSummary, int index> :
   }];
 
   let arguments = (
-    ins TFL_TensorOf<[F32, QI8, QUI8]>:$input,
+    ins TFL_TensorOf<[F32, QI8, QUI8, QI16]>:$input,
     TFL_TensorOf<[F32, QI8, QUI8]>:$filter,
-    TFL_TensorOfOrNone<[F32, I32]>:$bias,
+    TFL_TensorOfOrNone<[F32, I32, I64]>:$bias,
     I32Attr:$dilation_h_factor,
     I32Attr:$dilation_w_factor,
     TFL_AFAttr:$fused_activation_function,
@@ -406,7 +425,7 @@ class TFL_ConvOp<string mnemonic, string opSummary, int index> :
     I32Attr:$stride_w
   );
 
-  let results = (outs TFL_TensorOf<[F32, QI8, QUI8]>:$output);
+  let results = (outs TFL_TensorOf<[F32, QI8, QUI8, QI16]>:$output);
 
   let hasOptions = 0b1;
 }
@@ -846,6 +865,40 @@ def TFL_FullyConnectedOp : TFL_Op<"fully_connected", [
   }];
 }
 
+def TFL_BatchMatMulOp : TFL_Op<"batch_matmul", [
+   NoSideEffect,
+   TFL_OperandHasAtleastRank<0, 2>,
+   TFL_OperandHasAtleastRank<1, 2>,
+   SameOperandsAndResultElementType]> {
+
+  let summary = "Batch Matrix Multiply Operator";
+
+  let description = [{
+Performs a batched matrix multiplication on the inputs. Follows the
+conventions of TensorFlow BatchMatMulV2, with support for unknown dimensions
+in the batch dimensions and broadcasting.
+
+    Inputs:
+      `inputs[0]`: required: input LHS
+      `inputs[1]`: required: input RHS
+      `adjoint_lhs`: optional: Transpose LHS (default false)
+      `adjoint_lhs`: optional: Transpose LHS (default false)
+  }];
+
+  let arguments = (ins
+    TFL_TensorOf<[F32]>:$x,
+    TFL_TensorOf<[F32]>:$y,
+    DefaultValuedAttr<BoolAttr, "false">:$adj_x,
+    DefaultValuedAttr<BoolAttr, "false">:$adj_y
+  );
+
+   let results = (outs
+    TFL_TensorOf<[F32]>:$output
+  );
+
+  let hasOptions = 1;
+}
+
 def TFL_GatherOp : TFL_Op<"gather", [
     NoSideEffect,
     SameOperandsAndResultsScale,
@@ -929,7 +982,11 @@ def TFL_ScatterNdOp : TFL_Op<"scatter_nd", [
 
 // Same type check of lhs and rhs is handled by the ResultsBroadcastableShape trait.
 def TFL_LessEqualOp : TFL_Op<"less_equal", [
-    ResultsBroadcastableShape, NoSideEffect, NoQuantizableResult]> {
+    ResultsBroadcastableShape,
+    BinaryOpSameElementTypeConstraint,
+    TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>,
+    NoSideEffect,
+    NoQuantizableResult]> {
   let summary = "Less_equal operator";
 
   let description = [{
@@ -937,8 +994,8 @@ def TFL_LessEqualOp : TFL_Op<"less_equal", [
   }];
 
   let arguments = (
-      ins TFL_TensorOf<[F32, I32, I64, I8, QI8, QUI8, TFL_Uint8]>:$lhs,
-      TFL_TensorOf<[F32, I32, I64, I8, QI8, QUI8, TFL_Uint8]>:$rhs);
+      ins TFL_TensorOf<[F32, I32, I64, QI8, QUI8]>:$lhs,
+      TFL_TensorOf<[F32, I32, I64, QI8, QUI8]>:$rhs);
 
   let results = (outs TFL_BoolTensor:$output);
 
@@ -951,9 +1008,12 @@ def TFL_LessEqualOp : TFL_Op<"less_equal", [
   let hasOptions = 0;
 }
 
-def TFL_LocalResponseNormalizationOp : TFL_Op<"local_response_normalization",
-                                             [NoSideEffect]> {
-    let summary = "Local Response Normalization.";
+def TFL_LocalResponseNormalizationOp : TFL_Op<"local_response_normalization", [
+    TFL_OperandHasRank<0, 4>,
+    SameOperandsAndResultShape,
+    SameOperandsAndResultType,
+    NoSideEffect]> {
+  let summary = "Local Response Normalization.";
 
   let description = [{
 The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
@@ -970,7 +1030,7 @@ convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imag
   }];
 
   let arguments = (ins
-      TFL_TensorOf<[F32, QI8, QUI8]>:$input,
+      TFL_FpTensor:$input,
       I32Attr:$radius,
       F32Attr:$bias,
       F32Attr:$alpha,
@@ -978,7 +1038,7 @@ convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imag
   );
 
   let results = (outs
-    TFL_TensorOf<[F32, QI8, QUI8]>:$output
+    TFL_FpTensor:$output
   );
 
   let hasOptions = 1;
@@ -1014,7 +1074,7 @@ def TFL_MatrixDiagOp : TFL_Op<"matrix_diag", [
   NoSideEffect,
   TFL_OperandHasAtleastRank<0, 1>,
   PredOpTrait<"operand and result must have the same element type",
-    TCresVTEtIsSameAsOp<0, 0>>]> {
+    TFL_TCresVTEtIsSameAsOp<0, 0>>]> {
   let summary = [{
     Returns a tensor with the provided diagonal and everything else padded with zeros.
   }];
@@ -1027,17 +1087,21 @@ def TFL_MatrixDiagOp : TFL_Op<"matrix_diag", [
   }];
 
   let arguments = (ins
-    TFL_TensorOf<[F32, I8, I64, I32, TFL_Uint8]>:$diagonal
+    TFL_TensorOf<[F32, I8, I16, I32, I64, TFL_Uint8, QUI8, QI8, TFL_Quint8]>:$diagonal
   );
 
   let results = (outs
-    TFL_TensorOf<[F32, I8, I64, I32, TFL_Uint8]>:$output
+    TFL_TensorOf<[F32, I8, I16, I32, I64, TFL_Uint8, QUI8, QI8, TFL_Quint8]>:$output
   );
 
   let hasOptions = 0;
 }
 
-def TFL_MatrixSetDiagOp : TFL_Op<"matrix_set_diag", [NoSideEffect]> {
+def TFL_MatrixSetDiagOp : TFL_Op<"matrix_set_diag", [
+    TFL_OperandHasAtleastRank<0, 2>,
+    PredOpTrait<"input and result must have the same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
+    NoSideEffect]> {
   let summary = [{
     Returns a batched matrix tensor with new batched diagonal values.
   }];
@@ -1049,12 +1113,12 @@ innermost matrices.  These will be overwritten by the values in `diagonal`.
   }];
 
   let arguments = (ins
-    TensorOf<[F32, I32, I64, I8, QI8, QI16, QUI8, TFL_Uint8, TFL_Quint8]>:$input,
-    TensorOf<[F32, I32, I64, I8, QI8, QI16, QUI8, TFL_Uint8, TFL_Quint8]>:$diagonal
+    TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QI16, QUI8, TFL_Quint8]>:$input,
+    TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QI16, QUI8, TFL_Quint8]>:$diagonal
   );
 
   let results = (outs
-    TensorOf<[F32, I32, I64, I8, QI8, QI16, QUI8, TFL_Uint8, TFL_Quint8]>:$output
+    TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QI16, QUI8, TFL_Quint8]>:$result
   );
 
   let hasOptions = 0;
@@ -1172,7 +1236,12 @@ larger than 0.
 }
 
 def TFL_NotEqualOp : TFL_Op<"not_equal", [
-    ResultsBroadcastableShape, Commutative, NoSideEffect, NoQuantizableResult]> {
+    TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>,
+    BinaryOpSameElementTypeConstraint,
+    ResultsBroadcastableShape,
+    Commutative,
+    NoSideEffect,
+    NoQuantizableResult]> {
   let summary = "Not_equal operator";
 
   let description = [{
@@ -1180,8 +1249,8 @@ def TFL_NotEqualOp : TFL_Op<"not_equal", [
   }];
 
   let arguments = (
-      ins AnyTensor:$lhs,
-      AnyTensor:$rhs);
+      ins TFL_TensorOf<[I1, F32, I32, I64, QUI8, QI8, TFL_Quint8, TFL_Str]>:$lhs,
+      TFL_TensorOf<[I1, F32, I32, I64, QUI8, QI8, TFL_Quint8, TFL_Str]>:$rhs);
 
   let results = (outs TFL_BoolTensor:$output);
 
@@ -1250,7 +1319,7 @@ def TFL_EmbeddingLookupOp: TFL_Op<"embedding_lookup",
      PredOpTrait<"value and output must have same element type",
        TFL_TCresVTEtIsSameAsOp<0, 1>>,
      TFL_OperandHasRank<0, 1>,
-     TFL_OperandHasRankGreaterThanOrEqualTo<1, 2>
+     TFL_OperandHasRankAtLeast<1, 2>
     ]> {
   let summary = "Embedding lookup operator";
 
@@ -1468,7 +1537,11 @@ def TFL_FloorModOp : TFL_Op<"floor_mod", [
 }
 
 def TFL_GreaterOp : TFL_Op<"greater", [
-    ResultsBroadcastableShape, NoSideEffect, NoQuantizableResult]> {
+    ResultsBroadcastableShape,
+    BinaryOpSameElementTypeConstraint,
+    TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>,
+    NoSideEffect,
+    NoQuantizableResult]> {
   let summary = "Greater operator";
 
   let description = [{
@@ -1476,10 +1549,10 @@ def TFL_GreaterOp : TFL_Op<"greater", [
   }];
 
   let arguments = (
-    ins AnyTensor:$lhs,
-    AnyTensor:$rhs);
+    ins TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$lhs,
+    TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$rhs);
 
-  let results = (outs AnyTensor:$output);
+  let results = (outs TFL_BoolTensor:$output);
 
   let builders = [TFL_ComparisonBinaryBuilder];
 
@@ -1488,9 +1561,12 @@ def TFL_GreaterOp : TFL_Op<"greater", [
   let printer = [{ return mlir::impl::printOneResultOp(getOperation(), p); }];
 }
 
-def TFL_HardSwishOp: TFL_Op<"hard_swish", [NoSideEffect,
-                                          SameOperandsAndResultShape,
-                                          TFL_GpuTargetOp]> {
+def TFL_HardSwishOp: TFL_Op<"hard_swish", [
+    NoSideEffect,
+    SameOperandsAndResultShape,
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
+    TFL_GpuTargetOp]> {
   let summary = "Hardswish activation function.";
   let description = [{
     Computes hard-swish activation function
@@ -1500,7 +1576,7 @@ def TFL_HardSwishOp: TFL_Op<"hard_swish", [NoSideEffect,
 
   let arguments = (ins TFL_TensorOf<[F32, QUI8, QI8]>:$input);
 
-  let results = (outs TFL_TensorOf<[F32, QUI8, QI8]>:$out);
+  let results = (outs TFL_TensorOf<[F32, QUI8, QI8]>:$output);
 
   let hasOptions = 0;
 }
@@ -1529,29 +1605,35 @@ def TFL_L2NormalizationOp : TFL_Op<"l2_normalization", [NoSideEffect,
   let customOption = "L2NormOptions";
 }
 
-def TFL_LeakyReluOp: TFL_Op<"leaky_relu", [NoSideEffect, SameOperandsAndResultType]> {
+def TFL_LeakyReluOp: TFL_Op<"leaky_relu", [
+    SameOperandsAndResultShape,
+    NoSideEffect,
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>]> {
   let summary = "Leaky Relu operator";
 
-  // TODO(jpienaar): Add type restriction. This op is only defined for
-  // restricted (floating point) types.
   let description = [{
     Element-wise Leaky ReLU operator
       x -> x >= 0 ? x : (alpha * x)
   }];
 
   let arguments = (
-    ins AnyTensor:$input,
+    ins TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$input,
     // Slope of the activation function at x < 0.
     F32Attr:$alpha
   );
 
-  let results = (outs AnyTensor:$output);
+  let results = (outs TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$output);
 
   let hasOptions = 0b1;
 }
 
 def TFL_LessOp : TFL_Op<"less", [
-    ResultsBroadcastableShape, NoSideEffect, NoQuantizableResult]> {
+    ResultsBroadcastableShape,
+    BinaryOpSameElementTypeConstraint,
+    TFL_BinaryOperandsHaveSameShapesOrBroadcastableShape<0, 1, 4>,
+    NoSideEffect,
+    NoQuantizableResult]> {
   let summary = "Less operator";
 
   let description = [{
@@ -1559,8 +1641,8 @@ def TFL_LessOp : TFL_Op<"less", [
   }];
 
   let arguments = (
-    ins AnyTensor:$lhs,
-    AnyTensor:$rhs);
+    ins TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$lhs,
+    TFL_TensorOf<[F32, I32, I64, QUI8, QI8, TFL_Quint8]>:$rhs);
 
   let results = (outs TFL_BoolTensor:$output);
 
@@ -1621,6 +1703,8 @@ def TFL_LogicalOrOp : TFL_Op<"logical_or", [NoSideEffect]> {
 
 def TFL_LogisticOp: TFL_Op<"logistic", [
     NoSideEffect,
+    PredOpTrait<"x and y must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
     SameOperandsAndResultShape,
     // zero_point = 0
     // scale = 1. / (max_value + 1)
@@ -1633,9 +1717,9 @@ def TFL_LogisticOp: TFL_Op<"logistic", [
     Computes element-wise Sigmoid of input
   }];
 
-  let arguments = (ins TFL_TensorOf<[F32, QI8, QUI8, QI16, QUI16]>:$x);
+  let arguments = (ins TFL_TensorOf<[F32, QI8, QUI8, QI16, TFL_Quint8]>:$x);
 
-  let results = (outs TFL_TensorOf<[F32, QI8, QUI8, QI16, QUI16]>:$y);
+  let results = (outs TFL_TensorOf<[F32, QI8, QUI8, QI16, TFL_Quint8]>:$y);
 }
 
 def TFL_LogOp: TFL_Op<"log", [
@@ -1656,10 +1740,11 @@ def TFL_LogOp: TFL_Op<"log", [
   let hasFolder = 1;
 }
 
-// TODO(b/130643170): Adds some constraint for the input/output element types.
 def TFL_LogSoftmaxOp : TFL_Op<"log_softmax", [
     NoSideEffect,
     SameOperandsAndResultShape,
+    PredOpTrait<"x and y must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
     // zero_point = max_value
     // scale = -log_softmax_output_min / (max_value + 1)
     FixedResultScale<Int8UniformQuantizedType<127, 625, -4>>,
@@ -1672,9 +1757,9 @@ def TFL_LogSoftmaxOp : TFL_Op<"log_softmax", [
       input - log(reduce_sum(exp(input), dim))
   }];
 
-  let arguments = (ins AnyTensor:$input);
+  let arguments = (ins TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$input);
 
-  let results = (outs AnyTensor:$output);
+  let results = (outs TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$output);
 
   let hasOptions = 1;
 }
@@ -1693,6 +1778,9 @@ def MaxPoolOperandAndResultConstraints : PredOpTrait<"MaxPool2D operand and "
     TFL_TCresVTEtIsSameAsOp<0, 0>]>>;
 
 def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [
+    TFL_OperandHasRank<0, 4>,
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
     NoSideEffect,
     MaxPoolOperandAndResultConstraints,
     SameOperandsAndResultsScale,
@@ -1707,7 +1795,7 @@ def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [
   }];
 
   let arguments = (
-    ins AnyTensor:$input,
+    ins TFL_TensorOf<[F32, QUI8, QI8, QI16, TFL_Quint8]>:$input,
     TFL_PaddingAttr:$padding,
     I32Attr:$stride_w,
     I32Attr:$stride_h,
@@ -1716,7 +1804,7 @@ def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [
     TFL_AFAttr:$fused_activation_function
   );
 
-  let results = (outs AnyTensor:$output);
+  let results = (outs TFL_TensorOf<[F32, QUI8, QI8, QI16, TFL_Quint8]>:$output);
 
   let hasOptions = 1;
 
@@ -1748,7 +1836,11 @@ def TFL_MaximumOp : TFL_Op<"maximum", [
   let hasOptions = 0;
 }
 
-def TFL_MeanOp : TFL_Op<"mean", [NoSideEffect, TFL_GpuTargetOp]> {
+def TFL_MeanOp : TFL_Op<"mean", [
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
+    NoSideEffect,
+    TFL_GpuTargetOp]> {
   let summary = "Mean operator";
 
   let description = [{
@@ -1760,13 +1852,13 @@ def TFL_MeanOp : TFL_Op<"mean", [NoSideEffect, TFL_GpuTargetOp]> {
   }];
 
   let arguments = (ins
-    TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8, TFL_Uint8]>:$input,
+    TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Uint8]>:$input,
     TFL_TensorOf<[I32, I64]>:$axis,
     BoolAttr:$keep_dims
   );
 
   let results = (outs
-    TFL_TensorOf<[F32, I32, I64, I8, QI8, QUI8, TFL_Uint8]>:$output);
+    TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Uint8]>:$output);
 
   let hasOptions = 1;
   let customOption = "ReducerOptions";
@@ -1787,14 +1879,14 @@ def TFL_OneHotOp : TFL_Op<"one_hot", [NoSideEffect]> {
   let arguments = (ins
     TFL_TensorOf<[I32, I64]>:$indices,
     TFL_I32Tensor:$depth,
-    TFL_TensorOf<[F32, I32, I64, I1]>:$on_value,
-    TFL_TensorOf<[F32, I32, I64, I1]>:$off_value,
+    TFL_TensorOf<[F32, I32, I64, I1, I8, UI8]>:$on_value,
+    TFL_TensorOf<[F32, I32, I64, I1, I8, UI8]>:$off_value,
 
     I32Attr:$axis
   );
 
   let results = (outs
-    TFL_TensorOf<[F32, I32, I64, I1]>:$output
+    TFL_TensorOf<[F32, I32, I64, I1, I8, UI8]>:$output
   );
 
   let hasOptions = 1;
@@ -1808,11 +1900,11 @@ Rounds the values of a tensor to the nearest integer, element-wise.
   }];
 
   let arguments = (ins
-    TFL_TensorOf<[F32]>:$x
+    TFL_FpTensor:$x
   );
 
   let results = (outs
-    TFL_TensorOf<[F32]>:$y
+    TFL_FpTensor:$y
   );
 }
 
@@ -1998,7 +2090,11 @@ def TFL_NegOp: TFL_Op<"neg", [NoSideEffect, SameOperandsAndResultType]> {
   let hasFolder = 1;
 }
 
-def TFL_PackOp : TFL_Op<"pack", [NoSideEffect, SameOperandsAndResultsScale]> {
+def TFL_PackOp : TFL_Op<"pack", [
+    PredOpTrait<"values and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
+    NoSideEffect,
+    SameOperandsAndResultsScale]> {
   let summary = "Packs a list of tensors along a dimension into one tensor";
 
   let description = [{
@@ -2029,14 +2125,14 @@ def TFL_PackOp : TFL_Op<"pack", [NoSideEffect, SameOperandsAndResultsScale]> {
   }];
 
   let arguments = (ins
-    TFL_VariadicTensorOf<[F32, I8, I16, I32, I64, QI8, QUI8, QI16]>:$values,
+    TFL_VariadicTensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QUI8, QI16, TFL_Quint8]>:$values,
 
-    I32Attr:$values_count,
+    Confined<I32Attr, [IntPositive]>:$values_count,
     I32Attr:$axis
   );
 
   let results = (outs
-    TFL_TensorOf<[F32, I8, I16, I32, I64, QI8, QUI8, QI16]>:$output
+    TFL_TensorOf<[F32, I8, I16, I32, I64, UI8, QI8, QUI8, QI16, TFL_Quint8]>:$output
   );
 
   let verifier = [{ return Verify(*this); }];
@@ -2047,8 +2143,11 @@ def TFL_PackOp : TFL_Op<"pack", [NoSideEffect, SameOperandsAndResultsScale]> {
 }
 
 def TFL_PadOp : TFL_Op<"pad", [
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
     NoSideEffect,
     SameOperandsAndResultsScale,
+    TFL_OperandHasRankAtMost<0, 4>,
     TFL_OperandHasRank<1, 2>,
     TFL_OperandRankEquals1DimOfOperand<0, 1>,
     TFL_GpuTargetOp]> {
@@ -2079,22 +2178,25 @@ def TFL_PadOp : TFL_Op<"pad", [
     ```
   }];
 
-  let arguments = (ins TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$input,
+  let arguments = (ins TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Quint8]>:$input,
     TFL_I32OrI64Tensor:$padding);
 
-  let results = (outs TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$output);
+  let results = (outs TFL_TensorOf<[F32, I32, I64, QI8, QUI8, TFL_Quint8]>:$output);
 
   let hasOptions = 1;
 }
 
 def TFL_PadV2Op : TFL_Op<"padv2", [
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
     NoSideEffect,
     SameOperandsAndResultsScale,
+    TFL_OperandHasRankAtMost<0, 4>,
     TFL_OperandHasRank<1, 2>,
     TFL_OperandHasRank<2, 0>,
     TFL_OperandRankEquals1DimOfOperand<0, 1>,
     PredOpTrait<"input and constant value operands must have same element type",
-      TCopVTEtAreSameAt<[0, 2]>>]> {
+      TFL_TCopVTEtAreSameAt<0, 2>>]> {
   let summary = "Padding operator v2";
 
   let description = [{
@@ -2125,11 +2227,11 @@ def TFL_PadV2Op : TFL_Op<"padv2", [
   }];
 
   let arguments = (
-    ins TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$input,
+    ins TFL_TensorOf<[F32, I32, I64, UI8, QI8, QUI8, TFL_Quint8]>:$input,
     TFL_I32OrI64Tensor:$padding,
-    TFL_TensorOf<[F32, I8, I32, I64]>:$constant_values);
+    TFL_TensorOf<[F32, I32, I64, UI8, QI8, QUI8, TFL_Quint8]>:$constant_values);
 
-  let results = (outs TFL_TensorOf<[F32, I8, I32, I64, QI8, QUI8]>:$output);
+  let results = (outs TFL_TensorOf<[F32, I32, I64, UI8, QI8, QUI8, TFL_Quint8]>:$output);
 
   let hasOptions = 1;
 }
@@ -2157,9 +2259,21 @@ def TFL_PowOp : TFL_Op<"pow", [ResultsBroadcastableShape,
   let builders = [TFL_BroadcastableBinaryBuilder];
 }
 
-def TFL_PReluOp : TFL_Op<"prelu", [NoSideEffect,
-                                   TFL_GpuTargetOp,
-                                   SameOperandsAndResultsScale]> {
+def TFL_PReluOp : TFL_Op<"prelu", [
+    NoSideEffect,
+    ResultsBroadcastableShape,
+    TFL_GpuTargetOp,
+    TFL_OperandHasRankAtMost<0, 4>,
+    TFL_OperandHasRankAtMost<1, 4>,
+    BinaryOpSameElementTypeConstraint,
+    PredOpTrait<"input and output must have the same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
+    PredOpTrait<"'alpha' should have one less rank than 'input'.",
+      Or<[TFL_OperandIsUnrankedPred<0>,
+          TFL_OperandIsUnrankedPred<1>,
+          CPred<"$_op.getOperand(0).getType().cast<ShapedType>().getRank() == "
+                "$_op.getOperand(1).getType().cast<ShapedType>().getRank() "
+                "+ 1">]>>]> {
   let summary = "Parameterized Relu operator";
 
   let description = [{
@@ -2172,11 +2286,11 @@ def TFL_PReluOp : TFL_Op<"prelu", [NoSideEffect,
   }];
 
   let arguments = (
-    ins TFL_TensorOf<[F32, QUI8]>:$input,
-    TFL_TensorOf<[F32, QUI8]>:$alpha
+    ins TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$input,
+    TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$alpha
   );
 
-  let results = (outs TFL_TensorOf<[F32, QUI8]>:$output);
+  let results = (outs TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$output);
 
   let verifier = [{ return Verify(*this); }];
 }
@@ -2333,9 +2447,9 @@ def TFL_RsqrtOp: TFL_Op<"rsqrt", [NoSideEffect,
     Computes element-wise reverse square root of input
   }];
 
-  let arguments = (ins AnyTensor:$x);
+  let arguments = (ins TFL_FpTensor:$x);
 
-  let results = (outs AnyTensor:$y);
+  let results = (outs TFL_FpTensor:$y);
 
   let hasFolder = 1;
 }
@@ -2853,7 +2967,7 @@ def TFL_DepthToSpaceOp: TFL_Op<"depth_to_space", [
     SameOperandsAndResultsScale,
     PredOpTrait<"input and output must have same element type",
       TFL_TCresVTEtIsSameAsOp<0, 0>>,
-    TFL_OperandHasRankLessThanOrEqualTo<0, 4>
+    TFL_OperandHasRankAtMost<0, 4>
   ]> {
   let summary = "DepthToSpace operator";
 
@@ -2965,7 +3079,8 @@ def TFL_ResizeNearestNeighborOp : TFL_Op<"resize_nearest_neighbor",
   let arguments = (ins
     TFL_TensorOf<[F32, I8, TFL_Uint8, QUI8, QI8]>:$input,
     TFL_TensorOf<[I32]>:$size,
-    BoolAttr:$align_corners
+    BoolAttr:$align_corners,
+    DefaultValuedAttr<BoolAttr, "false">:$half_pixel_centers
   );
 
   let results = (outs
@@ -3189,7 +3304,7 @@ def TFL_QConstOp : Op<TFL_Dialect, "pseudo_qconst", [
     ElementsAttr:$value
   );
 
-  let results = (outs AnyTensor:$output);
+  let results = (outs TFL_TensorOf<[QUI8, QI8, QI16, QUI16, TFL_Quint8]>:$output);
 
   let builders = [OpBuilder<
     "OpBuilder &, OperationState &state, TypeAttr qtype, Attribute value",
@@ -3250,9 +3365,11 @@ def TFL_QuantizeOp: TFL_Op<"quantize", [
   let results = (outs AnyTensor:$output);
 }
 
-def TFL_DensifyOp: TFL_Op<"densify", [NoSideEffect,
-                                      SameOperandsAndResultType,
-                                      NoQuantizableResult]> {
+def TFL_DensifyOp: TFL_Op<"densify", [
+    NoSideEffect,
+    PredOpTrait<"input and output must have same element type",
+      TFL_TCresVTEtIsSameAsOp<0, 0>>,
+    NoQuantizableResult]> {
   let summary = "Densify operator";
 
   let description = [{
@@ -3814,7 +3931,7 @@ def TFL_NumericVerifyOp : Op<TFL_Dialect, "NumericVerify", [
   }];
 
   let arguments = (ins
-    TFL_TensorOf<[QI8, QUI8, QI16, QUI16]>:$input,
+    TFL_TensorOf<[QI8, QUI8, QI16, F16, TFL_Quint8]>:$input,
     TFL_TensorOf<[F32]>:$ref,
 
     // Attributes
diff --git a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc
index c338b723a4a..51fcbb97360 100644
--- a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc
+++ b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc
@@ -146,6 +146,10 @@ Status ConvertSavedModelToTFLiteFlatBuffer(
       saved_model_exported_names.begin(), saved_model_exported_names.end());
   absl::Span<std::string> exported_names(exported_names_in_vector);
 
+  if (exported_names.size() != 1) {
+    return errors::Unimplemented("Only support a single exported name.");
+  }
+
   TF_ASSIGN_OR_RETURN(auto module,
                       ImportSavedModel(model_flags.saved_model_dir(),
                                        model_flags.saved_model_version(), tags,
diff --git a/tensorflow/compiler/mlir/lite/quantization/device_target.cc b/tensorflow/compiler/mlir/lite/quantization/device_target.cc
index 48c0345ff3d..6b5c894b7f5 100644
--- a/tensorflow/compiler/mlir/lite/quantization/device_target.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/device_target.cc
@@ -32,6 +32,7 @@ namespace mlir {
 namespace quant {
 
 constexpr int k8Bits = 8;
+constexpr int k32Bits = 32;
 constexpr unsigned kSigned = quant::QuantizationFlags::Signed;
 
 DeviceTarget::DeviceTarget(MLIRContext* ctx) : ctx_(ctx) {
@@ -39,20 +40,20 @@ DeviceTarget::DeviceTarget(MLIRContext* ctx) : ctx_(ctx) {
   i8_ = IntegerType::get(k8Bits, ctx_);
   i8_min_ = QuantizedType::getDefaultMinimumForInteger(kSigned, k8Bits);
   i8_max_ = QuantizedType::getDefaultMaximumForInteger(kSigned, k8Bits);
+  i32_ = IntegerType::get(k32Bits, ctx_);
+  i32_min_ = QuantizedType::getDefaultMinimumForInteger(kSigned, k32Bits);
+  i32_max_ = QuantizedType::getDefaultMaximumForInteger(kSigned, k32Bits);
   any_ = AnyQuantizedType();
   qi8_ = AnyQuantizedType::get(kSigned, i8_, f32_, i8_min_, i8_max_);
   qi8n_ = AnyQuantizedType::get(kSigned, i8_, f32_, i8_min_ + 1, i8_max_);
+  qi32_ = AnyQuantizedType::get(kSigned, i32_, f32_, i32_min_, i32_max_);
   assert(qi8n_ == qi8n_);
 }
 
-Optional<KernelSpec> DeviceTarget::GetKernelSpec(QuantizeRegionOp op) const {
-  auto kernel_specs_it = specs_.find(op.logical_kernel());
+Optional<KernelSpec> DeviceTarget::GetKernelSpec(
+    llvm::StringRef kernel, const KernelSpecs::Signature& signature) const {
+  auto kernel_specs_it = specs_.find(kernel);
   if (kernel_specs_it == specs_.end()) return llvm::None;
-
-  KernelSpecs::Signature signature;
-  signature.reserve(op.input_specs().size() + op.output_specs().size());
-  AppendToSignature(op.input_specs(), &signature);
-  AppendToSignature(op.output_specs(), &signature);
   return kernel_specs_it->getValue().Find(signature);
 }
 
@@ -62,31 +63,38 @@ ScaleDecomposeFn DeviceTarget::GetDecomposeFn(QuantizeRegionOp op) const {
   return kernel_specs_it->second.GetDecomposeFn();
 }
 
+void DeviceTarget::AppendToSignature(Type spec,
+                                     KernelSpecs::Signature* signature) {
+  if (auto quant = spec.dyn_cast_or_null<UniformQuantizedType>()) {
+    signature->push_back(AnyQuantizedType::get(
+        quant.getFlags(), quant.getStorageType(), quant.getExpressedType(),
+        quant.getStorageTypeMin(), quant.getStorageTypeMax()));
+  } else if (auto any = spec.dyn_cast_or_null<AnyQuantizedType>()) {
+    signature->push_back(any);
+  } else {  // float
+    signature->push_back(AnyQuantizedType());
+  }
+}
+
 LogicalResult DeviceTarget::RegisterKernel(
     llvm::StringRef kernel, const KernelSpecs::Signature& signature,
     const ScaleFn& fn, const ScaleDecomposeFn& dfn) {
   return specs_[kernel].Add(signature, {ScaleConstraintType::CustomScale, fn});
 }
 
+namespace ph = std::placeholders;
+
 LogicalResult DeviceTarget::RegisterKernel(
     llvm::StringRef kernel, const KernelSpecs::Signature& signature,
     const ScaleConstraintType constraint) {
-  return specs_[kernel].Add(signature, {constraint, {}});
-}
-
-void DeviceTarget::AppendToSignature(ArrayAttr specs_attr,
-                                     KernelSpecs::Signature* signature) const {
-  for (auto attr : specs_attr) {
-    Type spec = attr.cast<TypeAttr>().getValue();
-    if (auto quant = spec.dyn_cast<UniformQuantizedType>()) {
-      signature->push_back(AnyQuantizedType::get(
-          quant.getFlags(), quant.getStorageType(), quant.getExpressedType(),
-          quant.getStorageTypeMin(), quant.getStorageTypeMax()));
-    } else if (auto any = spec.dyn_cast<AnyQuantizedType>()) {
-      signature->push_back(any);
-    } else {  // float
-      signature->push_back({});
-    }
+  if (failed(specs_[kernel].Add(signature, {constraint, {}}))) return failure();
+  switch (constraint) {
+    case ScaleConstraintType::OutputInputSameScale:
+      specs_[kernel].WithImpl(std::bind(&DeviceTarget::DecomposeSameScale,
+                                        ph::_1, ph::_2, ph::_3, ph::_4));
+      return success();
+    default:
+      return failure();
   }
 }
 
@@ -119,7 +127,7 @@ LogicalResult DeviceTarget::DecomposeMultiplyAccumulateScale(
   input_multipliers->append(3, kUnitQuantizedMultiplier);
 
   // output multipliers
-  double real_multiplier = o_spec.getScale() / scale_product;
+  double real_multiplier = scale_product / o_spec.getScale();
   output_multipliers->push_back(quant::QuantizeMultiplier(real_multiplier));
 
   // output ranges
@@ -134,5 +142,40 @@ LogicalResult DeviceTarget::DecomposeMultiplyAccumulateScale(
   return success();
 }
 
+LogicalResult DeviceTarget::DecomposeSameScale(
+    Operation* op, quant::QuantizedMultipliers* input_multipliers,
+    quant::QuantizedMultipliers* output_multipliers,
+    quant::QuantizedRanges* output_ranges) {
+  auto rop = llvm::dyn_cast<quant::QuantizeRegionOp>(op);
+  if (!rop) return failure();
+
+  // input multipliers
+  for (int i = 0; i < op->getNumOperands(); ++i) {
+    input_multipliers->push_back(kUnitQuantizedMultiplier);
+  }
+
+  // output multipliers
+  for (int i = 0; i < op->getNumResults(); ++i) {
+    output_multipliers->push_back(kUnitQuantizedMultiplier);
+  }
+
+  auto o_spec = rop.output_specs()[0]
+                    .cast<TypeAttr>()
+                    .getValue()
+                    .dyn_cast<quant::UniformQuantizedType>();
+  if (!o_spec) return failure();
+
+  // output ranges
+  auto min = rop.getAttrOfType<FloatAttr>("min");
+  auto max = rop.getAttrOfType<FloatAttr>("max");
+  output_ranges->push_back(quant::CalculateQuantizedRange(
+      o_spec.getScale(), o_spec.getZeroPoint(),
+      (min ? absl::optional<double>(min.getValueAsDouble()) : absl::nullopt),
+      (max ? absl::optional<double>(max.getValueAsDouble()) : absl::nullopt),
+      o_spec.getStorageTypeMin(), o_spec.getStorageTypeMax()));
+
+  return success();
+}
+
 }  // namespace quant
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/lite/quantization/device_target.h b/tensorflow/compiler/mlir/lite/quantization/device_target.h
index 65e0c5fe4a6..8ed43157df8 100644
--- a/tensorflow/compiler/mlir/lite/quantization/device_target.h
+++ b/tensorflow/compiler/mlir/lite/quantization/device_target.h
@@ -134,11 +134,18 @@ class DeviceTarget {
   explicit DeviceTarget(MLIRContext* ctx);
 
   // Retrieves the kernel spec for the quant region op.
-  Optional<KernelSpec> GetKernelSpec(quant::QuantizeRegionOp op) const;
+  Optional<KernelSpec> GetKernelSpec(
+      llvm::StringRef kernel, const KernelSpecs::Signature& signature) const;
 
   // Retrieves the scale decomposition function for the quant region op.
   ScaleDecomposeFn GetDecomposeFn(quant::QuantizeRegionOp op) const;
 
+  // converts specification to signature:
+  // - UniformedQuantizedType -> AnyQuantizedType
+  // - AnyQuantizedType (int) -> AnyQuantizedType
+  // - Float -> {}
+  static void AppendToSignature(Type spec, KernelSpecs::Signature* signature);
+
  protected:
   // Adds the kernel spec with the custom scale function for the kernel.
   LogicalResult RegisterKernel(llvm::StringRef kernel,
@@ -154,13 +161,6 @@ class DeviceTarget {
   // added before.
   KernelSpecs& RegisterKernel(llvm::StringRef kernel) { return specs_[kernel]; }
 
-  // converts specification to signature:
-  // - UniformedQuantizedType -> AnyQuantizedType
-  // - AnyQuantizedType (int) -> AnyQuantizedType
-  // - Float -> {}
-  void AppendToSignature(ArrayAttr specs_attr,
-                         KernelSpecs::Signature* signature) const;
-
   // For "mulmat->add" type of kernels, convert the scales of all the ports to
   // multipliers.
   static LogicalResult DecomposeMultiplyAccumulateScale(
@@ -168,11 +168,17 @@ class DeviceTarget {
       quant::QuantizedMultipliers* output_multipliers,
       quant::QuantizedRanges* output_ranges);
 
+  // For "reshape" type of kernels.
+  static LogicalResult DecomposeSameScale(
+      Operation* op, quant::QuantizedMultipliers* input_multipliers,
+      quant::QuantizedMultipliers* output_multipliers,
+      quant::QuantizedRanges* output_ranges);
+
   // A set of parameters are required to build the signatures.
   FloatType f32_;
-  IntegerType i8_;
-  int64_t i8_min_, i8_max_;
-  AnyQuantizedType any_, qi8_, qi8n_;
+  IntegerType i8_, i32_;
+  int64_t i8_min_, i8_max_, i32_min_, i32_max_;
+  AnyQuantizedType any_, qi8_, qi8n_, qi32_;
 
  private:
   // Maps the kernel names to all the available kernels.
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/BUILD b/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
index 1504f7d3a1b..b4fddceb580 100644
--- a/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
+++ b/tensorflow/compiler/mlir/lite/quantization/lite/BUILD
@@ -72,5 +72,6 @@ tf_cc_binary(
         "//tensorflow/lite/schema:schema_fbs",
         "@com_google_absl//absl/strings",
         "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AllPassesAndDialects",
     ],
 )
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
index 9b49757fd3f..a2e3c065113 100644
--- a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/lite/schema/schema_generated.h"
 
 namespace mlir {
 namespace lite {
@@ -38,7 +39,9 @@ namespace lite {
 TfLiteStatus QuantizeModel(
     const tflite::ModelT& input_model, const tflite::TensorType& input_type,
     const tflite::TensorType& output_type,
-    const std::unordered_set<std::string>& operator_names, bool fully_quantize,
+    const tflite::TensorType& inference_type,
+    const std::unordered_set<std::string>& operator_names,
+    bool disable_per_channel, bool fully_quantize,
     flatbuffers::FlatBufferBuilder* builder,
     tflite::ErrorReporter* error_reporter) {
   // TODO(b/142502494): remove this restriction by improving the `emit_adaptor`
@@ -72,15 +75,18 @@ TfLiteStatus QuantizeModel(
   // Apply quantization passes
   PassManager pm(module->getContext());
   TFL::QuantizationSpecs quant_specs;
-  quant_specs.inference_type = tensorflow::DT_QINT8;
+  quant_specs.inference_type = tflite::TflTypeToTfType(inference_type);
   quant_specs.post_training_quantization = true;
+  quant_specs.disable_per_channel = disable_per_channel;
 
   bool emit_adaptor = false;
   auto input_tf_type = tflite::TflTypeToTfType(input_type);
   if (input_tf_type == tensorflow::DT_FLOAT) {
     emit_adaptor = true;
-  } else if (input_tf_type == tensorflow::DT_UINT8) {
-    quant_specs.inference_type = tensorflow::DT_QUINT8;
+  } else if (input_tf_type == tensorflow::DT_UINT8 ||
+             input_tf_type == tensorflow::DT_INT8 ||
+             input_tf_type == tensorflow::DT_INT16) {
+    quant_specs.inference_type = input_tf_type;
   }
 
   pm.addPass(TFL::CreatePrepareQuantizePass(quant_specs));
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h
index 473e97e07df..d60df56b473 100644
--- a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h
+++ b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_model.h
@@ -26,12 +26,15 @@ namespace mlir {
 namespace lite {
 
 // Quantize the `input_model` and write the result to a flatbuffer `builder`.
-// The `input_type` and `output_type` can be float32/qint8/int8.
+// The `input_type`, `output_type` and `inference_type` can be
+// float32/qint8/int8/int16.
 // Return partially quantized model if `fully_quantize` is false.
 TfLiteStatus QuantizeModel(
     const tflite::ModelT& input_model, const tflite::TensorType& input_type,
     const tflite::TensorType& output_type,
-    const std::unordered_set<std::string>& operator_names, bool fully_quantize,
+    const tflite::TensorType& inference_type,
+    const std::unordered_set<std::string>& operator_names,
+    bool disable_per_channel, bool fully_quantize,
     flatbuffers::FlatBufferBuilder* builder,
     tflite::ErrorReporter* error_reporter);
 }  // namespace lite
diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/tfl_quantizer.cc b/tensorflow/compiler/mlir/lite/quantization/lite/tfl_quantizer.cc
index 7530cdf008f..5bd1b71e631 100644
--- a/tensorflow/compiler/mlir/lite/quantization/lite/tfl_quantizer.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/lite/tfl_quantizer.cc
@@ -46,7 +46,9 @@ TfLiteStatus QuantizeAnnotatedModel(llvm::StringRef buffer,
 
   tflite::StderrReporter error_reporter;
   return mlir::lite::QuantizeModel(
-      *model, tflite::TensorType_INT8, tflite::TensorType_INT8, {},
+      *model, tflite::TensorType_INT8, tflite::TensorType_INT8,
+      tflite::TensorType_INT8, {},
+      /*disable_per_channel=*/false,
       /*fully_quantize=*/true, builder, &error_reporter);
 }
 
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_config.h b/tensorflow/compiler/mlir/lite/quantization/quantization_config.h
index 5b1c73e7887..2ffba579548 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_config.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_config.h
@@ -46,6 +46,12 @@ struct QuantizationSpecs {
   // post-training quantization. We need to deprecate the `weight_quantization`.
   bool post_training_quantization = false;
 
+  // When set to true, quantization will be done per-tensor. Currently, this
+  // option is only valid when the quantization parameters need to be created by
+  // scanning the constant content (post-training quantization or QAT without
+  // weight FakeQuant).
+  bool disable_per_channel = false;
+
   // The node type when the model is exported. Currently this is limited to
   // DT_FLOAT, DT_HALF, DT_QINT8, and DT_QUINT8. When DT_HALF is used, the
   // `weight_quantization` flag needs to set to true. When DT_QUINT8 is used,
@@ -84,7 +90,7 @@ struct QuantizationSpecs {
   bool RunWeightQuantization() const { return weight_quantization; }
 
   // Whether this inference type represents a signed storage type.
-  bool IsSignedInferenceType() {
+  bool IsSignedInferenceType() const {
     switch (inference_type) {
       case tensorflow::DT_QUINT8:
       case tensorflow::DT_QUINT16:
@@ -96,7 +102,7 @@ struct QuantizationSpecs {
 
   // Gets the width of this quantization type. Returns 0 if it isn't a
   // quantization type.
-  int64_t GetQuantizationTypeWidth() {
+  int64_t GetQuantizationTypeWidth() const {
     switch (inference_type) {
       case tensorflow::DT_QINT8:
       case tensorflow::DT_QUINT8:
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_context.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_context.cc
index 2b2c44f03a4..bcfd06cf06c 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_context.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_context.cc
@@ -64,10 +64,23 @@ std::vector<quant::QuantizeRegionOp> QuantizeContext::GetAllOps() {
   return all_ops;
 }
 
+KernelSpecs::Signature QuantizeContext::GetSignature(QuantizeRegionOp op) {
+  KernelSpecs::Signature signature;
+  signature.reserve(op.input_specs().size() + op.output_specs().size());
+  for (int i = 0; i < op.getNumOperands(); ++i) {
+    DeviceTarget::AppendToSignature(GetOperandParams(op, i), &signature);
+  }
+  for (int i = 0; i < op.getNumResults(); ++i) {
+    DeviceTarget::AppendToSignature(GetResultParams(op, i), &signature);
+  }
+  return signature;
+}
+
 LogicalResult QuantizeContext::Handle(
     quant::QuantizeRegionOp op, llvm::SmallVectorImpl<Operation *> *new_items,
     bool *changed) {
-  auto spec = target_spec_.GetKernelSpec(op);
+  auto signature = GetSignature(op);
+  auto spec = target_spec_.GetKernelSpec(op.logical_kernel(), signature);
   if (!spec.hasValue()) {
     op.emitWarning(
         "Couldn't find kernel from the registeration for quantization.");
diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_context.h b/tensorflow/compiler/mlir/lite/quantization/quantization_context.h
index 0d460fd9a50..0c5137eb1a2 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_context.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_context.h
@@ -107,6 +107,9 @@ class QuantizeContext {
     return states_manager_.GetOperandParams(op, index);
   }
 
+  // Return the signature of the op.
+  KernelSpecs::Signature GetSignature(QuantizeRegionOp op);
+
   // A heuristic to get quantization parameters satisfies the same scale
   // constraints:
   // - If there are immutable states,
diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
index 25ee1d8ba5d..15b6bf56b7a 100644
--- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
@@ -1213,15 +1213,14 @@ func @resize_nearest_neighbor(%arg0: tensor<1x100x100x3xf32>, %arg1: tensor<4xi3
   %0 = "tf.ResizeNearestNeighbor"(%arg0, %arg1) {align_corners = true} : (tensor<1x100x100x3xf32>, tensor<4xi32>) -> tensor<?xf32>
   return %0 : tensor<?xf32>
   // CHECK-LABEL: resize_nearest_neighbor
-  // CHECK: "tfl.resize_nearest_neighbor"(%arg0, %arg1) {align_corners = true} : (tensor<1x100x100x3xf32>, tensor<4xi32>) -> tensor<?xf32>
+  // CHECK: "tfl.resize_nearest_neighbor"(%arg0, %arg1) {align_corners = true, half_pixel_centers = false} : (tensor<1x100x100x3xf32>, tensor<4xi32>) -> tensor<?xf32>
 }
 
-// Note: half_pixel_centers isn't supported by TFLite, so it's not legalized.
 func @resize_nearest_neighbor_with_half_pixel_centers(%arg0: tensor<1x100x100x3xf32>, %arg1: tensor<4xi32>) -> tensor<?xf32> {
-  %0 = "tf.ResizeNearestNeighbor"(%arg0, %arg1) {align_corners = true, half_pixel_centers = true} : (tensor<1x100x100x3xf32>, tensor<4xi32>) -> tensor<?xf32>
+  %0 = "tf.ResizeNearestNeighbor"(%arg0, %arg1) {align_corners = false, half_pixel_centers = true} : (tensor<1x100x100x3xf32>, tensor<4xi32>) -> tensor<?xf32>
   return %0 : tensor<?xf32>
   // CHECK-LABEL: resize_nearest_neighbor_with_half_pixel_centers
-  // CHECK: "tf.ResizeNearestNeighbor"(%arg0, %arg1) {align_corners = true, half_pixel_centers = true}
+  // CHECK: "tfl.resize_nearest_neighbor"(%arg0, %arg1) {align_corners = false, half_pixel_centers = true} : (tensor<1x100x100x3xf32>, tensor<4xi32>) -> tensor<?xf32>
 }
 
 func @sparse_to_dense_with_scalar_sparse_indices(%arg0: tensor<i32>, %arg1: tensor<3xi32>, %arg2: tensor<f32>, %arg3: tensor<f32>) -> tensor<?x?x?xf32> {
@@ -1497,3 +1496,27 @@ func @broadcast_to_i32(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3
 // CHECK:  [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xi32>, tensor<3x3xi32>) -> tensor<3x3xi32>
 // CHECK:  return [[MUL]] : tensor<3x3xi32>
 }
+
+func @matmul_batch(%arg0: tensor<10x15xf32>, %arg1: tensor<15x17xf32>) -> tensor<10x17xf32> {
+  %0 = "tf.BatchMatMul"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", device = "/device:CPU:0", name = "MatMul", adj_x = false, adj_y = false} :
+(tensor<10x15xf32>, tensor<15x17xf32>) -> tensor<10x17xf32>
+  return %0 : tensor<10x17xf32>
+// CHECK-LABEL: matmul_batch
+// CHECK: "tfl.batch_matmul"(%arg0, %arg1) {adj_x = false, adj_y = false} : (tensor<10x15xf32>, tensor<15x17xf32>) -> tensor<10x17xf32>
+}
+
+func @matmul_batchv2(%arg0: tensor<2x10x15xf32>, %arg1: tensor<15x17xf32>) -> tensor<2x10x17xf32> {
+  %0 = "tf.BatchMatMulV2"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", device = "/device:CPU:0", name = "MatMul", adj_x = false, adj_y = false} :
+(tensor<2x10x15xf32>, tensor<15x17xf32>) -> tensor<2x10x17xf32>
+  return %0 : tensor<2x10x17xf32>
+// CHECK-LABEL: matmul_batchv2
+// CHECK: "tfl.batch_matmul"(%arg0, %arg1) {adj_x = false, adj_y = false} : (tensor<2x10x15xf32>, tensor<15x17xf32>) -> tensor<2x10x17xf32>
+}
+
+func @matmul_batchv2_unknown_dim(%arg0: tensor<?x10x15xf32>, %arg1: tensor<15x17xf32>) -> tensor<?x10x17xf32> {
+  %0 = "tf.BatchMatMulV2"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", device = "/device:CPU:0", name = "MatMul", adj_x = false, adj_y = false} :
+(tensor<?x10x15xf32>, tensor<15x17xf32>) -> tensor<?x10x17xf32>
+  return %0 : tensor<?x10x17xf32>
+// CHECK-LABEL: matmul_batchv2_unknown_dim
+// CHECK: "tfl.batch_matmul"(%arg0, %arg1) {adj_x = false, adj_y = false} : (tensor<?x10x15xf32>, tensor<15x17xf32>) -> tensor<?x10x17xf32>
+}
diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir
index 38f736ee378..f42e06350e5 100644
--- a/tensorflow/compiler/mlir/lite/tests/ops.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir
@@ -192,7 +192,7 @@ func @testSquare(tensor<? x f32>) -> tensor<? x f32> {
 
 func @testQuantizedResizeNearestNeighbor(tensor<? x !quant.uniform<u8:f32, 0.1>>, tensor<? x i32>) -> tensor<? x !quant.uniform<u8:f32, 0.1>> {
 ^bb0(%arg0: tensor<? x !quant.uniform<u8:f32, 0.1>>, %arg1: tensor<? x i32>):
-  %0 = "tfl.resize_nearest_neighbor"(%arg0, %arg1) { align_corners = false } : (tensor<? x !quant.uniform<u8:f32, 0.1>>, tensor<? x i32>) -> tensor<? x !quant.uniform<u8:f32, 0.1>>
+  %0 = "tfl.resize_nearest_neighbor"(%arg0, %arg1) { align_corners = false, half_pixel_centers = false } : (tensor<? x !quant.uniform<u8:f32, 0.1>>, tensor<? x i32>) -> tensor<? x !quant.uniform<u8:f32, 0.1>>
   return %0 : tensor<? x !quant.uniform<u8:f32, 0.1>>
 }
 
@@ -573,7 +573,7 @@ func @testLogistic(tensor<1x2x3x4x5xf32>) -> tensor<1x2x3x4x5xf32> {
 // test invalid Logistic input
 func @testLogisticWithWrongInputType(tensor<?xi32>) -> tensor<?xi32> {
 ^bb0(%arg0: tensor<?xi32>):
-  // expected-error @+1 {{tfl.logistic' op operand #0 must be tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or QUI16 type values}}
+  // expected-error @+1 {{'tfl.logistic' op operand #0 must be tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or TFLite quint8 type values, but got 'tensor<?xi32>'}}
   %0 = "tfl.logistic"(%arg0): (tensor<?xi32>) -> tensor<?xi32>
   return %0#0 : tensor<?xi32>
 }
@@ -1252,10 +1252,10 @@ func @testOneHot(%arg0: tensor<3xi32>, %arg1: tensor<i32>, %arg2: tensor<f32>, %
 
 // -----
 
-func @testOneHotWithInvalidOutputType(%arg0: tensor<3xi32>, %arg1: tensor<i32>, %arg2: tensor<f32>, %arg3: tensor<f32>) -> tensor<*xi8> {
-  // expected-error @+1 {{'tfl.one_hot' op result #0 must be tensor of 32-bit float or 32-bit signless integer or 64-bit signless integer or 1-bit signless integer values}}
-  %0 = "tfl.one_hot"(%arg0, %arg1, %arg2, %arg3) {axis = -1 : i32} : (tensor<3xi32>, tensor<i32>, tensor<f32>, tensor<f32>) -> tensor<*xi8>
-  return %0 : tensor<*xi8>
+func @testOneHotWithInvalidOutputType(%arg0: tensor<3xi32>, %arg1: tensor<i32>, %arg2: tensor<f32>, %arg3: tensor<f32>) -> tensor<*xi16> {
+  // expected-error @+1 {{'tfl.one_hot' op result #0 must be tensor of 32-bit float or 32-bit signless integer or 64-bit signless integer or 1-bit signless integer or 8-bit signless integer or 8-bit unsigned integer values, but got 'tensor<*xi16>'}}
+  %0 = "tfl.one_hot"(%arg0, %arg1, %arg2, %arg3) {axis = -1 : i32} : (tensor<3xi32>, tensor<i32>, tensor<f32>, tensor<f32>) -> tensor<*xi16>
+  return %0 : tensor<*xi16>
 }
 
 // -----
@@ -1489,7 +1489,8 @@ func @testEmbeddingLookupValueAndResultElementTypeTraitFailed(%arg0 : tensor<?xi
 
 // -----
 
-func @testQuantizedLocalResponseNormalization(%arg0 : tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>>) -> tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>> {
+func @testWrongQuantizedLocalResponseNormalization(%arg0 : tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>>) -> tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>> {
+  // expected-error @+1 {{'tfl.local_response_normalization' op operand #0 must be tensor of 32-bit float values, but got 'tensor<1x56x56x192x!quant.uniform<u8:f32, 2.000000e-02>>'}}
   %0 = "tfl.local_response_normalization"(%arg0) {alpha = 9.99999974E-5 : f32, beta = 5.000000e-01 : f32, bias = 2.000000e+00 : f32, radius = 5 : i32} : (tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>>) -> tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>>
   return %0 : tensor<1x56x56x192x!quant.uniform<u8:f32, 0.02>>
 }
@@ -1523,32 +1524,32 @@ func @testDepthToSpaceInvalidOutputType(%arg0: tensor<1x1x1x4xf32>) -> tensor<1x
 
 // -----
 
-func @testPReluWrongOutputRank(%arg0: tensor<10x10x10x10xf32>, %arg1: tensor<1x1x10xf32>) -> tensor<10x10x10xf32> {
-  // expected-error @+1 {{'input' and 'output' should have the same rank}}
-  %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<10x10x10x10xf32>, tensor<1x1x10xf32>) -> tensor<10x10x10xf32>
-  return %0 : tensor<10x10x10xf32>
+func @testPReluWrongOutputRank(%arg0: tensor<10x10x10x10xf32>, %arg1: tensor<10x10x10x10xf32>) -> tensor<10x10xf32> {
+  // expected-error @+1 {{'tfl.prelu' op result type '10x10' not broadcast compatible with broadcasted operands's shapes '10x10x10x10'}}
+  %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<10x10x10x10xf32>, tensor<10x10x10x10xf32>) -> tensor<10x10xf32>
+  return %0 : tensor<10x10xf32>
 }
 
 // -----
 
 func @testPReluWrongOutputShape(%arg0: tensor<1x2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<1x2x3x5xf32> {
-  // expected-error @+1 {{'input' and 'output' should have the same shape}}
+  // expected-error @+1 {{'tfl.prelu' op result type '1x2x3x5' not broadcast compatible with broadcasted operands's shapes '1x2x3x4'}}
   %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<1x2x3x4xf32>, tensor<2x3x4xf32>) -> tensor<1x2x3x5xf32>
   return %0 : tensor<1x2x3x5xf32>
 }
 
 // -----
 
-func @testPReluWrongAlphaRank(%arg0: tensor<7x3x2x14xf32>, %arg1: tensor<2x7x3x2x14xf32>) -> tensor<7x3x2x14xf32> {
+func @testPReluWrongAlphaRank(%arg0: tensor<7x3x2x14xf32>, %arg1: tensor<7x3x2x14xf32>) -> tensor<7x3x2x14xf32> {
   // expected-error @+1 {{'alpha' should have one less rank than 'input'.}}
-  %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<7x3x2x14xf32>, tensor<2x7x3x2x14xf32>) -> tensor<7x3x2x14xf32>
+  %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<7x3x2x14xf32>, tensor<7x3x2x14xf32>) -> tensor<7x3x2x14xf32>
   return %0 : tensor<7x3x2x14xf32>
 }
 
 // -----
 
 func @testPReluInvalidBroadcast(%arg0: tensor<15x14x2x14xf32>, %arg1: tensor<1x1x3xf32>) -> tensor<15x14x2x14xf32> {
-  // expected-error @+1 {{'alpha' is not broadcastable at dimension 2.}}
+  // expected-error @+1 {{'tfl.prelu' op operands don't have broadcast-compatible shapes}}
   %0 = "tfl.prelu"(%arg0, %arg1) : (tensor<15x14x2x14xf32>, tensor<1x1x3xf32>) -> tensor<15x14x2x14xf32>
   return %0 : tensor<15x14x2x14xf32>
 }
diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
index d1c0dd20c05..2815afd14b9 100644
--- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
@@ -958,3 +958,16 @@ func @FusingdivRelu(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32>
 // Fusing:  %[[div2:[0-9].*]] = tfl.div %[[relu]], %[[div1]] {fused_activation_function = "RELU6"} : tensor<1xf32>
 // Fusing:  return
 }
+
+func @ReorderAddWithConstant(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<1.0> : tensor<2x2xf32>
+  %cst_1 = constant dense<2.0> : tensor<2x2xf32>
+  %0 = "tfl.add"(%arg0, %cst) {fused_activation_function = "NONE"} : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  %1 = "tfl.add"(%0, %cst_1) {fused_activation_function = "NONE"} : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: ReorderAddWithConstant
+  // CHECK: %[[CONST:.*]] = constant dense<3.000000e+00> : tensor<2x2xf32>
+  // CHECK: %[[RESULT:.*]] = tfl.add %arg0, %[[CONST]] {fused_activation_function = "NONE"} : tensor<2x2xf32>
+}
+
diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc
index 5eefa821c6b..d3f1a430642 100644
--- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc
+++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc
@@ -48,7 +48,8 @@ void AddQuantizationPasses(const mlir::TFL::QuantizationSpecs& quant_specs,
       quant_specs.default_ranges.second.hasValue()) {
     pass_manager->addPass(mlir::TFL::CreateDefaultQuantParamsPass(
         quant_specs.default_ranges.first.getValueOr(0.0),
-        quant_specs.default_ranges.second.getValueOr(0.0)));
+        quant_specs.default_ranges.second.getValueOr(0.0),
+        quant_specs.IsSignedInferenceType()));
     pass_manager->addPass(mlir::TFL::CreateQuantizePass());
     pass_manager->addPass(
         mlir::TFL::CreatePostQuantizePass(emit_quant_adaptor_ops));
@@ -73,16 +74,17 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config,
     pass_manager->addPass(mlir::TFControlFlow::CreateRaiseTFControlFlowPass());
   }
 
+  if (pass_config.shape_inference) {
+    pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass());
+  }
+  // Keep this pass after the shape inference pass, which couldn't do shape
+  // inference for non-tf ops.
   if (!pass_config.quant_specs.serialized_quant_stats.empty()) {
     pass_manager->addPass(
         mlir::quant::CreateImportQuantStatsPassForTFControlDialect(
             pass_config.quant_specs.serialized_quant_stats));
   }
 
-  if (pass_config.shape_inference) {
-    pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass());
-  }
-
   // The conversion pipeline has to follow the following orders:
   // 1) Saved model related optimization like decompose resource ops
   // 2) Convert composite functions like lstm/rnns, along with proper function
diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc
index 4bc9d9e0c2d..fce1333a491 100644
--- a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc
+++ b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc
@@ -160,6 +160,11 @@ int main(int argc, char **argv) {
         absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty());
     absl::Span<std::string> exported_names(exported_names_vector);
 
+    if (exported_names.size() != 1) {
+      llvm::errs() << "There should be only one exported name";
+      return kTrFailure;
+    }
+
     module = tensorflow::ImportSavedModel(input_file_name, saved_model_version,
                                           tags, exported_names, &context);
   } else {
diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc
index b9ec67736d9..62f64ab63b4 100644
--- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc
+++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc
@@ -174,7 +174,7 @@ StatusOr<mlir::OwningModuleRef> ImportSavedModel(
     return module;
   } else if (saved_model_version == 1) {
     auto module = tensorflow::SavedModelSignatureDefsToMlirImport(
-        input_filename, tags, context);
+        input_filename, tags, exported_names, context);
 
     if (!module)
       return tensorflow::errors::InvalidArgument("fail to open input file");
diff --git a/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc b/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc
index a1602baced5..c23ae9fcfab 100644
--- a/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc
@@ -46,8 +46,11 @@ namespace {
 class DefaultQuantParamsPass
     : public PassWrapper<DefaultQuantParamsPass, FunctionPass> {
  public:
-  explicit DefaultQuantParamsPass(double default_min, double default_max)
-      : default_min_(default_min), default_max_(default_max) {}
+  explicit DefaultQuantParamsPass(double default_min, double default_max,
+                                  bool is_signed)
+      : default_min_(default_min),
+        default_max_(default_max),
+        is_signed_(is_signed) {}
 
   void runOnFunction() override;
 
@@ -82,6 +85,7 @@ class DefaultQuantParamsPass
 
   double default_min_;
   double default_max_;
+  bool is_signed_;
   quant::QuantParams default_quant_params_;
 };
 }  // namespace
@@ -214,15 +218,16 @@ quant::QuantParams DefaultQuantParamsPass::GetDefaultQuantParams(
     default_quant_params_ = quant::fakeQuantAttrsToType(
         builder.getUnknownLoc(),
         /*numBits=*/8, default_min_, default_max_, /*narrowRange=*/false,
-        builder.getF32Type());
+        builder.getF32Type(), is_signed_);
   }
   return default_quant_params_;
 }
 
 // Creates an instance of the default quant parameters pass.
 std::unique_ptr<OperationPass<FuncOp>> CreateDefaultQuantParamsPass(
-    double default_min, double default_max) {
-  return absl::make_unique<DefaultQuantParamsPass>(default_min, default_max);
+    double default_min, double default_max, bool is_signed) {
+  return absl::make_unique<DefaultQuantParamsPass>(default_min, default_max,
+                                                   is_signed);
 }
 
 // Registers this pass with default values, only for test
@@ -230,7 +235,8 @@ static PassRegistration<DefaultQuantParamsPass> pass(
     "tfl-default-quant",
     "Apply quantization with default quantization parameter", [] {
       return CreateDefaultQuantParamsPass(/*default_min=*/-1.0,
-                                          /*default_max=*/1.0);
+                                          /*default_max=*/1.0,
+                                          /*is_signed=*/false);
     });
 
 }  // namespace TFL
diff --git a/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc b/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc
index 201a0bb2481..9b526f40277 100644
--- a/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc
@@ -321,7 +321,8 @@ void DenseToSparse::runOnFunction() {
 
       if (result.needs_densify) {
         const auto value = op->getOperand(operand);
-        auto densify = builder.create<DensifyOp>(op->getLoc(), value);
+        auto densify =
+            builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
         value.replaceAllUsesWith(densify);
         densify.setOperand(value);
       }
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
index 13ae216dc25..4c6a16c2233 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
@@ -211,6 +211,11 @@ def : Pat<(TF_LogicalOrOp $l, $r), (TFL_LogicalOrOp $l, $r)>;
 
 def : Pat<(TF_AddOp $lhs, $rhs), (TFL_AddOp $lhs, $rhs, TFL_AF_None)>;
 def : Pat<(TF_AddV2Op $lhs, $rhs), (TFL_AddOp $lhs, $rhs, TFL_AF_None)>;
+// When batch size is known, TF BatchMatMul gets unfolded to TFL FullyConnected
+// with additional ops. In the case of unknown batch size, the match will
+// fall through to here and convert to TF Lite BatchMatMul.
+def : Pat<(TF_BatchMatMulV2Op $lhs, $rhs, $adj_x, $adj_y), (TFL_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y)>;
+def : Pat<(TF_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y), (TFL_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y)>;
 def : Pat<(TF_SubOp $lhs, $rhs), (TFL_SubOp $lhs, $rhs, TFL_AF_None)>;
 def : Pat<(TF_MulOp $lhs, $rhs), (TFL_MulOp $lhs, $rhs, TFL_AF_None)>;
 def : Pat<(TF_RealDivOp $lhs, $rhs), (TFL_DivOp $lhs, $rhs, TFL_AF_None)>;
@@ -297,7 +302,7 @@ def : Pat<(TF_DepthToSpaceOp $input, $block_size, IsDataFormatNHWC:$data_format)
           (TFL_DepthToSpaceOp $input, (convertIntAttrTo32Bit $block_size))>;
 
 def : Pat<(TF_ResizeBilinearOp $images, $size, $align_corners, $half_pixel_centers), (TFL_ResizeBilinearOp $images, $size, $align_corners, $half_pixel_centers)>;
-def : Pat<(TF_ResizeNearestNeighborOp $images, $size, $align_corners, ConstBoolAttrFalse:$half_pixel_centers), (TFL_ResizeNearestNeighborOp $images, $size, $align_corners)>;
+def : Pat<(TF_ResizeNearestNeighborOp $images, $size, $align_corners, $half_pixel_centers), (TFL_ResizeNearestNeighborOp $images, $size, $align_corners, $half_pixel_centers)>;
 
 def : Pat<(TF_MirrorPadOp $arg0, $arg1, $cst), (TFL_MirrorPadOp $arg0, $arg1, $cst)>;
 
diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
index ce0b49fbd49..49be29065fe 100644
--- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
@@ -75,8 +75,6 @@ class TensorListPatternRewriter : public PatternRewriter {
  public:
   explicit TensorListPatternRewriter(FuncOp fn)
       : PatternRewriter(fn.getContext()) {}
-
-  Operation *insert(Operation *op) override { return OpBuilder::insert(op); }
 };
 
 /// Lower TensorList ops in functions for subsequent legalization.
@@ -861,6 +859,7 @@ LogicalResult LowerStaticTensorListPass::RewriteFunction(
   target.addLegalOp<ConstantOp>();
   target.addLegalOp<FuncOp>();
   target.addLegalOp<ReturnOp>();
+  target.addLegalOp<TFL::CustomOp>();
   // Register fused LSTM/RNN ops as legal.
   target.addLegalOp<TFL::LSTMOp>();
   target.addLegalOp<TFL::UnidirectionalSequenceLSTMOp>();
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
index 82d9a76fab3..a3244f31053 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
@@ -457,3 +457,21 @@ def : Pat<(TFL_AddOp
 // The constant folding in this pass might produce constant in the tf dialect.
 // This rule is to legalize these constant to the tfl dialect.
 def : Pat<(TF_ConstOp ElementsAttr:$value), (TFL_ConstOp $value)>;
+
+// Reorders adds to allow constant folding.
+// Add --> Add $input, $constantA
+//    \--> $constantB
+// To
+// Add --> $input
+//    \--> Add ($constantA, $constantB)
+foreach ActFun = [TFL_AF_Relu, TFL_AF_Relu6, TFL_AF_Relu1, TFL_AF_None] in {
+  def : Pat<(TFL_AddOp
+              (TFL_AddOp:$first_output $input, (ConstantOp $a), TFL_AF_None),
+              (ConstantOp $b), ActFun),
+            (TFL_AddOp $input,
+              (TFL_AddOp (ConstantOp $a), (ConstantOp $b), TFL_AF_None),
+              ActFun),
+  [(HasOneUse $first_output)]>;
+}
+
+
diff --git a/tensorflow/compiler/mlir/lite/transforms/passes.h b/tensorflow/compiler/mlir/lite/transforms/passes.h
index 959c17e317a..105c9394fb4 100644
--- a/tensorflow/compiler/mlir/lite/transforms/passes.h
+++ b/tensorflow/compiler/mlir/lite/transforms/passes.h
@@ -76,7 +76,7 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateOptimizeFunctionalOpsPass();
 // Creates an instance of the TensorFlow Lite dialect pass to add default
 // quantization parameters.
 std::unique_ptr<OperationPass<FuncOp>> CreateDefaultQuantParamsPass(
-    double default_min, double default_max);
+    double default_min, double default_max, bool is_signed);
 
 // Creates an instance of the TensorFlow Lite dialect pass to convert dense
 // tensor to sparse format.
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
index 4f25e434fac..a9e10a485bf 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
@@ -273,8 +273,9 @@ void PrepareQuantizePass::runOnFunction() {
 
   // Finally, the quantization parameters can be propagated to the rest of the
   // values (tensors).
-  ApplyQuantizationParamsPropagation(func, is_signed, disable_per_channel,
-                                     GetOpQuantSpec);
+  ApplyQuantizationParamsPropagation(
+      func, is_signed, disable_per_channel || quant_specs_.disable_per_channel,
+      GetOpQuantSpec);
 
   ConvertMlirQuantOpsToTFLQuantOps(func);
 }
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
index a97af8e632e..c5211bdfadb 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
@@ -48,6 +48,7 @@ limitations under the License.
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
 #include "tensorflow/compiler/mlir/lite/transforms/dilated_conv.h"
@@ -612,11 +613,35 @@ struct ConvertTFStridedSlice : public RewritePattern {
 
 #include "tensorflow/compiler/mlir/lite/transforms/generated_prepare_tf.inc"
 
+// Returns success if all the operations in the `op`'s regions including `op`
+// itself are legal in a TFLite pipeline.
+LogicalResult ValidateOp(Operation *op) {
+  bool has_illegal_ops = false;
+  op->walk([&](Operation *op) {
+    if (isa<TF::VariableV2Op>(op)) {
+      has_illegal_ops = true;
+      op->emitOpError() << "is illegal in a TFLite pipeline";
+    }
+  });
+
+  return failure(has_illegal_ops);
+}
+
 void PrepareTFPass::runOnFunction() {
   OwningRewritePatternList patterns;
   auto func = getFunction();
   MLIRContext *ctx = &getContext();
 
+  // Check illegal ops in a TFLite pipeline (e.g. trainning only ops) , since
+  // PrepareTFPass is the very first TFLite pass in the pipeline.
+  // TODO(jingpu): It might be better to split this check into its own pass
+  // to make things more modular.
+  if (failed(ValidateOp(func))) {
+    func.emitError() << "tfl-prepare-tf pass failed.";
+    signalPassFailure();
+    return;
+  }
+
   // This pattern was intented to uses TFL QDQs to preserve the quantization
   // parameters from the TF Quant ops, thus this pattern should run with the
   // first `applyPatternsAndFoldGreedily` method, which would otherwise removes
diff --git a/tensorflow/compiler/mlir/python/BUILD b/tensorflow/compiler/mlir/python/BUILD
index 666f89ac72f..1189a926383 100644
--- a/tensorflow/compiler/mlir/python/BUILD
+++ b/tensorflow/compiler/mlir/python/BUILD
@@ -12,6 +12,22 @@ cc_library(
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
         "//tensorflow/compiler/mlir/tensorflow:error_util",
+        # (yongtang) The graph_optimization_pass_registration needs to be part
+        # of a shared object that will be loaded whenever `import tensorflow`
+        # is run. The natural place is libtensorflow_framework.so.
+        # While adding graph_optimization_pass_registration to
+        # libtensorflow_framework.so is possible with some modification in
+        # dependency, many tests will fail due to multiple copies of LLVM.
+        # See https://github.com/tensorflow/tensorflow/pull/39231 for details.
+        # Alternatively, we place graph_optimization_pass_registration here
+        # because:
+        # - tensorflow/python/_pywrap_mlir.so already depends on LLVM anyway
+        # - tensorflow/python/_pywrap_mlir.so always loaded as part of python
+        #   binding
+        # TODO: It might be still preferrable to place graph_optimization_pass
+        # as part of the libtensorflow_framework.so, as it is the central
+        # place for core related components.
+        "//tensorflow/compiler/mlir/tensorflow:graph_optimization_pass_registration",
         "//tensorflow/compiler/mlir/tensorflow:import_utils",
         "@llvm-project//llvm:support",
         "@llvm-project//mlir:IR",
diff --git a/tensorflow/compiler/mlir/python/mlir.cc b/tensorflow/compiler/mlir/python/mlir.cc
index d0f6e015922..f22fb519a64 100644
--- a/tensorflow/compiler/mlir/python/mlir.cc
+++ b/tensorflow/compiler/mlir/python/mlir.cc
@@ -112,7 +112,7 @@ std::string ExperimentalConvertSavedModelV1ToMlir(
   // Convert the SavedModelBundle to an MLIR module.
 
   mlir::MLIRContext context;
-  auto module_or = ConvertSavedModelV1ToMlir(bundle, &context);
+  auto module_or = ConvertSavedModelV1ToMlir(bundle, {}, &context);
   if (!module_or.status().ok()) {
     Set_TF_Status_from_Status(status, module_or.status());
     return "// error";
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/BUILD b/tensorflow/compiler/mlir/python/mlir_wrapper/BUILD
new file mode 100644
index 00000000000..78f4312da46
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/BUILD
@@ -0,0 +1,41 @@
+load("//tensorflow:tensorflow.bzl", "tf_python_pybind_extension")
+
+package(licenses = ["notice"])
+
+tf_python_pybind_extension(
+    name = "mlir_wrapper",
+    srcs = [
+        "attrs.cc",
+        "basic_classes.cc",
+        "builders.cc",
+        "mlir_wrapper.cc",
+        "mlir_wrapper.h",
+        "ops.cc",
+        "types.cc",
+    ],
+    module_name = "mlir_wrapper",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/compiler/mlir/tensorflow",
+        "//tensorflow/compiler/mlir/tensorflow:tensorflow_types",
+        "//tensorflow/python:pybind11_lib",
+        "//tensorflow/python:pybind11_status",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:StandardOps",
+        "@pybind11",
+    ],
+)
+
+tf_python_pybind_extension(
+    name = "filecheck_wrapper",
+    srcs = ["filecheck_wrapper.cc"],
+    module_name = "filecheck_wrapper",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/python:pybind11_lib",
+        "//tensorflow/python:pybind11_status",
+        "@llvm-project//llvm:support",
+        "@pybind11",
+    ],
+)
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/attrs.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/attrs.cc
new file mode 100644
index 00000000000..ca7faf2e1d3
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/attrs.cc
@@ -0,0 +1,25 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/Types.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
+
+void init_attrs(py::module& m) {
+  py::class_<mlir::Attribute>(m, "Attribute");
+  py::class_<mlir::IntegerAttr, mlir::Attribute>(m, "IntegerAttr")
+      .def("get",
+           py::overload_cast<mlir::Type, int64_t>(&mlir::IntegerAttr::get));
+}
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/basic_classes.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/basic_classes.cc
new file mode 100644
index 00000000000..25adb44fe1d
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/basic_classes.cc
@@ -0,0 +1,49 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "llvm/Support/FileCheck.h"
+#include "mlir/IR/Block.h"  // from @llvm-project
+#include "mlir/IR/Location.h"  // from @llvm-project
+#include "mlir/IR/MLIRContext.h"  // from @llvm-project
+#include "mlir/IR/Operation.h"  // from @llvm-project
+#include "mlir/IR/Region.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
+
+void init_basic_classes(py::module& m) {
+  py::class_<mlir::MLIRContext>(m, "MLIRContext").def(py::init<>());
+
+  py::class_<mlir::Location>(m, "Location");
+
+  py::class_<mlir::UnknownLoc>(m, "UnknownLoc")
+      .def("get", &mlir::UnknownLoc::get);
+
+  py::class_<mlir::Region>(m, "Region")
+      .def("back", &mlir::Region::back, py::return_value_policy::reference)
+      .def("front", &mlir::Region::front, py::return_value_policy::reference)
+      .def("add_block", [](mlir::Region& r) { r.push_back(new mlir::Block); })
+      .def("push_back", &mlir::Region::push_back)
+      .def("size", [](mlir::Region& r) { return r.getBlocks().size(); })
+      .def("front", &mlir::Region::front, py::return_value_policy::reference);
+  py::class_<mlir::Block::iterator>(m, "Block_Iterator");
+  py::class_<mlir::Block>(m, "Block")
+      .def("new", ([]() { return new mlir::Block; }),
+           py::return_value_policy::reference)
+      .def("end", &mlir::Block::end)
+      .def("addArgument", &mlir::Block::addArgument);
+
+  py::class_<mlir::Value>(m, "Value").def("getType", &mlir::Value::getType);
+  py::class_<mlir::OpResult, mlir::Value>(m, "OpResult");
+  py::class_<mlir::BlockArgument, mlir::Value>(m, "BlockArgument");
+}
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/builders.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/builders.cc
new file mode 100644
index 00000000000..338f17ed6df
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/builders.cc
@@ -0,0 +1,51 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/IR/Builders.h"  // from @llvm-project
+
+#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
+
+void init_builders(py::module& m) {
+  py::class_<mlir::Builder>(m, "Builder")
+      .def(py::init<mlir::MLIRContext*>())
+      .def("getFunctionType",
+           [](mlir::Builder& b, std::vector<mlir::Type> inputs,
+              std::vector<mlir::Type> outputs) {
+             return b.getFunctionType(llvm::ArrayRef<mlir::Type>(inputs),
+                                      llvm::ArrayRef<mlir::Type>(outputs));
+           });
+  py::class_<mlir::OpBuilder>(m, "OpBuilder")
+      .def(py::init<mlir::MLIRContext*>())
+      .def(py::init<mlir::Region&>())
+      .def(py::init<mlir::Operation*>())
+      .def(py::init<mlir::Block*, mlir::Block::iterator>())
+      .def("getUnknownLoc", &mlir::OpBuilder::getUnknownLoc)
+      .def("setInsertionPoint",
+           py::overload_cast<mlir::Block*, mlir::Block::iterator>(
+               &mlir::OpBuilder::setInsertionPoint))
+      .def("saveInsertionPoint", &mlir::OpBuilder::saveInsertionPoint)
+      .def("restoreInsertionPoint", &mlir::OpBuilder::restoreInsertionPoint)
+      .def(
+          "createOperation",
+          [](mlir::OpBuilder& opb, mlir::OperationState& state) {
+            return opb.createOperation(state);
+          },
+          py::return_value_policy::reference)
+      .def("getContext", &mlir::OpBuilder::getContext,
+           py::return_value_policy::reference);
+
+  py::class_<mlir::OpBuilder::InsertPoint>(m, "OpBuilder_InsertionPoint")
+      .def("getBlock", &mlir::OpBuilder::InsertPoint::getBlock);
+}
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/filecheck_wrapper.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/filecheck_wrapper.cc
new file mode 100644
index 00000000000..8a841856b72
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/filecheck_wrapper.cc
@@ -0,0 +1,36 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "llvm/Support/FileCheck.h"
+#include "llvm/Support/SourceMgr.h"
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+#include "tensorflow/python/lib/core/pybind11_lib.h"
+#include "tensorflow/python/lib/core/pybind11_status.h"
+
+PYBIND11_MODULE(filecheck_wrapper, m) {
+  m.def("check", [](std::string input, std::string check) {
+    llvm::FileCheckRequest fcr;
+    llvm::FileCheck fc(fcr);
+    llvm::SourceMgr SM = llvm::SourceMgr();
+    SM.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(input),
+                          llvm::SMLoc());
+    SM.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(check),
+                          llvm::SMLoc());
+    llvm::Regex regex = fc.buildCheckPrefixRegex();
+    fc.readCheckFile(SM, llvm::StringRef(check), regex);
+    return fc.checkInput(SM, llvm::StringRef(input));
+  });
+}
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.cc
new file mode 100644
index 00000000000..6f468cd4267
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.cc
@@ -0,0 +1,38 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
+
+#include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/python/lib/core/pybind11_lib.h"
+#include "tensorflow/python/lib/core/pybind11_status.h"
+
+PYBIND11_MODULE(mlir_wrapper, m) {
+  m.def("registerDialects", []() {
+    mlir::registerDialect<mlir::TF::TensorFlowDialect>();
+    mlir::registerDialect<mlir::tf_executor::TensorFlowExecutorDialect>();
+    mlir::registerDialect<mlir::StandardOpsDialect>();
+  });
+
+  init_basic_classes(m);
+  init_types(m);
+  init_builders(m);
+  init_ops(m);
+  init_attrs(m);
+}
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h
new file mode 100644
index 00000000000..562c59b43e1
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h
@@ -0,0 +1,30 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_PYTHON_MLIR_WRAPPER_MLIR_WRAPPER_H
+#define TENSORFLOW_COMPILER_MLIR_PYTHON_MLIR_WRAPPER_MLIR_WRAPPER_H
+
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+namespace py = pybind11;
+
+void init_basic_classes(py::module& m);
+void init_types(py::module& m);
+void init_builders(py::module& m);
+void init_ops(py::module& m);
+void init_attrs(py::module& m);
+
+#endif  // TENSORFLOW_COMPILER_MLIR_PYTHON_MLIR_WRAPPER_MLIR_WRAPPER_H
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/ops.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/ops.cc
new file mode 100644
index 00000000000..4432829653e
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/ops.cc
@@ -0,0 +1,194 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+
+#include "mlir/IR/Function.h"  // from @llvm-project
+#include "mlir/IR/Operation.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+
+void init_ops(py::module& m) {
+  py::class_<mlir::Operation, std::unique_ptr<mlir::Operation, py::nodelete>>(
+      m, "Operation")
+      .def("getRegion", &mlir::Operation::getRegion,
+           py::return_value_policy::reference)
+      .def("getResult", &mlir::Operation::getResult)
+      .def("dump", &mlir::Operation::dump)
+      .def("getNumResults", &mlir::Operation::getNumResults);
+
+  py::class_<mlir::OperationState>(m, "OperationState")
+      .def(py::init([](mlir::Location loc, std::string name) {
+        return mlir::OperationState(loc, llvm::StringRef(name));
+      }))
+      .def("addTypes",
+           [](mlir::OperationState& state, std::vector<mlir::Type> tys) {
+             state.addTypes(mlir::ArrayRef<mlir::Type>(tys));
+           })
+      .def("addOperands",
+           [](mlir::OperationState& os, std::vector<mlir::Value> ops) {
+             os.addOperands(mlir::ArrayRef<mlir::Value>(ops));
+           })
+      .def("addRegion", py::overload_cast<>(&mlir::OperationState::addRegion),
+           py::return_value_policy::reference);
+
+  py::class_<mlir::ModuleOp>(m, "ModuleOp")
+      .def("create",
+           [](mlir::Location loc) { return mlir::ModuleOp::create(loc); })
+      .def("push_back",
+           [](mlir::ModuleOp& m, mlir::FuncOp f) { m.push_back(f); })
+      .def("dump", &mlir::ModuleOp::dump)
+      .def("getAsStr", [](mlir::ModuleOp& m) {
+        std::string str;
+        llvm::raw_string_ostream os(str);
+        m.print(os);
+        return os.str();
+      });
+
+  py::class_<mlir::FuncOp>(m, "FuncOp")
+      .def("create",
+           [](mlir::Location location, std::string name,
+              mlir::FunctionType type) {
+             auto func = mlir::FuncOp::create(location, name, type);
+             func.addEntryBlock();
+             return func;
+           })
+      .def(
+          "getBody",
+          [](mlir::FuncOp& f) -> mlir::Region& { return f.getBody(); },
+          py::return_value_policy::reference)
+      .def("getArguments",
+           [](mlir::FuncOp& f) { return f.getArguments().vec(); })
+      .def("getName", [](mlir::FuncOp& f) { return f.getName().str(); })
+      .def("getType", &mlir::FuncOp::getType);
+
+  py::class_<mlir::ReturnOp>(m, "ReturnOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc,
+              std::vector<mlir::Value> values) -> mlir::Operation* {
+             return opb
+                 .create<mlir::ReturnOp>(loc,
+                                         mlir::ArrayRef<mlir::Value>(values))
+                 .getOperation();
+           });
+
+  // mlir::TF::AddOp
+  py::class_<mlir::TF::AddV2Op>(m, "Tf_AddV2Op")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb.create<mlir::TF::AddV2Op>(loc, x, y).getOperation();
+           });
+
+  py::class_<mlir::TF::AnyOp>(m, "Tf_AnyOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value input,
+              mlir::Value reduction_indices,
+              bool keep_dims = false) -> mlir::Operation* {
+             return opb
+                 .create<mlir::TF::AnyOp>(loc, opb.getI1Type(), input,
+                                          reduction_indices, keep_dims)
+                 .getOperation();
+           });
+
+  // mlir::TF::ConstOp
+  py::class_<mlir::TF::ConstOp>(m, "Tf_ConstOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc,
+              mlir::Attribute value) -> mlir::Operation* {
+             return opb.create<mlir::TF::ConstOp>(loc, value).getOperation();
+           });
+
+  // mlir::TF::EqualOp
+  py::class_<mlir::TF::EqualOp>(m, "Tf_EqualOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb
+                 .create<mlir::TF::EqualOp>(loc, x, y, opb.getBoolAttr(true))
+                 .getOperation();
+           });
+
+  // mlir::TF::GreaterEqualOp
+  py::class_<mlir::TF::GreaterEqualOp>(m, "Tf_GreaterEqualOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb.create<mlir::TF::GreaterEqualOp>(loc, x, y)
+                 .getOperation();
+           });
+
+  // mlir::TF::GreaterOp
+  py::class_<mlir::TF::GreaterOp>(m, "Tf_GreaterOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb.create<mlir::TF::GreaterOp>(loc, x, y).getOperation();
+           });
+
+  // mlir::TF::LegacyCallOp
+  py::class_<mlir::TF::LegacyCallOp>(m, "Tf_LegacyCallOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc,
+              std::vector<mlir::Type> output, std::vector<mlir::Value> args,
+              std::string f) -> mlir::Operation* {
+             return opb
+                 .create<mlir::TF::LegacyCallOp>(
+                     loc, mlir::ArrayRef<mlir::Type>(output),
+                     mlir::ArrayRef<mlir::Value>(args), mlir::StringRef(f))
+                 .getOperation();
+           });
+
+  // mlir::TF::LessEqualOp
+  py::class_<mlir::TF::LessEqualOp>(m, "Tf_LessEqualOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb.create<mlir::TF::LessEqualOp>(loc, x, y).getOperation();
+           });
+
+  // mlir::TF::LessOp
+  py::class_<mlir::TF::LessOp>(m, "Tf_LessOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb.create<mlir::TF::LessOp>(loc, x, y).getOperation();
+           });
+
+  // mlir::TF::NegOp
+  py::class_<mlir::TF::NegOp>(m, "Tf_NegOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc,
+              mlir::Value x) -> mlir::Operation* {
+             return opb.create<mlir::TF::NegOp>(loc, x).getOperation();
+           });
+
+  py::class_<mlir::TF::NotEqualOp>(m, "Tf_NotEqualOp")
+      .def("create", [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+                        mlir::Value y) {
+        return opb
+            .create<mlir::TF::NotEqualOp>(
+                loc, x, y, mlir::BoolAttr::get(true, opb.getContext()))
+            .getOperation();
+      });
+
+  // mlir::TF::SubOp
+  py::class_<mlir::TF::SubOp>(m, "Tf_SubOp")
+      .def("create",
+           [](mlir::OpBuilder& opb, mlir::Location loc, mlir::Value x,
+              mlir::Value y) -> mlir::Operation* {
+             return opb.create<mlir::TF::SubOp>(loc, x, y).getOperation();
+           });
+}
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc
new file mode 100644
index 00000000000..2be67f8e93e
--- /dev/null
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc
@@ -0,0 +1,48 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
+
+void init_types(py::module& m) {
+  // Type
+  py::class_<mlir::Type> Type(m, "Type");
+  Type.def("getKind", &mlir::Type::getKind);
+
+  // Type Enums
+  py::enum_<mlir::StandardTypes::Kind>(Type, "StandardTypes_Kind")
+      .value("BF16", mlir::StandardTypes::BF16);
+
+  // Type Sub-classes
+  py::class_<mlir::FunctionType, mlir::Type>(m, "FunctionType")
+      .def("getResults",
+           [](mlir::FunctionType& ft) { return ft.getResults().vec(); });
+
+  py::class_<mlir::FloatType, mlir::Type>(m, "FloatType")
+      .def("get", &mlir::FloatType::get);
+
+  py::class_<mlir::IntegerType, mlir::Type>(m, "IntegerType")
+      .def("get", py::overload_cast<unsigned, mlir::MLIRContext*>(
+                      &mlir::IntegerType::get));
+
+  py::class_<mlir::UnrankedTensorType, mlir::Type>(m, "UnrankedTensorType")
+      .def("get", &mlir::UnrankedTensorType::get);
+
+  py::class_<mlir::RankedTensorType, mlir::Type>(m, "RankedTensorType")
+      .def("get", [](std::vector<int64_t> shape, mlir::Type ty) {
+        return mlir::RankedTensorType::get(mlir::ArrayRef<int64_t>(shape), ty);
+      });
+}
diff --git a/tensorflow/compiler/mlir/runlit.cfg.py b/tensorflow/compiler/mlir/runlit.cfg.py
index 6d3131a781c..f1271d0da24 100644
--- a/tensorflow/compiler/mlir/runlit.cfg.py
+++ b/tensorflow/compiler/mlir/runlit.cfg.py
@@ -70,9 +70,9 @@ tool_dirs = config.mlir_tf_tools_dirs + [
 ]
 tool_names = [
     'mlir-opt', 'mlir-translate', 'tf-opt', 'tf_tfl_translate',
-    'flatbuffer_to_string', 'flatbuffer_translate', 'tf-mlir-translate',
-    'mlir-tflite-runner', 'tfcompile', 'json_to_flatbuffer', 'xla-gpu-opt',
-    'xla-opt'
+    'tf_tfjs_translate', 'flatbuffer_to_string', 'flatbuffer_translate',
+    'tf-mlir-translate', 'mlir-tflite-runner', 'tfcompile',
+    'json_to_flatbuffer', 'xla-gpu-opt', 'xla-opt'
 ]
 tools = [ToolSubst(s, unresolved='ignore') for s in tool_names]
 llvm_config.add_tool_substitutions(tools, tool_dirs)
diff --git a/tensorflow/compiler/mlir/runlit.site.cfg.py b/tensorflow/compiler/mlir/runlit.site.cfg.py
index 661e6200df3..3e7596c75d7 100644
--- a/tensorflow/compiler/mlir/runlit.site.cfg.py
+++ b/tensorflow/compiler/mlir/runlit.site.cfg.py
@@ -44,6 +44,7 @@ mlir_tf_tools_dirs = [
     'tensorflow/compiler/mlir',
     'tensorflow/compiler/mlir/lite',
     'tensorflow/compiler/mlir/tensorflow',
+    'tensorflow/compiler/mlir/tfjs',
     'tensorflow/compiler/mlir/xla',
     'tensorflow/compiler/aot',
     'tensorflow/compiler/xla/service/mlir_gpu',
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index d1fb4343d51..54b560ed6ce 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -36,7 +36,7 @@ filegroup(
         "@llvm-project//mlir:OpBaseTdFiles",
         "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td",
         "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
     ],
 )
 
@@ -342,6 +342,38 @@ cc_library(
     ],
 )
 
+gentbl(
+    name = "tf_data_optimization_inc_gen",
+    tbl_outs = [
+        (
+            "-gen-rewriters",
+            "transforms/generated_tf_data_optimization.inc",
+        ),
+    ],
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
+    td_file = "transforms/tf_data_optimization.td",
+    td_srcs = [
+        ":tensorflow_ops_td_files",
+        "@llvm-project//mlir:StdOpsTdFiles",
+    ],
+)
+
+cc_library(
+    name = "tf_data_optimization",
+    srcs = [
+        "transforms/tf_data_optimization.cc",
+    ],
+    hdrs = [
+        "transforms/tf_data_optimization.h",
+    ],
+    deps = [
+        ":tensorflow",
+        ":tensorflow_types",
+        ":tf_data_optimization_inc_gen",
+        "@llvm-project//mlir:IR",
+    ],
+)
+
 cc_library(
     name = "unroll_batch_matmul_pass",
     srcs = [
@@ -406,10 +438,12 @@ cc_library(
         "transforms/tensor_array_ops_decomposition.cc",
         "transforms/tensor_list_ops_decomposition.cc",
         "transforms/test_side_effect_analysis.cc",
+        "transforms/tf_data_optimization_pass.cc",
         "transforms/tf_device_assignment.cc",
         "transforms/tpu_cluster_formation.cc",
         "transforms/tpu_dynamic_layout_pass.cc",
         "transforms/tpu_dynamic_padding_mapper.cc",
+        "transforms/tpu_extract_head_tail_outside_compilation.cc",
         "transforms/tpu_extract_outside_compilation.cc",
         "transforms/tpu_merge_variables_with_execute.cc",
         "transforms/tpu_rewrite_pass.cc",
@@ -443,6 +477,7 @@ cc_library(
         ":tensorflow",
         ":tensorflow_optimize_inc_gen",
         ":tensorflow_types",
+        ":tf_data_optimization",
         ":tpu_rewrite_device_util",
         ":translate_utils",
         ":unroll_batch_matmul_pass",
@@ -521,7 +556,7 @@ cc_library(
     deps = [
         ":tensorflow",
         "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:LoopOpsTransforms",
+        "@llvm-project//mlir:SCFTransforms",
     ],
     alwayslink = 1,
 )
@@ -599,7 +634,6 @@ cc_library(
         ":error_util",
         ":parse_text_proto",
         "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
         "@llvm-project//llvm:support",
     ],
@@ -789,6 +823,7 @@ cc_library(
         ":mangling_util",
         ":tensorflow_attributes",
         ":tensorflow_types",
+        "//tensorflow/compiler/xla:util",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
@@ -811,8 +846,10 @@ tf_cc_test(
         "//tensorflow/compiler/xla:test",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
         "//tensorflow/stream_executor/lib",
         "@llvm-project//mlir:IR",
     ],
@@ -1038,7 +1075,7 @@ genrule(
     srcs = [
         "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td",
         "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
         "@llvm-project//mlir:include/mlir/IR/OpBase.td",
         "ir/tf_generated_ops.td",
         "ir/tf_op_base.td",
@@ -1111,6 +1148,7 @@ COMPILE_MLIR_UTIL_DEPS = [
     "//tensorflow/stream_executor/lib",
     "//tensorflow/compiler/xla:xla_data_proto_cc",
     "//tensorflow/compiler/xla/service:hlo",
+    ":convert_tensor",
 ]
 
 # Prefer to link 'compile_mlir_util' library that also links necessary
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
index 89d40566b29..aa1601c4032 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -160,6 +160,8 @@ def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastable
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 
   let hasCanonicalizer = 1;
+
+  let hasFolder = 1;
 }
 
 def TF_AllOp : TF_Op<"All", [NoSideEffect]> {
@@ -190,6 +192,44 @@ retained with length 1.
   let verifier = [{ return Verify(*this); }];
 }
 
+def TF_AllToAllOp : TF_Op<"AllToAll", [NoSideEffect]> {
+  let summary = "An Op to exchange data across TPU replicas.";
+
+  let description = [{
+On each replica, the input is split into `split_count` blocks along
+`split_dimension` and send to the other replicas given group_assignment. After
+receiving `split_count` - 1 blocks from other replicas, we concatenate the
+blocks along `concat_dimension` as the output.
+
+For example, suppose there are 2 TPU replicas:
+replica 0 receives input: `[[A, B]]`
+replica 1 receives input: `[[C, D]]`
+
+group_assignment=`[[0, 1]]`
+concat_dimension=0
+split_dimension=1
+split_count=2
+
+replica 0's output: `[[A], [C]]`
+replica 1's output: `[[B], [D]]`
+  }];
+
+  let arguments = (ins
+    TensorOf<[BF16, F16, F32, F64, I1, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$input,
+    I32Tensor:$group_assignment,
+
+    I64Attr:$concat_dimension,
+    I64Attr:$split_dimension,
+    I64Attr:$split_count
+  );
+
+  let results = (outs
+    TensorOf<[BF16, F16, F32, F64, I1, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$output
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_AngleOp : TF_Op<"Angle", [NoSideEffect, SameOperandsAndResultShape]> {
   let summary = "Returns the argument of a complex number.";
 
@@ -1063,6 +1103,26 @@ for dtype in dtype_list:
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_BroadcastArgsOp : TF_Op<"BroadcastArgs", [NoSideEffect]> {
+  let summary = "Return the shape of s0 op s1 with broadcast.";
+
+  let description = [{
+Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
+broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
+  }];
+
+  let arguments = (ins
+    TF_I32OrI64Tensor:$s0,
+    TF_I32OrI64Tensor:$s1
+  );
+
+  let results = (outs
+    TF_I32OrI64Tensor:$r0
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_BroadcastGradientArgsOp : TF_Op<"BroadcastGradientArgs", [NoSideEffect]> {
   let summary = [{
 Return the reduction indices for computing gradients of s0 op s1 with broadcast.
@@ -1195,7 +1255,7 @@ that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
-def TF_ClipByValueOp : TF_Op<"ClipByValue", [NoSideEffect, SameOperandsAndResultType]> {
+def TF_ClipByValueOp : TF_Op<"ClipByValue", [NoSideEffect]> {
   let summary = "Clips tensor values to a specified min and max.";
 
   let description = [{
@@ -1386,6 +1446,30 @@ tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
   let hasCanonicalizer = 1;
 }
 
+def TF_ConjugateTransposeOp : TF_Op<"ConjugateTranspose", [NoSideEffect]> {
+  let summary = [{
+Shuffle dimensions of x according to a permutation and conjugate the result.
+  }];
+
+  let description = [{
+The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+  `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+  `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])`
+  }];
+
+  let arguments = (ins
+    TF_Tensor:$x,
+    TF_I32OrI64Tensor:$perm
+  );
+
+  let results = (outs
+    TF_Tensor:$y
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedOperandTypeAttr Tperm = TF_DerivedOperandTypeAttr<1>;
+}
+
 def TF_Conv2DOp : TF_Op<"Conv2D", [NoSideEffect, TF_LayoutSensitiveInterface]> {
   let summary = [{
 Computes a 2-D convolution given 4-D `input` and `filter` tensors.
@@ -1660,7 +1744,28 @@ Given an input tensor, this function computes hyperbolic cosine of every
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
-def TF_CrossReplicaSumOp : TF_Op<"CrossReplicaSum", [AllTypesMatch<["input", "output"]>, NoSideEffect]> {
+def TF_CrossOp : TF_Op<"Cross", [NoSideEffect]> {
+  let summary = "Compute the pairwise cross product.";
+
+  let description = [{
+`a` and `b` must be the same shape; they can either be simple 3-element vectors,
+or any shape where the innermost dimension is 3. In the latter case, each pair
+of corresponding 3-element vectors is cross-multiplied independently.
+  }];
+
+  let arguments = (ins
+    TF_IntOrFpTensor:$a,
+    TF_IntOrFpTensor:$b
+  );
+
+  let results = (outs
+    TF_IntOrFpTensor:$product
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_CrossReplicaSumOp : TF_Op<"CrossReplicaSum", [NoSideEffect, TF_AllTypesMatch<["input", "output"]>]> {
   let summary = "An Op to sum inputs across replicated TPU instances.";
 
   let description = [{
@@ -1684,7 +1789,7 @@ and `B, D, F, H` as group 1. Thus we get the outputs:
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
-def TF_CumsumOp : TF_Op<"Cumsum", [AllTypesMatch<["x", "out"]>, NoSideEffect]> {
+def TF_CumsumOp : TF_Op<"Cumsum", [NoSideEffect, TF_AllTypesMatch<["x", "out"]>]> {
   let summary = "Compute the cumulative sum of the tensor `x` along `axis`.";
 
   let description = [{
@@ -1734,6 +1839,169 @@ tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
 }
 
+def TF_DataFormatDimMapOp : TF_Op<"DataFormatDimMap", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = [{
+Returns the dimension index in the destination data format given the one in
+  }];
+
+  let description = [{
+the source data format.
+  }];
+
+  let arguments = (ins
+    TF_I32OrI64Tensor:$x,
+
+    DefaultValuedAttr<StrAttr, "NHWC">:$src_format,
+    DefaultValuedAttr<StrAttr, "NCHW">:$dst_format
+  );
+
+  let results = (outs
+    TF_I32OrI64Tensor:$y
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_DecodeAndCropJpegOp : TF_Op<"DecodeAndCropJpeg", [NoSideEffect]> {
+  let summary = "Decode and Crop a JPEG-encoded image to a uint8 tensor.";
+
+  let description = [{
+The attr `channels` indicates the desired number of color channels for the
+decoded image.
+
+Accepted values are:
+
+*   0: Use the number of channels in the JPEG-encoded image.
+*   1: output a grayscale image.
+*   3: output an RGB image.
+
+If needed, the JPEG-encoded image is transformed to match the requested number
+of color channels.
+
+The attr `ratio` allows downscaling the image by an integer factor during
+decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+downscaling the image later.
+
+
+It is equivalent to a combination of decode and crop, but much faster by only
+decoding partial jpeg image.
+  }];
+
+  let arguments = (ins
+    TF_StrTensor:$contents,
+    I32Tensor:$crop_window,
+
+    DefaultValuedAttr<I64Attr, "0">:$channels,
+    DefaultValuedAttr<I64Attr, "1">:$ratio,
+    DefaultValuedAttr<BoolAttr, "true">:$fancy_upscaling,
+    DefaultValuedAttr<BoolAttr, "false">:$try_recover_truncated,
+    DefaultValuedAttr<F32Attr, "1.0f">:$acceptable_fraction,
+    StrAttr:$dct_method
+  );
+
+  let results = (outs
+    TF_Uint8Tensor:$image
+  );
+}
+
+def TF_DecodeGifOp : TF_Op<"DecodeGif", [NoSideEffect]> {
+  let summary = "Decode the frame(s) of a GIF-encoded image to a uint8 tensor.";
+
+  let description = [{
+GIF images with frame or transparency compression are not supported.
+On Linux and MacOS systems, convert animated GIFs from compressed to
+uncompressed by running:
+
+    convert $src.gif -coalesce $dst.gif
+
+This op also supports decoding JPEGs and PNGs, though it is cleaner to use
+`tf.io.decode_image`.
+  }];
+
+  let arguments = (ins
+    TF_StrTensor:$contents
+  );
+
+  let results = (outs
+    TF_Uint8Tensor:$image
+  );
+}
+
+def TF_DecodeJpegOp : TF_Op<"DecodeJpeg", [NoSideEffect]> {
+  let summary = "Decode a JPEG-encoded image to a uint8 tensor.";
+
+  let description = [{
+The attr `channels` indicates the desired number of color channels for the
+decoded image.
+
+Accepted values are:
+
+*   0: Use the number of channels in the JPEG-encoded image.
+*   1: output a grayscale image.
+*   3: output an RGB image.
+
+If needed, the JPEG-encoded image is transformed to match the requested number
+of color channels.
+
+The attr `ratio` allows downscaling the image by an integer factor during
+decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+downscaling the image later.
+
+
+This op also supports decoding PNGs and non-animated GIFs since the interface is
+the same, though it is cleaner to use `tf.io.decode_image`.
+  }];
+
+  let arguments = (ins
+    TF_StrTensor:$contents,
+
+    DefaultValuedAttr<I64Attr, "0">:$channels,
+    DefaultValuedAttr<I64Attr, "1">:$ratio,
+    DefaultValuedAttr<BoolAttr, "true">:$fancy_upscaling,
+    DefaultValuedAttr<BoolAttr, "false">:$try_recover_truncated,
+    DefaultValuedAttr<F32Attr, "1.0f">:$acceptable_fraction,
+    StrAttr:$dct_method
+  );
+
+  let results = (outs
+    TF_Uint8Tensor:$image
+  );
+}
+
+def TF_DecodePngOp : TF_Op<"DecodePng", [NoSideEffect]> {
+  let summary = "Decode a PNG-encoded image to a uint8 or uint16 tensor.";
+
+  let description = [{
+The attr `channels` indicates the desired number of color channels for the
+decoded image.
+
+Accepted values are:
+
+*   0: Use the number of channels in the PNG-encoded image.
+*   1: output a grayscale image.
+*   3: output an RGB image.
+*   4: output an RGBA image.
+
+If needed, the PNG-encoded image is transformed to match the requested number
+of color channels.
+
+This op also supports decoding JPEGs and non-animated GIFs since the interface
+is the same, though it is cleaner to use `tf.io.decode_image`.
+  }];
+
+  let arguments = (ins
+    TF_StrTensor:$contents,
+
+    DefaultValuedAttr<I64Attr, "0">:$channels
+  );
+
+  let results = (outs
+    TensorOf<[TF_Uint16, TF_Uint8]>:$image
+  );
+
+  TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>;
+}
+
 def TF_DepthToSpaceOp : TF_Op<"DepthToSpace", [NoSideEffect]> {
   let summary = "DepthToSpace for tensors of type T.";
 
@@ -1963,6 +2231,8 @@ def TF_DivOp : TF_Op<"Div", [NoSideEffect, ResultsBroadcastableShape]>,
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 
   let hasCanonicalizer = 1;
+
+  let hasFolder = 1;
 }
 
 def TF_DivNoNanOp : TF_Op<"DivNoNan", [NoSideEffect, ResultsBroadcastableShape]>,
@@ -2195,6 +2465,51 @@ See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_EluGradOp : TF_Op<"EluGrad", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = [{
+Computes gradients for the exponential linear (Elu) operation.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_FpTensor:$gradients,
+    TF_FpTensor:$outputs
+  );
+
+  let results = (outs
+    TF_FpTensor:$backprops
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_EmptyOp : TF_Op<"Empty", []> {
+  let summary = [{
+Creates a tensor with the given shape.
+
+This operation creates a tensor of `shape` and `dtype`.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    I32Tensor:$shape,
+
+    DefaultValuedAttr<BoolAttr, "false">:$init
+  );
+
+  let results = (outs
+    TF_Tensor:$output
+  );
+
+  TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>;
+
+  let hasFolder = 1;
+}
+
 def TF_EqualOp : TF_Op<"Equal", [Commutative, NoSideEffect]> {
   let summary = "Returns the truth value of (x == y) element-wise.";
 
@@ -2592,6 +2907,8 @@ fill([2, 3], 9) ==> [[9, 9, 9]
     return Verify(*this);
   }];
 
+  let hasFolder = 1;
+
   let builders = [OpBuilder<
     "OpBuilder &builder, OperationState &result, Value dims, Value value"
   >];
@@ -3024,8 +3341,8 @@ Gather slices from `params` axis `axis` according to `indices`.
 
   let description = [{
 `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-Produces an output tensor with shape `params.shape[:axis] + indices.shape +
-params.shape[axis + 1:]` where:
+Produces an output tensor with shape `params.shape[:axis] +
+indices.shape[batch_dims:] + params.shape[axis + 1:]` where:
 
 ```python
     # Scalar indices (output is rank(params) - 1).
@@ -3597,6 +3914,28 @@ def TF_LeakyReluOp : TF_Op<"LeakyRelu", [NoSideEffect, SameOperandsAndResultType
   let hasFolder = 1;
 }
 
+def TF_LeakyReluGradOp : TF_Op<"LeakyReluGrad", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = [{
+Computes rectified linear gradients for a LeakyRelu operation.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_FpTensor:$gradients,
+    TF_FpTensor:$features,
+
+    DefaultValuedAttr<F32Attr, "0.2f">:$alpha
+  );
+
+  let results = (outs
+    TF_FpTensor:$backprops
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_LeftShiftOp : TF_Op<"LeftShift", [NoSideEffect, ResultsBroadcastableShape]>,
                      WithBroadcastableBinOpBuilder {
   let summary = "Elementwise computes the bitwise left-shift of `x` and `y`.";
@@ -3988,7 +4327,7 @@ cublas.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
-def TF_MatrixBandPartOp : TF_Op<"MatrixBandPart", [AllTypesMatch<["input", "band"]>, NoSideEffect]> {
+def TF_MatrixBandPartOp : TF_Op<"MatrixBandPart", [NoSideEffect, TF_AllTypesMatch<["input", "band"]>]> {
   let summary = [{
 Copy a tensor setting everything outside a central band in each innermost matrix to zero.
   }];
@@ -4895,7 +5234,7 @@ func @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32>
 
 @tf.function
 def foo(x, y):
-  return = mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32])
+  return mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32])
 
 graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.TensorSpec([10], tf.float32)).graph.as_graph_def()
 ```
@@ -4960,6 +5299,8 @@ def TF_MulOp : TF_Op<"Mul", [Commutative, NoSideEffect, ResultsBroadcastableShap
   );
 
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+
+  let hasFolder = 1;
 }
 
 def TF_MulNoNanOp : TF_Op<"MulNoNan", [NoSideEffect, ResultsBroadcastableShape]>,
@@ -4974,12 +5315,12 @@ Returns x * y element-wise. Returns zero if y is zero, even if x if infinite or
   }];
 
   let arguments = (ins
-    TensorOf<[F16, F32, F64, TF_Complex128, TF_Complex64]>:$x,
-    TensorOf<[F16, F32, F64, TF_Complex128, TF_Complex64]>:$y
+    TF_FpOrComplexTensor:$x,
+    TF_FpOrComplexTensor:$y
   );
 
   let results = (outs
-    TensorOf<[F16, F32, F64, TF_Complex128, TF_Complex64]>:$z
+    TF_FpOrComplexTensor:$z
   );
 
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
@@ -6032,6 +6373,29 @@ is the corresponding input gradient.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_RecvTPUEmbeddingActivationsOp : TF_Op<"RecvTPUEmbeddingActivations", []> {
+  let summary = "An op that receives embedding activations on the TPU.";
+
+  let description = [{
+The TPU system performs the embedding lookups and aggregations specified by
+the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
+results of these aggregations are visible to the Tensorflow Graph as the
+outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
+one Tensor of activations per table specified in the model. There can be at
+most one RecvTPUEmbeddingActivations op in the TPU graph.
+  }];
+
+  let arguments = (ins
+    StrAttr:$config
+  );
+
+  let results = (outs
+    Variadic<F32Tensor>:$outputs
+  );
+
+  TF_DerivedResultSizeAttr num_outputs = TF_DerivedResultSizeAttr<0>;
+}
+
 def TF_ReluOp : TF_Op<"Relu", [NoSideEffect, SameOperandsAndResultType, TF_LayoutAgnostic]> {
   let summary = "Computes rectified linear: `max(features, 0)`.";
 
@@ -6070,6 +6434,24 @@ def TF_Relu6Op : TF_Op<"Relu6", [NoSideEffect, SameOperandsAndResultType]> {
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_Relu6GradOp : TF_Op<"Relu6Grad", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = "Computes rectified linear 6 gradients for a Relu6 operation.";
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_IntOrFpTensor:$gradients,
+    TF_IntOrFpTensor:$features
+  );
+
+  let results = (outs
+    TF_IntOrFpTensor:$backprops
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_ReluGradOp : TF_Op<"ReluGrad", [NoSideEffect, SameOperandsAndResultType]> {
   let summary = "Computes rectified linear gradients for a Relu operation.";
 
@@ -7077,6 +7459,52 @@ def TF_SelectV2Op : TF_Op<"SelectV2", [NoSideEffect]> {
   ];
 }
 
+def TF_SeluOp : TF_Op<"Selu", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = [{
+Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+  }];
+
+  let description = [{
+if < 0, `scale * features` otherwise.
+
+To be used together with
+`initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
+For correct dropout, use `tf.contrib.nn.alpha_dropout`.
+
+See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
+  }];
+
+  let arguments = (ins
+    TF_FpTensor:$features
+  );
+
+  let results = (outs
+    TF_FpTensor:$activations
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
+def TF_SeluGradOp : TF_Op<"SeluGrad", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = [{
+Computes gradients for the scaled exponential linear (Selu) operation.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_FpTensor:$gradients,
+    TF_FpTensor:$outputs
+  );
+
+  let results = (outs
+    TF_FpTensor:$backprops
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_ShapeOp : TF_Op<"Shape", [NoSideEffect]> {
   let summary = "Returns the shape of a tensor.";
 
@@ -7715,6 +8143,26 @@ I.e., \\(y = \sqrt{x} = x^{1/2}\\).
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_SqrtGradOp : TF_Op<"SqrtGrad", [NoSideEffect, SameOperandsAndResultType]> {
+  let summary = "Computes the gradient for the sqrt of `x` wrt its input.";
+
+  let description = [{
+Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
+is the corresponding input gradient.
+  }];
+
+  let arguments = (ins
+    TF_FpOrComplexTensor:$y,
+    TF_FpOrComplexTensor:$dy
+  );
+
+  let results = (outs
+    TF_FpOrComplexTensor:$z
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_SquareOp : TF_Op<"Square", [NoSideEffect, SameOperandsAndResultType]> {
   let summary = "Computes square of x element-wise.";
 
@@ -8096,6 +8544,8 @@ def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape]>,
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 
   let hasCanonicalizer = 1;
+
+  let hasFolder = 1;
 }
 
 def TF_SumOp : TF_Op<"Sum", [NoSideEffect]> {
@@ -9270,6 +9720,30 @@ y + truncate_mod(x, y) = x`.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_TruncatedNormalOp : TF_Op<"TruncatedNormal", []> {
+  let summary = "Outputs random values from a truncated normal distribution.";
+
+  let description = [{
+The generated values follow a normal distribution with mean 0 and standard
+deviation 1, except that values whose magnitude is more than 2 standard
+deviations from the mean are dropped and re-picked.
+  }];
+
+  let arguments = (ins
+    TF_I32OrI64Tensor:$shape,
+
+    DefaultValuedAttr<I64Attr, "0">:$seed,
+    DefaultValuedAttr<I64Attr, "0">:$seed2
+  );
+
+  let results = (outs
+    TF_FpTensor:$output
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>;
+}
+
 def TF_UniqueOp : TF_Op<"Unique", [NoSideEffect]> {
   let summary = "Finds unique elements in a 1-D tensor.";
 
@@ -9855,6 +10329,33 @@ https://www.tensorflow.org/xla/operation_semantics#gather
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_XlaHostComputeOp : TF_Op<"XlaHostCompute", []> {
+  let summary = [{
+A pseudo-op to represent host-side computation in an XLA program.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    Variadic<TF_Tensor>:$inputs,
+
+    StrArrayAttr:$ancestors,
+    TF_ShapeAttrArray:$shapes,
+    SymbolRefAttr:$shape_inference_graph,
+    StrAttr:$key,
+    DefaultValuedAttr<I64Attr, "1000000">:$cost_estimate_ns,
+    DefaultValuedAttr<I64Attr, "0">:$tpu_core
+  );
+
+  let results = (outs
+    Variadic<TF_Tensor>:$outputs
+  );
+
+  TF_DerivedOperandTypeListAttr Tinputs = TF_DerivedOperandTypeListAttr<0>;
+  TF_DerivedResultTypeListAttr Toutputs = TF_DerivedResultTypeListAttr<0>;
+}
+
 def TF_XlaKeyValueSortOp : TF_Op<"XlaKeyValueSort", [NoSideEffect]> {
   let summary = "Wraps the XLA Sort operator, documented at";
 
@@ -9903,6 +10404,24 @@ https://www.tensorflow.org/performance/xla/operation_semantics#pad
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_XlaRecvFromHostOp : TF_Op<"XlaRecvFromHost", []> {
+  let summary = "An op to receive a tensor from the host.";
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_ShapeAttr:$shape,
+    StrAttr:$key
+  );
+
+  let results = (outs
+    TF_Tensor:$output
+  );
+
+  TF_DerivedResultTypeAttr Toutput = TF_DerivedResultTypeAttr<0>;
+}
+
 def TF_XlaReduceOp : TF_Op<"XlaReduce", [NoSideEffect]> {
   let summary = "Wraps the XLA Reduce operator, documented at";
 
@@ -9967,6 +10486,23 @@ i=0...N-1.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_XlaSendToHostOp : TF_Op<"XlaSendToHost", []> {
+  let summary = "An op to send a tensor to the host.";
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_Tensor:$input,
+
+    StrAttr:$key
+  );
+
+  let results = (outs);
+
+  TF_DerivedOperandTypeAttr Tinput = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_XlaSvdOp : TF_Op<"XlaSvd", [NoSideEffect]> {
   let summary = [{
 Computes the eigen decomposition of a batch of self-adjoint matrices
@@ -10050,6 +10586,29 @@ def TF_ZerosLikeOp : TF_Op<"ZerosLike", [NoSideEffect, SameOperandsAndResultType
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF__RecvTPUEmbeddingActivationsOp : TF_Op<"_RecvTPUEmbeddingActivations", []> {
+  let summary = "An op that receives embeddng activations on the TPU.";
+
+  let description = [{
+The TPU system performs the embedding lookups and aggregations. The results of
+these aggregations are visible to the Tensorflow Graph as the outputs of a
+_RecvTPUEmbeddingActivations Op. This op returns a list containing one
+Tensor of activations per table specified in the model.
+  }];
+
+  let arguments = (ins
+    TF_VariantTensor:$deduplication_data,
+
+    StrAttr:$config
+  );
+
+  let results = (outs
+    Variadic<F32Tensor>:$outputs
+  );
+
+  TF_DerivedResultSizeAttr num_tables = TF_DerivedResultSizeAttr<0>;
+}
+
 def TF__TPUCompileMlirOp : TF_Op<"_TPUCompileMlir", []> {
   let summary = [{
 Compiles a computations for execution on one or more TPU devices.
@@ -10085,3 +10644,44 @@ used to look up the program in the compilation cache.
   TF_DerivedResultSizeAttr num_computations = TF_DerivedResultSizeAttr<1>;
   TF_DerivedOperandSizeAttr NumDynamicShapes = TF_DerivedOperandSizeAttr<0>;
 }
+
+def TF__XlaRecvAtHostOp : TF_Op<"_XlaRecvAtHost", []> {
+  let summary = [{
+A placeholder op to receive values from a running XLA computation.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_StrTensor:$dynamic_key,
+
+    StrAttr:$key,
+    I64Attr:$device_ordinal
+  );
+
+  let results = (outs
+    Variadic<TF_Tensor>:$outputs
+  );
+
+  TF_DerivedResultTypeListAttr Toutputs = TF_DerivedResultTypeListAttr<0>;
+}
+
+def TF__XlaSendFromHostOp : TF_Op<"_XlaSendFromHost", []> {
+  let summary = "A placeholder op to send values to a running XLA computation.";
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    Variadic<TF_Tensor>:$inputs,
+    TF_StrTensor:$dynamic_key,
+
+    StrAttr:$key,
+    I64Attr:$device_ordinal
+  );
+
+  let results = (outs);
+
+  TF_DerivedOperandTypeListAttr Tinputs = TF_DerivedOperandTypeListAttr<0>;
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td
index 80a2b1925e6..dbd8ab0fae2 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td
@@ -23,7 +23,7 @@ limitations under the License.
 #define TF_OP_BASE
 
 include "mlir/IR/OpBase.td"
-include "mlir/Interfaces/SideEffects.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.td"
 
 //===----------------------------------------------------------------------===//
@@ -70,6 +70,16 @@ class TF_OpIsBroadcastableToRes<int opId, int resId> : And<[
               "$_op.getOperand(" # opId # ").getType(), "
               "$_op.getResult(" # resId # ").getType())">]>;
 
+
+class TF_AllTypesMatchPred<list<string> values> :
+    CPred<"TF::AreCastCompatible(llvm::makeArrayRef({"# StrJoin<values>.result #"}))">;
+
+class TF_AllTypesMatch<list<string> names> :
+    PredOpTrait<
+        "all of {" # StrJoin<names>.result # "} have dynamically equal types ",
+        TF_AllTypesMatchPred<
+            !foreach(n, names, !subst("$_self", "$" # n, "$_self.getType()"))>>;
+
 //===----------------------------------------------------------------------===//
 // TensorFlow op definitions
 //===----------------------------------------------------------------------===//
@@ -129,9 +139,16 @@ def TF_I32Or64 : SignlessIntOfWidths<[32, 64]>;
 def TF_I32OrI64Tensor : TensorOf<[TF_I32Or64]>;
 
 def TF_Uint8 : UI<8>;
+def TF_Uint8Tensor : TensorOf<[TF_Uint8]>;
+
 def TF_Uint16 : UI<16>;
+def TF_Uint16Tensor : TensorOf<[TF_Uint16]>;
+
 def TF_Uint32 : UI<32>;
+def TF_Uint32Tensor : TensorOf<[TF_Uint32]>;
+
 def TF_Uint64 : UI<64>;
+def TF_Uint64Tensor : TensorOf<[TF_Uint64]>;
 
 // Any unsigned integer type
 def TF_UInt : UnsignedIntOfWidths<[8, 16, 32, 64]>;
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
index 1b915e3d5fc..2007824369c 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <tuple>
 #include <type_traits>
 
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
@@ -34,6 +35,7 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
@@ -108,47 +110,6 @@ static inline bool HasRankAtMost(Value value, int64_t rank) {
   return !type || type.getRank() <= rank;
 }
 
-// Returns true if the given pair of TensorFlow types can be cast to one
-// another. In other words, a single run-time value is legal for both the types.
-// For example, tensor<*xf32> and tensor<3xf32> are cast compatible.
-static bool AreCastCompatible(Type a, Type b) {
-  if (TensorCastOp::areCastCompatible(a, b)) return true;
-
-  // Resource types may optionally contain subtypes information that does not
-  // match. Check subtypes compatibility when possible, otherwise treat them as
-  // compatible.
-  auto a_or_element_type = getElementTypeOrSelf(a);
-  auto b_or_element_type = getElementTypeOrSelf(b);
-
-  auto a_kind = a_or_element_type.getKind();
-  auto b_kind = b_or_element_type.getKind();
-
-  if (a_kind == TensorFlowTypes::RESOURCE &&
-      b_kind == TensorFlowTypes::RESOURCE) {
-    auto a_resource_type = a_or_element_type.dyn_cast<ResourceType>();
-    auto b_resource_type = b_or_element_type.dyn_cast<ResourceType>();
-    bool a_has_subtype = !a_resource_type.getSubtypes().empty();
-    bool b_has_subtype = !b_resource_type.getSubtypes().empty();
-
-    if (!a_has_subtype || !b_has_subtype) return true;
-
-    assert(a_resource_type.getSubtypes().size() <= 1 &&
-           "Resource type must have at most one subtype");
-    assert(b_resource_type.getSubtypes().size() <= 1 &&
-           "Resource type must have at most one subtype");
-
-    return TensorCastOp::areCastCompatible(
-        a_resource_type.getSubtypes().front(),
-        b_resource_type.getSubtypes().front());
-  }
-
-  // Variant types may optionally contain subtypes information that need not
-  // match.  It is also not possible to compare subtypes for compatibility as
-  // their interpretation depends on the ops operating on them. So, accept all
-  // pairs of variant types.
-  return a_kind == TensorFlowTypes::VARIANT &&
-         b_kind == TensorFlowTypes::VARIANT;
-}
 
 static bool IsUnknownDimOrRank(int64_t dim_or_rank) {
   return dim_or_rank == -1;
@@ -494,6 +455,57 @@ LogicalResult FoldOperandsPermutation(
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Rewrite Pattern for removing trivial Arithmetic op.
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Folder that returns LHS of an Arithmetic Op if the RHS is a constant
+// known to be Identity (e.g X+0)
+template <typename OpT,
+          typename std::enable_if<llvm::is_one_of<
+              OpT, AddV2Op, SubOp, MulOp, DivOp>::value>::type * = nullptr>
+OpFoldResult IdentityArithmeticOpFolder(OpT arithmetic_op,
+                                        ArrayRef<Attribute> operands) {
+  auto result_op_type = arithmetic_op.getResult().getType();
+  auto lhs_type = arithmetic_op.x().getType().template cast<ShapedType>();
+  if (!result_op_type.template cast<ShapedType>().hasStaticShape()) return {};
+
+  // We only handle non-broadcastable case.
+  if (result_op_type != lhs_type) {
+    return {};
+  }
+
+  // Mul and Div ops have identity value one while AddV2 and SubOp have identity
+  // value zero.
+  int identity =
+      (std::is_same<OpT, MulOp>::value || std::is_same<OpT, DivOp>::value);
+
+  Type element_ty = lhs_type.getElementType();
+  Attribute identity_attr;
+  if (auto ty = element_ty.template dyn_cast<FloatType>()) {
+    identity_attr = FloatAttr::get(ty, static_cast<double>(identity));
+  } else if (auto ty = element_ty.template dyn_cast<IntegerType>()) {
+    identity_attr = IntegerAttr::get(ty, static_cast<int64_t>(identity));
+  } else {
+    return {};
+  }
+
+  if (auto attr = operands[1].dyn_cast_or_null<DenseElementsAttr>()) {
+    if (attr.isSplat() && attr.getSplatValue() == identity_attr)
+      return arithmetic_op.x();
+  }
+
+  bool is_symmetric =
+      (std::is_same<OpT, AddV2Op>::value || std::is_same<OpT, MulOp>::value);
+  if (auto attr = operands[0].dyn_cast_or_null<DenseElementsAttr>()) {
+    if (is_symmetric && attr.isSplat() && attr.getSplatValue() == identity_attr)
+      return arithmetic_op.y();
+  }
+  return {};
+}
+}  // namespace
+
 namespace {
 #include "tensorflow/compiler/mlir/tensorflow/transforms/generated_canonicalize.inc"
 }  // namespace
@@ -525,6 +537,10 @@ void AddV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results,
   results.insert<AddV2OfNegLeft, AddV2OfNegRight>(context);
 }
 
+OpFoldResult AddV2Op::fold(ArrayRef<Attribute> operands) {
+  return IdentityArithmeticOpFolder<AddV2Op>(*this, operands);
+}
+
 //===----------------------------------------------------------------------===//
 // AllOp
 //===----------------------------------------------------------------------===//
@@ -927,20 +943,17 @@ void ConstOp::build(OpBuilder &builder, OperationState &result, Type type,
 
 LogicalResult ConstOp::inferReturnTypes(
     MLIRContext *context, Optional<Location> location, ValueRange operands,
-    ArrayRef<NamedAttribute> attributes, RegionRange regions,
+    DictionaryAttr attributes, RegionRange regions,
     SmallVectorImpl<Type> &inferredReturnTypes) {
-  for (NamedAttribute named_attr : attributes) {
-    if (named_attr.first.strref() != "value") continue;
-    auto value = named_attr.second;
-    if (auto elem_attr = value.dyn_cast<ElementsAttr>()) {
-      inferredReturnTypes.assign({elem_attr.getType()});
-      return success();
-    }
-    return emitOptionalError(location,
-                             "attribute 'value' failed to satisfy constraint: "
-                             "constant vector/tensor");
+  auto value = attributes.get("value");
+  if (!value) return emitOptionalError(location, "missing attribute 'value'");
+  if (auto elem_attr = value.dyn_cast<ElementsAttr>()) {
+    inferredReturnTypes.assign({elem_attr.getType()});
+    return success();
   }
-  return emitOptionalError(location, "missing attribute 'value'");
+  return emitOptionalError(location,
+                           "attribute 'value' failed to satisfy constraint: "
+                           "constant vector/tensor");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1271,6 +1284,10 @@ void DivOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
   results.insert<DivWithSqrtDivisor>(context);
 }
 
+OpFoldResult DivOp::fold(ArrayRef<Attribute> operands) {
+  return IdentityArithmeticOpFolder<DivOp>(*this, operands);
+}
+
 //===----------------------------------------------------------------------===//
 // DynamicStitchOp
 //===----------------------------------------------------------------------===//
@@ -1355,7 +1372,7 @@ static LogicalResult Verify(DynamicStitchOp op) {
       auto expected_out_ty =
           RankedTensorType::get(expected_shape, out_ty.getElementType());
 
-      if (!AreCastCompatible(out_ty, expected_out_ty)) {
+      if (!AreCastCompatible({out_ty, expected_out_ty})) {
         return op.emitOpError() << "has invalid output type; should be "
                                    "compatible with inferred type "
                                 << expected_out_ty;
@@ -1381,6 +1398,43 @@ static LogicalResult Verify(EinsumOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// EmptyOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult EmptyOp::fold(ArrayRef<Attribute> operands) {
+  assert(operands.size() == 1 && "empty op has one operand");
+
+  Attribute attr = operands.front();
+  if (!attr) return {};
+
+  auto int_attr = attr.cast<DenseIntElementsAttr>();
+  SmallVector<int64_t, 6> out_shape;
+  for (const auto val : int_attr.getValues<int32_t>()) {
+    out_shape.push_back(val);
+  }
+
+  auto type = getResult().getType().cast<ShapedType>();
+  auto etype = type.getElementType();
+
+  // We can not fold if the result is not static.
+  if (!type.hasStaticShape()) return {};
+
+  if (auto float_type = etype.dyn_cast<FloatType>()) {
+    auto out_type = RankedTensorType::get(out_shape, float_type);
+    return DenseElementsAttr::get(out_type,
+                                  {APFloat(float_type.getFloatSemantics())});
+  }
+
+  if (auto int_type = etype.dyn_cast<IntegerType>()) {
+    auto out_type = RankedTensorType::get(out_shape, etype);
+    APInt val(int_type.getWidth(), 0, int_type.getSignedness());
+    return DenseElementsAttr::get(out_type, val);
+  }
+
+  return {};
+}
+
 //===----------------------------------------------------------------------===//
 // EmptyTensorListOp
 //===----------------------------------------------------------------------===//
@@ -1552,7 +1606,7 @@ static ShapedType InferFillOpType(Value dims, Value value) {
 
   llvm::SmallVector<int64_t, 4> shape;
   shape.reserve(dims_attr.getNumElements());
-  for (const APInt &dim : dims_attr.getValues<APInt>()) {
+  for (const APInt dim : dims_attr.getValues<APInt>()) {
     shape.push_back(dim.getSExtValue());
   }
   return RankedTensorType::get(shape, etype);
@@ -1563,6 +1617,29 @@ void FillOp::build(OpBuilder &builder, OperationState &result, Value dims,
   FillOp::build(builder, result, InferFillOpType(dims, value), dims, value);
 }
 
+OpFoldResult FillOp::fold(ArrayRef<Attribute> operands) {
+  assert(operands.size() == 2 && "fill op has two operand");
+
+  auto value = operands[1].dyn_cast_or_null<ElementsAttr>();
+  if (!value) return {};
+
+  auto type = getType().cast<ShapedType>();
+  if (type.hasStaticShape())
+    return DenseElementsAttr::get(type, value.getValue({}));
+
+  auto dims = operands[0].dyn_cast_or_null<DenseIntElementsAttr>();
+  if (!dims) return {};
+
+  llvm::SmallVector<int64_t, 4> shape;
+  shape.reserve(dims.getNumElements());
+  for (const APInt dim : dims.getValues<APInt>()) {
+    shape.push_back(dim.getSExtValue());
+  }
+  type = RankedTensorType::get(shape, type.getElementType());
+
+  return DenseElementsAttr::get(type, value.getValue({}));
+}
+
 //===----------------------------------------------------------------------===//
 // FusedBatchNormGradOp
 //===----------------------------------------------------------------------===//
@@ -1719,14 +1796,14 @@ static LogicalResult Verify(IfOp op) {
   for (unsigned i = 0; i < expectedNumInputs; ++i) {
     auto operandType = op.getOperand(i + 1).getType().cast<TensorType>();
     auto thenInputType = thenFuncType.getInput(i).cast<TensorType>();
-    if (!AreCastCompatible(operandType, thenInputType))
+    if (!AreCastCompatible({operandType, thenInputType}))
       return op.emitError(
           llvm::formatv("then branch input type {0} is incompatible with "
                         "operand type {1} at index {2}",
                         thenInputType, operandType, i));
 
     auto elseInputType = elseFuncType.getInput(i).cast<TensorType>();
-    if (!AreCastCompatible(operandType, elseInputType))
+    if (!AreCastCompatible({operandType, elseInputType}))
       return op.emitError(
           llvm::formatv("else branch input type {0} is incompatible with "
                         "operand type {1} at index {2}",
@@ -1734,7 +1811,7 @@ static LogicalResult Verify(IfOp op) {
 
     // If branches have incompatible input types that means that no tensor can
     // serve as input to both the functions. Hence, the op is invalid.
-    if (!AreCastCompatible(thenInputType, elseInputType))
+    if (!AreCastCompatible({thenInputType, elseInputType}))
       return op.emitError(llvm::formatv(
           "branches inputs have incompatible types {0} and {1} at index {2}",
           thenInputType, elseInputType, i));
@@ -1750,14 +1827,14 @@ static LogicalResult Verify(IfOp op) {
   for (unsigned i = 0; i < expectedNumResults; ++i) {
     auto resultType = op.getResult(i).getType().cast<TensorType>();
     auto thenResultType = thenFuncType.getResult(i).cast<TensorType>();
-    if (!AreCastCompatible(thenResultType, resultType))
+    if (!AreCastCompatible({thenResultType, resultType}))
       return op.emitError(
           llvm::formatv("then branch result type {0} is incompatible with op "
                         "result type {1} at index {2}",
                         thenResultType, resultType, i));
 
     auto elseResultType = elseFuncType.getResult(i).cast<TensorType>();
-    if (!AreCastCompatible(elseResultType, resultType))
+    if (!AreCastCompatible({elseResultType, resultType}))
       return op.emitError(
           llvm::formatv("else branch result type {0} is incompatible with op "
                         "result type {1} at index {2}",
@@ -1936,6 +2013,14 @@ LogicalResult MeanOp::FoldOperandsPermutation(ArrayRef<int64_t> permutation) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// MulOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult MulOp::fold(ArrayRef<Attribute> operands) {
+  return IdentityArithmeticOpFolder<MulOp>(*this, operands);
+}
+
 //===----------------------------------------------------------------------===//
 // NegOp
 //===----------------------------------------------------------------------===//
@@ -2904,6 +2989,10 @@ void SubOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
   results.insert<SubOfNeg>(context);
 }
 
+OpFoldResult SubOp::fold(ArrayRef<Attribute> operands) {
+  return IdentityArithmeticOpFolder<SubOp>(*this, operands);
+}
+
 //===----------------------------------------------------------------------===//
 // SumOp
 //===----------------------------------------------------------------------===//
@@ -3682,7 +3771,7 @@ static LogicalResult Verify(WhileOp op) {
         auto aType = a.second[idx];
         auto bType = b.second[idx];
 
-        if (!AreCastCompatible(aType, bType))
+        if (!AreCastCompatible({aType, bType}))
           return op.emitError(llvm::formatv(
               "{0} type {1} is incompatible with {2} type {3} at index {4}",
               a.first, aType, b.first, bType, idx));
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
index 6efa26b3745..94b0c5f5e19 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
@@ -626,29 +626,6 @@ def TF_FusedBatchNormExOp : TF_Op<"_FusedBatchNormEx", [NoSideEffect]> {
   TF_DerivedOperandSizeAttr num_side_inputs = TF_DerivedOperandSizeAttr<5>;
 }
 
-def TF_RecvTPUEmbeddingActivationsOp : TF_Op<"RecvTPUEmbeddingActivations", []> {
-  let summary = "An op that receives embedding activations on the TPU.";
-
-  let description = [{
-The TPU system performs the embedding lookups and aggregations specified by
-the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
-results of these aggregations are visible to the Tensorflow Graph as the
-outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
-one Tensor of activations per table specified in the model. There can be at
-most one RecvTPUEmbeddingActivations op in the TPU graph.
-  }];
-
-  let arguments = (ins
-    StrAttr:$config
-  );
-
-  let results = (outs
-    Variadic<F32Tensor>:$outputs
-  );
-
-  TF_DerivedResultSizeAttr num_outputs = TF_DerivedResultSizeAttr<0>;
-}
-
 // Multiple variadic operands with different sizes are not supported by the
 // dialect generator, so we manually added the op.
 def TF_SendTPUEmbeddingGradientsOp : TF_Op<"SendTPUEmbeddingGradients", [AttrSizedOperandSegments]> {
@@ -680,6 +657,65 @@ config: Serialized TPUEmbeddingConfiguration proto.
   TF_DerivedOperandSizeAttr NN = TF_DerivedOperandSizeAttr<1>;
 }
 
+// Multiple variadic operands with different sizes are not supported by the
+// dialect generator, so we manually added the op.
+def TF__SendTPUEmbeddingGradientsOp : TF_Op<"_SendTPUEmbeddingGradients", [AttrSizedOperandSegments]> {
+  let summary = "Performs gradient updates of embedding tables.";
+
+  let description = [{
+The gradients argument is a TensorList having the same length and shapes as the
+return value of _RecvTPUEmbeddingActivations, but contains gradients of the
+model's loss with respect to the embedding activations. The embedding tables are
+updated from these gradients via the optimizer specified in the
+TPUEmbeddingConfiguration proto given to tpu.initialize_system.
+
+gradients: A TensorList of gradients with which to update embedding tables.
+learning_rates: A TensorList of learning rates used for updating the embedding
+    tables via the optimizer. The length of the TensorList must be equal to the
+    number of dynamic learning rate tags specified in the
+    TPUEmbeddingConfiguration proto.
+deduplication_data: A Tensor with type=DT_VARIANT containing the deduplication
+    data. The tensor is an XLA nested tuple containing N elements. Each
+    element of the nested tuple is a tuple of rank 1 tensors. Each tensor either
+    contains indices (DT_INT32) for embedding lookup or weights (DT_FLOAT) to
+    apply to the output of the embedding lookup operation.
+config: Serialized TPUEmbeddingConfiguration proto.
+  }];
+
+  let arguments = (ins
+    Variadic<TF_Tensor>:$gradients,
+    Variadic<TF_Tensor>:$learning_rates,
+    TF_VariantTensor:$deduplication_data,
+    StrAttr:$config
+  );
+
+  TF_DerivedOperandSizeAttr NumTables = TF_DerivedOperandSizeAttr<0>;
+  TF_DerivedOperandSizeAttr NumLearningRateTags = TF_DerivedOperandSizeAttr<1>;
+}
+
+// Updated the op description text from the auto-generated op definition.
+def TF__RecvTPUEmbeddingDeduplicationDataOp : TF_Op<"_RecvTPUEmbeddingDeduplicationData", []> {
+  let summary = [{
+Receives deduplication data (indices and weights).
+  }];
+
+  let description = [{
+The deduplication data is a Tensor with type=DT_VARIANT. The tensor itself is an
+XLA nested tuple containing N elements. Each element of the nested tuple is a
+tuple of rank 1 tensors. Each tensor either contains indices (DT_INT32) for
+embedding lookup or weights (DT_FLOAT) to apply to the output of the embedding
+lookup operation.
+  }];
+
+  let arguments = (ins
+    StrAttr:$config
+  );
+
+  let results = (outs
+    TF_VariantTensor:$output
+  );
+}
+
 def TF_XlaShardingOp : TF_Op<"XlaSharding", [NoSideEffect]> {
   let summary = [{
 An op which shards the input based on the given sharding attribute.
@@ -741,4 +777,157 @@ Formats a string template using a list of tensors, pretty-printing tensor summar
   TF_DerivedOperandTypeListAttr T = TF_DerivedOperandTypeListAttr<0>;
 }
 
+//===----------------------------------------------------------------------===//
+// tf.data ops
+//===----------------------------------------------------------------------===//
+
+def TF_BatchDatasetV2Op : TF_Op<"BatchDatasetV2", [NoSideEffect]> {
+  let summary = [{
+Creates a dataset that batches `batch_size` elements from `input_dataset`.
+  }];
+
+  let description = [{
+  }];
+
+  let arguments = (ins
+    TF_VariantTensor:$input_dataset,
+    I64Tensor:$batch_size,
+    I1Tensor:$drop_remainder,
+
+    DefaultValuedAttr<BoolAttr, "false">:$parallel_copy,
+    Confined<TypeArrayAttr, [ArrayMinCount<1>]>:$output_types,
+    Confined<TF_ShapeAttrArray, [ArrayMinCount<1>]>:$output_shapes
+  );
+
+  let results = (outs
+    TF_VariantTensor:$handle
+  );
+}
+
+def TF_MapDatasetOp : TF_Op<"MapDataset", [NoSideEffect]> {
+  let summary = [{
+    Creates a dataset that applies `f` to the outputs of `input_dataset`.
+  }];
+
+  let arguments = (ins
+    TF_VariantTensor:$input_dataset,
+    Variadic<TF_Tensor>:$other_arguments,
+
+    SymbolRefAttr:$f,
+    Confined<TypeArrayAttr, [ArrayMinCount<1>]>:$output_types,
+    Confined<TF_ShapeAttrArray, [ArrayMinCount<1>]>:$output_shapes,
+    DefaultValuedAttr<BoolAttr, "true">:$use_inter_op_parallelism,
+    DefaultValuedAttr<BoolAttr, "false">:$preserve_cardinality
+  );
+
+  let results = (outs
+    TF_VariantTensor:$handle
+  );
+
+  TF_DerivedOperandTypeListAttr Targuments = TF_DerivedOperandTypeListAttr<1>;
+}
+
+def TF_MapAndBatchDatasetOp : TF_Op<"MapAndBatchDataset", [NoSideEffect]> {
+  let summary = "Creates a dataset that fuses mapping with batching.";
+
+  let description = [{
+Creates a dataset that applies `f` to the outputs of `input_dataset` and then
+batches `batch_size` of them.
+
+Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes up
+to `batch_size * num_parallel_batches` copies of `f` in parallel.
+  }];
+
+  let arguments = (ins
+    TF_VariantTensor:$input_dataset,
+    Variadic<TF_Tensor>:$other_arguments,
+    I64Tensor:$batch_size,
+    I64Tensor:$num_parallel_calls,
+    I1Tensor:$drop_remainder,
+
+    SymbolRefAttr:$f,
+    Confined<TypeArrayAttr, [ArrayMinCount<1>]>:$output_types,
+    Confined<TF_ShapeAttrArray, [ArrayMinCount<1>]>:$output_shapes,
+    DefaultValuedAttr<BoolAttr, "false">:$preserve_cardinality
+  );
+
+  let results = (outs
+    TF_VariantTensor:$handle
+  );
+
+  TF_DerivedOperandTypeListAttr Targuments = TF_DerivedOperandTypeListAttr<1>;
+}
+
+def TF_ParallelMapDatasetOp : TF_Op<"ParallelMapDataset", [NoSideEffect]> {
+  let summary = [{
+    Creates a dataset that applies `f` to the outputs of `input_dataset`.
+  }];
+
+  let description = [{
+    Unlike a "MapDataset", which applies `f` sequentially, this dataset invokes
+    up to `num_parallel_calls` copies of `f` in parallel.
+  }];
+
+  let arguments = (ins
+    TF_VariantTensor:$input_dataset,
+    Variadic<TF_Tensor>:$other_arguments,
+    I32Tensor:$num_parallel_calls,
+
+    SymbolRefAttr:$f,
+    Confined<TypeArrayAttr, [ArrayMinCount<1>]>:$output_types,
+    Confined<TF_ShapeAttrArray, [ArrayMinCount<1>]>:$output_shapes,
+    DefaultValuedAttr<BoolAttr, "true">:$use_inter_op_parallelism,
+    DefaultValuedAttr<BoolAttr, "false">:$sloppy,
+    DefaultValuedAttr<BoolAttr, "false">:$preserve_cardinality
+  );
+
+  let results = (outs
+    TF_VariantTensor:$handle
+  );
+
+  TF_DerivedOperandTypeListAttr Targuments = TF_DerivedOperandTypeListAttr<1>;
+}
+
+def TF_TensorSliceDatasetOp : TF_Op<"TensorSliceDataset", []> {
+  let summary = [{
+    Creates a dataset that emits each dim-0 slice of `components` once.
+  }];
+
+  let arguments = (ins
+    Variadic<TF_Tensor>:$components,
+    Confined<TF_ShapeAttrArray, [ArrayMinCount<1>]>:$output_shapes
+  );
+
+  let results = (outs
+    TF_VariantTensor:$handle
+  );
+
+  TF_DerivedOperandTypeListAttr Toutput_types = TF_DerivedOperandTypeListAttr<0>;
+}
+
+// TODO(b/156507832): Move tf.InplaceUpdate to tf_generated_ops.td once
+// autogenerated op def matches.
+def TF_InplaceUpdateOp : TF_Op<"InplaceUpdate", [NoSideEffect]> {
+  let summary = "Updates specified rows 'i' with values 'v'.";
+
+  let description = [{
+Computes `x[i, :] = v; return x`.
+
+Originally this function is mutative however for compilation we make this
+operation create / operate on a copy of `x`.
+  }];
+
+  let arguments = (ins
+    TF_Tensor:$x,
+    I32Tensor:$i,
+    TF_Tensor:$v
+  );
+
+  let results = (outs
+    TF_Tensor:$y
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 #endif // TF_OPS
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
index 6c3cd7fac92..d312e5e409b 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
@@ -28,6 +28,134 @@ llvm::Optional<llvm::ArrayRef<int64_t>> GetShape(mlir::Value value) {
   if (shaped_type.hasRank()) return shaped_type.getShape();
   return llvm::None;
 }
+
+// Merges cast compatible shapes and returns a more refined shape. The two
+// shapes are cast compatible if they have the same rank and at each dimension,
+// either both have same size or one of them is dynamic. Returns false if the
+// given shapes are not cast compatible. The refined shape is same or more
+// precise than the two input shapes.
+bool GetCastCompatibleShape(llvm::ArrayRef<int64_t> a_shape,
+                            llvm::ArrayRef<int64_t> b_shape,
+                            llvm::SmallVectorImpl<int64_t>* refined_shape) {
+  if (a_shape.size() != b_shape.size()) return false;
+  int64_t rank = a_shape.size();
+  refined_shape->reserve(rank);
+  for (auto dims : llvm::zip(a_shape, b_shape)) {
+    int64_t dim1 = std::get<0>(dims);
+    int64_t dim2 = std::get<1>(dims);
+
+    if (mlir::ShapedType::isDynamic(dim1)) {
+      refined_shape->push_back(dim2);
+      continue;
+    }
+    if (mlir::ShapedType::isDynamic(dim2)) {
+      refined_shape->push_back(dim1);
+      continue;
+    }
+    if (dim1 == dim2) {
+      refined_shape->push_back(dim1);
+      continue;
+    }
+    return false;
+  }
+  return true;
+}
+
+// Given two types `a` and `b`, returns a refined type which is cast compatible
+// with both `a` and `b` and is equal to or more precise than both of them. It
+// returns empty Type if the input types are not cast compatible.
+//
+// The two types are considered cast compatible if they have dynamically equal
+// shapes and element type. For element types that do not have subtypes, they
+// must be equal. However for TensorFlow types such as Resource and Variant,
+// that also have subtypes, we recursively check for subtype compatibilty for
+// Resource types and assume all variant types are cast compatible. If either
+// one of `a` or `b` have empty subtypes, they are considered cast compatible.
+//
+// The returned type is same or more precise than the input types. For example,
+// if `a` and `b` are cast compatible types tensor<2x?x?xf32> and
+// tensor<?x4x?xf32> respectively, the returned type is tensor<2x4x?xf32>.
+//
+// Provides option to ignore ref types on 'a'. This is useful for TF ops that
+// might allow operands to either be same as result type or be a ref type
+// corresponding to it.
+mlir::Type GetCastCompatibleType(mlir::Type a, mlir::Type b,
+                                 bool may_ignore_ref_type_a) {
+  // Fast path if everything is equal.
+  if (a == b) return b;
+
+  auto a_tt = a.dyn_cast<mlir::TensorType>();
+  auto b_tt = b.dyn_cast<mlir::TensorType>();
+
+  // If only one of a or b is a tensor type, they are incompatible.
+  if (static_cast<bool>(a_tt) ^ static_cast<bool>(b_tt)) return nullptr;
+
+  // For non-tensor types, we do not need to worry about shape and can return
+  // early.
+  if (!a_tt && !b_tt) {
+    // Remove ref types.
+    if (may_ignore_ref_type_a) {
+      if (auto ref_type = a.dyn_cast<mlir::TF::TensorFlowRefType>()) {
+        a = ref_type.RemoveRef();
+        if (a == b) return a;
+      }
+    }
+    if (a.getKind() != b.getKind()) return nullptr;
+
+    // If either is not a type that contain subtypes then the types are not cast
+    // compatible.
+    auto a_wst = a.dyn_cast<mlir::TF::TensorFlowTypeWithSubtype>();
+    auto b_wst = b.dyn_cast<mlir::TF::TensorFlowTypeWithSubtype>();
+    if (!a_wst || !b_wst) return nullptr;
+
+    // For Variant types we are more permissive right now and accept all pairs
+    // of Variant types. If we are more constrainted and check compatibility of
+    // subtypes, we might reject valid graphs.
+    // TODO(prakalps): Variant doesn't have a subtype, we assign it
+    // one, so we should only assign it one when we know the subtype. Then we
+    // can be more constrained and check subtypes for cast compatibility as
+    // well.
+    if (a.isa<mlir::TF::VariantType>()) return a;
+
+    // For Resource types, we recursively check the subtypes for cast
+    // compatibility, if possible. Otherwise treat them as compatible.
+    auto a_wst_st = a_wst.GetSubtypes();
+    auto b_wst_st = b_wst.GetSubtypes();
+    if (a_wst_st.empty() || b_wst_st.empty()) return a;
+    if (a_wst_st.size() != b_wst_st.size()) return nullptr;
+    llvm::SmallVector<mlir::TensorType, 4> refined_subtypes;
+    for (auto subtypes : llvm::zip(a_wst_st, b_wst_st)) {
+      mlir::Type refined_st =
+          GetCastCompatibleType(std::get<0>(subtypes), std::get<1>(subtypes),
+                                /*may_ignore_ref_type_a=*/false);
+      if (!refined_st) return nullptr;
+      refined_subtypes.push_back(refined_st.cast<mlir::TensorType>());
+    }
+
+    return mlir::TF::ResourceType::get(refined_subtypes, a.getContext());
+  }
+
+  // For tensor types, check compatibility of both element type and shape.
+  mlir::Type refined_element_ty = GetCastCompatibleType(
+      a_tt.getElementType(), b_tt.getElementType(), may_ignore_ref_type_a);
+  if (!refined_element_ty) return nullptr;
+
+  if (!a_tt.hasRank() && !b_tt.hasRank()) {
+    return mlir::UnrankedTensorType::get(refined_element_ty);
+  }
+  if (!a_tt.hasRank()) {
+    return mlir::RankedTensorType::get(b_tt.getShape(), refined_element_ty);
+  }
+  if (!b_tt.hasRank()) {
+    return mlir::RankedTensorType::get(a_tt.getShape(), refined_element_ty);
+  }
+
+  llvm::SmallVector<int64_t, 8> refined_shape;
+  if (!GetCastCompatibleShape(a_tt.getShape(), b_tt.getShape(), &refined_shape))
+    return nullptr;
+
+  return mlir::RankedTensorType::get(refined_shape, refined_element_ty);
+}
 }  // namespace
 
 namespace mlir {
@@ -224,44 +352,16 @@ bool BroadcastCompatible(ArrayRef<Type> lhs, ArrayRef<Type> rhs) {
 
 bool HasCompatibleElementTypes(Type lhs, Type rhs,
                                bool may_ignore_ref_type_lhs) {
-  // Fast path if everything is equal.
-  if (lhs == rhs) return true;
+  return GetCastCompatibleType(lhs, rhs, may_ignore_ref_type_lhs) != nullptr;
+}
 
-  // In TF all values are tensors.
-  auto lhs_tt = lhs.cast<TensorType>();
-  auto rhs_tt = rhs.cast<TensorType>();
-
-  // Verify matching element types. These should be identical dynamically,
-  // so this allows for types not yet fully refined.
-  auto lhs_et = lhs_tt.getElementType();
-  auto rhs_et = rhs_tt.getElementType();
-  if (lhs_et == rhs_et) return true;
-
-  // Remove ref types.
-  if (may_ignore_ref_type_lhs) {
-    if (auto ref_type = lhs_et.dyn_cast<TF::TensorFlowRefType>()) {
-      lhs_et = ref_type.RemoveRef();
-      if (lhs_et == rhs_et) return true;
-    }
-  }
-
-  if (lhs_et.getKind() != rhs_et.getKind()) return false;
-
-  // If either is not type that contain subtypes then the element types don't
-  // match.
-  auto lhs_wst = lhs_et.dyn_cast<TF::TensorFlowTypeWithSubtype>();
-  auto rhs_wst = rhs_et.dyn_cast<TF::TensorFlowTypeWithSubtype>();
-  if (!lhs_wst || !rhs_wst) return false;
-
-  // Consider the subtype recursively.
-  auto lhs_wst_st = lhs_wst.GetSubtypes();
-  auto rhs_wst_st = rhs_wst.GetSubtypes();
-  if (lhs_wst_st.empty() || rhs_wst_st.empty()) return true;
-  if (lhs_wst_st.size() != rhs_wst_st.size()) return false;
-  for (auto subtypes : llvm::zip(lhs_wst_st, rhs_wst_st)) {
-    if (!HasCompatibleElementTypes(std::get<0>(subtypes),
-                                   std::get<1>(subtypes)))
-      return false;
+bool AreCastCompatible(ArrayRef<Type> types) {
+  Type common = types.front();
+  for (auto type : types.drop_front()) {
+    Type refined_type =
+        GetCastCompatibleType(common, type, /*may_ignore_ref_type_a=*/false);
+    if (!refined_type) return false;
+    common = refined_type;
   }
   return true;
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
index d1e6a74a0c5..4c99aae4706 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
@@ -313,6 +313,12 @@ bool BroadcastCompatible(ArrayRef<Type> lhs, ArrayRef<Type> rhs);
 bool HasCompatibleElementTypes(Type lhs, Type rhs,
                                bool may_ignore_ref_type_lhs = false);
 
+// Returns true if all TensorFlow types can be cast to one
+// another. In other words, a single run-time value is legal for both the types.
+// For example, tensor<*xf32>, tensor<?xf32> and tensor<3xf32> are cast
+// compatible.
+bool AreCastCompatible(ArrayRef<Type> types);
+
 }  // end namespace TF
 }  // end namespace mlir
 
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/annotate-parameter-replication.mlir b/tensorflow/compiler/mlir/tensorflow/tests/annotate-parameter-replication.mlir
index 0111d4e4a89..743f0b43b69 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/annotate-parameter-replication.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/annotate-parameter-replication.mlir
@@ -10,18 +10,18 @@ module attributes {tf.versions = {producer = 888 : i32}} {
     %5:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor<?xi32>) {n = 2 : i32} {
       %2 = "tf._F"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
       %3 = "tf.Identity"(%1) : (tensor<?xi32>) -> tensor<?xi32>
-      %4 = "tf_device.launch_func"(%ri_0, %3, %2) {func = @tpu0_func, device = ""} : (tensor<?xi32>, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
+      %4 = "tf_device.cluster_func"(%ri_0, %3, %2) {func = @_func, device = ""} : (tensor<?xi32>, tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
       tf_device.return %4 : tensor<?xi32>
     }
     %6 = "tf._C"(%5#1) : (tensor<?xi32>) -> tensor<?xi32>
     return %6 : tensor<?xi32>
   }
 
-  // CHECK-LABEL: func @tpu0_func
+  // CHECK-LABEL: func @_func
   // CHECK-SAME: %[[ARG0:.*]]: tensor<?xi32>,
   // CHECK-SAME: %[[ARG1:.*]]: tensor<?xi32> {tf_device.is_same_data_across_replicas = true}
   // CHECK-SAME: %[[ARG2:.*]]: tensor<?xi32>)
-  func @tpu0_func(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: tensor<?xi32>) -> tensor<?xi32> {
+  func @_func(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf._D"(%arg0, %arg1) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
     return %0 : tensor<?xi32>
   }
@@ -46,18 +46,18 @@ module attributes {tf.versions = {producer = 888 : i32}} {
       [%arg4, %arg5] as %ri_2: tensor<!tf.resource<tensor<?xi32>>>) {_mirrored_variable_indices = [0, 2], n = 2 : i32} {
       %0 = "tf.ReadVariableOp"(%ri_0): (tensor<!tf.resource<tensor<?xi32>>>) -> tensor<?xi32>
       %1 = "tf.ReadVariableOp"(%ri_1): (tensor<!tf.resource<tensor<?xi32>>>) -> tensor<?xi32>
-      %2 = "tf_device.launch_func"(%0, %1, %ri_2) {func = @tpu0_func, device = ""} : (tensor<?xi32>, tensor<?xi32>, tensor<!tf.resource<tensor<?xi32>>>) -> tensor<?xi32>
+      %2 = "tf_device.cluster_func"(%0, %1, %ri_2) {func = @_func, device = ""} : (tensor<?xi32>, tensor<?xi32>, tensor<!tf.resource<tensor<?xi32>>>) -> tensor<?xi32>
       tf_device.return %2 : tensor<?xi32>
     }
     %4 = "tf._C"(%3#1) : (tensor<?xi32>) -> tensor<?xi32>
     return %4 : tensor<?xi32>
   }
 
-  // CHECK-LABEL: func @tpu0_func
+  // CHECK-LABEL: func @_func
   // CHECK-SAME: %[[ARG0:.*]]: tensor<?xi32> {tf_device.is_same_data_across_replicas = true},
   // CHECK-SAME: %[[ARG1:.*]]: tensor<?xi32>,
   // CHECK-SAME: %[[ARG2:.*]]: tensor<!tf.resource<tensor<?xi32>>> {tf_device.is_same_data_across_replicas = true}
-  func @tpu0_func(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: tensor<!tf.resource<tensor<?xi32>>>) -> tensor<?xi32> {
+  func @_func(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>, %arg2: tensor<!tf.resource<tensor<?xi32>>>) -> tensor<?xi32> {
     %0 = "tf._D"(%arg0, %arg1) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
     return %0 : tensor<?xi32>
   }
@@ -65,21 +65,21 @@ module attributes {tf.versions = {producer = 888 : i32}} {
 
 // -----
 
-// Tests that a non-replicated LaunchFuncOp is not annotated.
+// Tests that a non-replicated ClusterFuncOp is not annotated.
 
 module attributes {tf.versions = {producer = 888 : i32}} {
   // CHECK-LABEL: func @do_not_annotate_without_replicate
   func @do_not_annotate_without_replicate(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf._A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     %1 = "tf._B"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
-    %2 = "tf_device.launch_func"(%0, %1) {func = @tpu0_func, device = ""} : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
+    %2 = "tf_device.cluster_func"(%0, %1) {func = @_func, device = ""} : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
     %3 = "tf._C"(%2) : (tensor<?xi32>) -> tensor<?xi32>
     return %3 : tensor<?xi32>
   }
 
-  // CHECK-LABEL: func @tpu0_func
+  // CHECK-LABEL: func @_func
   // CHECK-NOT: tf_device.is_same_data_across_replicas
-  func @tpu0_func(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>) -> tensor<?xi32> {
+  func @_func(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf._D"(%arg0, %arg1) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
     return %0 : tensor<?xi32>
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir
index 18f8d5f4486..e05894dc266 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir
@@ -471,3 +471,14 @@ func @testRankOfRankedTensor(%arg0 : tensor<4x3x2xf32>) -> tensor<i32> {
   // CHECK: return [[VAL0]]
   return %0 : tensor<i32>
 }
+
+// CHECK-LABEL: @foldFill
+func @foldFill() -> (tensor<3x2x1xf32>, tensor<*xf32>) {
+  %0 = "tf.Const"() {value = dense<[3, 2, 1]> : tensor<3xi32>} : () -> tensor<3xi32>
+  %1 = "tf.Const"() {value = dense<23.0> : tensor<f32>} : () -> tensor<f32>
+  // CHECK: "tf.Const"() {value = dense<2.300000e+01> : tensor<3x2x1xf32>}
+  %2 = "tf.Fill"(%0, %1) : (tensor<3xi32>, tensor<f32>) -> tensor<3x2x1xf32>
+  // CHECK: "tf.Const"() {value = dense<2.300000e+01> : tensor<3x2x1xf32>}
+  %3 = "tf.Fill"(%0, %1) : (tensor<3xi32>, tensor<f32>) -> tensor<*xf32>
+  return %2, %3 : tensor<3x2x1xf32>, tensor<*xf32>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir
index 1866879c465..42ed55deeda 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/cluster_outlining.mlir
@@ -1,127 +1,120 @@
-// RUN: tf-opt %s -split-input-file -tf-device-cluster-outlining | FileCheck %s
+// RUN: tf-opt %s -split-input-file -tf-device-cluster-outlining | FileCheck %s -dump-input-on-failure
 
-// Tests simple case of a single `tf_device.launch`.
+// Tests simple case of a single `tf_device.cluster`.
 
-module {
-  // CHECK-LABEL: func @multiplelaunches
-  // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
-  func @multiplelaunches(%arg0: tensor<?xi32>) -> tensor<?xi32> {
-    %0 = tf_executor.graph {
-      %1:2 = tf_executor.island {
-        // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]])
-        %2 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
+// CHECK-LABEL: func @single_cluster
+// CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
+func @single_cluster(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  %0 = tf_executor.graph {
+    %1:2 = tf_executor.island {
+      // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]])
+      %2 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
 
-        // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[A_OUTPUT]]) {device = "tpu0", func = @tpu0_func}
-        %3 = "tf_device.launch"() ( {
-          %4 = "tf.B"(%2) : (tensor<?xi32>) -> tensor<?xi32>
-          tf_device.return %4 : tensor<?xi32>
-        }) {device = "tpu0"} : () -> tensor<?xi32>
+      // CHECK: %[[CLUSTER_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[A_OUTPUT]]) {func = @[[CLUSTER:.*]]}
+      %3 = "tf_device.cluster"() ( {
+        %4 = "tf.B"(%2) : (tensor<?xi32>) -> tensor<?xi32>
+        tf_device.return %4 : tensor<?xi32>
+      }) {} : () -> tensor<?xi32>
 
-        // CHECK: tf_executor.yield %[[C_OUTPUT]]
-        tf_executor.yield %3 : tensor<?xi32>
-      }
-      tf_executor.fetch %1#0 : tensor<?xi32>
+      // CHECK: tf_executor.yield %[[CLUSTER_OUTPUT]]
+      tf_executor.yield %3 : tensor<?xi32>
     }
-    return %0 : tensor<?xi32>
+    tf_executor.fetch %1#0 : tensor<?xi32>
   }
-
-// CHECK-LABEL: func @tpu0_func
-// CHECK-SAME: (%[[TPU0_FUNC_ARG_0:[a-z0-9]*]]: tensor<?xi32>) -> tensor<?xi32>
-// CHECK-SAME: sym_visibility = "private"
-// CHECK: %[[TPU0_FUNC_B_OUTPUT:[0-9]*]] = "tf.B"(%[[TPU0_FUNC_ARG_0]])
-// CHECK: return %[[TPU0_FUNC_B_OUTPUT]]
+  return %0 : tensor<?xi32>
 }
 
+// CHECK: func @[[CLUSTER]]
+// CHECK-SAME: (%[[CLUSTER_ARG_0:[a-z0-9]*]]: tensor<?xi32>) -> tensor<?xi32>
+// CHECK-SAME: sym_visibility = "private"
+// CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[CLUSTER_ARG_0]])
+// CHECK: return %[[B_OUTPUT]]
+
 // -----
 
-// Tests that multiple `tf_device.launch` that depend on each other are
+// Tests that multiple `tf_device.cluster` that depend on each other are
 // correctly handled.
 
-module {
-  // CHECK-LABEL: func @multiplelaunches
-  // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
-  func @multiplelaunches(%arg0: tensor<?xi32>) -> tensor<?xi32> {
-    %0 = tf_executor.graph {
-      %1:2 = tf_executor.island {
-        // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]])
-        %2 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
+// CHECK-LABEL: func @multiple_clusters
+// CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
+func @multiple_clusters(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  %0 = tf_executor.graph {
+    %1:2 = tf_executor.island {
+      // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"(%[[ARG_0]])
+      %2 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
 
-        // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[A_OUTPUT]]) {device = "tpu0", func = @tpu0_func}
-        %3 = "tf_device.launch"() ( {
-          %6 = "tf.B"(%2) : (tensor<?xi32>) -> tensor<?xi32>
-          tf_device.return %6 : tensor<?xi32>
-        }) {device = "tpu0"} : () -> tensor<?xi32>
+      // CHECK: %[[CLUSTER_0_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[A_OUTPUT]]) {func = @[[CLUSTER_0:.*]]}
+      %3 = "tf_device.cluster"() ( {
+        %6 = "tf.B"(%2) : (tensor<?xi32>) -> tensor<?xi32>
+        tf_device.return %6 : tensor<?xi32>
+      }) {} : () -> tensor<?xi32>
 
-        // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[C_OUTPUT]])
-        %4 = "tf.D"(%3) : (tensor<?xi32>) -> tensor<?xi32>
+      // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[CLUSTER_0_OUTPUT]])
+      %4 = "tf.D"(%3) : (tensor<?xi32>) -> tensor<?xi32>
 
-        // CHECK: %[[E_OUTPUT:[0-9]*]] = "tf_device.launch_func"(%[[C_OUTPUT]], %[[D_OUTPUT]]) {device = "gpu0", func = @gpu0_func}
-        %5 = "tf_device.launch"() ( {
-          %6 = "tf.E"(%3) : (tensor<?xi32>) -> tensor<?xi32>
-          %7 = "tf.F"(%4, %6) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
-          tf_device.return %7 : tensor<?xi32>
-        }) {device = "gpu0"} : () -> tensor<?xi32>
+      // CHECK: %[[CLUSTER_1_OUTPUT:[0-9]*]] = "tf_device.cluster_func"(%[[CLUSTER_0_OUTPUT]], %[[D_OUTPUT]]) {func = @[[CLUSTER_1:.*]]}
+      %5 = "tf_device.cluster"() ( {
+        %6 = "tf.E"(%3) : (tensor<?xi32>) -> tensor<?xi32>
+        %7 = "tf.F"(%4, %6) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi32>
+        tf_device.return %7 : tensor<?xi32>
+      }) {} : () -> tensor<?xi32>
 
-        // CHECK: tf_executor.yield %[[E_OUTPUT]]
-        tf_executor.yield %5 : tensor<?xi32>
-      }
-      tf_executor.fetch %1#0 : tensor<?xi32>
+      // CHECK: tf_executor.yield %[[CLUSTER_1_OUTPUT]]
+      tf_executor.yield %5 : tensor<?xi32>
     }
-    return %0 : tensor<?xi32>
+    tf_executor.fetch %1#0 : tensor<?xi32>
   }
-
-// CHECK-LABEL: func @tpu0_func
-// CHECK-SAME: (%[[TPU0_FUNC_ARG_0:[a-z0-9]*]]: tensor<?xi32>) -> tensor<?xi32>
-// CHECK: %[[TPU0_FUNC_B_OUTPUT:[0-9]*]] = "tf.B"(%[[TPU0_FUNC_ARG_0]])
-// CHECK: return %[[TPU0_FUNC_B_OUTPUT]]
-
-// CHECK-LABEL: func @gpu0_func
-// CHECK-SAME: (%[[GPU0_FUNC_ARG_0:[a-z0-9]*]]: tensor<?xi32>, %[[GPU0_FUNC_ARG_1:[a-z0-9]*]]: tensor<?xi32>) -> tensor<?xi32>
-// CHECK: %[[GPU0_FUNC_E_OUTPUT:[0-9]*]] = "tf.E"(%[[GPU0_FUNC_ARG_0]])
-// CHECK: %[[GPU0_FUNC_F_OUTPUT:[0-9]*]] = "tf.F"(%[[GPU0_FUNC_ARG_1]], %[[GPU0_FUNC_E_OUTPUT]])
-// CHECK: return %[[GPU0_FUNC_F_OUTPUT]]
+  return %0 : tensor<?xi32>
 }
 
+// CHECK: func @[[CLUSTER_0]]
+// CHECK-SAME: (%[[CLUSTER_0_ARG_0:[a-z0-9]*]]: tensor<?xi32>) -> tensor<?xi32>
+// CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[CLUSTER_0_ARG_0]])
+// CHECK: return %[[B_OUTPUT]]
+
+// CHECK: func @[[CLUSTER_1]]
+// CHECK-SAME: (%[[CLUSTER_1_ARG_0:[a-z0-9]*]]: tensor<?xi32>, %[[CLUSTER_1_ARG_1:[a-z0-9]*]]: tensor<?xi32>) -> tensor<?xi32>
+// CHECK: %[[E_OUTPUT:[0-9]*]] = "tf.E"(%[[CLUSTER_1_ARG_0]])
+// CHECK: %[[F_OUTPUT:[0-9]*]] = "tf.F"(%[[CLUSTER_1_ARG_1]], %[[E_OUTPUT]])
+// CHECK: return %[[F_OUTPUT]]
+
 // -----
 
-// Tests outlining launches with no live-in values.
+// Tests outlining clusters with no live-in values.
 
-module {
-  // CHECK-LABEL: func @multiplelaunches
-  // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
-  func @multiplelaunches(%arg0: tensor<?xi32>) -> tensor<?xi32> {
-    %0 = tf_executor.graph {
-      %1:2 = tf_executor.island wraps
-        // CHECK: %[[A_OUTPUT:[a-z0-9]*]], %{{.*}} = {{.*}} "tf_device.launch_func"() {device = "tpu0", func = @tpu0_func}
-        "tf_device.launch"() ( {
-          %3 = "tf.A"() : () -> tensor<?xi32>
-          tf_device.return %3 : tensor<?xi32>
-        }) {device = "tpu0"} : () -> tensor<?xi32>
-      // CHECK: tf_executor.fetch %[[A_OUTPUT]]
-      tf_executor.fetch %1#0 : tensor<?xi32>
-    }
-    return %0 : tensor<?xi32>
+// CHECK-LABEL: func @cluster_operands
+// CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
+func @cluster_operands(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  %0 = tf_executor.graph {
+    %1:2 = tf_executor.island wraps
+      // CHECK: %[[CLUSTER_OUTPUT:[a-z0-9]*]], %{{.*}} = {{.*}} "tf_device.cluster_func"() {func = @[[CLUSTER:.*]]}
+      "tf_device.cluster"() ( {
+        %3 = "tf.A"() : () -> tensor<?xi32>
+        tf_device.return %3 : tensor<?xi32>
+      }) {} : () -> tensor<?xi32>
+    // CHECK: tf_executor.fetch %[[CLUSTER_OUTPUT]]
+    tf_executor.fetch %1#0 : tensor<?xi32>
   }
+  return %0 : tensor<?xi32>
+}
 
-// CHECK-LABEL: func @tpu0_func
+// CHECK: func @[[CLUSTER]]
 // CHECK-SAME: () -> tensor<?xi32>
-// CHECK: %[[TPU0_FUNC_A_OUTPUT:[0-9]*]] = "tf.A"()
-// CHECK: return %[[TPU0_FUNC_A_OUTPUT]]
-}
+// CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"()
+// CHECK: return %[[A_OUTPUT]]
 
 // -----
 
-// Tests launch attributes are copied over to launch_func.
+// Tests cluster attributes are copied over to cluster_func.
 
-module {
-  // CHECK-LABEL: func @launch_attrs
-  func @launch_attrs() -> tensor<?xi32> {
-    %0 = "tf_device.launch"() ( {
-      %1 = "tf.A"() : () -> tensor<?xi32>
-      tf_device.return %1 : tensor<?xi32>
-    }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<?xi32>
-    return %0 : tensor<?xi32>
-  }
-
-// CHECK: launch_attr = "launch_attr"
+// CHECK-LABEL: func @cluster_attrs
+func @cluster_attrs() -> tensor<?xi32> {
+  %0 = "tf_device.cluster"() ( {
+    %1 = "tf.A"() : () -> tensor<?xi32>
+    tf_device.return %1 : tensor<?xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<?xi32>
+  return %0 : tensor<?xi32>
 }
+
+// CHECK: "tf_device.cluster_func"
+// CHECK-SAME: cluster_attr = "cluster_attr"
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir b/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir
index 2a34bbfacdc..bccb8923134 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir
@@ -38,6 +38,56 @@ func @testPow(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> (tensor<4xf32>, ten
   return %0, %1, %2 : tensor<4xf32>, tensor<4xf32>, tensor<4xf32>
 }
 
+// CHECK-LABEL: func @testEmpty32
+func @testEmpty32() -> (tensor<5xi32>) {
+  %0 = "tf.Const"() { value = dense<5> : tensor<i32> } : () -> tensor<i32>
+
+  // CHECK: [[VAL:%.+]] = "tf.Const"() {value = dense<0> : tensor<5xi32>}
+  // CHECK: return [[VAL]]
+  %1 = "tf.Empty"(%0) : (tensor<i32>) -> (tensor<5xi32>)
+  return %1 : tensor<5xi32>
+}
+
+// CHECK-LABEL: func @testEmpty64
+func @testEmpty64() -> (tensor<5xi64>) {
+  %0 = "tf.Const"() { value = dense<5> : tensor<i32> } : () -> tensor<i32>
+
+  // CHECK: [[VAL:%.+]] = "tf.Const"() {value = dense<0> : tensor<5xi64>}
+  // CHECK: return [[VAL]] : tensor<5xi64>
+  %1 = "tf.Empty"(%0) : (tensor<i32>) -> (tensor<5xi64>)
+  return %1 : tensor<5xi64>
+}
+
+// CHECK-LABEL: func @testEmptyFloat
+func @testEmptyFloat() -> (tensor<5xf64>) {
+  %0 = "tf.Const"() { value = dense<5> : tensor<i32> } : () -> tensor<i32>
+
+  // CHECK: [[VAL:%.+]] = "tf.Const"() {value =  dense<0.000000e+00> : tensor<5xf64>}
+  // CHECK: return [[VAL]]
+  %1 = "tf.Empty"(%0) : (tensor<i32>) -> (tensor<5xf64>)
+  return %1 : tensor<5xf64>
+}
+
+// CHECK-LABEL: func @testEmptyf16
+func @testEmptyf16() -> (tensor<5xf16>) {
+  %0 = "tf.Const"() { value = dense<5> : tensor<i32> } : () -> tensor<i32>
+
+  // CHECK: [[VAL:%.+]] = "tf.Const"() {value =  dense<0.000000e+00> : tensor<5xf16>}
+  // CHECK: return [[VAL]]
+  %1 = "tf.Empty"(%0) : (tensor<i32>) -> (tensor<5xf16>)
+  return %1 : tensor<5xf16>
+}
+
+// CHECK-LABEL: func @testEmptybf16
+func @testEmptybf16() -> (tensor<5xbf16>) {
+  %0 = "tf.Const"() { value = dense<5> : tensor<i32> } : () -> tensor<i32>
+
+  // CHECK: [[VAL:%.+]] = "tf.Const"() {value =  dense<0.000000e+00> : tensor<5xbf16>}
+  // CHECK: return [[VAL]]
+  %1 = "tf.Empty"(%0) : (tensor<i32>) -> (tensor<5xbf16>)
+  return %1 : tensor<5xbf16>
+}
+
 // CHECK-LABEL: func @testShapeN
 func @testShapeN(%arg0: tensor<f32>, %arg1: tensor<1x32x32x16xf32>, %arg2: tensor<*xf32>) -> (tensor<0xi64>, tensor<4xi64>, tensor<4xi64>, tensor<?xi64>) {
 
@@ -251,3 +301,138 @@ func @testTensorListElementShape(%arg0: tensor<!tf.variant<tensor<2x4xf32>>>) ->
   // CHECK-NEXT:    return [[cst]] : tensor<2xi32>
   return %0: tensor<2xi32>
 }
+
+func @RemoveTrivialAdd(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<0.0> : tensor<2x2xf32>
+  %0 = "tf.Add"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  %1 = "tf.Add"(%0, %cst) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: RemoveTrivialAdd
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  // CHECK-NEXT: return %[[RESULT]] : tensor<2x2xf32>
+}
+
+func @RemoveTrivialAddBf16RHS(%arg0: tensor<2x2xbf16>) -> tensor<2x2xbf16> {
+  %cst = constant dense<0.0> : tensor<2x2xbf16>
+  %0 = "tf.Add"(%arg0, %cst) : (tensor<2x2xbf16>, tensor<2x2xbf16>) -> tensor<2x2xbf16>
+  return %0 : tensor<2x2xbf16>
+
+  // CHECK-LABEL: RemoveTrivialAdd
+  // CHECK-NEXT: return %arg0 : tensor<2x2xbf16>
+}
+
+func @RemoveTrivialAddBf16LHS(%arg0: tensor<2x2xbf16>) -> tensor<2x2xbf16> {
+  %cst = constant dense<0.0> : tensor<2x2xbf16>
+  %0 = "tf.Add"(%cst, %arg0) : (tensor<2x2xbf16>, tensor<2x2xbf16>) -> tensor<2x2xbf16>
+  return %0 : tensor<2x2xbf16>
+
+  // CHECK-LABEL: RemoveTrivialAdd
+  // CHECK-NEXT: return %arg0 : tensor<2x2xbf16>
+}
+
+func @RemoveTrivialAddV2(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<0.0> : tensor<2x2xf32>
+  %0 = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  %1 = "tf.AddV2"(%0, %cst) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: RemoveTrivialAddV2
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  // CHECK-NEXT: return %[[RESULT]] : tensor<2x2xf32>
+}
+
+func @RemoveTrivialSub(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<0.0> : tensor<2x2xf32>
+  %0 = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  %1 = "tf.Sub"(%0, %cst) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: RemoveTrivialSub
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  // CHECK-NEXT: return %[[RESULT]] : tensor<2x2xf32>
+}
+
+func @RemoveTrivialSubInt8(%arg0: tensor<2x2xi8>) -> tensor<2x2xi8> {
+  %cst = constant dense<0> : tensor<2x2xi8>
+  %0 = "tf.Sub"(%arg0, %cst) : (tensor<2x2xi8>, tensor<2x2xi8>) -> tensor<2x2xi8>
+  return %0 : tensor<2x2xi8>
+
+  // CHECK-LABEL: RemoveTrivialSubInt8
+  // CHECK-NEXT: return %arg0 : tensor<2x2xi8>
+}
+
+func @RemoveTrivialMul(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<1.0> : tensor<2x2xf32>
+  %0 = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  %1 = "tf.Mul"(%0, %cst) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: RemoveTrivialMul
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  // CHECK-NEXT: return %[[RESULT]] : tensor<2x2xf32>
+}
+
+func @RemoveTrivialDiv(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<1.0> : tensor<2x2xf32>
+  %0 = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  %1 = "tf.Div"(%0, %cst) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: RemoveTrivialDiv
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  // CHECK-NEXT: return %[[RESULT]] : tensor<2x2xf32>
+}
+
+func @RemoveTrivialDivBf16RHS(%arg0: tensor<2x2xbf16>) -> tensor<2x2xbf16> {
+  %cst = constant dense<1.0> : tensor<2x2xbf16>
+  %0 = "tf.Div"(%arg0, %cst) : (tensor<2x2xbf16>, tensor<2x2xbf16>) -> tensor<2x2xbf16>
+  return %0 : tensor<2x2xbf16>
+
+  // CHECK-LABEL: RemoveTrivialDiv
+  // CHECK-NEXT: return %arg0 : tensor<2x2xbf16>
+}
+
+func @RemoveTrivialMulInt8(%arg0: tensor<2x2xi8>) -> tensor<2x2xi8> {
+  %cst = constant dense<1> : tensor<2x2xi8>
+  %0 = "tf.Mul"(%cst, %arg0) : (tensor<2x2xi8>, tensor<2x2xi8>) -> tensor<2x2xi8>
+  return %0 : tensor<2x2xi8>
+
+  // CHECK-LABEL: RemoveTrivialMulInt8
+  // CHECK-NEXT: return %arg0 : tensor<2x2xi8>
+}
+
+func @DivBf16LHS(%arg0: tensor<2x2xbf16>) -> tensor<2x2xbf16> {
+  %cst = constant dense<1.0> : tensor<2x2xbf16>
+  %0 = "tf.Div"(%cst, %arg0) : (tensor<2x2xbf16>, tensor<2x2xbf16>) -> tensor<2x2xbf16>
+  return %0 : tensor<2x2xbf16>
+
+  // CHECK-LABEL: DivBf16LHS
+  // CHECK: tf.Div
+}
+
+func @DontRemoveTrivialAdd(%arg0: tensor<1x2xf32>, %arg1: tensor<1x2xf32>) -> tensor<2x2xf32> {
+  %cst = constant dense<0.0> : tensor<2x2xf32>
+  %0 = "tf.AddV2"(%arg0, %arg1) : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<1x2xf32>
+  %1 = "tf.AddV2"(%0, %cst) : (tensor<1x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+
+  // CHECK-LABEL: DontRemoveTrivialAdd
+  // CHECK: %[[CONST:.*]] = constant dense<0.000000e+00> : tensor<2x2xf32>
+  // CHECK: %[[add:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<1x2xf32>, tensor<1x2xf32>) -> tensor<1x2xf32>
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%[[add]], %[[CONST]]) : (tensor<1x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  // CHECK: return %[[RESULT]] : tensor<2x2xf32>
+}
+
+func @DontRemoveTrivialAdd2(%arg0: tensor<?x?xf32>, %arg1: tensor<2x2xf32>) -> tensor<?x?xf32> {
+  %cst = constant dense<0.0> : tensor<2x2xf32>
+  %0 = "tf.AddV2"(%arg0, %arg1) : (tensor<?x?xf32>, tensor<2x2xf32>) -> tensor<?x?xf32>
+  %1 = "tf.AddV2"(%0, %cst) : (tensor<?x?xf32> , tensor<2x2xf32>) -> tensor<?x?xf32>
+  return %1 :tensor<?x?xf32>
+
+  // CHECK-LABEL: DontRemoveTrivialAdd2
+  // CHECK: %[[CONST:.*]] = constant dense<0.000000e+00> : tensor<2x2xf32>
+  // CHECK: %[[add:.*]] = "tf.AddV2"(%arg0, %arg1) : (tensor<?x?xf32>, tensor<2x2xf32>) -> tensor<?x?xf32>
+  // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%[[add]], %[[CONST]]) : (tensor<?x?xf32>, tensor<2x2xf32>) -> tensor<?x?xf32>
+  // CHECK: return %[[RESULT]] : tensor<?x?xf32>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/tf-data-pipeline.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/tf-data-pipeline.pbtxt
new file mode 100644
index 00000000000..1e640baa507
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/tf-data-pipeline.pbtxt
@@ -0,0 +1,256 @@
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-output-arrays=BatchDatasetV2 -o - | FileCheck %s --dump-input-on-failure
+
+# CHECK-LABEL: func @main() -> tensor<*x!tf.variant>
+# CHECK: %[[tensor_slice:.*]], %[[tensor_slice_control:.*]] = tf_executor.island wraps "tf.TensorSliceDataset"
+# CHECK: %[[map_dataset:.*]], %[[map_dataset_control:.*]] = tf_executor.island wraps "tf.MapDataset"(%[[tensor_slice]]
+# CHECK: %[[batch_dataset:.*]], %[[batch_dataset_control:.*]] = tf_executor.island wraps "tf.BatchDatasetV2"(%[[map_dataset]]
+
+node {
+  name: "tensors/normalize_tensors/component_0"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 3
+          }
+        }
+        tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000"
+      }
+    }
+  }
+}
+node {
+  name: "TensorSliceDataset"
+  op: "TensorSliceDataset"
+  input: "tensors/normalize_tensors/component_0"
+  attr {
+    key: "Toutput_types"
+    value {
+      list {
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "output_shapes"
+    value {
+      list {
+        shape {
+        }
+      }
+    }
+  }
+}
+node {
+  name: "MapDataset"
+  op: "MapDataset"
+  input: "TensorSliceDataset"
+  attr {
+    key: "Targuments"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "f"
+    value {
+      func {
+        name: "__inference_Dataset_map_<lambda>_8"
+      }
+    }
+  }
+  attr {
+    key: "output_shapes"
+    value {
+      list {
+        shape {
+        }
+      }
+    }
+  }
+  attr {
+    key: "output_types"
+    value {
+      list {
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "preserve_cardinality"
+    value {
+      b: false
+    }
+  }
+  attr {
+    key: "use_inter_op_parallelism"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "batch_size"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT64
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT64
+        tensor_shape {
+        }
+        int64_val: 5
+      }
+    }
+  }
+}
+node {
+  name: "drop_remainder"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_BOOL
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_BOOL
+        tensor_shape {
+        }
+        bool_val: false
+      }
+    }
+  }
+}
+node {
+  name: "BatchDatasetV2"
+  op: "BatchDatasetV2"
+  input: "MapDataset"
+  input: "batch_size"
+  input: "drop_remainder"
+  attr {
+    key: "output_shapes"
+    value {
+      list {
+        shape {
+          dim {
+            size: -1
+          }
+        }
+      }
+    }
+  }
+  attr {
+    key: "output_types"
+    value {
+      list {
+        type: DT_INT32
+      }
+    }
+  }
+  attr {
+    key: "parallel_copy"
+    value {
+      b: false
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "__inference_Dataset_map_<lambda>_8"
+      input_arg {
+        name: "args_0"
+        type: DT_INT32
+      }
+      output_arg {
+        name: "identity"
+        type: DT_INT32
+      }
+    }
+    node_def {
+      name: "mul/y"
+      op: "Const"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+            int_val: 2
+          }
+        }
+      }
+    }
+    node_def {
+      name: "mul"
+      op: "Mul"
+      input: "args_0"
+      input: "mul/y:output:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_INT32
+        }
+      }
+    }
+    node_def {
+      name: "Identity"
+      op: "Identity"
+      input: "mul:z:0"
+      attr {
+        key: "T"
+        value {
+          type: DT_INT32
+        }
+      }
+    }
+    ret {
+      key: "identity"
+      value: "Identity:output:0"
+    }
+    arg_attr {
+      key: 0
+      value {
+        attr {
+          key: "_user_specified_name"
+          value {
+            s: "args_0"
+          }
+        }
+      }
+    }
+  }
+}
+versions {
+  producer: 134
+  min_consumer: 12
+}
+
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/stringescape.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/stringescape.mlir
index 1ab0195f33a..4b6600d3b16 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/stringescape.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/stringescape.mlir
@@ -11,7 +11,7 @@ func @main() {
   // CHECK-NEXT: value {
   // CHECK-NEXT:   s: " 0\n\000\000"
   tf_executor.graph {
-    %0:2 = tf_executor.island wraps "tf.Empty"() {name = "dummy", dtype = "tfdtype$DT_INT32", value = "\200\n\00\00", listvalue = ["\20\0A"]} : () -> tensor<2xi32>
+    %0:2 = tf_executor.island wraps "tf.Placeholder"() {name = "dummy", dtype = "tfdtype$DT_INT32", value = "\200\n\00\00", listvalue = ["\20\0A"]} : () -> tensor<2xi32>
     tf_executor.fetch
   }
   return
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/type_list_attr.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/type_list_attr.mlir
index 4a09af84438..466c5adb0e5 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/type_list_attr.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/type_list_attr.mlir
@@ -14,7 +14,7 @@ func @main() {
     // CHECK-NEXT:      type: DT_FLOAT
     // CHECK-NEXT:    }
     // CHECK-NEXT:  }
-    %0:2 = tf_executor.island wraps "tf.Empty"() {name = "dummy", dtype = "tfdtype$DT_FLOAT", emptylist = [], typelist = ["tfdtype$DT_INT32", "tfdtype$DT_FLOAT"]} : () -> tensor<*xi32>
+    %0:2 = tf_executor.island wraps "tf.Placeholder"() {name = "dummy", dtype = "tfdtype$DT_FLOAT", emptylist = [], typelist = ["tfdtype$DT_INT32", "tfdtype$DT_FLOAT"]} : () -> tensor<*xi32>
     tf_executor.fetch
   }
   return
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir b/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir
index e7f4873594b..60663f4bd4a 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir
@@ -1,11 +1,11 @@
 // RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-promote-resources-to-args | FileCheck %s -dump-input-on-failure
 
 // One resource, one read. The initial value of the resource is read.
-// CHECK-LABEL: func @main(%arg0: tensor<f32> {tf.resource_name = "x"}) -> tensor<2xf32>
-func @main() -> tensor<2xf32> {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>, %arg1: tensor<f32> {tf.resource_name = "x"}) -> tensor<2xf32>
+func @main(%arg0: tensor<i1>) -> tensor<2xf32> {
   // CHECK-NOT: "tf.VarHandleOp"
   // CHECK-NOT: "tf.ReadVariableOp"
-  // CHECK: %[[ADD:[0-9]*]] = "tf.AddV2"(%arg0, %[[CONST:[0-9]*]])
+  // CHECK: %[[ADD:[0-9]*]] = "tf.AddV2"(%arg1, %[[CONST:[0-9]*]])
   // CHECK: %[[PACK:[0-9]*]] = "tf.Pack"(%[[CONST]], %[[ADD]])
   // CHECK: return %[[PACK]]
   %0 = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>} : () -> tensor<f32>
@@ -19,8 +19,8 @@ func @main() -> tensor<2xf32> {
 // -----
 
 // One resource, one write. The initial value of the resource is not read.
-// CHECK-LABEL: func @main() -> (tensor<f32> {tf.resource_name = "x"})
-func @main() {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>) -> (tensor<f32> {tf.resource_name = "x"})
+func @main(%arg0: tensor<i1>) {
   // CHECK-NOT: "tf.VarHandleOp"
   // CHECK-NOT: "tf.AssignVariableOp"
   // CHECK: return %[[CONST]]
@@ -33,12 +33,12 @@ func @main() {
 // -----
 
 // One resource, two reads using different resource handles.
-// CHECK-LABEL: func @main(%arg0: tensor<f32> {tf.resource_name = "x"}) -> tensor<2xf32>
-func @main() -> tensor<2xf32> {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>, %arg1: tensor<f32> {tf.resource_name = "x"}) -> tensor<2xf32>
+func @main(%arg0: tensor<i1>) -> tensor<2xf32> {
   // CHECK-NOT: "tf.VarHandleOp"
   // CHECK-NOT: "tf.ReadVariableOp"
-  // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%arg0, %[[CONST:[0-9]*]])
-  // CHECK: %[[ADD2:[0-9]*]] = "tf.AddV2"(%[[ADD1]], %arg0)
+  // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%arg1, %[[CONST:[0-9]*]])
+  // CHECK: %[[ADD2:[0-9]*]] = "tf.AddV2"(%[[ADD1]], %arg1)
   // CHECK: %[[PACK:[0-9]*]] = "tf.Pack"(%[[CONST]], %[[ADD2]])
   // CHECK: return %[[PACK]]
 
@@ -56,12 +56,12 @@ func @main() -> tensor<2xf32> {
 // -----
 
 // Two resources, two reads using different resources.
-// CHECK-LABEL: func @main(%arg0: tensor<f32> {tf.resource_name = "x"}, %arg1: tensor<f32> {tf.resource_name = "y"}) -> tensor<2xf32>
-func @main() -> tensor<2xf32> {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>, %arg1: tensor<f32> {tf.resource_name = "x"}, %arg2: tensor<f32> {tf.resource_name = "y"}) -> tensor<2xf32>
+func @main(%arg0: tensor<i1>) -> tensor<2xf32> {
   // CHECK-NOT: "tf.VarHandleOp"
   // CHECK-NOT: "tf.ReadVariableOp"
-  // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%arg0, %[[CONST:[0-9]*]])
-  // CHECK: %[[ADD2:[0-9]*]] = "tf.AddV2"(%[[ADD1]], %arg1)
+  // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%arg1, %[[CONST:[0-9]*]])
+  // CHECK: %[[ADD2:[0-9]*]] = "tf.AddV2"(%[[ADD1]], %arg2)
   // CHECK: %[[PACK:[0-9]*]] = "tf.Pack"(%[[CONST]], %[[ADD2]])
   // CHECK: return %[[PACK]]
 
@@ -79,12 +79,12 @@ func @main() -> tensor<2xf32> {
 // -----
 
 // One resource with read and write. The initial value of the resource is read.
-// CHECK-LABEL: func @main(%arg0: tensor<f32> {tf.aliasing_output = 1 : i64, tf.resource_name = "x"}) -> (tensor<2xf32>, tensor<f32>)
-func @main() -> tensor<2xf32> {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>, %arg1: tensor<f32> {tf.aliasing_output = 1 : i64, tf.resource_name = "x"}) -> (tensor<2xf32>, tensor<f32>)
+func @main(%arg0: tensor<i1>) -> tensor<2xf32> {
   // CHECK-NOT: "tf.AssignVariableOp"
-  // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%arg0, %{{[0-9]*}})
+  // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%arg1, %{{[0-9]*}})
   // CHECK: %[[ADD2:[0-9]*]] = "tf.AddV2"(%[[ADD1]], %[[ADD1]])
-  // CHECK: %[[PACK:[0-9]*]] = "tf.Pack"(%arg0, %[[ADD2]])
+  // CHECK: %[[PACK:[0-9]*]] = "tf.Pack"(%arg1, %[[ADD2]])
   // CHECK: return %[[PACK]], %[[ADD1]]
 
   %0 = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>} : () -> tensor<f32>
@@ -102,8 +102,8 @@ func @main() -> tensor<2xf32> {
 // -----
 
 // One resource with read and write. The initial value of the resource is not read.
-// CHECK-LABEL: func @main() -> (tensor<2xf32>, tensor<f32> {tf.resource_name = "x"})
-func @main() -> tensor<2xf32> {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>) -> (tensor<2xf32>, tensor<f32> {tf.resource_name = "x"})
+func @main(%arg0: tensor<i1>) -> tensor<2xf32> {
   // CHECK-NOT: "tf.AssignVariableOp"
   // CHECK: %[[CONST:[a-z0-9]+]] = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>}
   // CHECK: %[[ADD1:[0-9]*]] = "tf.AddV2"(%[[CONST]], %[[CONST]])
@@ -138,8 +138,8 @@ func @cond_true(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<f32>) ->
   return %2 : tensor<f32>
 }
 
-// CHECK-LABEL: func @main(%arg0: tensor<f32> {tf.resource_name = "x"}) -> tensor<2xf32>
-func @main() -> tensor<2xf32> attributes {tf.entry_function = {inputs = "", outputs = "result"}} {
+// CHECK-LABEL: func @main(%arg0: tensor<i1>, %arg1: tensor<f32> {tf.resource_name = "x"}) -> tensor<2xf32>
+func @main(%arg0: tensor<i1>) -> tensor<2xf32> attributes {tf.entry_function = {inputs = "", outputs = "result"}} {
   %0 = "tf.Const"() {value = dense<1.050000e+03> : tensor<f32>} : () -> tensor<f32>
   %1 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
   %2 = "tf.ReadVariableOp"(%1) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
@@ -157,10 +157,11 @@ func @main() -> tensor<2xf32> attributes {tf.entry_function = {inputs = "", outp
 // Tests resource passed in as an argument is not modified and not returned.
 
 // CHECK-LABEL: func @main
-// CHECK-SAME: %[[ARG_0:[a-z0-9]+]]: tensor<f32>
-func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) {
-  %0 = "tf.ReadVariableOp"(%arg0) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
-  // CHECK-NEXT: "tf.AddV2"(%[[ARG_0]], %[[ARG_0]])
+// CHECK-SAME: %arg0: tensor<i1>
+// CHECK-SAME: %[[ARG_1:[a-z0-9]+]]: tensor<f32>
+func @main(%arg0: tensor<i1>, %arg1: tensor<!tf.resource<tensor<f32>>>) {
+  %0 = "tf.ReadVariableOp"(%arg1) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
+  // CHECK-NEXT: "tf.AddV2"(%[[ARG_1]], %[[ARG_1]])
   %1 = "tf.AddV2"(%0, %0) : (tensor<f32>, tensor<f32>) -> tensor<f32>
   // CHECK-NEXT: return
   return
@@ -171,9 +172,10 @@ func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) {
 // Tests resource passed in as an argument is modified but not returned.
 
 // CHECK-LABEL: func @main
-// CHECK-SAME: %[[ARG_0:[a-z0-9]+]]: tensor<f32> {tf.aliasing_output = 0 : i64}
+// CHECK-SAME: %{{[a-z0-9]+}}: tensor<f32> {tf.aliasing_output = 0 : i64}
+// CHECK-SAME: %arg1: tensor<i1>
 // CHECK-SAME: -> tensor<f32>
-func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) {
+func @main(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<i1>) {
   // CHECK-NEXT: %[[CONST:[a-z0-9]+]] = "tf.Const"
   %0 = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>} : () -> tensor<f32>
   "tf.AssignVariableOp"(%arg0, %0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
@@ -186,9 +188,10 @@ func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) {
 // Tests last resource assign is returned as a result.
 
 // CHECK-LABEL: func @main
-// CHECK-SAME: %[[ARG_0:[a-z0-9]+]]: tensor<f32> {tf.aliasing_output = 0 : i64}
+// CHECK-SAME: %{{[a-z0-9]+}}: tensor<f32> {tf.aliasing_output = 0 : i64}
+// CHECK-SAME: %arg1: tensor<i1>
 // CHECK-SAME: -> tensor<f32>
-func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) {
+func @main(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<i1>) {
   %0 = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>} : () -> tensor<f32>
   "tf.AssignVariableOp"(%arg0, %0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
   // CHECK: %[[CONST:[a-z0-9]+]] = "tf.Const"() {value = dense<1.050000e+03> : tensor<f32>}
@@ -204,9 +207,10 @@ func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) {
 // returns the same value prior.
 
 // CHECK-LABEL: func @main
-// CHECK-SAME: %[[ARG_0:[a-z0-9]+]]: tensor<f32> {tf.aliasing_output = 1 : i64}
+// CHECK-SAME: %{{[a-z0-9]+}}: tensor<f32> {tf.aliasing_output = 1 : i64}
+// CHECK-SAME: %arg1: tensor<i1>
 // CHECK-SAME: -> (tensor<f32>, tensor<f32>)
-func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) -> tensor<f32> {
+func @main(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<i1>) -> tensor<f32> {
   %0 = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>} : () -> tensor<f32>
   "tf.AssignVariableOp"(%arg0, %0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
   // CHECK: %[[CONST:[a-z0-9]+]] = "tf.Const"() {value = dense<1.050000e+03> : tensor<f32>}
@@ -221,9 +225,10 @@ func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) -> tensor<f32> {
 // Tests read interleaved between writes.
 
 // CHECK-LABEL: func @main
-// CHECK-SAME: %[[ARG_0:[a-z0-9]+]]: tensor<f32> {tf.aliasing_output = 1 : i64}
+// CHECK-SAME: %{{[a-z0-9]+}}: tensor<f32> {tf.aliasing_output = 1 : i64}
+// CHECK-SAME: %arg1: tensor<i1>
 // CHECK-SAME: -> (tensor<f32>, tensor<f32>)
-func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) -> tensor<f32> {
+func @main(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<i1>) -> tensor<f32> {
   // CHECK-NEXT: %[[CONST_0:[a-z0-9]+]] = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>}
   %0 = "tf.Const"() {value = dense<4.200000e+01> : tensor<f32>} : () -> tensor<f32>
   "tf.AssignVariableOp"(%arg0, %0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
@@ -271,7 +276,7 @@ func @main(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<!tf.resource<
 
 // Tests main function with multiple blocks.
 
-// expected-error@+1 {{expects 'main' function to have 1 block, got 2}}
+// expected-error@+1 {{expects function 'main' to have 1 block, got 2}}
 func @main() {
   br ^bb1
 ^bb1:
@@ -282,7 +287,7 @@ func @main() {
 
 // Tests main function is terminated with a non MLIR ReturnOp.
 
-// expected-error@+1 {{expects 'main' function to have a MLIR ReturnOp}}
+// expected-error@+1 {{expects function 'main' to have a MLIR ReturnOp}}
 func @main() {
 ^bb0:
   tf_device.return
@@ -312,9 +317,10 @@ func @main() {
 // Tests resource argument has users that are not ReadVariableOp or
 // AssignVariableOp.
 
-// expected-error@+1 {{expects users of resource argument 0 to be 'tf.ReadVariableOp' or 'tf.AssignVariableOp'}}
+// expected-error@+1 {{expects users of resource argument 0 to be 'tf.ReadVariableOp' or 'tf.AssignVariableOp', got [tf.UnknownOp, tf.VarIsInitializedOp]}}
 func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) -> tensor<i1> {
   %0 = "tf.VarIsInitializedOp"(%arg0) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<i1>
+  %1 = "tf.UnknownOp"(%arg0) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<i1>
   return %0 : tensor<i1>
 }
 
@@ -323,7 +329,7 @@ func @main(%arg0: tensor<!tf.resource<tensor<f32>>>) -> tensor<i1> {
 // Tests VarHandleOp has users that are not removed.
 
 func @main() -> tensor<i1> {
-  // expected-error@+1 {{expects no uses but used by operations: tf.UnknownOp, tf.VarIsInitializedOp}}
+  // expected-error@+1 {{expects users to be 'tf.ReadVariableOp' or 'tf.AssignVariableOp', got [tf.UnknownOp, tf.VarIsInitializedOp]}}
   %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
   %1 = "tf.VarIsInitializedOp"(%0) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<i1>
   %2 = "tf.UnknownOp"(%0) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<i1>
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir b/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir
new file mode 100644
index 00000000000..8b8a070cfab
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir
@@ -0,0 +1,59 @@
+// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-promote-var-handles-to-args | FileCheck %s -dump-input-on-failure
+
+// Tests main function with multiple blocks.
+
+// expected-error@+1 {{expects function 'main' to have 1 block, got 2}}
+func @main() {
+  br ^bb1
+^bb1:
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @no_args
+// CHECK-SAME: (%arg0: tensor<!tf.resource> {tf.resource_name = "x"})
+// CHECK-NOT: "tf.VarHandleOp"
+func @no_args() {
+  %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource>
+  return
+}
+
+// CHECK-LABEL: func @some_args
+// CHECK-SAME: (%arg0: tensor<i1>, %arg1: tensor<!tf.resource> {tf.resource_name = "x"})
+// CHECK-NOT: "tf.VarHandleOp"
+func @some_args(%arg0: tensor<i1>) {
+  %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource>
+  return
+}
+
+// CHECK-LABEL: func @unique_vars
+// CHECK-SAME: (%arg0: tensor<!tf.resource<tensor<f32>>> {tf.resource_name = "x"}, %arg1: tensor<!tf.resource<tensor<i32>>> {tf.resource_name = "y"})
+// CHECK-NOT: "tf.VarHandleOp"
+func @unique_vars() {
+  %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
+  %1 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "y"} : () -> tensor<!tf.resource<tensor<i32>>>
+  return
+}
+
+// CHECK-LABEL: func @duplicate_vars
+// CHECK-SAME: (%arg0: tensor<!tf.resource<tensor<f32>>> {tf.resource_name = "x"})
+// CHECK-NOT: "tf.VarHandleOp"
+func @duplicate_vars() {
+  %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
+  %1 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
+  return
+}
+
+// CHECK-LABEL: func @duplicate_vars_with_users
+// CHECK-SAME: (%arg0: tensor<f32>, %arg1: tensor<!tf.resource<tensor<f32>>> {tf.resource_name = "x"})
+// CHECK: "tf.ReadVariableOp"(%arg1)
+// CHECK: "tf.AssignAddVariableOp"(%arg1, %arg0)
+// CHECK-NOT: "tf.VarHandleOp"
+func @duplicate_vars_with_users(%arg0: tensor<f32>) {
+  %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
+  %1 = "tf.ReadVariableOp"(%0) : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
+  %2 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor<!tf.resource<tensor<f32>>>
+  "tf.AssignAddVariableOp"(%2, %arg0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
+  return
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir
index cfbd112a7c2..8da252fc832 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir
@@ -18,11 +18,10 @@ func @controls_per_replica() {
   return
 }
 
-// CHECK: %[[CT_0:[0-9]*]] = tf_executor.ControlTrigger
-// CHECK: %[[CT_1:[0-9]*]] = tf_executor.ControlTrigger
-// CHECK: %[[ISLAND_0:[a-z_0-9]*]] = tf_executor.island(%[[CT_0]], %[[CT_1]])
-// CHECK: %[[ISLAND_1:[a-z_0-9]*]] = tf_executor.island(%[[CT_0]], %[[CT_1]])
-// CHECK: %[[ISLAND_2:[a-z_0-9]*]] = tf_executor.island(%[[ISLAND_0]], %[[ISLAND_1]])
+// CHECK: %[[CT_0:.*]] = tf_executor.ControlTrigger
+// CHECK: %[[CT_1:.*]] = tf_executor.ControlTrigger
+// CHECK: %{{.*}} = tf_executor.island(%[[CT_0]], %[[CT_1]])
+// CHECK: %{{.*}} = tf_executor.island(%[[CT_0]], %[[CT_1]])
 
 
 // Tests devices are not remapped if no devices were defined in replicate.
@@ -100,64 +99,45 @@ func @remap_device() {
 // CHECK: device = "/GPU:1"
 
 
-// Tests unused per replica island are added as a control dependency to the
-// island forwarding per replica results.
-// CHECK-LABEL: func @unused_replica_control
-// CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>, %[[ARG_1:[a-z0-9]*]]: tensor<i1>)
-func @unused_replica_control(%arg0: tensor<i1>, %arg1: tensor<i1>) {
-  %0 = tf_executor.graph {
-    %1 = tf_executor.ControlTrigger {}
-    %2:2 = tf_executor.island(%1) {
-      %3:4 = tf_device.replicate([%arg0, %arg1] as %ri: tensor<i1>) {n = 2 : i32} {
-        %4 = "tf.opA"(%ri) : (tensor<i1>) -> tensor<i1>
-        %5 = "tf.opB"(%4) : (tensor<i1>) -> tensor<i1>
-        tf_device.return %4, %5 : tensor<i1>, tensor<i1>
+// Tests replicate with control dependency output has each expanded replica
+// control pinned to a sink island.
+// CHECK-LABEL: func @replicate_control
+func @replicate_control() {
+  tf_executor.graph {
+    %1 = tf_executor.island {
+      tf_device.replicate {n = 2 : i32} {
+        tf_device.return
       }
-      tf_executor.yield %3#0 : tensor<i1>
+      tf_executor.yield
     }
-    tf_executor.fetch %2#0 : tensor<i1>
+    tf_executor.fetch %1 : !tf_executor.control
   }
   return
 }
 
-// CHECK:      %[[CT:[0-9]*]] = tf_executor.ControlTrigger
-// CHECK:      %[[ISLAND_0:[a-z_0-9]*]]:2, %{{.*}} = tf_executor.island(%[[CT]])
-// CHECK:        %[[OP_A_0:[0-9]*]] = "tf.opA"(%[[ARG_0]])
-// CHECK:        %[[OP_B_0:[0-9]*]] = "tf.opB"(%[[OP_A_0]])
-// CHECK:        tf_executor.yield %[[OP_A_0]], %[[OP_B_0]]
-// CHECK:      %[[ISLAND_1:[a-z_0-9]*]]:2, %[[ISLAND_1_control:[a-z_0-9]*]] = tf_executor.island(%[[CT]])
-// CHECK:        %[[OP_A_1:[0-9]*]] = "tf.opA"(%[[ARG_1]])
-// CHECK:        %[[OP_B_1:[0-9]*]] = "tf.opB"(%[[OP_A_1]])
-// CHECK:        tf_executor.yield %[[OP_A_1]], %[[OP_B_1]]
-// CHECK:      %[[ISLAND_2:.*]], %[[ISLAND_2_control:.*]] = tf_executor.island(%[[ISLAND_1_control]])
-// CHECK:        tf_executor.yield %[[ISLAND_0]]#0
-// CHECK:      tf_executor.fetch %[[ISLAND_2]]
+// CHECK: %[[REPLICA_0:.*]] = tf_executor.island
+// CHECK: %[[REPLICA_1:.*]] = tf_executor.island
+// CHECK: %[[SINK:.*]] = tf_executor.island(%[[REPLICA_0]], %[[REPLICA_1]])
+// CHECK: tf_executor.fetch %[[SINK]]
 
 
-// Tests replicate with dynamic result shapes uses its inner ops to determine
-// types for sink island.
-// CHECK-LABEL: func @replicate_body_result_types
-func @replicate_body_result_types() {
-  "tf_executor.graph"() ( {
-    %0:3 = "tf_executor.island"() ( {
-      %1:2 = "tf_device.replicate"() ( {
-      ^bb0:
-        %a = "tf.opA"() : () -> tensor<i1>
-        "tf_device.return"(%a) : (tensor<i1>) -> ()
-      }) {n = 2 : i32} : () -> (tensor<*xi1>, tensor<*xi1>)
-      "tf_executor.yield"(%1#0, %1#1) : (tensor<*xi1>, tensor<*xi1>) -> ()
-    }) : () -> (tensor<*xi1>, tensor<*xi1>, !tf_executor.control)
-    "tf_executor.fetch"(%0#2) : (!tf_executor.control) -> ()
-  }) : () -> ()
+// Tests replicate results are remapped correctly.
+// CHECK-LABEL: func @replicate_result
+func @replicate_result(%arg0: tensor<i1>, %arg1: tensor<i1>) {
+  %0:4 = tf_executor.graph {
+    %1:5 = tf_executor.island {
+      %2:4 = tf_device.replicate([%arg0, %arg1] as %arg2: tensor<i1>) {n = 2 : i32} {
+        %3 = "tf.opA"(%arg2) : (tensor<i1>) -> tensor<f32>
+        %4 = "tf.opB"(%arg2) : (tensor<i1>) -> tensor<i32>
+        tf_device.return %3, %4 : tensor<f32>, tensor<i32>
+      }
+      tf_executor.yield %2#0, %2#1, %2#2, %2#3 : tensor<f32>, tensor<f32>, tensor<i32>, tensor<i32>
+    }
+    tf_executor.fetch %1#0, %1#1, %1#2, %1#3 : tensor<f32>, tensor<f32>, tensor<i32>, tensor<i32>
+  }
   return
 }
 
-// CHECK:      %[[ISLAND_0:.*]], %{{.*}} = tf_executor.island
-// CHECK-NEXT:   %[[OP_A_0:.*]] = "tf.opA"()
-// CHECK-NEXT:   tf_executor.yield %[[OP_A_0]] : tensor<i1>
-// CHECK:      %[[ISLAND_1:.*]], %{{.*}} = tf_executor.island
-// CHECK-NEXT:   %[[OP_A_1:.*]] = "tf.opA"()
-// CHECK-NEXT:   tf_executor.yield %[[OP_A_1]] : tensor<i1>
-// CHECK:      %[[ISLAND_2:.*]]:2, %[[ISLAND_2_CTRL:.*]] = tf_executor.island
-// CHECK-NEXT:   tf_executor.yield %[[ISLAND_0]], %[[ISLAND_1]] : tensor<i1>, tensor<i1>
-// CHECK:      tf_executor.fetch %[[ISLAND_2_CTRL]] : !tf_executor.control
+// CHECK: %[[REPLICA_0:.*]]:2, %{{.*}} = tf_executor.island
+// CHECK: %[[REPLICA_1:.*]]:2, %{{.*}} = tf_executor.island
+// CHECK: tf_executor.fetch %[[REPLICA_0]]#0, %[[REPLICA_1]]#0, %[[REPLICA_0]]#1, %[[REPLICA_1]]#1
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir
index 793c9a601cc..9e7358ab2f5 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir
@@ -9,17 +9,17 @@ func @only_resource_load() -> tensor<*xi32> {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
   // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = i32}
-  // CHECK: "tf_device.launch"
+  // CHECK: "tf_device.cluster"
   // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.SomeComputation"(%[[RES_READ_VAL]])
   // CHECK: tf_device.return %[[COMPUTE_RES]]
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
+  // CHECK: {cluster_attr = "cluster_attr"}
   // CHECK-SAME: () -> tensor<*xi32>
 
-  %1 = "tf_device.launch"() ( {
+  %1 = "tf_device.cluster"() ( {
     %2 = "tf.ReadVariableOp"(%0) {dtype = i32} : (tensor<*x!tf.resource>) -> tensor<*xi32>
     %3 = "tf.SomeComputation"(%2) : (tensor<*xi32>) -> (tensor<*xi32>)
     tf_device.return %3 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<*xi32>
 
   return %1 : tensor<*xi32>
 }
@@ -34,20 +34,20 @@ func @only_resource_store() -> tensor<*xi32> {
   // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
-  // CHECK: %[[LAUNCH_RES:[0-9]*]]:2 = "tf_device.launch"
+  // CHECK: %[[CLUSTER_RES:[0-9]*]]:2 = "tf_device.cluster"
   // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.SomeComputation"()
   // CHECK: tf_device.return %[[COMPUTE_RES]], %[[COMPUTE_RES]]
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
+  // CHECK: {cluster_attr = "cluster_attr"}
   // CHECK-SAME: () -> (tensor<*xi32>, tensor<*xi32>)
-  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = i32}
+  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[CLUSTER_RES]]#1) {dtype = i32}
 
-  %1 = "tf_device.launch"() ( {
+  %1 = "tf_device.cluster"() ( {
     %2 = "tf.SomeComputation"() : () -> (tensor<*xi32>)
     "tf.AssignVariableOp"(%0, %2) {dtype = i32} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
     tf_device.return %2 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<*xi32>
 
-  // CHECK: return %[[LAUNCH_RES]]#0
+  // CHECK: return %[[CLUSTER_RES]]#0
   return %1 : tensor<*xi32>
 }
 
@@ -62,21 +62,21 @@ func @same_resource_load_and_store() -> tensor<*xi32> {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
   // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = i32}
-  // CHECK: %[[LAUNCH_RES:[0-9]*]]:2 = "tf_device.launch"
+  // CHECK: %[[CLUSTER_RES:[0-9]*]]:2 = "tf_device.cluster"
   // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.SomeComputation"(%[[RES_READ_VAL]])
   // CHECK: tf_device.return %[[COMPUTE_RES]], %[[COMPUTE_RES]]
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
+  // CHECK: {cluster_attr = "cluster_attr"}
   // CHECK-SAME: () -> (tensor<*xi32>, tensor<*xi32>)
-  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = i32}
+  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[CLUSTER_RES]]#1) {dtype = i32}
 
-  %1 = "tf_device.launch"() ( {
+  %1 = "tf_device.cluster"() ( {
     %2 = "tf.ReadVariableOp"(%0) {dtype = i32} : (tensor<*x!tf.resource>) -> tensor<*xi32>
     %3 = "tf.SomeComputation"(%2) : (tensor<*xi32>) -> (tensor<*xi32>)
     "tf.AssignVariableOp"(%0, %3) {dtype = i32} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
     tf_device.return %3 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<*xi32>
 
-  // CHECK: return %[[LAUNCH_RES]]#0
+  // CHECK: return %[[CLUSTER_RES]]#0
   return %1 : tensor<*xi32>
 }
 
@@ -87,8 +87,8 @@ func @same_resource_load_and_store() -> tensor<*xi32> {
 // CHECK-LABEL: func @internal_resource
 func @internal_resource() -> tensor<*xi32> {
 
-  // CHECK: %[[LAUNCH_RES:[0-9]*]] = "tf_device.launch"
-  %0 = "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER_RES:[0-9]*]] = "tf_device.cluster"
+  %0 = "tf_device.cluster"() ( {
 
     // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
     %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
@@ -104,9 +104,9 @@ func @internal_resource() -> tensor<*xi32> {
 
     // CHECK: tf_device.return %[[COMPUTE_RES]]
     tf_device.return %3 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<*xi32>
 
-  // CHECK: return %[[LAUNCH_RES]]
+  // CHECK: return %[[CLUSTER_RES]]
   return %0 : tensor<*xi32>
 }
 
@@ -120,12 +120,12 @@ func @lifting_failure() -> tensor<*xi32> {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
   // expected-error @+1 {{has remaining resource inputs that can not be lifted}}
-  %1 = "tf_device.launch"() ( {
+  %1 = "tf_device.cluster"() ( {
     %2 = "tf.ReadVariableOp"(%0) {dtype = i32} : (tensor<*x!tf.resource>) -> tensor<*xi32>
 		%3 = "tf.SomeResourceOp"(%0, %2) : (tensor<*x!tf.resource>, tensor<*xi32>) -> tensor<*xi32>
     "tf.AssignVariableOp"(%0, %3) {dtype = i32} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
     tf_device.return %3 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<*xi32>
 
   return %1 : tensor<*xi32>
 }
@@ -135,16 +135,16 @@ func @lifting_failure() -> tensor<*xi32> {
 // Tests that pass lifts resource reads/writes from a loop, and removed unused
 // resources.
 
-// CHECK-LABEL: func @launch_with_loop
-func @launch_with_loop() -> () {
+// CHECK-LABEL: func @cluster_with_loop
+func @cluster_with_loop() -> () {
   // CHECK: %[[COUNT:.*]] = "tf.Const"() {value = dense<10> : tensor<i32>}
   %0 = "tf.Const"() {value = dense<10> : tensor<i32>} : () -> tensor<i32>
   // CHECK: %[[VH:.*]] = "tf.VarHandleOp"()
   %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   %unused = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[READ:.*]] = "tf.ReadVariableOp"(%[[VH]])
-  // CHECK: %[[LAUNCH:.*]] = "tf_device.launch"()
-  "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]] = "tf_device.cluster"()
+  "tf_device.cluster"() ( {
     // CHECK: %[[WHILE:.*]]:2 = "tf.While"(%[[COUNT]], %[[READ]])
     %2:3 = "tf.While"(%0, %1, %unused)
                {body = @while_body, cond = @while_cond, device = "", is_stateless = false,
@@ -153,9 +153,9 @@ func @launch_with_loop() -> () {
          -> (tensor<i32>, tensor<*x!tf.resource<tensor<f32>>>, tensor<*x!tf.resource<tensor<f32>>>)
     // CHECK: tf_device.return %[[WHILE]]#1 : tensor<f32>
     tf_device.return
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<f32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
-  // CHECK: "tf.AssignVariableOp"(%[[VH]], %[[LAUNCH]])
+  // CHECK: {cluster_attr = "cluster_attr"} : () -> tensor<f32>
+  }) {cluster_attr = "cluster_attr"} : () -> ()
+  // CHECK: "tf.AssignVariableOp"(%[[VH]], %[[CLUSTER]])
   // CHECK: return
   return
 }
@@ -188,13 +188,13 @@ func @while_cond(%arg0: tensor<i32>, %arg1: tensor<*x!tf.resource<tensor<f32>>>,
 
 // Tests that pass lifts resource reads from loop condition.
 
-// CHECK-LABEL: func @launch_with_loop
-func @launch_with_loop() -> () {
+// CHECK-LABEL: func @cluster_with_loop
+func @cluster_with_loop() -> () {
   // CHECK: %[[VH:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[READ:.*]] = "tf.ReadVariableOp"(%[[VH]])
-  // CHECK: %[[LAUNCH:.*]] = "tf_device.launch"()
-  "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]] = "tf_device.cluster"()
+  "tf_device.cluster"() ( {
     // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]])
     %1 = "tf.While"(%0) {
       body = @while_body, cond = @while_cond, device = "", is_stateless = false,
@@ -203,9 +203,9 @@ func @launch_with_loop() -> () {
          -> (tensor<*x!tf.resource<tensor<f32>>>)
     // CHECK: tf_device.return %[[WHILE]] : tensor<f32>
     tf_device.return
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<f32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
-  // CHECK: "tf.AssignVariableOp"(%[[VH]], %[[LAUNCH]])
+  // CHECK: {cluster_attr = "cluster_attr"} : () -> tensor<f32>
+  }) {cluster_attr = "cluster_attr"} : () -> ()
+  // CHECK: "tf.AssignVariableOp"(%[[VH]], %[[CLUSTER]])
   // CHECK: return
   return
 }
@@ -230,13 +230,13 @@ func @while_cond(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> tensor<f32> {
 // Tests that pass lifts read-only resource reads from loop, but does not add
 // assign after the loop.
 
-// CHECK-LABEL: func @launch_with_loop
-func @launch_with_loop() -> () {
+// CHECK-LABEL: func @cluster_with_loop
+func @cluster_with_loop() -> () {
   // CHECK: %[[VH:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[READ:.*]] = "tf.ReadVariableOp"(%[[VH]])
-  // CHECK: "tf_device.launch"()
-  "tf_device.launch"() ( {
+  // CHECK: "tf_device.cluster"()
+  "tf_device.cluster"() ( {
     // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]])
     %1 = "tf.While"(%0) {
       body = @while_body, cond = @while_cond, device = "", is_stateless = false,
@@ -245,8 +245,8 @@ func @launch_with_loop() -> () {
          -> (tensor<*x!tf.resource<tensor<f32>>>)
     // CHECK: tf_device.return
     tf_device.return
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  // CHECK: {cluster_attr = "cluster_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   // CHECK-NOT: "tf.AssignVariableOp"
   // CHECK: return
   return
@@ -267,15 +267,15 @@ func @while_cond(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> tensor<f32> {
 
 // Tests that pass lifts resource reads from nested loops.
 
-// CHECK-LABEL: func @launch_with_nested_loop
-func @launch_with_nested_loop() -> () {
+// CHECK-LABEL: func @cluster_with_nested_loop
+func @cluster_with_nested_loop() -> () {
   // CHECK: %[[VH:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[VH_UNUSED:.*]] = "tf.VarHandleOp"()
   %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[READ:.*]] = "tf.ReadVariableOp"(%[[VH]])
-  // CHECK: %[[LAUNCH:.*]] = "tf_device.launch"()
-  "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]] = "tf_device.cluster"()
+  "tf_device.cluster"() ( {
     // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]])
     %2:2 = "tf.While"(%0, %1) {
       body = @while_body, cond = @while_cond, device = "", is_stateless = false,
@@ -284,9 +284,9 @@ func @launch_with_nested_loop() -> () {
          -> (tensor<*x!tf.resource<tensor<f32>>>, tensor<*x!tf.resource<tensor<f32>>>)
     // CHECK: tf_device.return %[[WHILE]] : tensor<f32>
     tf_device.return
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<f32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
-  // CHECK: "tf.AssignVariableOp"(%[[VH]], %[[LAUNCH]])
+  // CHECK: {cluster_attr = "cluster_attr"} : () -> tensor<f32>
+  }) {cluster_attr = "cluster_attr"} : () -> ()
+  // CHECK: "tf.AssignVariableOp"(%[[VH]], %[[CLUSTER]])
   // CHECK: return
   return
 }
@@ -330,15 +330,15 @@ func @while_cond1(%arg0: tensor<*x!tf.resource<tensor<f32>>>, %arg1: tensor<*x!t
 
 // Tests that pass reports error on non-aliasing while input/output resources.
 
-func @launch_with_loop() -> () {
+func @cluster_with_loop() -> () {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
-  "tf_device.launch"() ( {
+  "tf_device.cluster"() ( {
     %1 = "tf.While"(%0) {
       body = @while_body, cond = @while_cond, device = "", is_stateless = false,
       output_shapes = [#tf.shape<>]}
          : (tensor<*x!tf.resource<tensor<f32>>>) -> (tensor<*x!tf.resource<tensor<f32>>>)
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 func @while_body(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> (tensor<*x!tf.resource<tensor<f32>>>) {
@@ -355,15 +355,15 @@ func @while_cond(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> tensor<f32> {
 
 // Tests that pass reports error on unsupported ops in loop body.
 
-func @launch_with_loop() -> () {
+func @cluster_with_loop() -> () {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
-  "tf_device.launch"() ( {
+  "tf_device.cluster"() ( {
     %1 = "tf.While"(%0) {
       body = @while_body, cond = @while_cond, device = "", is_stateless = false,
       output_shapes = [#tf.shape<>]}
          : (tensor<*x!tf.resource<tensor<f32>>>) -> (tensor<*x!tf.resource<tensor<f32>>>)
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 func @while_body(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> (tensor<*x!tf.resource<tensor<f32>>>) {
@@ -380,15 +380,15 @@ func @while_cond(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> tensor<f32> {
 
 // Tests that pass reports error on unsupported ops in loop cond.
 
-func @launch_with_loop() -> () {
+func @cluster_with_loop() -> () {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
-  "tf_device.launch"() ( {
+  "tf_device.cluster"() ( {
     %1 = "tf.While"(%0) {
       body = @while_body, cond = @while_cond, device = "", is_stateless = false,
       output_shapes = [#tf.shape<>]}
          : (tensor<*x!tf.resource<tensor<f32>>>) -> (tensor<*x!tf.resource<tensor<f32>>>)
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 func @while_body(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> (tensor<*x!tf.resource<tensor<f32>>>) {
@@ -408,16 +408,16 @@ func @while_cond(%arg0: tensor<*x!tf.resource<tensor<f32>>>) -> tensor<f32> {
 
 // Tests that pass lifts resource reads from if branches.
 
-// CHECK: func @launch_with_if(%[[ARG0:.*]]: tensor<i1>) -> tensor<4xf32>
-func @launch_with_if(%arg0: tensor<i1>) -> tensor<4xf32> {
+// CHECK: func @cluster_with_if(%[[ARG0:.*]]: tensor<i1>) -> tensor<4xf32>
+func @cluster_with_if(%arg0: tensor<i1>) -> tensor<4xf32> {
   // CHECK: %[[VH0:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<4xf32>>>
   // CHECK: %[[VH1:.*]] = "tf.VarHandleOp"()
   %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource<tensor<4xf32>>>
   // CHECK-DAG: %[[READ0:.*]] = "tf.ReadVariableOp"(%[[VH0]])
   // CHECK-DAG: %[[READ1:.*]] = "tf.ReadVariableOp"(%[[VH1]])
-  // CHECK: %[[LAUNCH:.*]]:2 = "tf_device.launch"()
-  %2 = "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]]:2 = "tf_device.cluster"()
+  %2 = "tf_device.cluster"() ( {
     // CHECK: %[[IF:.*]]:2 = "tf.If"(%[[ARG0]], %[[READ0]], %[[READ1]])
     %3:2 = "tf.If"(%arg0, %0, %1) {then_branch = @if_then, else_branch = @if_else,
         output_shapes = [#tf.shape<>, #tf.shape<4>], is_stateless = false}
@@ -428,10 +428,10 @@ func @launch_with_if(%arg0: tensor<i1>) -> tensor<4xf32> {
     %5 = "tf.AddV2"(%4, %3#1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
     // CHECK-NEXT: tf_device.return %[[ADD]], %[[IF]]#1
     tf_device.return %5 : tensor<4xf32>
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> (tensor<4xf32>, tensor<4xf32>)
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<4xf32>
-  // CHECK: "tf.AssignVariableOp"(%[[VH0]], %[[LAUNCH]]#1)
-  // CHECK: return %[[LAUNCH]]#0
+  // CHECK: {cluster_attr = "cluster_attr"} : () -> (tensor<4xf32>, tensor<4xf32>)
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<4xf32>
+  // CHECK: "tf.AssignVariableOp"(%[[VH0]], %[[CLUSTER]]#1)
+  // CHECK: return %[[CLUSTER]]#0
   return %2 : tensor<4xf32>
 }
 // CHECK: func @if_then(%[[TARG0:.*]]: tensor<4xf32>, %[[TARG1:.*]]: tensor<4xf32>)
@@ -457,15 +457,15 @@ func @if_else(%arg0: tensor<*x!tf.resource<tensor<4xf32>>>, %arg1: tensor<*x!tf.
 
 // Tests that pass lifts resource reads from nested if ops.
 
-// CHECK: func @launch_with_nested_if(%[[ARG0:.*]]: tensor<i1>) -> tensor<f32>
-func @launch_with_nested_if(%arg0: tensor<i1>) -> tensor<f32> {
+// CHECK: func @cluster_with_nested_if(%[[ARG0:.*]]: tensor<i1>) -> tensor<f32>
+func @cluster_with_nested_if(%arg0: tensor<i1>) -> tensor<f32> {
   // CHECK: %[[VH0:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[VH1:.*]] = "tf.VarHandleOp"()
   %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK-DAG: %[[READ0:.*]] = "tf.ReadVariableOp"(%[[VH0]])
-  // CHECK: %[[LAUNCH:.*]]:2 = "tf_device.launch"()
-  %2 = "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]]:2 = "tf_device.cluster"()
+  %2 = "tf_device.cluster"() ( {
     // CHECK: %[[IF:.*]] = "tf.If"(%[[ARG0]], %[[READ0]])
     %3 = "tf.If"(%arg0, %0, %1) {then_branch = @if_then, else_branch = @if_else,
         output_shapes = [], is_stateless = false}
@@ -476,10 +476,10 @@ func @launch_with_nested_if(%arg0: tensor<i1>) -> tensor<f32> {
     %5 = "tf.AddV2"(%4, %4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
     // CHECK-NEXT: tf_device.return %[[ADD]], %[[IF]]
     tf_device.return %5 : tensor<f32>
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> (tensor<f32>, tensor<f32>)
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<f32>
-  // CHECK: "tf.AssignVariableOp"(%[[VH0]], %[[LAUNCH]]#1)
-  // CHECK: return %[[LAUNCH]]#0
+  // CHECK: {cluster_attr = "cluster_attr"} : () -> (tensor<f32>, tensor<f32>)
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<f32>
+  // CHECK: "tf.AssignVariableOp"(%[[VH0]], %[[CLUSTER]]#1)
+  // CHECK: return %[[CLUSTER]]#0
   return %2 : tensor<f32>
 }
 // CHECK: func @if_then(%[[TARG0:.*]]: tensor<f32>)
@@ -520,10 +520,10 @@ func @inner_if_else(%arg0: tensor<*x!tf.resource<tensor<f32>>>)
 
 // Tests that the pass reports error for ambiguous resource aliasing.
 
-func @launch_with_if(%arg0: tensor<i1>) -> tensor<4xf32> {
+func @cluster_with_if(%arg0: tensor<i1>) -> tensor<4xf32> {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<4xf32>>>
   %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource<tensor<4xf32>>>
-  %2 = "tf_device.launch"() ( {
+  %2 = "tf_device.cluster"() ( {
     // expected-error @+1 {{unsupported tf.IfOp output: resource does not alias a single input.}}
     %3 = "tf.If"(%arg0, %0, %1) {then_branch = @if_then, else_branch = @if_else,
         output_shapes = [#tf.shape<>], is_stateless = false}
@@ -531,7 +531,7 @@ func @launch_with_if(%arg0: tensor<i1>) -> tensor<4xf32> {
       -> (tensor<*x!tf.resource<tensor<4xf32>>>)
     %4 = "tf.ReadVariableOp"(%3) : (tensor<*x!tf.resource<tensor<4xf32>>>) -> tensor<4xf32>
     tf_device.return %4 : tensor<4xf32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<4xf32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<4xf32>
   return %2 : tensor<4xf32>
 }
 func @if_then(%arg0: tensor<*x!tf.resource<tensor<4xf32>>>, %arg1: tensor<*x!tf.resource<tensor<4xf32>>>)
@@ -548,15 +548,15 @@ func @if_else(%arg0: tensor<*x!tf.resource<tensor<4xf32>>>, %arg1: tensor<*x!tf.
 // Tests that the pass lifts resources on two partitioned call ops sharing the
 // same callee. The lifting should clone the callee then modify the clone.
 
-// CHECK-LABEL: @launch_with_partitioned_call
-func @launch_with_partitioned_call() -> tensor<f32> {
+// CHECK-LABEL: @cluster_with_partitioned_call
+func @cluster_with_partitioned_call() -> tensor<f32> {
   // CHECK: %[[VH:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[CONST:.*]] = "tf.Const"()
   %1 = "tf.Const"() {value = dense<10.0> : tensor<f32>} : () -> tensor<f32>
   // CHECK: %[[READ:.*]] = "tf.ReadVariableOp"(%[[VH]])
-  // CHECK: %[[LAUNCH:.*]] = "tf_device.launch"()
-  %2 = "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]] = "tf_device.cluster"()
+  %2 = "tf_device.cluster"() ( {
     // CHECK: %[[PC0:.*]] = "tf.PartitionedCall"(%[[CONST]], %[[READ]], %[[CONST]])
     // CHECK-SAME: f = @callee_resource_lifted
     %3 = "tf.PartitionedCall"(%1, %0, %1) {f = @callee, config = "", config_proto = "", executor_type = ""}
@@ -569,7 +569,7 @@ func @launch_with_partitioned_call() -> tensor<f32> {
     %5 = "tf.AddV2"(%3, %4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
     // CHECK: tf_device.return %[[ADD]] : tensor<f32>
     tf_device.return %5 : tensor<f32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<f32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<f32>
   return %2 : tensor<f32>
 }
 // CHECK: @callee(%[[OA0:.*]]: tensor<f32>, %[[OA1:.*]]: tensor<*x!tf.resource<tensor<f32>>>, %[[OA2:.*]]: tensor<f32>) -> tensor<f32>
@@ -592,8 +592,8 @@ func @callee(%arg0: tensor<f32>, %arg1: tensor<*x!tf.resource<tensor<f32>>>, %ar
 // sharing the same callee. The lifting should clone the callee then modify the
 // clone.
 
-// CHECK-LABEL: @launch_with_stateful_partitioned_call
-func @launch_with_stateful_partitioned_call() -> () {
+// CHECK-LABEL: @cluster_with_stateful_partitioned_call
+func @cluster_with_stateful_partitioned_call() -> () {
   // CHECK: %[[VH0:.*]] = "tf.VarHandleOp"()
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   // CHECK: %[[VH1:.*]] = "tf.VarHandleOp"()
@@ -602,8 +602,8 @@ func @launch_with_stateful_partitioned_call() -> () {
   %2 = "tf.Const"() {value = dense<10.0> : tensor<f32>} : () -> tensor<f32>
   // CHECK-DAG: %[[READ0:.*]] = "tf.ReadVariableOp"(%[[VH0]])
   // CHECK-DAG: %[[READ1:.*]] = "tf.ReadVariableOp"(%[[VH1]])
-  // CHECK: %[[LAUNCH:.*]] = "tf_device.launch"()
-  "tf_device.launch"() ( {
+  // CHECK: %[[CLUSTER:.*]] = "tf_device.cluster"()
+  "tf_device.cluster"() ( {
     // CHECK: %[[PC0:.*]] = "tf.StatefulPartitionedCall"(%[[READ0]], %[[READ1]], %[[CONST]])
     // CHECK-SAME: f = @callee_resource_lifted
     %3 = "tf.StatefulPartitionedCall"(%0, %1, %2) {f = @callee, config = "", config_proto = "", executor_type = ""}
@@ -614,9 +614,9 @@ func @launch_with_stateful_partitioned_call() -> () {
       : (tensor<*x!tf.resource<tensor<f32>>>, tensor<*x!tf.resource<tensor<f32>>>, tensor<f32>) -> tensor<*x!tf.resource<tensor<f32>>>
     // CHECK: tf_device.return %[[PC1]] : tensor<f32>
     tf_device.return
-    // CHECK: {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<f32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
-  // CHECK: "tf.AssignVariableOp"(%[[VH0]], %[[LAUNCH]])
+    // CHECK: {cluster_attr = "cluster_attr"} : () -> tensor<f32>
+  }) {cluster_attr = "cluster_attr"} : () -> ()
+  // CHECK: "tf.AssignVariableOp"(%[[VH0]], %[[CLUSTER]])
   return
 }
 // CHECK: @callee(%[[OA0:.*]]: tensor<*x!tf.resource<tensor<f32>>>, %[[OA1:.*]]: tensor<*x!tf.resource<tensor<f32>>>, %[[OA2:.*]]: tensor<f32>) -> tensor<*x!tf.resource<tensor<f32>>>
@@ -637,17 +637,17 @@ func @callee(%arg0: tensor<*x!tf.resource<tensor<f32>>>, %arg1: tensor<*x!tf.res
 // Tests that the pass reports error on called function that has resource output
 // which doesn't alias an input.
 
-func @launch_with_stateful_partitioned_call() -> () {
+func @cluster_with_stateful_partitioned_call() -> () {
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource<tensor<f32>>>
   %2 = "tf.Const"() {value = dense<10.0> : tensor<f32>} : () -> tensor<f32>
-  "tf_device.launch"() ( {
+  "tf_device.cluster"() ( {
     %3 = "tf.StatefulPartitionedCall"(%0, %1, %2) {f = @callee, config = "", config_proto = "", executor_type = ""}
       : (tensor<*x!tf.resource<tensor<f32>>>, tensor<*x!tf.resource<tensor<f32>>>, tensor<f32>) -> tensor<*x!tf.resource<tensor<f32>>>
     %4 = "tf.StatefulPartitionedCall"(%3, %1, %2) {f = @callee, config = "", config_proto = "", executor_type = ""}
       : (tensor<*x!tf.resource<tensor<f32>>>, tensor<*x!tf.resource<tensor<f32>>>, tensor<f32>) -> tensor<*x!tf.resource<tensor<f32>>>
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 // expected-error @+1 {{unsupported function call: resource return value does not alias an input.}}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
index 1c979b96a9a..160bba94cfc 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
@@ -390,4 +390,24 @@ func @multiple_blocks_one_return(%arg0: tensor<?xf32>) -> tensor<*xf32> {
     }
     return
   }
+
+  // CHECK-LABEL: dont_update_for_ref
+  func @dont_update_for_ref() -> () {
+    // CHECK: () -> tensor<4x!tf.f32ref>
+    %11 = "tf.VariableV2"() {container = "", device = "", shape = #tf.shape<4>, shared_name = ""} : () -> tensor<4x!tf.f32ref>
+    // CHECK: (tensor<4x!tf.f32ref>) -> tensor<4xf32>
+    %12 = "tf.Identity"(%11) {device = ""} : (tensor<4x!tf.f32ref>) -> tensor<4xf32>
+    // CHECK: (tensor<4xf32>) -> tensor<4xf32>
+    %13 = "tf.Neg"(%12) {device = ""} : (tensor<4xf32>) -> tensor<4xf32>
+    return
+  }
+
+  // CHECK-LABEL: operand_as_shape
+  func @operand_as_shape(%18: tensor<i32>, %39: tensor<1x4x4x32xf32>) -> () {
+    %cst_5 = constant dense<512> : tensor<i32>
+    %19 = "tf.Pack"(%18, %cst_5) {N = 2 : i64, T = i32, axis = 0 : i64, device = ""} : (tensor<i32>, tensor<i32>) -> tensor<2xi32>
+    // CHECK: -> tensor<1x512xf32>
+    %40 = "tf.Reshape"(%39, %19) {T = f32, Tshape = i32, device = ""} : (tensor<1x4x4x32xf32>, tensor<2xi32>) -> tensor<?x?xf32>
+   return
+  }
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/sink_constant.mlir b/tensorflow/compiler/mlir/tensorflow/tests/sink_constant.mlir
index 282fa4953a5..b9c6e242e70 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/sink_constant.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/sink_constant.mlir
@@ -2,7 +2,7 @@
 
 // CHECK-LABEL: func @sink_const
 func @sink_const(%arg0 : tensor<16xf32>) -> (tensor<16xf32>, tensor<f32>) {
-  // Verify that the constant are sunk in the tf_device.launch region using them
+  // Verify that the constant are sunk in the tf_device.cluster region using them
   // and removed if no other use is left.
 
   // Only the 2.0 and 3.0 constants are removed, the 4.0 has a use in the return
@@ -13,11 +13,11 @@ func @sink_const(%arg0 : tensor<16xf32>) -> (tensor<16xf32>, tensor<f32>) {
   %2 = "tf.Const"() {value = dense<4.000000e+00> : tensor<f32>} : () -> tensor<f32>
   %3 = tf_executor.graph {
     %res, %ctl = tf_executor.island {
-      %3 = "tf_device.launch"() ({
+      %3 = "tf_device.cluster"() ({
 
         // In the device region, check that the 3 constants are materialized and
         // remapped to the uses.
-        // CHECK: tf_device.launch
+        // CHECK: tf_device.cluster
         // CHECK-DAG: %[[CST2:.*]] = "tf.Const"{{.*}}2.0
         // CHECK-DAG: %[[CST3:.*]] = "tf.Const"{{.*}}3.0
         // CHECK-DAG: %[[CST4:.*]] = "tf.Const"{{.*}}4.0
@@ -31,7 +31,7 @@ func @sink_const(%arg0 : tensor<16xf32>) -> (tensor<16xf32>, tensor<f32>) {
         %5 = "tf.Mul"(%4, %1) : (tensor<16xf32>, tensor<f32>) -> tensor<16xf32>
         %6 = "tf.Mul"(%5, %2) : (tensor<16xf32>, tensor<f32>) -> tensor<16xf32>
         tf_device.return %6 : tensor<16xf32>
-      }) {device = "tpu0"} : () -> tensor<16xf32>
+      }) {} : () -> tensor<16xf32>
       tf_executor.yield %3 : tensor<16xf32>
     }
     tf_executor.fetch %res : tensor<16xf32>
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
index 118ce2e8645..ffa287e0e53 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
@@ -881,20 +881,29 @@ func @testValidMatrixBandPartOpUnranked(%arg0: tensor<*xbf16>, %arg1: tensor<i64
 
 // -----
 
-// Test invalid tf.MatrixBandPart
-func @testInvalidMatrixBandPartOp(%arg0: tensor<64x64x64xbf16>, %arg1: tensor<i64>, %arg2: tensor<i64>) -> tensor<64x64xbf16> {
-  // expected-error @+1 {{op failed to verify that all of {input, band} have same type}}
-  %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor<i64>, tensor<i64>) -> tensor<64x64xbf16>
-  return %0 : tensor<64x64xbf16>
+// Test valid tf.MatrixBandPart
+// CHECK-LABEL: func @testValidMatrixBandPartOpUnrankedBand
+func @testValidMatrixBandPartOpUnrankedBand(%arg0: tensor<64x64x64xbf16>, %arg1: tensor<i64>, %arg2: tensor<i64>) -> tensor<*xbf16> {
+  %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor<i64>, tensor<i64>) -> tensor<*xbf16>
+  return %0 : tensor<*xbf16>
+}
+
+// -----
+
+// Test valid tf.MatrixBandPart
+// CHECK-LABEL: func @testValidMatrixBandPartOpCompatibleDynamicShapes
+func @testValidMatrixBandPartOpCompatibleDynamicShapes(%arg0: tensor<?x10x?xbf16>, %arg1: tensor<i64>, %arg2: tensor<i64>) -> tensor<?x?x8xbf16> {
+  %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<?x10x?xbf16>, tensor<i64>, tensor<i64>) -> tensor<?x?x8xbf16>
+  return %0 : tensor<?x?x8xbf16>
 }
 
 // -----
 
 // Test invalid tf.MatrixBandPart
-func @testInvalidMatrixBandPartOp(%arg0: tensor<64x64x64xbf16>, %arg1: tensor<i64>, %arg2: tensor<i64>) -> tensor<*xbf16> {
-  // expected-error @+1 {{op failed to verify that all of {input, band} have same type}}
-  %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor<i64>, tensor<i64>) -> tensor<*xbf16>
-  return %0 : tensor<*xbf16>
+func @testInvalidMatrixBandPartOp(%arg0: tensor<64x64x64xbf16>, %arg1: tensor<i64>, %arg2: tensor<i64>) -> tensor<64x64xbf16> {
+  // expected-error @+1 {{op failed to verify that all of {input, band} have dynamically equal types}}
+  %0 = "tf.MatrixBandPart"(%arg0, %arg1, %arg2) : (tensor<64x64x64xbf16>, tensor<i64>, tensor<i64>) -> tensor<64x64xbf16>
+  return %0 : tensor<64x64xbf16>
 }
 
 // -----
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_data_fuse_map_and_batch.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_data_fuse_map_and_batch.mlir
new file mode 100644
index 00000000000..39f34caf259
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_data_fuse_map_and_batch.mlir
@@ -0,0 +1,29 @@
+// RUN: tf-opt -tf-standard-pipeline -tf-data-optimization %s -o %t && FileCheck %s --dump-input-on-failure < %t
+
+module {
+// CHECK-LABEL: fuse_map_and_batch
+func @fuse_map_and_batch() -> tensor<!tf.variant> attributes {tf.entry_function = {control_outputs = "", inputs = "", outputs = "BatchDatasetV2"}} {
+  %0 = "tf.Const"() {value = dense<5> : tensor<i64>} : () -> tensor<i64>
+  %1 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
+  %2 = "tf.Const"() {value = dense<[0, 1, 2]> : tensor<3xi32>} : () -> tensor<3xi32>
+  // CHECK: %[[NPC:.*]] = "tf.Const"() {value = dense<1> : tensor<i64>}
+  // CHECK: %[[TSLICE:.*]] = "tf.TensorSliceDataset"
+  %3 = "tf.TensorSliceDataset"(%2) {device = "", output_shapes = [#tf.shape<>]} : (tensor<3xi32>) -> tensor<*x!tf.variant>
+  // CHECK: "tf.MapAndBatchDataset"(%[[TSLICE]], %[[BSIZE:.*]], %[[NPC]]
+  // CHECK-SAME: f = @"__inference_Dataset_map_<lambda>_80",
+  %4 = "tf.MapDataset"(%3) {device = "",
+           f = @"__inference_Dataset_map_<lambda>_80",
+           output_shapes = [#tf.shape<>], output_types = [i32],
+           preserve_cardinality = false, sloppy = false,
+           use_inter_op_parallelism = true} : (tensor<*x!tf.variant>) -> tensor<!tf.variant>
+  %5 = "tf.BatchDatasetV2"(%4, %0, %1) {device = "", output_shapes = [#tf.shape<>], output_types = [i32], parallel_copy = false} : (tensor<!tf.variant>, tensor<i64>, tensor<i1>) -> tensor<!tf.variant>
+  return %5 : tensor<!tf.variant>
+}
+
+func @"__inference_Dataset_map_<lambda>_80"(%arg0: tensor<*xi32>) -> tensor<*xi32> {
+  %0 = "tf.Const"() {value = dense<2> : tensor<i32>} : () -> tensor<i32>
+  %1 = "tf.Mul"(%arg0, %0) {device = ""} : (tensor<*xi32>, tensor<i32>) -> tensor<*xi32>
+  %2 = "tf.Identity"(%1) {device = ""} : (tensor<*xi32>) -> tensor<*xi32>
+  return %2 : tensor<*xi32>
+}
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_data_fuse_pmap_and_batch.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_data_fuse_pmap_and_batch.mlir
new file mode 100644
index 00000000000..70c5c220fe1
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_data_fuse_pmap_and_batch.mlir
@@ -0,0 +1,29 @@
+// RUN: tf-opt -tf-standard-pipeline -tf-data-optimization %s -o %t && FileCheck %s --dump-input-on-failure < %t
+
+module {
+// CHECK-LABEL: fuse_pmap_and_batch
+func @fuse_pmap_and_batch() -> tensor<!tf.variant> attributes {tf.entry_function = {control_outputs = "", inputs = "", outputs = "BatchDatasetV2"}} {
+  %0 = "tf.Const"() {value = dense<5> : tensor<i64>} : () -> tensor<i64>
+  %1 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
+  %2 = "tf.Const"() {value = dense<[0, 1, 2]> : tensor<3xi32>} : () -> tensor<3xi32>
+  %3 = "tf.Const"() {value = dense<12> : tensor<i32>} : () -> tensor<i32>
+  // CHECK: %[[TSLICE:.*]] = "tf.TensorSliceDataset"
+  %4 = "tf.TensorSliceDataset"(%2) {device = "", output_shapes = [#tf.shape<>]} : (tensor<3xi32>) -> tensor<*x!tf.variant>
+  // CHECK: "tf.MapAndBatchDataset"(%[[TSLICE]],
+  // CHECK-SAME: f = @"__inference_Dataset_map_<lambda>_80",
+  %5 = "tf.ParallelMapDataset"(%4, %3) {device = "",
+           f = @"__inference_Dataset_map_<lambda>_80",
+           output_shapes = [#tf.shape<>], output_types = [i32],
+           preserve_cardinality = false, sloppy = false,
+           use_inter_op_parallelism = true} : (tensor<*x!tf.variant>, tensor<i32>) -> tensor<!tf.variant>
+  %6 = "tf.BatchDatasetV2"(%5, %0, %1) {device = "", output_shapes = [#tf.shape<>], output_types = [i32], parallel_copy = false} : (tensor<!tf.variant>, tensor<i64>, tensor<i1>) -> tensor<!tf.variant>
+  return %6 : tensor<!tf.variant>
+}
+
+func @"__inference_Dataset_map_<lambda>_80"(%arg0: tensor<*xi32>) -> tensor<*xi32> {
+  %0 = "tf.Const"() {value = dense<2> : tensor<i32>} : () -> tensor<i32>
+  %1 = "tf.Mul"(%arg0, %0) {device = ""} : (tensor<*xi32>, tensor<i32>) -> tensor<*xi32>
+  %2 = "tf.Identity"(%1) {device = ""} : (tensor<*xi32>) -> tensor<*xi32>
+  return %2 : tensor<*xi32>
+}
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_cluster_formation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_cluster_formation.mlir
index fbbbf05f116..6dceb00eefa 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_cluster_formation.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_cluster_formation.mlir
@@ -2,7 +2,7 @@
 
 
 // Test ops in cluster only have `_tpu_replicate` and `device` attributes
-// removed when moved to a launch.
+// removed when moved to a `tf_device.cluster`.
 // CHECK-LABEL: func @cluster_ops_removed_attrs
 func @cluster_ops_removed_attrs() {
   %0 = "tf.opA"() {_tpu_replicate = "replicate", device = "device", name = "name"} : () -> tensor<i1>
@@ -18,9 +18,9 @@ func @cluster_ops_removed_attrs() {
 
 
 // Test TPUReplicateMetadata ops `name` and `num_replicas` attributes are not
-// copied over to launch.
-// CHECK-LABEL: func @launch_removed_metadata_attrs
-func @launch_removed_metadata_attrs() {
+// copied over to `tf_device.cluster`.
+// CHECK-LABEL: func @removed_metadata_attrs
+func @removed_metadata_attrs() {
   %0 = "tf.opA"() {_tpu_replicate = "replicate"} : () -> tensor<i1>
   "tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", name = "name", num_replicas = 1, topology = "topology"} : () -> ()
   return
@@ -42,7 +42,7 @@ func @metadata_op_removed() {
 
 
 // Test ops in an island with the same `_tpu_replicate` attribute are merged
-// under a launch.
+// under a `tf_device.cluster`.
 // CHECK-LABEL: func @simple_island
 // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>)
 func @simple_island(%arg0 : tensor<i1>) -> tensor<i1> {
@@ -60,19 +60,19 @@ func @simple_island(%arg0 : tensor<i1>) -> tensor<i1> {
 }
 
 // CHECK:          "tf.opB"
-// CHECK:          %[[LAUNCH:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:          %[[CLUSTER:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:       %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:       %[[OP_C:[0-9]*]] = "tf.opC"(%[[OP_A]])
 // CHECK-NEXT:       tf_device.return %[[OP_C]]
 // CHECK-NEXT:     _tpu_replicate = "replicate"
 // CHECK-SAME:     device = "device"
 // CHECK-SAME:     topology = "topology"
-// CHECK:          tf_executor.yield %[[LAUNCH]]
+// CHECK:          tf_executor.yield %[[CLUSTER]]
 
 
 // Test ops in an island with the same `_tpu_replicate` attribute are merged
-// under a launch, even when the associated TPUReplicateMetadata op is in a
-// different island.
+// under a `tf_device.cluster`, even when the associated TPUReplicateMetadata op
+// is in a different island.
 // CHECK-LABEL: func @simple_island_separate_metadata
 // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>)
 func @simple_island_separate_metadata(%arg0 : tensor<i1>) -> tensor<i1> {
@@ -92,18 +92,18 @@ func @simple_island_separate_metadata(%arg0 : tensor<i1>) -> tensor<i1> {
 }
 
 // CHECK:          "tf.opB"
-// CHECK:          %[[LAUNCH:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:          %[[CLUSTER:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:       %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:       %[[OP_C:[0-9]*]] = "tf.opC"(%[[OP_A]])
 // CHECK-NEXT:       tf_device.return %[[OP_C]]
 // CHECK-NEXT:     _tpu_replicate = "replicate"
 // CHECK-SAME:     device = "device"
 // CHECK-SAME:     topology = "topology"
-// CHECK:          tf_executor.yield %[[LAUNCH]]
+// CHECK:          tf_executor.yield %[[CLUSTER]]
 
 
 // Test ops in multiple islands with the same `_tpu_replicate` attribute are
-// merged under launch ops only within their respective island.
+// merged under `tf_device.cluster` ops only within their respective island.
 // CHECK-LABEL: func @multiple_islands_separate_metadata
 // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>)
 func @multiple_islands_separate_metadata(%arg0 : tensor<i1>) -> (tensor<i1>, tensor<i1>) {
@@ -130,28 +130,28 @@ func @multiple_islands_separate_metadata(%arg0 : tensor<i1>) -> (tensor<i1>, ten
 
 // CHECK:        %[[ISLAND_1:.*]], %[[ISLAND_1_control:.*]] = tf_executor.island {
 // CHECK:          "tf.opB"
-// CHECK:          %[[LAUNCH_0:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:          %[[CLUSTER_0:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:       %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:       %[[OP_C:[0-9]*]] = "tf.opC"(%[[OP_A]])
 // CHECK-NEXT:       tf_device.return %[[OP_C]]
 // CHECK-NEXT:     _tpu_replicate = "replicate"
 // CHECK-SAME:     device = "device"
 // CHECK-SAME:     topology = "topology"
-// CHECK:          tf_executor.yield %[[LAUNCH_0]]
+// CHECK:          tf_executor.yield %[[CLUSTER_0]]
 // CHECK:        tf_executor.island {
 // CHECK:          "tf.opE"
-// CHECK:          %[[LAUNCH_1:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:          %[[CLUSTER_1:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:       %[[OP_D:[0-9]*]] = "tf.opD"(%[[ISLAND_1]])
 // CHECK-NEXT:       %[[OP_F:[0-9]*]] = "tf.opF"(%[[ARG_0]])
 // CHECK-NEXT:       tf_device.return %[[OP_F]]
 // CHECK-NEXT:     _tpu_replicate = "replicate"
 // CHECK-SAME:     device = "device"
 // CHECK-SAME:     topology = "topology"
-// CHECK:          tf_executor.yield %[[LAUNCH_1]]
+// CHECK:          tf_executor.yield %[[CLUSTER_1]]
 
 
 // Test ops in a function body with the same `_tpu_replicate` attribute are
-// merged under a launch op.
+// merged under a `tf_device.cluster` op.
 // CHECK-LABEL: func @ops_in_func_body
 // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>)
 func @ops_in_func_body(%arg0 : tensor<i1>) -> (tensor<i1>, tensor<i1>, tensor<i1>) {
@@ -167,7 +167,7 @@ func @ops_in_func_body(%arg0 : tensor<i1>) -> (tensor<i1>, tensor<i1>, tensor<i1
 
 // CHECK:      "tf.opB"
 // CHECK:      "tf.opE"
-// CHECK:      %[[LAUNCH:[0-9]*]]:3 = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER:[0-9]*]]:3 = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:   %[[OP_C:[0-9]*]] = "tf.opC"(%[[OP_A]])
 // CHECK-NEXT:   %[[OP_D:[0-9]*]] = "tf.opD"(%[[OP_C]])
@@ -176,11 +176,11 @@ func @ops_in_func_body(%arg0 : tensor<i1>) -> (tensor<i1>, tensor<i1>, tensor<i1
 // CHECK-NEXT: _tpu_replicate = "replicate"
 // CHECK-SAME: device = "device"
 // CHECK-SAME: topology = "topology"
-// CHECK:      return %[[LAUNCH]]#0, %[[LAUNCH]]#1, %[[LAUNCH]]#2
+// CHECK:      return %[[CLUSTER]]#0, %[[CLUSTER]]#1, %[[CLUSTER]]#2
 
 
-// Test a nested user of an op in a cluster has its operand be updated to launch
-// result.
+// Test a nested user of an op in a cluster has its operand be updated to
+// `tf_device.cluster` result.
 // CHECK-LABEL: func @nested_cluster_op_user
 // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>)
 func @nested_cluster_op_user(%arg0 : tensor<i1>) -> (tensor<i1>) {
@@ -193,7 +193,7 @@ func @nested_cluster_op_user(%arg0 : tensor<i1>) -> (tensor<i1>) {
   return %2 : tensor<i1>
 }
 
-// CHECK:      %[[LAUNCH:[0-9]*]]:2 = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER:[0-9]*]]:2 = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:   %[[OP_B:[0-9]*]] = "tf.opB"(%[[OP_A]])
 // CHECK-NEXT:   tf_device.return %[[OP_A]], %[[OP_B]]
@@ -201,8 +201,8 @@ func @nested_cluster_op_user(%arg0 : tensor<i1>) -> (tensor<i1>) {
 // CHECK-SAME: device = "device"
 // CHECK-SAME: topology = "topology"
 // CHECK:      tf_executor.graph {
-// CHECK-NEXT:   tf_executor.fetch %[[LAUNCH]]#0
-// CHECK:      return %[[LAUNCH]]#1
+// CHECK-NEXT:   tf_executor.fetch %[[CLUSTER]]#0
+// CHECK:      return %[[CLUSTER]]#1
 
 
 // Test nested op of a cluster with an operand from an op of the same cluster
@@ -218,7 +218,7 @@ func @nested_cluster_op(%arg0 : tensor<i1>) -> (tensor<i1>) {
   return %1 : tensor<i1>
 }
 
-// CHECK:      %[[LAUNCH:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:   %[[OP_B:[0-9]*]] = "tf.opB"() ( {
 // CHECK-NEXT:     "tf.opC"(%[[OP_A]])
@@ -226,7 +226,7 @@ func @nested_cluster_op(%arg0 : tensor<i1>) -> (tensor<i1>) {
 // CHECK-NEXT: _tpu_replicate = "replicate"
 // CHECK-SAME: device = "device"
 // CHECK-SAME: topology = "topology"
-// CHECK:      return %[[LAUNCH]]
+// CHECK:      return %[[CLUSTER]]
 
 
 // Test multiple clusters interleaved.
@@ -242,21 +242,21 @@ func @interleaved_clusters(%arg0 : tensor<i1>) -> (tensor<i1>, tensor<i1>) {
   return %2, %3 : tensor<i1>, tensor<i1>
 }
 
-// CHECK:      %[[LAUNCH_0:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER_0:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:   %[[OP_C:[0-9]*]] = "tf.opC"(%[[OP_A]])
 // CHECK-NEXT:   tf_device.return %[[OP_C]]
 // CHECK-NEXT: _tpu_replicate = "replicate_0"
 // CHECK-SAME: device = "device_0"
 // CHECK-SAME: topology = "topology_0"
-// CHECK:      %[[LAUNCH_1:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER_1:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_B:[0-9]*]] = "tf.opB"(%[[ARG_0]])
 // CHECK-NEXT:   %[[OP_D:[0-9]*]] = "tf.opD"(%[[OP_B]])
 // CHECK-NEXT:   tf_device.return %[[OP_D]]
 // CHECK-NEXT: _tpu_replicate = "replicate_1"
 // CHECK-SAME: device = "device_1"
 // CHECK-SAME: topology = "topology_1"
-// CHECK:      return %[[LAUNCH_0]], %[[LAUNCH_1]]
+// CHECK:      return %[[CLUSTER_0]], %[[CLUSTER_1]]
 
 
 // Test operands and results of ops of a cluster that are interleaved between
@@ -276,14 +276,14 @@ func @interleaved_cluster_operands_results() {
 
 // CHECK:      %[[OP_C:[0-9]*]] = "tf.opC"
 // CHECK:      %[[OP_E:[0-9]*]] = "tf.opE"(%[[OP_C]])
-// CHECK:      %[[LAUNCH:[0-9]*]] = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER:[0-9]*]] = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_A:[0-9]*]] = "tf.opA"
 // CHECK-NEXT:   "tf.opF"(%[[OP_E]])
 // CHECK-NEXT:   tf_device.return %[[OP_A]]
 // CHECK-NEXT: _tpu_replicate = "replicate"
 // CHECK-SAME: device = "device"
 // CHECK-SAME: topology = "topology"
-// CHECK:      %[[OP_B:[0-9]*]] = "tf.opB"(%[[LAUNCH]])
+// CHECK:      %[[OP_B:[0-9]*]] = "tf.opB"(%[[CLUSTER]])
 // CHECK:      "tf.opD"(%[[OP_B]])
 
 
@@ -306,24 +306,24 @@ func @one_replica(%arg0: tensor<i1>) -> tensor<i1> {
 
 // CHECK:      %[[OP_C:[0-9]*]] = "tf.opC"
 // CHECK:      %[[OP_E:[0-9]*]] = "tf.opE"(%[[OP_C]])
-// CHECK:      %[[LAUNCH:[0-9]*]]:2 = "tf_device.launch"() ( {
+// CHECK:      %[[CLUSTER:[0-9]*]]:2 = "tf_device.cluster"() ( {
 // CHECK-NEXT:   %[[OP_A:[0-9]*]] = "tf.opA"(%[[ARG_0]])
 // CHECK-NEXT:   %[[OP_F:[0-9]*]] = "tf.opF"(%[[OP_E]])
 // CHECK-NEXT:   tf_device.return %[[OP_A]], %[[OP_F]]
 // CHECK-NEXT: _tpu_replicate = "replicate"
 // CHECK-SAME: device = "device"
 // CHECK-SAME: topology = "topology"
-// CHECK:      %[[OP_B:[0-9]*]] = "tf.opB"(%[[LAUNCH]]#0)
+// CHECK:      %[[OP_B:[0-9]*]] = "tf.opB"(%[[CLUSTER]]#0)
 // CHECK:      "tf.opD"(%[[OP_B]])
-// CHECK:      return %[[LAUNCH]]#1
+// CHECK:      return %[[CLUSTER]]#1
 // CHECK-NOT:  "tf.TPUReplicatedInput"
 // CHECK-NOT:  "tf.TPUReplicatedOutput"
 
 
 // Test replication with replicated operands and replicated results. The cluster
-// will be wrapped in a launch first and then by a replicate. TPUReplicatedInput
-// and TPUReplicatedOutput nodes will be replaced by the replicate operands and
-// results.
+// will be wrapped in a `tf_device.cluster` first and then by a replicate.
+// TPUReplicatedInput and TPUReplicatedOutput nodes will be replaced by the
+// replicate operands and results.
 // CHECK-LABEL: func @replication
 // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<i1>, %[[ARG_1:[a-z0-9]*]]: tensor<i32>, %[[ARG_2:[a-z0-9]*]]: tensor<f32>)
 func @replication(%arg0: tensor<i1>, %arg1: tensor<i32>, %arg2: tensor<f32>) -> (tensor<i32>, tensor<f32>) {
@@ -347,18 +347,18 @@ func @replication(%arg0: tensor<i1>, %arg1: tensor<i32>, %arg2: tensor<f32>) ->
 // CHECK-DAG:  [%[[ARG_0]], %[[OP_A]]] as %[[RI_0:[a-z0-9]*]]: tensor<i1>
 // CHECK-DAG:  [%[[OP_B]], %[[ARG_1]]] as %[[RI_1:[a-z0-9]*]]: tensor<i32>
 // CHECK-SAME: n = 2 : i32
-// CHECK-NEXT:   %[[LAUNCH:[0-9]*]]:2 = "tf_device.launch"() ( {
+// CHECK-NEXT:   %[[CLUSTER:[0-9]*]]:2 = "tf_device.cluster"() ( {
 // CHECK:          %[[OP_D:[0-9]*]] = "tf.opD"(%[[RI_0]], %[[RI_1]], %[[ARG_2]], %[[OP_C]])
 // CHECK:          %[[OP_E:[0-9]*]] = "tf.opE"(%[[OP_D]], %[[RI_0]], %[[RI_1]], %[[ARG_2]], %[[OP_C]])
 // CHECK:          tf_device.return %[[OP_D]], %[[OP_E]]
 // CHECK-NEXT:   _tpu_replicate = "replicate"
 // CHECK-SAME:   device = "device"
 // CHECK-SAME:   topology = "topology"
-// CHECK:        tf_device.return %[[LAUNCH]]#0, %[[LAUNCH]]#1
+// CHECK:        tf_device.return %[[CLUSTER]]#0, %[[CLUSTER]]#1
 // CHECK:      return %[[REPLICATE]]#0, %[[REPLICATE]]#3
 
 
-// Test `tf.TPUReplicatedInput` ops are sorted by their `index` attribute.
+// Test TPUReplicatedInput ops are sorted by their `index` attribute.
 // Non-negative `index` should precede `index` of -1, and ordering of ops with
 // `index` of -1 does not matter.
 // CHECK-LABEL: func @sort_replicated_input
@@ -452,7 +452,7 @@ func @mismatched_replicated_output() {
 // Test cluster that should be replicated where its outputs do not lead to a
 // TPUReplicatedOutput.
 func @missing_replicated_output() {
-  // expected-error@+1 {{requires output of tf_device.launch to lead to a 'tf.TPUReplicatedOutput' op}}
+  // expected-error@+1 {{requires output of tf_device.cluster to lead to a 'tf.TPUReplicatedOutput' op}}
   %0 = "tf.opA"() {_tpu_replicate = "replicate", device = "device", name = "name"} : () -> tensor<i1>
   %1 = "tf.opB"(%0) : (tensor<i1>) -> tensor<i1>
   "tf.TPUReplicateMetadata"() {_tpu_replicate = "replicate", device = "device", num_replicas = 2, topology = "topology"} : () -> ()
@@ -520,8 +520,10 @@ func @input_index_gaps(%arg0: tensor<i1>) {
   return
 }
 
+
 // -----
 
+
 // Test that the `is_mirrored_variable` attribute is preserved in the
 // tf_device.replicate op.
 // CHECK-LABEL: func @mirrored_variables
@@ -537,4 +539,3 @@ func @mirrored_variables(%arg0: tensor<!tf.resource<tensor<32xf32>>>, %arg1: ten
 // CHECK:      tf_device.replicate
 // CHECK-SAME: [%[[ARG_0]], %[[ARG_1]]] as %{{[a-z0-9]*}}
 // CHECK-SAME: _mirrored_variable_indices = [1]
-
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_dynamic_padding_mapper.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_dynamic_padding_mapper.mlir
index ad2ebc08c1d..8b610e45b4e 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_dynamic_padding_mapper.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_dynamic_padding_mapper.mlir
@@ -10,7 +10,7 @@
 // CHECK-LABEL: func @single_arg_single_shape
 func @single_arg_single_shape(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_0, %ri_1) {device = "", func = @func0, padding_map = ["\10\02\18\01"]} : (tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_0, %ri_1) {func = @func0, padding_map = ["\10\02\18\01"]} : (tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -37,7 +37,7 @@ func @func0(%arg0: tensor<i1>, %arg1: tensor<i1>) {
 // CHECK-LABEL: func @single_arg_multiple_shapes
 func @single_arg_multiple_shapes(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>, [%arg0, %arg0] as %ri_2: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_0, %ri_1, %ri_2) {device = "", func = @func1, padding_map = ["\10\02\18\01", "\10\03\18\02"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_0, %ri_1, %ri_2) {func = @func1, padding_map = ["\10\02\18\01", "\10\03\18\02"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -69,7 +69,7 @@ func @func1(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>) {
 // CHECK-LABEL: func @multiple_args
 func @multiple_args(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>, [%arg0, %arg0] as %ri_2: tensor<i1>, [%arg0, %arg0] as %ri_3: tensor<i1>, [%arg0, %arg0] as %ri_4: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_0, %ri_1, %ri_2, %ri_3, %ri_4) {device = "", func = @func2, padding_map = ["\10\02\18\01", "\10\03\18\02", "\08\04\10\01\18\03"]} : (tensor<i1>, tensor<i1>, tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_0, %ri_1, %ri_2, %ri_3, %ri_4) {func = @func2, padding_map = ["\10\02\18\01", "\10\03\18\02", "\08\04\10\01\18\03"]} : (tensor<i1>, tensor<i1>, tensor<i1>, tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -90,7 +90,7 @@ func @func2(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>, %arg3: tens
 // CHECK-LABEL: func @remap_indices
 func @remap_indices(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_1, %arg0, %ri_0) {device = "", func = @func3, padding_map = ["\10\02\18\01"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_1, %arg0, %ri_0) {func = @func3, padding_map = ["\10\02\18\01"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -111,7 +111,7 @@ func @func3(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>) {
 //   padding_arg_index: 1
 // CHECK-LABEL: func @no_replicate
 func @no_replicate(%arg0: tensor<i1>) {
-  "tf_device.launch_func"(%arg0, %arg0, %arg0) {device = "", func = @func4, padding_map = ["\10\02\18\01"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+  "tf_device.cluster_func"(%arg0, %arg0, %arg0) {func = @func4, padding_map = ["\10\02\18\01"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
   return
 }
 
@@ -125,7 +125,7 @@ func @func4(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>) {
 // CHECK-LABEL: func @no_padding_map
 func @no_padding_map(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_1, %arg0, %ri_0) {device = "", func = @func5} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_1, %arg0, %ri_0) {func = @func5} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -141,7 +141,7 @@ func @func5(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>) {
 // CHECK-LABEL: func @empty_padding_map
 func @empty_padding_map(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_1, %arg0, %ri_0) {device = "", func = @func6, padding_map = []} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_1, %arg0, %ri_0) {func = @func6, padding_map = []} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -162,7 +162,7 @@ func @func6(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>) {
 // CHECK-LABEL: func @unused_padding_map
 func @unused_padding_map(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    "tf_device.launch_func"(%ri_1) {device = "", func = @func7, padding_map = ["\10\02\18\01"]} : (tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_1) {func = @func7, padding_map = ["\10\02\18\01"]} : (tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -189,7 +189,7 @@ func @func7(%arg0: tensor<i1>) {
 func @missing_padding_arg(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>, [%arg0, %arg0] as %ri_2: tensor<i1>, [%arg0, %arg0] as %ri_3: tensor<i1>) {n = 2 : i32} {
     // expected-warning@+1 {{bad 'padding_map' attribute at index 0, unused padding_arg_index 1}}
-    "tf_device.launch_func"(%ri_0, %ri_2, %ri_3) {device = "", func = @func8, padding_map = ["\10\02\18\01", "\08\02\10\02\18\03"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
+    "tf_device.cluster_func"(%ri_0, %ri_2, %ri_3) {func = @func8, padding_map = ["\10\02\18\01", "\08\02\10\02\18\03"]} : (tensor<i1>, tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -206,8 +206,8 @@ func @func8(%arg0: tensor<i1>, %arg1: tensor<i1>, %arg2: tensor<i1>) {
 // Test bad padding map attribute (not an array).
 func @bad_padding_map() {
   tf_device.replicate {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op requires 'padding_map' array attribute}}
-    "tf_device.launch_func"() {device = "", func = @_func, padding_map = 0 : i32} : () -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op requires 'padding_map' array attribute}}
+    "tf_device.cluster_func"() {func = @_func, padding_map = 0 : i32} : () -> ()
     tf_device.return
   }
   return
@@ -222,8 +222,8 @@ func @_func() {
 // Test bad padding map attribute (element in array is not a string).
 func @bad_padding_map_element() {
   tf_device.replicate {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op bad 'padding_map' attribute at index 0, not a string}}
-    "tf_device.launch_func"() {device = "", func = @_func, padding_map = [0 : i32]} : () -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op bad 'padding_map' attribute at index 0, not a string}}
+    "tf_device.cluster_func"() {func = @_func, padding_map = [0 : i32]} : () -> ()
     tf_device.return
   }
   return
@@ -238,8 +238,8 @@ func @_func() {
 // Test unparsable padding map.
 func @bad_padding_map_proto() {
   tf_device.replicate {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op bad 'padding_map' attribute at index 0, failed to parse 'z' as tensorflow::tpu::PaddingMap}}
-    "tf_device.launch_func"() {device = "", func = @_func, padding_map = ["z"]} : () -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op bad 'padding_map' attribute at index 0, failed to parse 'z' as tensorflow::tpu::PaddingMap}}
+    "tf_device.cluster_func"() {func = @_func, padding_map = ["z"]} : () -> ()
     tf_device.return
   }
   return
@@ -259,8 +259,8 @@ func @_func() {
 //   padding_arg_index: 1
 func @negative_arg_index(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op bad 'padding_map' attribute at index 0, arg_index must be in [0, 2), got -1}}
-    "tf_device.launch_func"(%ri_0, %ri_1) {device = "", func = @_func, padding_map = ["\08\FF\FF\FF\FF\FF\FF\FF\FF\FF\01\10\02\18\01"]} : (tensor<i1>, tensor<i1>) -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op bad 'padding_map' attribute at index 0, arg_index must be in [0, 2), got -1}}
+    "tf_device.cluster_func"(%ri_0, %ri_1) {func = @_func, padding_map = ["\08\FF\FF\FF\FF\FF\FF\FF\FF\FF\01\10\02\18\01"]} : (tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -280,8 +280,8 @@ func @_func(%arg0: tensor<i1>, %arg1: tensor<i1>) {
 //   padding_arg_index: 1
 func @bad_arg_index(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op bad 'padding_map' attribute at index 0, arg_index must be in [0, 2), got 2}}
-    "tf_device.launch_func"(%ri_0, %ri_1) {device = "", func = @_func, padding_map = ["\08\02\10\02\18\01"]} : (tensor<i1>, tensor<i1>) -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op bad 'padding_map' attribute at index 0, arg_index must be in [0, 2), got 2}}
+    "tf_device.cluster_func"(%ri_0, %ri_1) {func = @_func, padding_map = ["\08\02\10\02\18\01"]} : (tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -301,8 +301,8 @@ func @_func(%arg0: tensor<i1>, %arg1: tensor<i1>) {
 //   padding_arg_index: -1
 func @negative_padding_arg_index(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op bad 'padding_map' attribute at index 0, padding_arg_index must be in [0, 2), got -1}}
-    "tf_device.launch_func"(%ri_0, %ri_1) {device = "", func = @_func, padding_map = ["\08\01\10\02\18\FF\FF\FF\FF\FF\FF\FF\FF\FF\01"]} : (tensor<i1>, tensor<i1>) -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op bad 'padding_map' attribute at index 0, padding_arg_index must be in [0, 2), got -1}}
+    "tf_device.cluster_func"(%ri_0, %ri_1) {func = @_func, padding_map = ["\08\01\10\02\18\FF\FF\FF\FF\FF\FF\FF\FF\FF\01"]} : (tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
@@ -322,8 +322,8 @@ func @_func(%arg0: tensor<i1>, %arg1: tensor<i1>) {
 //   padding_arg_index: 2
 func @bad_padding_arg_index(%arg0: tensor<i1>) {
   tf_device.replicate([%arg0, %arg0] as %ri_0: tensor<i1>, [%arg0, %arg0] as %ri_1: tensor<i1>) {n = 2 : i32} {
-    // expected-error@+1 {{'tf_device.launch_func' op bad 'padding_map' attribute at index 0, padding_arg_index must be in [0, 2), got 2}}
-    "tf_device.launch_func"(%ri_0, %ri_1) {device = "", func = @_func, padding_map = ["\08\01\10\02\18\02"]} : (tensor<i1>, tensor<i1>) -> ()
+    // expected-error@+1 {{'tf_device.cluster_func' op bad 'padding_map' attribute at index 0, padding_arg_index must be in [0, 2), got 2}}
+    "tf_device.cluster_func"(%ri_0, %ri_1) {func = @_func, padding_map = ["\08\01\10\02\18\02"]} : (tensor<i1>, tensor<i1>) -> ()
     tf_device.return
   }
   return
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir
new file mode 100644
index 00000000000..eb67bdcc914
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_head_tail_outside_compilation.mlir
@@ -0,0 +1,81 @@
+// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-extract-head-tail-outside-compilation | FileCheck %s --dump-input-on-failure
+
+// Tests extraction of a outside compiled ops at head of TPU computation.
+
+func @single_head_outside_compilation(%arg0 : tensor<i32>) -> () {
+  // CHECK:      tf_device.launch
+  // CHECK:        "tf.A"
+  // CHECK-NEXT:   tf_device.return
+  //
+  // CHECK:      "tf_device.cluster"
+  // CHECK:        "tf.C"
+  // CHECK-NEXT:   tf_device.return
+  "tf_device.cluster"() ( {
+    "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> ()
+    "tf.B"() : () -> ()
+    "tf.C"() : () -> ()
+    tf_device.return
+  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @multiple_head_outside_compilation
+func @multiple_head_outside_compilation(%arg0 : tensor<i32>) -> () {
+  // CHECK:      %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
+  // CHECK:        %[[A_OUT:.*]] = "tf.A"
+  // CHECK:        %[[B_OUT:.*]] = "tf.B"(%[[A_OUT]])
+  // CHECK:        "tf.C"
+  // CHECK-NEXT:   tf_device.return %[[B_OUT]]
+  //
+  // CHECK:      "tf_device.cluster"
+  // CHECK:        "tf.D"(%[[LAUNCH_OUT]])
+  // CHECK-NEXT:   tf_device.return
+  "tf_device.cluster"() ( {
+    %0 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> (tensor<i32>)
+    %1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> (tensor<i32>)
+    "tf.C"(%1, %arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>, tensor<i32>) -> ()
+    "tf.D"(%1) : (tensor<i32>) -> ()
+    tf_device.return
+  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @test_do_not_outside_compiled_ops_in_middle
+func @test_do_not_outside_compiled_ops_in_middle(%arg0 : tensor<i32>) -> () {
+  // CHECK-NOT:  tf_device.launch
+  // CHECK:      "tf_device.cluster"
+  // CHECK-NEXT:   "tf.A"
+  // CHECK-NEXT:   "tf.B"
+  // CHECK-NEXT:   "tf.C"
+  // CHECK-NEXT:   tf_device.return
+  "tf_device.cluster"() ( {
+    %0 = "tf.A"(%arg0) {} : (tensor<i32>) -> (tensor<i32>)
+    %1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"}: (tensor<i32>) -> (tensor<i32>)
+    "tf.C"(%1) : (tensor<i32>) -> ()
+    tf_device.return
+  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @test_ops_with_tpu_operands_not_extracted
+func @test_ops_with_tpu_operands_not_extracted(%arg0 : tensor<i32>) -> () {
+  // CHECK:      %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
+  // CHECK:        %[[A_OUT:.*]] = "tf.A"
+  // CHECK:        %[[D_OUT:.*]] = "tf.D"(%[[A_OUT]])
+  // CHECK-NEXT:   tf_device.return %[[D_OUT]]
+  //
+  // CHECK:      "tf_device.cluster"
+  // CHECK:        "tf.B"
+  // CHECK:        "tf.C"
+  // CHECK:        "tf.E"
+  // CHECK-NEXT:   tf_device.return
+  "tf_device.cluster"() ( {
+    %0 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> (tensor<i32>)
+    %1 = "tf.B"() {} : () -> (tensor<i32>)
+    %2 = "tf.C"(%arg0, %1) {_xla_outside_compilation = "cluster1"} : (tensor<i32>, tensor<i32>) -> (tensor<i32>)
+    %3 = "tf.D"(%0) {_xla_outside_compilation = "cluster1"}: (tensor<i32>) -> (tensor<i32>)
+    %4 = "tf.E"(%3) {} : (tensor<i32>) -> (tensor<i32>)
+    tf_device.return
+  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  return
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir
index b2e8f116827..3cb693ee571 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir
@@ -3,12 +3,12 @@
 // Tests that missing `_xla_outside_compilation` attribute value results in an error.
 
 func @missing_outside_compilation_attribute() -> () {
-  "tf_device.launch"() ( {
+  "tf_device.cluster"() ( {
     "tf.A"() : () -> ()
     // expected-error@+1 {{attribute '_xla_outside_compilation' is empty}}
     "tf.B"() {_xla_outside_compilation = ""} : () -> ()
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 
@@ -18,11 +18,11 @@ func @missing_outside_compilation_attribute() -> () {
 
 // CHECK-LABEL: func @no_outside_compilation
 func @no_outside_compilation() -> tensor<?xi32> {
-  %0 = "tf_device.launch"() ( {
+  %0 = "tf_device.cluster"() ( {
     %1 = "tf.A"() : () -> tensor<?xi32>
     %2 = "tf.B"(%1) : (tensor<?xi32>) -> tensor<?xi32>
     tf_device.return %2 : tensor<?xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<?xi32>
+  }) {cluster_attr = "cluster_attr"} : () -> tensor<?xi32>
   return %0 : tensor<?xi32>
 }
 
@@ -36,16 +36,15 @@ func @nodep_single_outside_compilation() -> () {
    // CHECK-NEXT: "tf_device.launch"
    // CHECK-NEXT: "tf.B"
    // CHECK-NOT: _xla_outside_compilation
-   // CHECK: "tf_device.launch"
+   // CHECK: "tf_device.cluster"
    // CHECK-NEXT: "tf.A"
-   // CHECK: device = "tpu0"
-   // CHECK-SAME: launch_attr = "launch_attr"
-  "tf_device.launch"() ( {
+   // CHECK: cluster_attr = "cluster_attr"
+  "tf_device.cluster"() ( {
     "tf.A"() : () -> ()
     "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> ()
     "tf.C"() : () -> ()
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 
@@ -59,19 +58,18 @@ func @nodep_single_cluster_multiple_ops_outside_compilation() -> () {
    // CHECK-NEXT: "tf.C"
    // CHECK-NEXT: "tf.D"
    // CHECK-NOT: _xla_outside_compilation
-   // CHECK: "tf_device.launch"
+   // CHECK: "tf_device.cluster"
    // CHECK-NEXT: "tf.A"
    // CHECK-NEXT: "tf.E"
-   // CHECK: device = "tpu0"
-   // CHECK-SAME: launch_attr = "launch_attr"
-  "tf_device.launch"() ( {
+   // CHECK: cluster_attr = "cluster_attr"
+  "tf_device.cluster"() ( {
     "tf.A"() : () -> ()
     "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> ()
     "tf.C"() {_xla_outside_compilation = "cluster1"} : () -> ()
     "tf.D"() {_xla_outside_compilation = "cluster1"} : () -> ()
     "tf.E"() : () -> ()
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 
@@ -80,15 +78,16 @@ func @nodep_single_cluster_multiple_ops_outside_compilation() -> () {
 // CHECK-LABEL: func @nodep_multiple_outside_compilation
 func @nodep_multiple_outside_compilation() -> () {
    // CHECK: "tf_device.parallel_execute"
-   // CHECK-COUNT-3: "tf_device.launch"
-  "tf_device.launch"() ( {
+   // CHECK-COUNT-2: "tf_device.launch"
+   // CHECK: "tf_device.cluster"
+  "tf_device.cluster"() ( {
     "tf.A"() : () -> ()
     "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> ()
     "tf.C"() : () -> ()
     "tf.D"() {_xla_outside_compilation = "cluster2"} : () -> ()
     "tf.E"() : () -> ()
     tf_device.return
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> ()
+  }) {cluster_attr = "cluster_attr"} : () -> ()
   return
 }
 
@@ -100,17 +99,17 @@ func @single_tpu_return_single_outside_compilation(%arg0: tensor<?xi32>) -> tens
   // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate
     // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute"
       // CHECK-NEXT: "tf_device.launch"
-      // CHECK: %[[TPU_LAUNCH_OUTPUT:[0-9]*]] = "tf_device.launch"
+      // CHECK: %[[TPU_CLUSTER_OUTPUT:[0-9]*]] = "tf_device.cluster"
         // CHECK: tf_device.return
-      // CHECK: tf_device.return %[[TPU_LAUNCH_OUTPUT]]
+      // CHECK: tf_device.return %[[TPU_CLUSTER_OUTPUT]]
       // CHECK: tf_device.return %[[PARALLEL_EXECUTE_OUTPUT]]
   %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor<?xi32>) {n = 2 : i32} {
-    %2 = "tf_device.launch"() ( {
+    %2 = "tf_device.cluster"() ( {
       "tf.A"() : () -> ()
       "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> ()
       %3 = "tf.C"() : () -> tensor<?xi32>
       tf_device.return %3 : tensor<?xi32>
-    }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<?xi32>
+    }) {cluster_attr = "cluster_attr"} : () -> tensor<?xi32>
     tf_device.return %2 : tensor<?xi32>
   }
 
@@ -125,17 +124,17 @@ func @multiple_tpu_return_single_outside_compilation(%arg0: tensor<?xi32>) -> te
   // CHECK: %[[REPLICATE:[0-9]*]]:4 = tf_device.replicate
     // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]]:2  = "tf_device.parallel_execute"
       // CHECK-NEXT: "tf_device.launch"
-      // CHECK: %[[TPU_LAUNCH_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
+      // CHECK: %[[TPU_CLUSTER_OUTPUT:[0-9]*]]:2 = "tf_device.cluster"
         // CHECK: tf_device.return
-      // CHECK: tf_device.return %[[TPU_LAUNCH_OUTPUT]]
+      // CHECK: tf_device.return %[[TPU_CLUSTER_OUTPUT]]
     // CHECK: tf_device.return %[[PARALLEL_EXECUTE_OUTPUT]]
   %1:4 = tf_device.replicate([%0, %arg0] as %ri_0: tensor<?xi32>) {n = 2 : i32} {
-    %2, %3 = "tf_device.launch"() ( {
+    %2, %3 = "tf_device.cluster"() ( {
       %4 = "tf.A"() : () -> tensor<?xf32>
       "tf.B"() {_xla_outside_compilation = "cluster1"} : () -> ()
       %5 = "tf.C"() : () -> tensor<?xi32>
       tf_device.return %4, %5  : tensor<?xf32>, tensor<?xi32>
-    }) {device = "tpu0", launch_attr = "launch_attr"} : () -> (tensor<?xf32>, tensor<?xi32>)
+    }) {cluster_attr = "cluster_attr"} : () -> (tensor<?xf32>, tensor<?xi32>)
     tf_device.return %2, %3 : tensor<?xf32>, tensor<?xi32>
   }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir
index 06d6c35e0a8..b8a48bbb379 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir
@@ -5,7 +5,7 @@
 // expected-error@+1 {{requires attribute 'tf.versions'}}
 module attributes {tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_tf_versions() {
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -20,7 +20,7 @@ module attributes {tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_devices() {
     // expected-error@+1 {{error in fetching TPU compilation/execution devices: no TPU_SYSTEM devices found}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -30,13 +30,13 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `num_cores_per_replicas`
+// Tests `tf_device.cluster_func` with missing `num_cores_per_replicas`
 // attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_num_cores_per_replica() {
     // expected-error@+1 {{requires attribute 'num_cores_per_replica'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -46,12 +46,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `num_cores_per_replicas` attribute.
+// Tests `tf_device.cluster_func` with bad `num_cores_per_replicas` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_num_cores_per_replica() {
     // expected-error@+1 {{requires attribute 'num_cores_per_replica'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = "", step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = "", step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -61,12 +61,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `step_marker_location` attribute.
+// Tests `tf_device.cluster_func` with missing `step_marker_location` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_num_cores_per_replica() {
     // expected-error@+1 {{requires attribute 'step_marker_location'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -76,12 +76,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `step_marker_location` attribute.
+// Tests `tf_device.cluster_func` with bad `step_marker_location` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_step_marker_location() {
     // expected-error@+1 {{requires attribute 'step_marker_location'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = 1, padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = 1, padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -91,12 +91,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with unparsable `step_marker_location` attribute.
+// Tests `tf_device.cluster_func` with unparsable `step_marker_location` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @unparsable_step_marker_location() {
     // expected-error@+1 {{bad 'step_marker_location' attribute with value 'test'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "test", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "test", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -106,12 +106,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `padding_map` attribute.
+// Tests `tf_device.cluster_func` with missing `padding_map` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_padding_map() {
     // expected-error@+1 {{requires attribute 'padding_map'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -121,12 +121,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `padding_map` attribute.
+// Tests `tf_device.cluster_func` with bad `padding_map` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_padding_map() {
     // expected-error@+1 {{requires attribute 'padding_map'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = "", topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = "", topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -136,12 +136,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad element in `padding_map` attribute.
+// Tests `tf_device.cluster_func` with bad element in `padding_map` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_element_padding_map() {
     // expected-error@+1 {{bad 'padding_map' attribute at index 0, not a string}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [1], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [1], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -151,12 +151,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with unparsable element in `padding_map` attribute.
+// Tests `tf_device.cluster_func` with unparsable element in `padding_map` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @unparsable_element_padding_map() {
     // expected-error@+1 {{bad 'padding_map' attribute at index 0 with value 'test': failed to parse to tpu::PaddingMap}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["test"], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["test"], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -166,12 +166,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `topology` attribute.
+// Tests `tf_device.cluster_func` with missing `topology` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_topology() {
     // expected-error@+1 {{requires attribute 'topology'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -181,12 +181,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `topology` attribute.
+// Tests `tf_device.cluster_func` with bad `topology` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_topology() {
     // expected-error@+1 {{requires attribute 'topology'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = 1 : i32, device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = 1 : i32, device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -196,12 +196,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with `topology` attribute resulting in device assignment error.
+// Tests `tf_device.cluster_func` with `topology` attribute resulting in device assignment error.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @invalid_topology() {
     // expected-error@+1 {{error in fetching TPU compilation/execution devices}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "test", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "test", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -211,12 +211,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `device_assignment` attribute.
+// Tests `tf_device.cluster_func` with missing `device_assignment` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_device_assignment() {
     // expected-error@+1 {{requires attribute 'device_assignment'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -226,12 +226,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `device_assignment` attribute.
+// Tests `tf_device.cluster_func` with bad `device_assignment` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_device_assignment() {
     // expected-error@+1 {{requires attribute 'device_assignment'}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = "", input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = "", input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -241,12 +241,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad element in `device_assignment` attribute.
+// Tests `tf_device.cluster_func` with bad element in `device_assignment` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_element_device_assignment() {
     // expected-error@+1 {{bad 'device_assignment' attribute at index 0, not an int}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [""], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [""], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -277,12 +277,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with `device_assignment` attribute resulting in device assignment error.
+// Tests `tf_device.cluster_func` with `device_assignment` attribute resulting in device assignment error.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @invalid_device_assignment() {
     // expected-error@+1 {{error in fetching TPU compilation/execution devices}}
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "\0A\03\01\01\02\10\01\18\02\22\06\00\00\00\00\00\01", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "\0A\03\01\01\02\10\01\18\02\22\06\00\00\00\00\00\01", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     return
   }
   func @empty_func() {
@@ -292,12 +292,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `input_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with missing `input_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_input_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{requires attribute 'input_sharding_configuration'}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_ENTRY", padding_map = [], topology = "", device_assignment = [], output_sharding_configuration = []} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_ENTRY", padding_map = [], topology = "", device_assignment = [], output_sharding_configuration = []} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -317,12 +317,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `input_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with bad `input_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_input_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{requires attribute 'input_sharding_configuration'}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = "", output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = "", output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -332,12 +332,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with mismatched `input_sharding_configuration` attribute size.
+// Tests `tf_device.cluster_func` with mismatched `input_sharding_configuration` attribute size.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @mismatched_size_input_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{bad 'input_sharding_configuration' attribute, expected array attribute of size 1, got size 0}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -347,12 +347,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with unsupported operand type.
+// Tests `tf_device.cluster_func` with unsupported operand type.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @unsupported_operand_type(%arg0: tensor<?xi2>) {
     // expected-error@+1 {{failed to determine operand type at index 0: Converting i2 to DataType}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_ENTRY", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi2>) -> tensor<?xi2>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_ENTRY", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi2>) -> tensor<?xi2>
     return
   }
   func @empty_func(%arg0: tensor<?xi2>) -> tensor<?xi2> {
@@ -362,12 +362,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad element in `input_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with bad element in `input_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_element_input_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{bad 'input_sharding_configuration' attribute at index 0, not a string}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [1], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [1], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -377,12 +377,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with unparsable element in `input_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with unparsable element in `input_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @unparsable_element_input_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{bad 'input_sharding_configuration' attribute at index 0 with value 'test': failed to parse to xla::OpSharding}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["test"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["test"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -392,12 +392,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with missing `output_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with missing `output_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @missing_output_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{requires attribute 'output_sharding_configuration'}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_ENTRY", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_ENTRY", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -407,12 +407,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with bad `output_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with bad `output_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_output_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{requires attribute 'output_sharding_configuration'}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ""} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ""} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -422,12 +422,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with mismatched `output_sharding_configuration` attribute size.
+// Tests `tf_device.cluster_func` with mismatched `output_sharding_configuration` attribute size.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @mismatched_size_output_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{bad 'output_sharding_configuration' attribute, expected array attribute of size 1, got size 0}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = []} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = []} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -438,12 +438,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 // -----
 
 
-// Tests `tf_device.launch_func` with bad element in `output_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with bad element in `output_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @bad_element_output_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{bad 'output_sharding_configuration' attribute at index 0, not a string}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = [1]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = [1]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -453,12 +453,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with unparsable element in `output_sharding_configuration` attribute.
+// Tests `tf_device.cluster_func` with unparsable element in `output_sharding_configuration` attribute.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   func @unparsable_element_output_sharding_configuration(%arg0: tensor<?xi32>) {
     // expected-error@+1 {{bad 'output_sharding_configuration' attribute at index 0 with value 'test': failed to parse to xla::OpSharding}}
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["test"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["test"]} : (tensor<?xi32>) -> tensor<?xi32>
     return
   }
   func @empty_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
@@ -468,7 +468,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests `tf_device.launch_func` with empty `step_marker_location` attribute
+// Tests `tf_device.cluster_func` with empty `step_marker_location` attribute
 // defaults to `STEP_MARK_AT_ENTRY`.
 //
 // The expected TPUCompileMetadataProto is:
@@ -478,7 +478,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @default_step_marker_location
   func @default_step_marker_location() {
-    "tf_device.launch_func"() {_tpu_replicate = "cluster0", device = "", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
+    "tf_device.cluster_func"() {_tpu_replicate = "cluster0", func = @empty_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = [], output_sharding_configuration = []} : () -> ()
     // CHECK:      metadata
     // CHECK-SAME: num_replicas: 1
     // CHECK-SAME: num_cores_per_replica: 1
@@ -497,7 +497,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @unranked_shape_arg
   func @unranked_shape_arg(%arg0: tensor<*xi32>) -> tensor<*xi32> {
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*xi32>) -> tensor<*xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*xi32>) -> tensor<*xi32>
     // CHECK:      metadata
     // CHECK-SAME: shape {\0A unknown_rank: true
 
@@ -515,7 +515,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @partial_shape_arg
   func @partial_shape_arg(%arg0: tensor<?x?x3xi32>) -> tensor<?x?x3xi32> {
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?x?x3xi32>) -> tensor<?x?x3xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?x?x3xi32>) -> tensor<?x?x3xi32>
     // CHECK:      metadata
     // CHECK-SAME: args
     // CHECK-SAME: shape {\0A dim {\0A size: -1\0A }\0A dim {\0A size: -1\0A }\0A dim {\0A size: 3\0A }\0A }
@@ -546,7 +546,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @static_shape_arg
   func @static_shape_arg(%arg0: tensor<1x2x3xi32>) -> tensor<1x2x3xi32> {
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<1x2x3xi32>) -> tensor<1x2x3xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<1x2x3xi32>) -> tensor<1x2x3xi32>
     // CHECK:      metadata
     // CHECK-SAME: args
     // CHECK-SAME: shape
@@ -571,7 +571,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @resource_arg
   func @resource_arg(%arg0: tensor<*x!tf.resource>) -> tensor<*x!tf.resource> {
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*x!tf.resource>) -> tensor<*x!tf.resource>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*x!tf.resource>) -> tensor<*x!tf.resource>
     // CHECK:      metadata
     // CHECK:      dtype: DT_RESOURCE
     // CHECK-SAME: kind: VARIABLE
@@ -590,7 +590,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @parameter_arg
   func @parameter_arg(%arg0: tensor<*xf32>) -> tensor<*xf32> {
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*xf32>) -> tensor<*xf32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*xf32>) -> tensor<*xf32>
     // CHECK:      metadata
     // CHECK:      dtype: DT_FLOAT
     // CHECK-SAME: kind: PARAMETER
@@ -614,7 +614,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests metadata is populated correctly based on launch_func op and attributes.
+// Tests metadata is populated correctly based on cluster_func op and attributes.
 //
 // The expected TPUCompileMetadataProto is:
 //   args {
@@ -650,7 +650,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @metadata
   func @metadata(%arg0: tensor<8xi32>) -> tensor<8xi32> {
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
     // CHECK:      metadata
     // CHECK-SAME: args
     // CHECK-SAME: dtype: DT_INT32
@@ -694,7 +694,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     // CHECK-NOT:  "tf.Shape"(%[[ARG_3]])
     // CHECK:      %[[ARG_0_SHAPE:[0-9]*]] = "tf.Shape"(%[[ARG_0]])
     // CHECK:      %[[ARG_2_SHAPE:[0-9]*]] = "tf.Shape"(%[[ARG_2]])
-    %0 = "tf_device.launch_func"(%arg0, %arg1, %arg2, %arg3) {_tpu_replicate = "cluster0", device = "", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*xi32>, tensor<8xi32>, tensor<*xi32>, tensor<8xi32>) -> tensor<8xi32>
+    %0 = "tf_device.cluster_func"(%arg0, %arg1, %arg2, %arg3) {_tpu_replicate = "cluster0", func = @_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<*xi32>, tensor<8xi32>, tensor<*xi32>, tensor<8xi32>) -> tensor<8xi32>
     // CHECK:      "tf._TPUCompileMlir"(%[[ARG_0_SHAPE]], %[[ARG_2_SHAPE]])
 
     return %0: tensor<8xi32>
@@ -706,16 +706,16 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests simple case of `tf_device.launch_func` on TPU with single input and
+// Tests simple case of `tf_device.cluster_func` on TPU with single input and
 // single output.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
-  // CHECK-LABEL: func @single_tpu_launch_func
-  func @single_tpu_launch_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  // CHECK-LABEL: func @single_tpu_cluster_func
+  func @single_tpu_cluster_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -747,12 +747,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests simple case of `tf_device.launch_func` on TPU with replication.
+// Tests simple case of `tf_device.cluster_func` on TPU with replication.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0", "/job:worker/replica:0/task:0/device:TPU:1"]} {
-  // CHECK-LABEL: func @replicated_tpu_launch_func
+  // CHECK-LABEL: func @replicated_tpu_cluster_func
   // CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor<?xi32>)
-  func @replicated_tpu_launch_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  func @replicated_tpu_cluster_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
 
@@ -775,7 +775,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
       // CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
       // CHECK: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf_device.launch"
       // CHECK-NEXT: "tf.TPUExecute"(%[[RI_0]], %[[COMPILE_OUTPUT]]#1)
-      %2 = "tf_device.launch_func"(%ri_0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+      %2 = "tf_device.cluster_func"(%ri_0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
 
       // CHECK: tf_device.return %[[EXECUTE_OUTPUT]]
       tf_device.return %2 : tensor<?xi32>
@@ -796,15 +796,15 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests that launch_func without _tpu_replicate attribute is ignored.
+// Tests that cluster_func without _tpu_replicate attribute is ignored.
 
 module attributes {tf.versions = {producer = 888 : i32}} {
-  // CHECK-LABEL: func @single_gpu_launch_func
-  func @single_gpu_launch_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  // CHECK-LABEL: func @single_gpu_cluster_func
+  func @single_gpu_cluster_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
 
-    %1 = "tf_device.launch_func"(%0) {device = "gpu0", func = @gpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
-    // CHECK: tf_device.launch_func
+    %1 = "tf_device.cluster_func"(%0) {device = "gpu0", func = @gpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    // CHECK: tf_device.cluster_func
     // CHECK-SAME: device = "gpu0"
     // CHECK-SAME: func = @gpu0_func
     // CHECK-SAME: num_cores_per_replica = 1
@@ -823,7 +823,7 @@ module attributes {tf.versions = {producer = 888 : i32}} {
 
 // -----
 
-// Tests of `tf_device.launch_func` on TPU with nested function calls.
+// Tests of `tf_device.cluster_func` on TPU with nested function calls.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
   // CHECK-LABEL: func @with_nested_func
@@ -831,7 +831,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -871,7 +871,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests of `tf_device.launch_func` on TPU with referenced function that's not
+// Tests of `tf_device.cluster_func` on TPU with referenced function that's not
 // via a standard call op.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
@@ -880,7 +880,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -916,7 +916,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests rewriting `tf_device.launch_func` on TPU with a chain of referenced
+// Tests rewriting `tf_device.cluster_func` on TPU with a chain of referenced
 // functions.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
@@ -925,7 +925,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -969,7 +969,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests rewriting `tf_device.launch_func` on TPU with multiple calls to same
+// Tests rewriting `tf_device.cluster_func` on TPU with multiple calls to same
 // function.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
@@ -978,7 +978,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -1017,15 +1017,15 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests multiple `tf_device.launch_func` on TPU with different computation.
+// Tests multiple `tf_device.cluster_func` on TPU with different computation.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
-  // CHECK-LABEL: func @multiple_launch_different_func
-  func @multiple_launch_different_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  // CHECK-LABEL: func @multiple_cluster_different_func
+  func @multiple_cluster_different_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func0, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func0, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE0_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -1039,7 +1039,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     // CHECK: %[[EXECUTE0_OUTPUT:[0-9]*]] = "tf_device.launch"
     // CHECK-NEXT: "tf.TPUExecute"(%[[A_OUTPUT]], %[[COMPILE0_OUTPUT]]#1)
 
-    %2 = "tf_device.launch_func"(%1) {_tpu_replicate = "cluster1", device = "", func = @tpu0_func1, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %2 = "tf_device.cluster_func"(%1) {_tpu_replicate = "cluster1", func = @tpu0_func1, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[EXECUTE0_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[EXECUTE0_OUTPUT]])
     // CHECK: %[[COMPILE1_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[EXECUTE0_SHAPE_OUTPUT]])
@@ -1073,15 +1073,15 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
-// Tests multiple `tf_device.launch_func` on TPU with same computation.
+// Tests multiple `tf_device.cluster_func` on TPU with same computation.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
-  // CHECK-LABEL: func @multiple_launch_same_func
-  func @multiple_launch_same_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  // CHECK-LABEL: func @multiple_cluster_same_func
+  func @multiple_cluster_same_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE0_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -1095,7 +1095,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     // CHECK: %[[EXECUTE0_OUTPUT:[0-9]*]] = "tf_device.launch"
     // CHECK-NEXT: "tf.TPUExecute"(%[[A_OUTPUT]], %[[COMPILE0_OUTPUT]]#1)
 
-    %2 = "tf_device.launch_func"(%1) {_tpu_replicate = "cluster1", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %2 = "tf_device.cluster_func"(%1) {_tpu_replicate = "cluster1", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[EXECUTE0_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[EXECUTE0_OUTPUT]])
     // CHECK: %[[COMPILE1_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[EXECUTE0_SHAPE_OUTPUT]])
@@ -1128,12 +1128,12 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 // ArrayAttr and DictionaryAttr.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
-  // CHECK-LABEL: func @single_tpu_launch_func
-  func @single_tpu_launch_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+  // CHECK-LABEL: func @single_tpu_cluster_func
+  func @single_tpu_cluster_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
     %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
 
-    %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
     // CHECK: %[[A_SHAPE_OUTPUT:[0-9]*]] = "tf.Shape"(%[[A_OUTPUT]])
     // CHECK: %[[COMPILE_OUTPUT:[0-9]*]]:2 = "tf_device.launch"
     // CHECK-NEXT: "tf._TPUCompileMlir"(%[[A_SHAPE_OUTPUT]])
@@ -1203,7 +1203,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
     // CHECK-NEXT: "tf.TPUCompileSucceededAssert"
     // CHECK: %[[EXECUTE_OUTPUT:[0-9]*]] = "tf_device.launch"
     // CHECK-NEXT: "tf.TPUExecute"
-    %1 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+    %1 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
 
     %compile_result = "tf.TPUCompilationResult"() {_tpu_replicate = "cluster0"} : () -> tensor<!tf.string>
     %compile_result2 = "tf.TPUCompilationResult"() {_tpu_replicate = "cluster0"} : () -> tensor<!tf.string>
@@ -1222,6 +1222,41 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor
 
 // -----
 
+// Tests simple case of `tf_device.cluster_func` on TPU with replication and parallel_execute.
+
+module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0", "/job:worker/replica:0/task:0/device:TPU:1"]} {
+  // CHECK-LABEL: func @replicated_parallel_tpu_cluster_func
+  func @replicated_parallel_tpu_cluster_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+    // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A"
+    %0 = "tf.A"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
+    // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate
+    %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor<?xi32>) {n = 2 : i32} {
+      // CHECK: "tf._TPUCompileMlir"
+      // CHECK: "tf.TPUCompileSucceededAssert"
+      // CHECK: "tf_device.parallel_execute"
+      // CHECK:    "tf.TPUExecute"
+      %3 = "tf_device.parallel_execute"() ( {
+        "tf.D"() : () -> ()
+        tf_device.return
+      }, {
+        %4 = "tf_device.cluster_func"(%ri_0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<?xi32>) -> tensor<?xi32>
+
+        tf_device.return %4 : tensor<?xi32>
+      }) : () -> (tensor<?xi32>)
+      tf_device.return %3 : tensor<?xi32>
+    }
+    %2 = "tf.C"(%1#1) : (tensor<?xi32>) -> tensor<?xi32>
+    return %2 : tensor<?xi32>
+  }
+
+  func @tpu0_func(%arg0: tensor<?xi32>) -> tensor<?xi32> {
+    %0 = "tf.B"(%arg0) : (tensor<?xi32>) -> tensor<?xi32>
+    return %0 : tensor<?xi32>
+  }
+}
+
+// -----
+
 // Tests devices are set properly for non replicated model parallelism.
 
 module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:0/device:TPU:0", "/job:localhost/replica:0/task:0/device:TPU:1", "/job:localhost/replica:0/task:0/device:TPU_SYSTEM:0"]} {
@@ -1244,7 +1279,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
     // CHECK-NEXT:     "tf.TPUExecute"
     // CHECK-NEXT:     tf_device.return
     // CHECK-NEXT:   device = "/job:localhost/replica:0/task:0/device:TPU:1"
-    %0 = "tf_device.launch_func"(%arg0) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\01\01\02\10\01\18\02\22\08\00\00\00\00\00\00\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
+    %0 = "tf_device.cluster_func"(%arg0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\01\01\02\10\01\18\02\22\08\00\00\00\00\00\00\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
     return %0 : tensor<8xi32>
   }
   func @tpu0_func(%arg0: tensor<8xi32>) -> tensor<8xi32> {
@@ -1309,7 +1344,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK-NEXT:     "tf.TPUExecute"
       // CHECK-NEXT:     tf_device.return
       // CHECK-NEXT:   device = "TPU_REPLICATED_CORE_1"
-      %1 = "tf_device.launch_func"(%ri) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
+      %1 = "tf_device.cluster_func"(%ri) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
       tf_device.return %1 : tensor<8xi32>
     }
     return %0#0, %0#1 : tensor<8xi32>, tensor<8xi32>
@@ -1344,7 +1379,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:        "tf_device.launch"
       // CHECK-NEXT:     "tf.TPUExecute"(%[[RI_1]], %[[RI_2]], %[[COMPILE]]#2)
       // CHECK:        device = "TPU_REPLICATED_CORE_1"
-      %1 = "tf_device.launch_func"(%ri, %ri2, %ri3) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "", ""], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>, tensor<*xi1>, tensor<*xi32>) -> tensor<8xi32>
+      %1 = "tf_device.cluster_func"(%ri, %ri2, %ri3) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "", ""], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>, tensor<*xi1>, tensor<*xi32>) -> tensor<8xi32>
       tf_device.return %1 : tensor<8xi32>
     }
     return %0#0, %0#1 : tensor<8xi32>, tensor<8xi32>
@@ -1382,7 +1417,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:        "tf_device.launch"
       // CHECK-NEXT:     "tf.TPUExecute"
       // CHECK:        device = "TPU_REPLICATED_CORE_1"
-      %1 = "tf_device.launch_func"(%ri) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
+      %1 = "tf_device.cluster_func"(%ri) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"]} : (tensor<8xi32>) -> tensor<8xi32>
       tf_device.return %1 : tensor<8xi32>
     }
     return %0#0, %0#1 : tensor<8xi32>, tensor<8xi32>
@@ -1420,7 +1455,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK-NEXT:     %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute"
       // CHECK-NEXT:     tf_device.return %[[EXECUTE_1_OUTPUT]]
       // CHECK:        device = "TPU_REPLICATED_CORE_1"
-      %1, %2 = "tf_device.launch_func"(%ri) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<8xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<8xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1487,7 +1522,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK-NEXT:     %[[EXECUTE_1_OUTPUT:[0-9]*]] = "tf.TPUExecute"(%[[SPLIT_OUT]]#1, %[[RI_1]], %[[COMPILE]]#2)
       // CHECK-NEXT:     tf_device.return %[[EXECUTE_1_OUTPUT]]
       // CHECK:        device = "TPU_REPLICATED_CORE_1"
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1555,7 +1590,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:     %[[CONST_CONCAT_DIM:[0-9]*]] = "tf.Const"()
       // CHECK:     %[[CONCAT_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT_DIM]], %[[PARALLEL_EXECUTE_OUTPUT]]#0, %[[PARALLEL_EXECUTE_OUTPUT]]#2
 
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\00"]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\03\1A\02\01\02\22\02\00\01", "\08\01\1A\01\01\22\01\00"]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1598,7 +1633,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
   func @uneven_input_sharding_disallowed(%arg0: tensor<128x10xf32>, %arg1: tensor<128x10xf32>, %arg2: tensor<*xi32>, %arg3: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>) {
     %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} {
     // expected-error@+1 {{incorrect input sharding configuration received. 1-th dimension of the input must be evenly divisible by 4}}
-    %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\01\04\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\01\04\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+    %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\01\04\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\01\04\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1638,7 +1673,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
   func @uneven_output_sharding_disallowed(%arg0: tensor<128x10xf32>, %arg1: tensor<128x10xf32>, %arg2: tensor<*xi32>, %arg3: tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>) {
     %0:2, %1:2 = tf_device.replicate([%arg0, %arg1] as %ri_1: tensor<128x10xf32>, [%arg2, %arg3] as %ri_2: tensor<*xi32>) {n = 2 : i32} {
     // expected-error@+1 {{incorrect sharding format for outputs. Number of tiled outputs(4) must match the number of logical devices(2)}}
-    %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["", ""], output_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\01\04\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\01\04\22\04\00\01\02\03", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+    %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 2, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = [""], topology = "\0A\04\01\02\01\02\10\02\18\02\22\10\00\00\00\00\00\00\00\01\00\01\00\00\00\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0], input_sharding_configuration = ["", ""], output_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\01\04\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\01\04\22\04\00\01\02\03", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1744,7 +1779,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:        %[[LAUNCH_3_OUTPUT:[0-9]*]] = "tf_device.launch"
       // CHECK-NEXT:     %[[EXECUTE_3_OUTPUT:[0-9]*]] = "tf.TPUExecute"(%[[SPLIT_2_OUT]]#1, %[[COMPILE]]#4)
       // CHECK:          tf_device.return %[[EXECUTE_3_OUTPUT]]
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1851,7 +1886,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:        %[[LAUNCH_3_OUTPUT:[0-9]*]] = "tf_device.launch"
       // CHECK-NEXT:     %[[EXECUTE_3_OUTPUT:[0-9]*]] = "tf.TPUExecute"(%[[SPLIT_2_OUT]]#1, %[[COMPILE]]#4)
       // CHECK:          tf_device.return %[[EXECUTE_3_OUTPUT]]
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -1935,7 +1970,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:     %[[CONCAT2_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT2_DIM]], %[[PARALLEL_EXECUTE_OUTPUT]]#3, %[[PARALLEL_EXECUTE_OUTPUT]]#4
       // CHECK:     %[[CONST_CONCAT3_DIM:[0-9]*]] = "tf.Const"()
       // CHECK:     %[[CONCAT3_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT3_DIM]], %[[CONCAT_OUTPUT]], %[[CONCAT2_OUTPUT]]
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "", padding_map = [""], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\00"]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "", padding_map = [""], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\00\01\02\03", "\08\01\1A\01\01\22\01\00"]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -2020,7 +2055,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:        %[[LAUNCH_3_OUTPUT:[0-9]*]] = "tf_device.launch"
       // CHECK-NEXT:     %[[EXECUTE_3_OUTPUT:[0-9]*]] = "tf.TPUExecute"(%[[SPLIT_1_OUT]]#0, %[[COMPILE]]#4)
       // CHECK:          tf_device.return %[[EXECUTE_3_OUTPUT]]
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\03\02\01\00", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\03\02\01\00", "\08\01\1A\01\01\22\01\01"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00", ""]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
@@ -2104,7 +2139,7 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:loc
       // CHECK:     %[[CONCAT2_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT2_DIM]], %[[PARALLEL_EXECUTE_OUTPUT]]#2, %[[PARALLEL_EXECUTE_OUTPUT]]#0
       // CHECK:     %[[CONST_CONCAT3_DIM:[0-9]*]] = "tf.Const"()
       // CHECK:     %[[CONCAT3_OUTPUT:[0-9]*]] = "tf.Concat"(%[[CONST_CONCAT3_DIM]], %[[CONCAT_OUTPUT]], %[[CONCAT2_OUTPUT]]
-      %1, %2 = "tf_device.launch_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", device = "", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "", padding_map = [""], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\03\02\01\00", "\08\01\1A\01\01\22\01\00"]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
+      %1, %2 = "tf_device.cluster_func"(%ri_1, %ri_2) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 4, step_marker_location = "", padding_map = [""], topology = "\0A\04\02\02\01\02\10\01\18\08\22 \00\00\00\00\00\00\00\01\01\00\00\00\01\00\00\01\00\01\00\00\00\01\00\01\01\01\00\00\01\01\00\01", device_assignment = [0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00", "\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\03\12\12\10\0b\1a\02\02\02\2a\06\0a\02\01\00\20\01\32\02\00\00\1a\02\02\02\22\04\03\02\01\00", "\08\01\1A\01\01\22\01\00"]} : (tensor<128x10xf32>, tensor<*xi32>) -> (tensor<*xi32>, tensor<*xi1>)
       tf_device.return %1, %2 : tensor<*xi32>, tensor<*xi1>
     }
     return %0#0, %1#0 : tensor<*xi32>, tensor<*xi1>
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_sharding_identification.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_sharding_identification.mlir
index 2c49c2060f1..fff1240a121 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_sharding_identification.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_sharding_identification.mlir
@@ -1,10 +1,10 @@
 // RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-sharding-identification | FileCheck %s --dump-input=fail
 
-// Tests empty launch func. Empty input/output sharding configuration
+// Tests empty cluster func. Empty input/output sharding configuration
 // attributes must be added.
-// CHECK-LABEL: func @check_sharding_attrs_exists_for_empty_launch_func
-func @check_sharding_attrs_exists_for_empty_launch_func() {
-  "tf_device.launch_func"() {device = "", func = @empty_func, step_marker_location = ""} : () -> ()
+// CHECK-LABEL: func @check_sharding_attrs_exists_for_empty_cluster_func
+func @check_sharding_attrs_exists_for_empty_cluster_func() {
+  "tf_device.cluster_func"() {func = @empty_func, step_marker_location = ""} : () -> ()
   // CHECK: input_sharding_configuration = []
   // CHECK: output_sharding_configuration = []
   return
@@ -21,7 +21,7 @@ func @empty_func() {
 // gets default maximal(0) sharding configuration.
 // CHECK-LABEL: func @check_default_sharding_for_block_arg_inputs_outputs
 func @check_default_sharding_for_block_arg_inputs_outputs(%arg0: tensor<*xi32>) {
-  "tf_device.launch_func"(%arg0) {device = "", func = @func_without_sharding, step_marker_location = ""} : (tensor<*xi32>) -> ()
+  "tf_device.cluster_func"(%arg0) {func = @func_without_sharding, step_marker_location = ""} : (tensor<*xi32>) -> ()
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\08\01\1A\01\01\22\01\00"]
   // CHECK: output_sharding_configuration
@@ -42,7 +42,7 @@ func @func_without_sharding(%arg0: tensor<*xi32>) -> tensor<*xi32> {
 // default maximal(0) sharding configuration.
 // CHECK-LABEL: func @check_default_sharding_for_inputs_outputs
 func @check_default_sharding_for_inputs_outputs(%arg0: tensor<*xi32>) {
-  "tf_device.launch_func"(%arg0) {device = "", func = @func_without_sharding, step_marker_location = ""} : (tensor<*xi32>) -> ()
+  "tf_device.cluster_func"(%arg0) {func = @func_without_sharding, step_marker_location = ""} : (tensor<*xi32>) -> ()
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\08\01\1A\01\01\22\01\00"]
   // CHECK: output_sharding_configuration
@@ -63,7 +63,7 @@ func @func_without_sharding(%arg0: tensor<*xi32>) -> tensor<*xi32> {
 // Tests with a input arg connected to XlaSharding op.
 // CHECK-LABEL: func @check_sharding_for_input_correctly_identified
 func @check_sharding_for_input_correctly_identified(%arg0: tensor<*xi32>) {
-  "tf_device.launch_func"(%arg0) {device = "", func = @inputs_with_sharding_func, step_marker_location = ""} : (tensor<*xi32>) -> ()
+  "tf_device.cluster_func"(%arg0) {func = @inputs_with_sharding_func, step_marker_location = ""} : (tensor<*xi32>) -> ()
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\01\02\03"]
   // CHECK: output_sharding_configuration
@@ -85,7 +85,7 @@ func @inputs_with_sharding_func(%arg0: tensor<*xi32>) -> tensor<*xi32> {
 // Tests with sharding is correctly parsed for multiple inputs/outputs.
 // CHECK-LABEL: func @check_sharding_for_multiple_inputs_outputs
 func @check_sharding_for_multiple_inputs_outputs(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) {
-  "tf_device.launch_func"(%arg0, %arg1) {device = "", func = @func_with_sharding, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
+  "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_sharding, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\01\02\03", "\04\05\06"]
   // CHECK: output_sharding_configuration
@@ -110,7 +110,7 @@ func @func_with_sharding(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) -> (tensor<*
 // Tests with input sharding following an identity op.
 // CHECK-LABEL: func @check_sharding_after_identity
 func @check_sharding_after_identity(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) {
-  "tf_device.launch_func"(%arg0, %arg1) {device = "", func = @func_with_sharding_after_identity, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
+  "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_sharding_after_identity, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\01\02\03", "\04\05\06"]
   // CHECK: output_sharding_configuration
@@ -136,7 +136,7 @@ func @func_with_sharding_after_identity(%arg0: tensor<*xi32>, %arg1: tensor<*xi1
 // Tests with input sharding following a ReadVariable op.
 // CHECK-LABEL: func @check_sharding_after_read_variable
 func @check_sharding_after_read_variable(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) {
-  "tf_device.launch_func"(%arg0, %arg1) {device = "", func = @func_with_sharding_after_read_variable, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
+  "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_sharding_after_read_variable, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\01\02\03", "\04\05\06"]
   // CHECK: output_sharding_configuration
@@ -164,7 +164,7 @@ func @func_with_sharding_after_read_variable(%arg0: tensor<*x!tf.resource<tensor
 // Tests with input sharding following an identity op and cast op.
 // CHECK-LABEL: func @check_sharding_after_cast_op
 func @check_sharding_after_cast_op(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) {
-  "tf_device.launch_func"(%arg0, %arg1) {device = "", func = @func_with_sharding_after_cast, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
+  "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_sharding_after_cast, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\01\02\03", "\04\05\06"]
   // CHECK: output_sharding_configuration
@@ -191,7 +191,7 @@ func @func_with_sharding_after_cast(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) -
 // Tests that input sharding inside a functional op is parsed correctly.
 // CHECK-LABEL: func @check_sharding_inside_functional_op
 func @check_sharding_inside_functional_op(%arg0: tensor<*xi32>, %arg1: tensor<*xi1>) {
-  "tf_device.launch_func"(%arg0, %arg1) {device = "", func = @func_with_device_training_loop, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
+  "tf_device.cluster_func"(%arg0, %arg1) {func = @func_with_device_training_loop, step_marker_location = ""} : (tensor<*xi32>, tensor<*xi1>) -> (tensor<*xi32>, tensor<*xi1>)
   // CHECK: input_sharding_configuration
   // CHECK-SAME: ["\01\02\03", "\04\05\06"]
   // CHECK: output_sharding_configuration
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/annotate_parameter_replication.cc b/tensorflow/compiler/mlir/tensorflow/transforms/annotate_parameter_replication.cc
index 01c30eabd35..fb3ecfde771 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/annotate_parameter_replication.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/annotate_parameter_replication.cc
@@ -36,7 +36,7 @@ namespace {
 constexpr char kReplicationAttr[] = "tf_device.is_same_data_across_replicas";
 constexpr char kMirroredVariableIndicesAttr[] = "_mirrored_variable_indices";
 
-// Analyzes the inputs to LaunchFuncOps in the module, and annotates their
+// Analyzes the inputs to ClusterFuncOps in the module, and annotates their
 // invoked functions whether each input has the same data across replicas.
 struct AnnotateParameterReplication
     : public PassWrapper<AnnotateParameterReplication,
@@ -57,8 +57,8 @@ Value SkipIdentityAndReadVariable(Value v) {
 void AnnotateParameterReplication::runOnOperation() {
   ModuleOp m = getOperation();
   OpBuilder builder(m.getContext());
-  m.walk([&](tf_device::LaunchFuncOp launch_func) {
-    auto replicate = launch_func.getParentOfType<tf_device::ReplicateOp>();
+  m.walk([&](tf_device::ClusterFuncOp cluster_func) {
+    auto replicate = cluster_func.getParentOfType<tf_device::ReplicateOp>();
     if (!replicate) return;
     auto mirrored_variable_indices_attr =
         replicate.getAttrOfType<ArrayAttr>(kMirroredVariableIndicesAttr);
@@ -69,8 +69,8 @@ void AnnotateParameterReplication::runOnOperation() {
             mirrored_index.cast<IntegerAttr>().getInt());
       }
     }
-    auto func = llvm::cast<FuncOp>(m.lookupSymbol(launch_func.func()));
-    for (auto entry : llvm::enumerate(launch_func.getOperands())) {
+    auto func = llvm::cast<FuncOp>(m.lookupSymbol(cluster_func.func()));
+    for (auto entry : llvm::enumerate(cluster_func.getOperands())) {
       auto operand = SkipIdentityAndReadVariable(entry.value());
       auto block_arg = operand.dyn_cast<BlockArgument>();
       if (block_arg && block_arg.getOwner() == &replicate.GetBody()) {
@@ -98,7 +98,7 @@ CreateAnnotateParameterReplicationPass() {
 
 static PassRegistration<AnnotateParameterReplication> pass(
     "tf-annotate-parameter-replication",
-    "Annotate whether a LaunchFuncOp's parameters have the same data across "
+    "Annotate whether a ClusterFuncOp's parameters have the same data across "
     "replicas.");
 
 }  // namespace TFDevice
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
index fc1622b93e9..a01769bc395 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
@@ -30,9 +30,10 @@ namespace {
 void EnableLogging(PassManager *pm) {
   // Print the whole module after each pass, which requires disabling
   // multi-threading as well.
-  pm->disableMultithreading();
+  pm->getContext()->disableMultithreading();
   pm->enableIRPrinting(std::make_unique<tensorflow::BridgeLoggerConfig>(
       /*print_module_scope=*/true));
+  pm->enableTiming(std::make_unique<tensorflow::BridgeTimingConfig>());
 }
 }  // namespace
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
index aa4c071abdf..886bd5b5b65 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// This pass outlines regions of `tf_device.launch` into functions and replaces
-// `tf_device.launch` with equivalent `tf_device.launch_func` operations.
+// This pass outlines regions of `tf_device.cluster` into functions and replaces
+// `tf_device.cluster` with equivalent `tf_device.cluster_func` operations.
 
 #include "llvm/ADT/SmallVector.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
@@ -35,7 +35,6 @@ namespace TFDevice {
 
 namespace {
 
-constexpr char kDeviceAttr[] = "device";
 constexpr char kFuncAttr[] = "func";
 
 struct ClusterOutliningPass
@@ -43,28 +42,29 @@ struct ClusterOutliningPass
   void runOnOperation() override;
 };
 
-void ReplaceLaunchReturnWithReturn(tf_device::ReturnOp launch_return_op,
-                                   OpBuilder* builder) {
-  builder->create<ReturnOp>(launch_return_op.getLoc(),
-                            launch_return_op.getOperands());
-  launch_return_op.erase();
+void ReplaceClusterReturnWithReturn(tf_device::ReturnOp cluster_return_op,
+                                    OpBuilder* builder) {
+  builder->create<ReturnOp>(cluster_return_op.getLoc(),
+                            cluster_return_op.getOperands());
+  cluster_return_op.erase();
 }
 
-// Builds a function that outlines region attached to launch_op and inserts
+// Builds a function that outlines region attached to cluster_op and inserts
 // built function into given module.
-FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value> live_ins,
-                     tf_device::LaunchOp launch_op, SymbolTable* symbol_table,
+FuncOp BuildFunction(llvm::ArrayRef<Value> live_ins,
+                     tf_device::ClusterOp cluster_op, SymbolTable* symbol_table,
                      OpBuilder* builder) {
   llvm::SmallVector<Type, 4> operand_types;
   operand_types.reserve(live_ins.size());
   for (Value v : live_ins) operand_types.emplace_back(v.getType());
 
-  auto func_type = FunctionType::get(operand_types, launch_op.getResultTypes(),
+  auto func_type = FunctionType::get(operand_types, cluster_op.getResultTypes(),
                                      builder->getContext());
 
-  std::string func_name_prefix = Twine(device, "_func").str();
+  // TODO(lyandy): Define better name for outlined function. Potentially some
+  // name can be added during cluster formation.
   FuncOp outlined_func =
-      FuncOp::create(launch_op.getLoc(), func_name_prefix, func_type);
+      FuncOp::create(cluster_op.getLoc(), "_func", func_type);
 
   // This function is not externally visible and marking it private would allow
   // symbol-dce pass to remove it when it is not referenced anymore.
@@ -73,64 +73,59 @@ FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value> live_ins,
   // Create function body.
   Block* outlined_func_block = outlined_func.addEntryBlock();
 
-  // Replace uses of live-in values within launch_op region with function
+  // Replace uses of live-in values within cluster_op region with function
   // arguments.
-  Region& launch_op_region = launch_op.body();
-  for (const auto& p :
-       llvm::zip(live_ins, outlined_func_block->getArguments())) {
+  Region& cluster_op_region = cluster_op.body();
+  for (auto p : llvm::zip(live_ins, outlined_func_block->getArguments())) {
     replaceAllUsesInRegionWith(std::get<0>(p), std::get<1>(p),
-                               launch_op_region);
+                               cluster_op_region);
   }
 
-  // Move all instructions in launch_op into outlined_function's only block.
-  auto& launch_op_body = launch_op_region.front().getOperations();
+  // Move all instructions in cluster_op into outlined_function's only block.
+  auto& cluster_op_body = cluster_op.GetBody().getOperations();
   outlined_func_block->getOperations().splice(
-      outlined_func_block->end(), launch_op_body, launch_op_body.begin(),
-      launch_op_body.end());
+      outlined_func_block->end(), cluster_op_body, cluster_op_body.begin(),
+      cluster_op_body.end());
 
-  // Replace `tf_device.launch_return` terminator with `std.return` in function
+  // Replace `tf_device.return` terminator with `std.return` in function
   // body.
-  auto launch_return_op =
+  auto cluster_return_op =
       cast<tf_device::ReturnOp>(outlined_func_block->getTerminator());
-  builder->setInsertionPoint(launch_return_op);
-  ReplaceLaunchReturnWithReturn(launch_return_op, builder);
+  builder->setInsertionPoint(cluster_return_op);
+  ReplaceClusterReturnWithReturn(cluster_return_op, builder);
 
   symbol_table->insert(outlined_func);
   return outlined_func;
 }
 
-// Outlines body of `tf_device.launch` into a function and create a
-// `tf_device.launch_func` to invoke that function. `tf_device.launch` is
+// Outlines body of `tf_device.cluster` into a function and create a
+// `tf_device.cluster_func` to invoke that function. `tf_device.cluster` is
 // removed afterwards.`
-void OutlineLaunch(tf_device::LaunchOp launch_op, SymbolTable* symbol_table,
-                   OpBuilder* builder) {
+void OutlineCluster(tf_device::ClusterOp cluster_op, SymbolTable* symbol_table,
+                    OpBuilder* builder) {
   llvm::SetVector<Value> live_ins;
-  getUsedValuesDefinedAbove(launch_op.body(), launch_op.body(), live_ins);
+  getUsedValuesDefinedAbove(cluster_op.body(), cluster_op.body(), live_ins);
 
-  StringRef device =
-      launch_op.getAttrOfType<StringAttr>(kDeviceAttr).getValue();
+  FuncOp outlined_func =
+      BuildFunction(live_ins.getArrayRef(), cluster_op, symbol_table, builder);
+  cluster_op.setAttr(builder->getIdentifier(kFuncAttr),
+                     builder->getSymbolRefAttr(outlined_func.getName()));
 
-  FuncOp outlined_func = BuildFunction(device, live_ins.getArrayRef(),
-                                       launch_op, symbol_table, builder);
-  launch_op.setAttr(builder->getIdentifier(kFuncAttr),
-                    builder->getSymbolRefAttr(outlined_func.getName()));
+  builder->setInsertionPoint(cluster_op);
+  auto cluster_func_op = builder->create<tf_device::ClusterFuncOp>(
+      cluster_op.getLoc(), outlined_func.getType().getResults(),
+      live_ins.getArrayRef(), cluster_op.getAttrs());
 
-  builder->setInsertionPoint(launch_op);
-  tf_device::LaunchFuncOp launch_func_op =
-      builder->create<tf_device::LaunchFuncOp>(
-          launch_op.getLoc(), outlined_func.getType().getResults(),
-          live_ins.getArrayRef(), launch_op.getAttrs());
-
-  launch_op.replaceAllUsesWith(launch_func_op);
-  launch_op.erase();
+  cluster_op.replaceAllUsesWith(cluster_func_op);
+  cluster_op.erase();
 }
 
 void ClusterOutliningPass::runOnOperation() {
-  ModuleOp m = getOperation();
-  SymbolTable symbol_table(m);
-  OpBuilder builder(m.getContext());
-  m.walk([&](tf_device::LaunchOp launch) {
-    OutlineLaunch(launch, &symbol_table, &builder);
+  ModuleOp module = getOperation();
+  SymbolTable symbol_table(module);
+  OpBuilder builder(module.getContext());
+  module.walk([&](tf_device::ClusterOp cluster) {
+    OutlineCluster(cluster, &symbol_table, &builder);
   });
 }
 
@@ -142,7 +137,7 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateClusterOutliningPass() {
 
 static PassRegistration<ClusterOutliningPass> pass(
     "tf-device-cluster-outlining",
-    "Outline regions of tf_device.launch operations.");
+    "Outline regions of tf_device.cluster operations.");
 
 }  // namespace TFDevice
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
index 92fa4e74a68..81d0259d2d6 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
@@ -91,6 +91,10 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateResourceDeviceInferencePass();
 // of their aliasing output arguments.
 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass();
 
+// Creates a pass that promotes tf.VarHandleOp to resource arguments for all
+// functions.
+std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass();
+
 // Marks function visibility using tf.entry_function specification. That is,
 // functions with tf.entry_function attributes are marked with public
 // visibility while the other functions are marked with private visibility.
@@ -256,6 +260,11 @@ std::unique_ptr<OperationPass<FuncOp>> CreateTPUMergeVariablesWithExecutePass();
 // run-time according to compilation result.
 std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass();
 
+// Creates a pass that extracts outside compilation (CPU ops inside TPU cluster)
+// at head/tail of TPU cluster to run before/after TPU computation.
+std::unique_ptr<OperationPass<ModuleOp>>
+CreateTPUExtractHeadTailOutsideCompilationPass();
+
 // Creates a pass that extract outside compilation (CPU ops inside TPU cluster)
 // ops to a separate parallel_execute region to run on CPU.
 std::unique_ptr<OperationPass<FuncOp>> CreateTPUExtractOutsideCompilationPass();
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc
index fa4fe461317..cece23b4750 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc
@@ -47,11 +47,14 @@ limitations under the License.
 //  . Dead functions have already been removed, as resource arguments in dead
 //    functions can cause the pass to fail.
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
@@ -73,114 +76,189 @@ constexpr char kResourceFunctionMsg[] =
     "expects function level resource argument";
 constexpr char kInvalidResourceMsg[] =
     "expects resource to be a VarHandleOp or function argument";
+constexpr char kResourceNameArgAttr[] = "tf.resource_name";
 
-// Records the input argument index and the current live value for a resource
-// variable.
-//
-// . If the input argument already exists or has been added, input_index is the
-//   index of the function, and live_value_or_type tracks the live value of the
-//   resource.
-//
-// . If the input argument has not been added in the pass, input_index is
-//   kInputUnassigned, live_value_or_type represents the type of the resource.
-//   (a) If this resource is read, add a new argument whose type is obtained
-//       from live_value_or_type, and input_index and live_value_or_type will be
-//       updated to reference the new argument.
-//   (b) If this resource is written, live_value_or_type will track the new
-//       value of the resource. input_index will remain to be kInputUnassigned.
+// Checks if a function has only one block.
+mlir::LogicalResult CheckSingleBlockFunction(FuncOp function) {
+  if (!hasSingleElement(function.getBlocks()))
+    return function.emitError()
+           << "expects function '" << function.getName()
+           << "' to have 1 block, got " << function.getBlocks().size();
+
+  return success();
+}
+
+// Collects names of users of a resource that are not `tf.ReadVariableOp` and
+// not `tf.AssignVariableOp`.
+llvm::SmallSet<llvm::StringRef, 1> GetCompositeResourceUserNames(
+    Value resource) {
+  // SmallSet will use a vector when there is only one element and use std::set
+  // when there are more than one elements. This ensures that the operations in
+  // the error message are ordered.
+  llvm::SmallSet<llvm::StringRef, 1> composite_users;
+  for (Operation* user : resource.getUsers())
+    if (!llvm::isa<TF::ReadVariableOp>(user) &&
+        !llvm::isa<TF::AssignVariableOp>(user))
+      composite_users.insert(user->getName().getStringRef());
+
+  return composite_users;
+}
+
+// Checks if `tf.VarHandleOp` has a valid resource subtype and its users are of
+// `tf.ReadVariableOp` and `tf.AssignVariableOp` only.
+mlir::LogicalResult ValidateVarHandle(TF::VarHandleOp var_handle_op) {
+  auto resource_type =
+      getElementTypeOrSelf(var_handle_op.getType()).cast<TF::ResourceType>();
+  if (resource_type.getSubtypes().size() != 1)
+    return var_handle_op.emitOpError()
+           << "expects resource type to have one subtype, got "
+           << resource_type;
+
+  auto composite_ops = GetCompositeResourceUserNames(var_handle_op);
+  if (!composite_ops.empty())
+    return var_handle_op.emitOpError()
+           << "expects users to be 'tf.ReadVariableOp' or "
+              "'tf.AssignVariableOp', got ["
+           << llvm::join(composite_ops.begin(), composite_ops.end(), ", ")
+           << "]";
+
+  return success();
+}
+
+// Checks if resource argument has a valid resource subtype and its users are of
+// `tf.ReadVariableOp` and `tf.AssignVariableOp` only.
+mlir::LogicalResult ValidateResourceArgument(FuncOp function,
+                                             BlockArgument resource_arg,
+                                             TF::ResourceType resource_type) {
+  if (resource_type.getSubtypes().size() != 1)
+    return function.emitError()
+           << "expects resource type of argument "
+           << resource_arg.getArgNumber() << " to have one subtype, got "
+           << resource_type;
+
+  auto composite_ops = GetCompositeResourceUserNames(resource_arg);
+  if (!composite_ops.empty())
+    return function.emitError()
+           << "expects users of resource argument "
+           << resource_arg.getArgNumber()
+           << " to be 'tf.ReadVariableOp' or 'tf.AssignVariableOp', got ["
+           << llvm::join(composite_ops.begin(), composite_ops.end(), ", ")
+           << "]";
+
+  return success();
+}
+
+// Adds resource arguments for every unique (name) variable handle. Associated
+// `tf.VarHandleOp` are removed from the function. Variable shared names are
+// returned in `var_handle_shared_names` based on the ordering of added resource
+// arguments.
+mlir::LogicalResult PromoteVarHandlesToArguments(
+    FuncOp function, bool add_validation,
+    llvm::SmallVectorImpl<std::string>* var_handle_shared_names) {
+  Block& block = function.front();
+  auto func_type = function.getType();
+
+  auto func_arg_types = llvm::to_vector<4>(func_type.getInputs());
+  llvm::SmallDenseMap<llvm::StringRef, int> var_arg_index_by_name;
+  for (auto var_handle_op :
+       llvm::make_early_inc_range(block.getOps<TF::VarHandleOp>())) {
+    if (add_validation && failed(ValidateVarHandle(var_handle_op)))
+      return failure();
+
+    llvm::StringRef name = var_handle_op.shared_nameAttr().getValue();
+    auto it = var_arg_index_by_name.insert({name, func_arg_types.size()});
+    if (it.second) {
+      var_handle_shared_names->emplace_back(name);
+      auto resource_type = var_handle_op.resource().getType();
+      func_arg_types.push_back(resource_type);
+      var_handle_op.resource().replaceAllUsesWith(
+          block.addArgument(resource_type));
+    } else {
+      var_handle_op.resource().replaceAllUsesWith(
+          block.getArgument(it.first->getSecond()));
+    }
+    var_handle_op.erase();
+  }
+
+  if (!var_handle_shared_names->empty())
+    function.setType(FunctionType::get(func_arg_types, func_type.getResults(),
+                                       function.getContext()));
+
+  return success();
+}
+
+// Records the current live value for a resource variable and whether a read or
+// write on the variable occurred.
 struct ResourceInfo {
-  static constexpr int64_t kInputUnassigned = -1;
-  int64_t input_index;
-  llvm::PointerUnion<Value, Type> live_value_or_type;
+  Value live_value = nullptr;
+  bool read = false;
+  bool write = false;
 };
 
-using ArgOrName = llvm::PointerUnion<BlockArgument, Attribute>;
-using ResourceMap = llvm::SmallDenseMap<ArgOrName, ResourceInfo>;
-
-LogicalResult PromoteResourcesToArguments(FuncOp function) {
+LogicalResult PromoteResourcesToArguments(
+    FuncOp function, llvm::ArrayRef<std::string> var_handle_shared_names) {
   Block& block = function.front();
 
   auto return_op = llvm::dyn_cast_or_null<ReturnOp>(block.getTerminator());
   if (!return_op)
-    return function.emitError(
-        "expects 'main' function to have a MLIR ReturnOp");
+    return function.emitError() << "expects function '" << function.getName()
+                                << "' to have a MLIR ReturnOp";
 
-  ResourceMap resource_map;
+  llvm::SmallVector<ResourceInfo, 4> resources(function.getNumArguments());
   auto argument_types = llvm::to_vector<4>(function.getType().getInputs());
+  bool has_resources = false;
+  auto add_resource_argument = [&](BlockArgument arg,
+                                   TF::ResourceType resource_type) {
+    Type arg_type = resource_type.getSubtypes().front();
+    arg.setType(arg_type);
+    resources[arg.getArgNumber()].live_value = arg;
+    argument_types[arg.getArgNumber()] = arg_type;
+    has_resources = true;
+  };
 
-  // Loop through the resource arguments in the function and store a mapping
-  // from that argument to its index and itself as the current live value.
-  for (BlockArgument& func_arg : function.getArguments()) {
+  // Loop through the non `tf.VarHandleOp` resource arguments in the function,
+  // validate its uses and subtype, and store a mapping from that argument to
+  // itself as the current live value.
+  auto func_args = function.getArguments().take_front(
+      function.getNumArguments() - var_handle_shared_names.size());
+  for (BlockArgument& func_arg : func_args) {
     auto resource_type =
         getElementTypeOrSelf(func_arg.getType()).dyn_cast<TF::ResourceType>();
     if (!resource_type) continue;
-    if (resource_type.getSubtypes().size() != 1)
-      return function.emitError()
-             << "expects resource type of argument " << func_arg.getArgNumber()
-             << " to have one subtype, got " << resource_type;
+    if (failed(ValidateResourceArgument(function, func_arg, resource_type)))
+      return failure();
 
-    for (auto* user : func_arg.getUsers())
-      if (!llvm::isa<TF::ReadVariableOp>(user) &&
-          !llvm::isa<TF::AssignVariableOp>(user))
-        return function.emitError()
-               << "expects users of resource argument "
-               << func_arg.getArgNumber()
-               << " to be 'tf.ReadVariableOp' or 'tf.AssignVariableOp'";
-
-    Type arg_type = resource_type.getSubtypes().front();
-    func_arg.setType(arg_type);
-    resource_map[func_arg] = {func_arg.getArgNumber(), func_arg};
-    argument_types[func_arg.getArgNumber()] = arg_type;
+    add_resource_argument(func_arg, resource_type);
   }
 
-  // Loop through the VarHandleOp in the function. When the first VarHandleOp
-  // for a resource variable is encountered, add an entry to the resource_map to
-  // record the information. Do not add a new function argument yet.
-  for (auto var_handle_op : block.getOps<TF::VarHandleOp>()) {
-    if (resource_map.count(var_handle_op.shared_nameAttr())) continue;
-
+  // Loop through `tf.VarHandleOp` resource arguments in the function and store
+  // a mapping from that argument to itself as the current live value. No
+  // validations are necessary here as these arguments were validated prior to
+  // being added.
+  auto var_handle_args =
+      function.getArguments().take_back(var_handle_shared_names.size());
+  for (BlockArgument& var_handle_arg : var_handle_args) {
     auto resource_type =
-        getElementTypeOrSelf(var_handle_op.getType()).cast<TF::ResourceType>();
-    if (resource_type.getSubtypes().size() != 1)
-      return var_handle_op.emitOpError()
-             << "expects resource type to have one subtype, got "
-             << resource_type;
-
-    resource_map[var_handle_op.shared_nameAttr()] = {
-        ResourceInfo::kInputUnassigned, resource_type.getSubtypes().front()};
+        getElementTypeOrSelf(var_handle_arg.getType()).cast<TF::ResourceType>();
+    add_resource_argument(var_handle_arg, resource_type);
   }
 
-  if (resource_map.empty()) return success();
+  if (!has_resources) return success();
 
   // We initially assign the argument for a resource as the live value for the
   // resource. We then walk through the operations in the function in their
   // lexical order, to update the live value for the resource when we see a
   // store to the resource and replace reads of the resource with uses of its
-  // live value. For the reads, if the resource does not have a live value yet,
-  // we add a new argument and use it as the live value.
+  // live value.
   for (Operation& op : llvm::make_early_inc_range(block)) {
     if (auto read_op = llvm::dyn_cast<TF::ReadVariableOp>(&op)) {
       if (auto func_arg = read_op.resource().dyn_cast<BlockArgument>()) {
         if (func_arg.getOwner() != &block)
           return read_op.emitOpError(kResourceFunctionMsg);
 
-        // resource_map[func_arg] is always a Value when func_arg is a
-        // BlockArgument.
-        read_op.value().replaceAllUsesWith(
-            resource_map[func_arg].live_value_or_type.get<Value>());
-      } else if (auto var_handle_op = llvm::dyn_cast<TF::VarHandleOp>(
-                     read_op.resource().getDefiningOp())) {
-        ResourceInfo& info = resource_map[var_handle_op.shared_nameAttr()];
-        if (auto live_value = info.live_value_or_type.dyn_cast<Value>()) {
-          read_op.value().replaceAllUsesWith(live_value);
-        } else {
-          auto arg_type = info.live_value_or_type.get<Type>();
-          BlockArgument arg = block.addArgument(arg_type);
-          info.input_index = argument_types.size();
-          info.live_value_or_type = arg;
-          argument_types.push_back(arg_type);
-          read_op.value().replaceAllUsesWith(arg);
-        }
+        ResourceInfo& resource_info = resources[func_arg.getArgNumber()];
+        resource_info.read = true;
+        read_op.value().replaceAllUsesWith(resource_info.live_value);
       } else {
         return read_op.emitOpError(kInvalidResourceMsg);
       }
@@ -191,11 +269,9 @@ LogicalResult PromoteResourcesToArguments(FuncOp function) {
         if (func_arg.getOwner() != &block)
           return write_op.emitOpError(kResourceFunctionMsg);
 
-        resource_map[func_arg].live_value_or_type = write_op.value();
-      } else if (auto var_handle_op = llvm::dyn_cast<TF::VarHandleOp>(
-                     write_op.resource().getDefiningOp())) {
-        resource_map[var_handle_op.shared_nameAttr()].live_value_or_type =
-            write_op.value();
+        ResourceInfo& resource_info = resources[func_arg.getArgNumber()];
+        resource_info.write = true;
+        resource_info.live_value = write_op.value();
       } else {
         return read_op.emitOpError(kInvalidResourceMsg);
       }
@@ -206,67 +282,68 @@ LogicalResult PromoteResourcesToArguments(FuncOp function) {
 
   const int64_t num_results_before = function.getNumResults();
   auto return_operands = llvm::to_vector<4>(return_op.getOperands());
-  return_operands.reserve(num_results_before + resource_map.size());
   auto result_types = llvm::to_vector<4>(return_op.getOperandTypes());
-  result_types.reserve(num_results_before + resource_map.size());
-  llvm::SmallVector<std::pair<int64_t, Attribute>, 4> output_only_resources;
-  output_only_resources.reserve(resource_map.size());
+  llvm::SmallVector<std::pair<int64_t, llvm::StringRef>, 4>
+      output_only_resources;
   llvm::SmallVector<std::pair<int64_t, int64_t>, 4> input_output_alias;
-  input_output_alias.reserve(resource_map.size());
 
-  // Collect new return values and either (a) output-only resource attributes
-  // (if the resource is not promoted to an argument) or (b) mapping from
-  // resource input index to output alias (if the resource has been promoted to
-  // an argument). If the last live value is itself (argument), then that live
-  // value will not be returned as the resource is unmodified.
-  for (auto& resource : resource_map) {
-    int64_t input_index = resource.getSecond().input_index;
-    auto live_value = resource.getSecond().live_value_or_type.dyn_cast<Value>();
-    if (input_index == ResourceInfo::kInputUnassigned) {
-      if (!live_value) continue;
-
-      output_only_resources.push_back(
-          {return_operands.size(), resource.getFirst().dyn_cast<Attribute>()});
-    } else {
-      // live_value is not nullptr because any input-assigned resource has a
-      // Value as live_value.
-      auto live_arg = live_value.dyn_cast<BlockArgument>();
-      if (live_arg && live_arg.getOwner() == &block &&
-          live_arg.getArgNumber() == input_index)
-        continue;
-
-      input_output_alias.push_back({input_index, return_operands.size()});
-    }
-    return_operands.push_back(live_value);
-    result_types.push_back(live_value.getType());
-  }
-
-  // Erase all VarHandleOp.
-  for (Operation& op : llvm::make_early_inc_range(function.front())) {
-    auto var_handle_op = llvm::dyn_cast<TF::VarHandleOp>(op);
-    if (!var_handle_op) continue;
-    if (!var_handle_op.use_empty()) {
-      // SmallSet will use a vector when there is only one element and use
-      // std::set when there are more than one elements. This ensures that
-      // the operations in the error message are ordered.
-      llvm::SmallSet<std::string, 2> unique_operations;
-      llvm::for_each(
-          var_handle_op.getOperation()->getUsers(), [&](Operation* user) {
-            unique_operations.insert(user->getName().getStringRef().str());
-          });
-
-      return var_handle_op.emitOpError(
-                 "expects no uses but used by operations: ")
-             << llvm::join(unique_operations.begin(), unique_operations.end(),
-                           ", ");
-    }
-
-    op.erase();
-  }
-
-  // Rewrite return if more results need to be returned by the function.
+  // Collect new return values for variable writes and either (a) output-only
+  // resource attributes (if the resource is not promoted to an argument) or (b)
+  // mapping from resource input index to output alias (if the resource has been
+  // promoted to an argument). Resource arguments that were originally
+  // `tf.VarHandleOp` but not read are collected and then removed.
   OpBuilder builder(return_op);
-  if (!output_only_resources.empty() || !input_output_alias.empty()) {
+  const int var_handles_start_idx =
+      function.getNumArguments() - var_handle_shared_names.size();
+  int new_argument_index = 0;
+  llvm::SmallVector<int, 4> argument_indices_to_remove;
+  for (auto resource_and_index : llvm::enumerate(resources)) {
+    const auto& resource = resource_and_index.value();
+    if (!resource.live_value) {
+      // Ignore non resource arguments.
+      ++new_argument_index;
+      continue;
+    }
+
+    const auto index = resource_and_index.index();
+    const bool is_var_handle = index >= var_handles_start_idx;
+    if (resource.write) {
+      if (!is_var_handle || resource.read) {
+        input_output_alias.push_back(
+            {new_argument_index, return_operands.size()});
+      } else if (is_var_handle) {
+        output_only_resources.push_back(
+            {return_operands.size(),
+             var_handle_shared_names[index - var_handles_start_idx]});
+      }
+      return_operands.push_back(resource.live_value);
+      result_types.push_back(resource.live_value.getType());
+    }
+
+    if (is_var_handle && !resource.read) {
+      assert(block.getArgument(index).getUses().empty());
+      argument_indices_to_remove.push_back(index);
+    } else {
+      if (is_var_handle) {
+        // Add resource_name attribute to VarHandleOp read.
+        function.setArgAttr(
+            new_argument_index, kResourceNameArgAttr,
+            builder.getStringAttr(
+                var_handle_shared_names[index - var_handles_start_idx]));
+      }
+      ++new_argument_index;
+    }
+  }
+
+  // Remove unread var handle arguments.
+  for (int argument_index_to_remove :
+       llvm::reverse(argument_indices_to_remove)) {
+    block.eraseArgument(argument_index_to_remove);
+    argument_types.erase(argument_types.begin() + argument_index_to_remove);
+  }
+
+  // Rewrite return if there are variable writes.
+  if (return_operands.size() > num_results_before) {
     builder.create<ReturnOp>(return_op.getLoc(), return_operands);
     return_op.erase();
   }
@@ -274,17 +351,10 @@ LogicalResult PromoteResourcesToArguments(FuncOp function) {
   // Update function argument and result types with new resource subtypes.
   function.setType(builder.getFunctionType(argument_types, result_types));
 
-  // Add resource_name attribute to the input argument for the resources.
-  for (auto& resource : resource_map) {
-    if (auto attr = resource.getFirst().dyn_cast<Attribute>()) {
-      int64_t input_index = resource.getSecond().input_index;
-      if (input_index != ResourceInfo::kInputUnassigned)
-        function.setArgAttr(input_index, "tf.resource_name", attr);
-    }
-  }
   // Add resource_name attribute to the output for the resources.
   for (auto& resource : output_only_resources)
-    function.setResultAttr(resource.first, "tf.resource_name", resource.second);
+    function.setResultAttr(resource.first, kResourceNameArgAttr,
+                           builder.getStringAttr(resource.second));
 
   // Add aliasing_output attribute to the input argument for the resources that
   // are updated by the function.
@@ -309,26 +379,60 @@ void PromoteResourcesToArgsPass::runOnOperation() {
   // This routine should only be called when control flow operations are still
   // represented with TF IfOp and WhileOp operations. In this case, there should
   // be only one basic blocks in the MLIR representation.
-  if (!hasSingleElement(main_func.getBlocks())) {
-    main_func.emitError() << "expects 'main' function to have 1 block, got "
-                          << main_func.getBlocks().size();
-    return signalPassFailure();
-  }
+  if (failed(CheckSingleBlockFunction(main_func))) return signalPassFailure();
 
+  llvm::SmallVector<std::string, 4> var_handle_shared_names;
   if (failed(ResourceLiftingForFunctionalControlFlow(main_func)) ||
-      failed(PromoteResourcesToArguments(main_func)))
+      failed(PromoteVarHandlesToArguments(main_func, /*add_validation=*/true,
+                                          &var_handle_shared_names)) ||
+      failed(PromoteResourcesToArguments(main_func, var_handle_shared_names)))
     return signalPassFailure();
 }
 
+class PromoteVarHandlesToArgsPass
+    : public PassWrapper<PromoteVarHandlesToArgsPass, OperationPass<ModuleOp>> {
+ public:
+  void runOnOperation() override;
+};
+
+void PromoteVarHandlesToArgsPass::runOnOperation() {
+  ModuleOp module = getOperation();
+  MLIRContext* context = module.getContext();
+  for (auto function : module.getOps<FuncOp>()) {
+    if (failed(CheckSingleBlockFunction(function))) return signalPassFailure();
+
+    llvm::SmallVector<std::string, 4> var_handle_shared_names;
+    PromoteVarHandlesToArguments(function, /*add_validation=*/false,
+                                 &var_handle_shared_names);
+
+    // Add resource names for each `tf.VarHandleOp` that were promoted to
+    // resource arguments.
+    const int var_handle_args_offset =
+        function.getNumArguments() - var_handle_shared_names.size();
+    for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names))
+      function.setArgAttr(var_name_and_index.index() + var_handle_args_offset,
+                          kResourceNameArgAttr,
+                          StringAttr::get(var_name_and_index.value(), context));
+  }
+}
+
 }  // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass() {
   return std::make_unique<PromoteResourcesToArgsPass>();
 }
 
+std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass() {
+  return std::make_unique<PromoteVarHandlesToArgsPass>();
+}
+
 static PassRegistration<PromoteResourcesToArgsPass> pass(
     "tf-promote-resources-to-args",
     "Promote resources reads/writes to function inputs/outputs.");
 
+static PassRegistration<PromoteVarHandlesToArgsPass> var_handle_pass(
+    "tf-promote-var-handles-to-args",
+    "Promote tf.VarHandleOps to function arguments.");
+
 }  // namespace TF
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
index 30bc1a21075..2fd230005d0 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 #include <utility>
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
@@ -107,10 +108,9 @@ llvm::SmallVector<tf_executor::IslandOp, 8> ExpandReplicateIntoReplicas(
 
 // Creates islands per replica from `tf_device.replicate` region and remap
 // replicate results with new island outputs. A single island is created to
-// forward results from each replica island. Control dependencies of individual
-// replicas are added to the single island if the single island does not emit
-// a result from the respective replica. Devices are remapped from aliased
-// devices to explicit devices, for `tf_device.launch` ops.
+// forward control dependencies if there is a control dependency output from the
+// replicate island. Devices are remapped from aliased devices to explicit
+// devices, for `tf_device.launch` ops.
 //
 // For example, the following:
 //
@@ -156,12 +156,9 @@ llvm::SmallVector<tf_executor::IslandOp, 8> ExpandReplicateIntoReplicas(
 //   }) {device = "/DEVICE:3"} : () -> tensor<i1>
 //   tf_executor.yield %a1, %b1 : tensor<i1>, tensor<i1>
 // }
-// %6:2 = tf_executor.island(%3#2) {
-//   tf_executor.yield %0#0 : tensor<i1>
-// }
-LogicalResult CreateIslandsFromReplicate(const Dialect* tf_dialect,
-                                         tf_executor::IslandOp island_op,
-                                         tf_device::ReplicateOp replicate_op) {
+void CreateIslandsFromReplicate(const Dialect* tf_dialect,
+                                tf_executor::IslandOp island_op,
+                                tf_device::ReplicateOp replicate_op) {
   OpBuilder builder(island_op);
   const int num_replicas = replicate_op.n().getLimitedValue();
 
@@ -181,45 +178,38 @@ LogicalResult CreateIslandsFromReplicate(const Dialect* tf_dialect,
           replica_result_and_idx.value();
 
   // Remap replicate results to per replica result.
-  replicate_op.replaceAllUsesWith(replicas_outputs);
+  for (auto result : llvm::zip(island_op.outputs(), replicas_outputs))
+    std::get<0>(result).replaceAllUsesWith(std::get<1>(result));
 
-  // Collect per replica control dependency and add to island operand if replica
-  // island has no uses.
-  llvm::SmallVector<Value, 8> island_operands;
-  for (auto& replica : replicas)
-    if (replica.use_empty()) island_operands.push_back(replica.control());
+  // Add sink island to pin all replicas as a control dependency if there is a
+  // control dependency leading from the replicate originally.
+  if (!island_op.control().use_empty()) {
+    llvm::SmallVector<Value, 8> island_operands;
+    for (auto& replica : replicas) island_operands.push_back(replica.control());
 
-  // Create single island forwarding per replica result.
-  builder.setInsertionPoint(island_op);
-  auto island_sink = builder.create<tf_executor::IslandOp>(
-      island_op.getLoc(),
-      llvm::to_vector<8>(island_op.GetYield().fetches().getTypes()),
-      tf_executor::ControlType::get(island_op.getContext()), island_operands);
-  island_sink.body().push_back(new Block);
-
-  // Move replicate island YieldOp over to new single island.
-  island_op.GetYield().getOperation()->moveBefore(
-      &island_sink.GetBody(), island_sink.GetBody().begin());
-
-  // Remap island results.
-  island_op.replaceAllUsesWith(island_sink);
+    builder.setInsertionPoint(island_op);
+    auto island_sink = builder.create<tf_executor::IslandOp>(
+        island_op.getLoc(), llvm::ArrayRef<Type>{},
+        tf_executor::ControlType::get(island_op.getContext()), island_operands);
+    island_sink.body().push_back(new Block);
+    builder.setInsertionPointToEnd(&island_sink.GetBody());
+    builder.create<tf_executor::YieldOp>(island_op.getLoc(),
+                                         llvm::ArrayRef<Value>{});
+    island_op.control().replaceAllUsesWith(island_sink.control());
+  }
 
   island_op.erase();
-  return success();
 }
 
 // Finds islands with a single `tf_device.replicate` and create individual
 // islands per replica of the replicate.
-LogicalResult LowerSingleIslandReplicateToIslands(
-    const Dialect* tf_dialect, tf_executor::IslandOp island_op) {
-  if (!hasSingleElement(island_op.GetBody().without_terminator()))
-    return success();
+void LowerSingleIslandReplicateToIslands(const Dialect* tf_dialect,
+                                         tf_executor::IslandOp island_op) {
+  if (!island_op.WrapsSingleOp()) return;
 
   if (auto replicate_op =
           llvm::dyn_cast<tf_device::ReplicateOp>(&island_op.GetBody().front()))
-    return CreateIslandsFromReplicate(tf_dialect, island_op, replicate_op);
-
-  return success();
+    CreateIslandsFromReplicate(tf_dialect, island_op, replicate_op);
 }
 
 void ReplicateToIslandPass::runOnFunction() {
@@ -229,13 +219,9 @@ void ReplicateToIslandPass::runOnFunction() {
     getFunction().emitError() << "'tf' dialect is not registered";
   }
 
-  auto result = getFunction().walk([&](tf_executor::IslandOp island_op) {
-    if (failed(LowerSingleIslandReplicateToIslands(tf_dialect, island_op)))
-      return WalkResult::interrupt();
-    return WalkResult::advance();
+  getFunction().walk([&](tf_executor::IslandOp island_op) {
+    LowerSingleIslandReplicateToIslands(tf_dialect, island_op);
   });
-
-  if (result.wasInterrupted()) return signalPassFailure();
 }
 }  // anonymous namespace
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
index faacaad4c98..611c4d2725a 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
@@ -62,7 +62,7 @@ namespace {
 // TensorFlow resource variable and returns new value:
 //
 // %resource_handle = "tf.VarHandleOp"()
-// %1 = "tf_device.launch"() ( {
+// %1 = "tf_device.cluster"() ( {
 //   %init_value = "tf.ReadVariableOp"(%resource_handle)
 //   "tf.AssignAddVariableOp"(%resource_handle, %init_value)
 //   %new_value = "tf.ReadVariableOp"(%resource_handle)
@@ -73,7 +73,7 @@ namespace {
 //
 // %resource_handle = "tf.VarHandleOp"()
 // %init_value = "tf.ReadVariableOp"(%resource_handle)
-// %1:2 = "tf_device.launch"() ( {
+// %1:2 = "tf_device.cluster"() ( {
 //   %new_value = "tf.AddV2"(%init_value, %init_value)
 //   tf_device.return %new_value, %new_value
 // })
@@ -81,7 +81,7 @@ namespace {
 //
 // You can see that there are a few main changes applied:
 // 1) All the resource variable reads and writes are now outside of
-//    tf_device.launch op.
+//    tf_device.cluster op.
 // 2) Instead of taking resource handles as input, this device computation now
 //    takes snapshotted values of that device.
 // 3) Some resource load operations are eliminated with store-load forwarding.
@@ -89,13 +89,13 @@ namespace {
 //    external resource store operations so that resources are still updated
 //    after the computation.
 //
-// If the launch body contains functional control flow, the pass first lifts the
-// loads/stores in the body/cond/branch functions to the launch body, then
+// If the cluster body contains functional control flow, the pass first lifts
+// the loads/stores in the body/cond/branch functions to the cluster body, then
 // performs the above lifting. E.g.,
 //
-// func @launch_with_loop() -> () {
+// func @cluster_with_loop() -> () {
 //   %0 = "tf.VarHandleOp"() ...
-//   "tf_device.launch"() ( {
+//   "tf_device.cluster"() ( {
 //      %1 = "tf.While"(%0) {body = @while_body, cond = @while_cond}
 //      tf_device.return
 //   })
@@ -113,10 +113,10 @@ namespace {
 //
 // will be be transformed to:
 //
-// func @launch_with_loop() {
+// func @cluster_with_loop() {
 //   %0 = "tf.VarHandleOp"() ...
 //   %1 = "tf.ReadVariableOp"(%0)
-//   %2 = "tf_device.launch"() ( {
+//   %2 = "tf_device.cluster"() ( {
 //     %3 = "tf.While"(%1) {body = @while_body, cond = @while_cond}
 //     tf_device.return %3 : tensor<f32>
 //   }) : () -> tensor<f32>
@@ -140,7 +140,7 @@ struct ResourceOpLiftingPass
 // such nodes to carry information.
 void RemoveIdentity(Block* block) {
   for (auto& op : llvm::make_early_inc_range(*block)) {
-    if (llvm::isa<TF::IdentityOp>(&op) || llvm::isa<TF::IdentityNOp>(&op)) {
+    if (isa<TF::IdentityOp>(&op) || isa<TF::IdentityNOp>(&op)) {
       op.replaceAllUsesWith(op.getOperands());
       op.erase();
     }
@@ -241,7 +241,7 @@ bool AppendResourceStoreValueToReturn(Block* body) {
 
     // TODO(ycao): Prevent same value from being returned multiple times.
     // TODO(ycao): Do not return resource store value if it is defined outside
-    // of launch_op.
+    // of cluster.
     new_return_operands.push_back(assign_variable_op.value());
     has_resource_store = true;
   }
@@ -256,81 +256,78 @@ bool AppendResourceStoreValueToReturn(Block* body) {
   return true;
 }
 
-// Moves resource store operations to after launch_op. This assumes load-store
-// forwarding has been performed on this launch_op such that there is at most
-// one resource store operation carrying its final value.
-tf_device::LaunchOp SinkResourceStores(tf_device::LaunchOp launch_op,
-                                       OpBuilder* builder) {
-  // Update ReturnOp inside launch_op's body to output final values of updated
+// Moves resource store operations to after cluster. This assumes load-store
+// forwarding has been performed on this cluster such that there is at most one
+// resource store operation carrying its final value.
+tf_device::ClusterOp SinkResourceStores(tf_device::ClusterOp cluster,
+                                        OpBuilder* builder) {
+  // Update ReturnOp inside cluster's body to output final values of updated
   // external resources.
-  if (!AppendResourceStoreValueToReturn(&launch_op.GetBody())) return launch_op;
+  if (!AppendResourceStoreValueToReturn(&cluster.GetBody())) return cluster;
 
-  auto new_return_op = launch_op.GetBody().getTerminator();
-  llvm::SmallVector<Type, 4> new_launch_return_types(
-      new_return_op->getOperandTypes());
+  auto new_return_op = cluster.GetBody().getTerminator();
+  llvm::SmallVector<Type, 4> new_return_types(new_return_op->getOperandTypes());
 
-  builder->setInsertionPoint(launch_op);
-  auto new_launch_op = builder->create<tf_device::LaunchOp>(
-      launch_op.getLoc(), new_launch_return_types,
-      /*operands=*/llvm::SmallVector<Value, 4>(), launch_op.getAttrs());
-  new_launch_op.body().takeBody(launch_op.body());
+  builder->setInsertionPoint(cluster);
+  auto new_cluster = builder->create<tf_device::ClusterOp>(
+      cluster.getLoc(), new_return_types,
+      /*operands=*/llvm::SmallVector<Value, 4>(), cluster.getAttrs());
+  new_cluster.body().takeBody(cluster.body());
 
-  // Replace uses of old launch_op results with those of new_launch_op.
-  for (auto p : llvm::zip(launch_op.getResults(), new_launch_op.getResults())) {
-    std::get<0>(p).replaceAllUsesWith(std::get<1>(p));
-  }
+  // Replace uses of old cluster results with those of new_cluster.
+  for (auto result : llvm::zip(cluster.getResults(), new_cluster.getResults()))
+    std::get<0>(result).replaceAllUsesWith(std::get<1>(result));
 
-  // Create a mapping from operands of new_return_op operands to new_launch_op
+  // Create a mapping from operands of new_return_op operands to new_cluster
   // results.
   BlockAndValueMapping mapper;
-  for (auto p :
-       llvm::zip(new_return_op->getOperands(), new_launch_op.getResults())) {
-    mapper.map(std::get<0>(p), std::get<1>(p));
-  }
+  for (auto operand_result :
+       llvm::zip(new_return_op->getOperands(), new_cluster.getResults()))
+    mapper.map(std::get<0>(operand_result), std::get<1>(operand_result));
 
   // Clone all resource store ops and map their operands to values returned from
-  // new_launch_op.
-  for (Operation& op : llvm::make_early_inc_range(new_launch_op.GetBody())) {
-    if (dyn_cast<TF::AssignVariableOp>(&op)) {
+  // new_cluster.
+  for (Operation& op : llvm::make_early_inc_range(new_cluster.GetBody())) {
+    if (isa<TF::AssignVariableOp>(op)) {
       builder->clone(op, mapper);
       op.erase();
     }
   }
 
-  launch_op.erase();
-  return new_launch_op;
+  cluster.erase();
+  return new_cluster;
 }
 
-// Hoists resource variable loads and sinks stores from launch_op.
-LogicalResult HoistResourceOpsFromLaunchOp(tf_device::LaunchOp launch_op) {
-  ModuleOp m = launch_op.getParentOfType<ModuleOp>();
-  OpBuilder builder(m);
+// Hoists resource variable loads and sinks stores from cluster.
+LogicalResult HoistResourceOpsFromCluster(tf_device::ClusterOp cluster,
+                                          ModuleOp module) {
+  OpBuilder builder(module);
 
   // Remove identity nodes to avoid aliasing.
-  RemoveIdentity(&launch_op.GetBody());
+  RemoveIdentity(&cluster.GetBody());
 
   // Perform store-load forwarding. So that each resource is only loaded with
   // its initial value and is only stored with its final value.
-  ForwardStoreToLoad(&launch_op.GetBody());
+  ForwardStoreToLoad(&cluster.GetBody());
 
-  // Move loads of external resources, if any, to before launch_op.
-  // (Skipping resources created inside of launch_op.)
+  // Move loads of external resources, if any, to before cluster.
+  // (Skipping resources created inside of cluster.)
   HoistResourceLoads(
-      &launch_op.GetBody(),
+      &cluster.GetBody(),
       /*skip_load=*/
       [&](TF::ReadVariableOp read) {
-        return read.resource().getParentRegion() == &launch_op.body();
+        return read.resource().getParentRegion() == &cluster.body();
       },
       /*move_load=*/
       [&](TF::ReadVariableOp read) {
-        read.getOperation()->moveBefore(launch_op);
+        read.getOperation()->moveBefore(cluster);
       });
 
-  // Move stores of external resources, if any, to after launch_op.
-  auto new_launch_op = SinkResourceStores(launch_op, &builder);
+  // Move stores of external resources, if any, to after cluster.
+  auto new_cluster = SinkResourceStores(cluster, &builder);
 
   llvm::SetVector<Value> captured_values;
-  getUsedValuesDefinedAbove(new_launch_op.body(), new_launch_op.body(),
+  getUsedValuesDefinedAbove(new_cluster.body(), new_cluster.body(),
                             captured_values);
 
   for (Value v : captured_values) {
@@ -338,7 +335,7 @@ LogicalResult HoistResourceOpsFromLaunchOp(tf_device::LaunchOp launch_op) {
     if (!tensor_type) continue;
     if (!tensor_type.getElementType().isa<TF::ResourceType>()) continue;
 
-    return new_launch_op.emitOpError()
+    return new_cluster.emitOpError()
            << "has remaining resource inputs that can not be lifted";
   }
 
@@ -378,8 +375,7 @@ LogicalResult FindResourceArgUseInfo(
         info.data_type = assign.value().getType();
         continue;
       }
-      if (llvm::isa<TF::StackPushV2Op>(user) ||
-          llvm::isa<TF::StackPopV2Op>(user)) {
+      if (isa<TF::StackPushV2Op>(user) || isa<TF::StackPopV2Op>(user)) {
         // Stacks will be handled by a separate pass.
         do_not_touch = true;
         break;
@@ -1034,7 +1030,7 @@ LogicalResult HoistForFunctionalControlFlow(
   for (auto local_var : local_vars) {
     if (llvm::all_of(local_var.resource().getUsers(),
                      [](const Operation* user) {
-                       return llvm::isa<TF::AssignVariableOp>(user);
+                       return isa<TF::AssignVariableOp>(user);
                      })) {
       for (auto user : local_var.resource().getUsers()) user->erase();
       local_var.erase();
@@ -1043,18 +1039,18 @@ LogicalResult HoistForFunctionalControlFlow(
   return success();
 }
 
-// Lifts resource operation from tf_device.launch_func ops nested in `op`
-// outside. Returns failure if there are remaining resource-type values that can
-// not be lifted.
+// Lifts resource operation from tf_device.cluster ops nested in `op` outside.
+// Returns failure if there are remaining resource-type values that can not be
+// lifted.
 void ResourceOpLiftingPass::runOnOperation() {
   llvm::SmallDenseMap<FuncOp, PartitionedCallLiftingInfo>
       lifted_partitioned_call_callees;
-  auto result = getOperation().walk([&](FuncOp func_op) {
-    return func_op.walk([&](tf_device::LaunchOp launch_op) {
+  ModuleOp module = getOperation();
+  auto result = module.walk([&](FuncOp func_op) {
+    return func_op.walk([&](tf_device::ClusterOp cluster) {
       if (failed(HoistForFunctionalControlFlow(
-              &launch_op.GetBody(), getOperation(),
-              &lifted_partitioned_call_callees)) ||
-          failed(HoistResourceOpsFromLaunchOp(launch_op))) {
+              &cluster.GetBody(), module, &lifted_partitioned_call_callees)) ||
+          failed(HoistResourceOpsFromCluster(cluster, module))) {
         return WalkResult::interrupt();
       }
       return WalkResult::advance();
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
index 38a1464ffcc..5a2cae38062 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
@@ -19,6 +19,8 @@ limitations under the License.
 #include <initializer_list>
 #include <iterator>
 
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
@@ -26,6 +28,7 @@ limitations under the License.
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+#include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
@@ -56,12 +59,14 @@ limitations under the License.
 #define DEBUG_TYPE "tf-shape-inference"
 
 using ::tensorflow::int64;
+using tensorflow::shape_inference::DimensionHandle;
+using tensorflow::shape_inference::InferenceContext;
+using tensorflow::shape_inference::ShapeHandle;
 
 namespace mlir {
 namespace TF {
 namespace {
-Optional<llvm::SmallVector<mlir::Type, 4>> InferShapeForFunctionReturnType(
-    FuncOp func) {
+Optional<SmallVector<Type, 4>> InferShapeForFunctionReturnType(FuncOp func) {
   // Find any return ops.
   SmallVector<ReturnOp, 4> return_ops;
   for (Block& block : func) {
@@ -121,19 +126,19 @@ bool IsSupportedNonTFOp(Operation* op) {
 // not a TF operation, as we can't guarantee that the new type will be OK.
 void AddCastBackForUnsupportedNonTFUses(Operation* op, Value result,
                                         Dialect* tf_dialect, Type old_type) {
-  OpBuilder builder(op);
-  builder.setInsertionPointAfter(op);
   // A tf.Cast operation is lazily created on the first uses that isn't a TF
   // operation.
   TF::CastOp cast_op;
   auto get_cast_op = [&]() {
-    if (!cast_op)
-      cast_op =
-          builder.create<TF::CastOp>(op->getLoc(), old_type, result,
-                                     /*truncate=*/builder.getBoolAttr(false));
-    return mlir::Value(cast_op);
+    if (!cast_op) {
+      OpBuilder b(op);
+      b.setInsertionPointAfter(op);
+      cast_op = b.create<TF::CastOp>(op->getLoc(), old_type, result,
+                                     /*truncate=*/b.getBoolAttr(false));
+    }
+    return Value(cast_op);
   };
-  for (OpOperand& use : llvm::make_early_inc_range(result.getUses())) {
+  for (OpOperand& use : make_early_inc_range(result.getUses())) {
     if (use.getOwner()->getDialect() != tf_dialect &&
         !IsSupportedNonTFOp(use.getOwner()))
       use.set(get_cast_op());
@@ -156,10 +161,22 @@ Optional<tensorflow::PartialTensorShape> GetShapeFromMlirType(Type t) {
 bool InferShapeForPassThroughOps(OperandRange pass_through_operands,
                                  Operation* op, Dialect* tf_dialect) {
   bool changed = false;
-  for (auto entry : llvm::zip(pass_through_operands, op->getResults())) {
+  for (auto entry : zip(pass_through_operands, op->getResults())) {
     Type operand_type = std::get<0>(entry).getType();
     Value result = std::get<1>(entry);
     if (result.getType() == operand_type) continue;
+    // Pass through nodes may remove ref types, don't consider that as
+    // refinement.
+    // TODO(jpienaar): There could be refinement in addition to this, so
+    // refine this.
+    if (operand_type.cast<TensorType>()
+            .getElementType()
+            .isa<TF::TensorFlowRefType>() &&
+        !result.getType()
+             .cast<TensorType>()
+             .getElementType()
+             .isa<TF::TensorFlowRefType>())
+      continue;
     AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect,
                                        result.getType());
     result.setType(operand_type);
@@ -186,7 +203,7 @@ bool InferShapeForNonTFDialectOperation(Operation* op, Dialect* tf_dialect) {
         tf_dialect);
   }
   // TODO(b/155227679): Use OpInterface instead of hard-coding for TensorCastOp.
-  if (auto tensor_cast = dyn_cast<mlir::TensorCastOp>(op)) {
+  if (auto tensor_cast = dyn_cast<TensorCastOp>(op)) {
     return InferShapeForPassThroughOps(
         tensor_cast.getOperation()->getOperands(), op, tf_dialect);
   }
@@ -236,9 +253,22 @@ GetSubtypes(Type type) {
 // match the i-th operand type). Returns true if anything is changed.
 bool PassThroughOperandTypes(OperandRange operands, ResultRange results) {
   bool changed = false;
-  for (auto entry : llvm::zip(operands, results)) {
+  for (auto entry : zip(operands, results)) {
     Type operand_type = std::get<0>(entry).getType();
-    if (operand_type == std::get<1>(entry).getType()) continue;
+    Type result_type = std::get<1>(entry).getType();
+    if (operand_type == result_type) continue;
+    // Pass through nodes may remove ref types, don't consider that as
+    // refinement.
+    // TODO(jpienaar): There could be refinement in addition to this, so
+    // refine this.
+    if (operand_type.cast<TensorType>()
+            .getElementType()
+            .isa<TF::TensorFlowRefType>() &&
+        !result_type.cast<TensorType>()
+             .getElementType()
+             .isa<TF::TensorFlowRefType>())
+      continue;
+
     std::get<1>(entry).setType(operand_type);
     changed = true;
   }
@@ -260,14 +290,13 @@ bool InferShapeForCall(Operation* op) {
   CallInterfaceCallable callable = call_op.getCallableForCallee();
   SymbolRefAttr sym = callable.dyn_cast<SymbolRefAttr>();
   if (!sym) return false;
-  FuncOp func =
-      dyn_cast<mlir::FuncOp>(SymbolTable::lookupNearestSymbolFrom(op, sym));
+  FuncOp func = dyn_cast<FuncOp>(SymbolTable::lookupNearestSymbolFrom(op, sym));
   if (!func) return false;
 
   bool changed = false;
   // Map each of the results of the call to the returned type of the
   // function.
-  for (auto result : llvm::zip(op->getResults(), func.getType().getResults())) {
+  for (auto result : zip(op->getResults(), func.getType().getResults())) {
     if (std::get<0>(result).getType() == std::get<1>(result)) continue;
     // Skip already statically shaped results.
     if (!CanBeRefined(std::get<0>(result).getType())) continue;
@@ -287,20 +316,293 @@ bool InferShapeForCall(Operation* op) {
   return changed;
 }
 
-bool RefineTfConst(TF::ConstOp const_op) {
-  Type old_type = const_op.getType();
-  if (const_op.valueAttr().getType() == old_type) return false;
-  const_op.getResult().setType(const_op.valueAttr().getType());
-  AddCastBackForUnsupportedNonTFUses(const_op, const_op.getResult(),
-                                     const_op.getDialect(), old_type);
-  return true;
+bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti,
+                                    Dialect* tf_dialect) {
+  Operation* op = infer_ti.getOperation();
+  SmallVector<Type, 4> inferred;
+  LogicalResult res = infer_ti.inferReturnTypes(
+      op->getContext(), op->getLoc(), op->getOperands(),
+      op->getAttrDictionary(), op->getRegions(), inferred);
+  if (failed(res)) {
+    op->emitOpError("failed to refine type as inference failed");
+    return false;
+  }
+
+  if (inferred == op->getResultTypes()) return false;
+
+  // Map each of the results of the call to the returned type of the
+  // function.
+  bool changed = false;
+  for (auto result : zip(op->getResults(), inferred)) {
+    if (std::get<0>(result).getType() == std::get<1>(result)) continue;
+
+    // Inserts a cast back to the original type if any user is not in the
+    // TF dialect.
+    AddCastBackForUnsupportedNonTFUses(op, std::get<0>(result),
+                                       op->getDialect(), std::get<1>(result));
+    // Finally we inferred the shape and replace the type for this result.
+    std::get<0>(result).setType(std::get<1>(result));
+    changed = true;
+  }
+  return changed;
 }
 
 }  // namespace
 
-bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
-                                  int64_t graph_version) {
-  assert(tf_dialect == op->getDialect());
+// Combination of value producer and port of value produced (e.g.,
+//   <value result output>:<value in output tensor>,
+// so for tf.Const -> tensor<10x20xf32>, [0,2,18] would point to a unique output
+// scalar value).
+struct ValuePort {
+  PointerUnion<Operation*, BlockArgument> producer;
+  SmallVector<unsigned int, 2> port;
+
+  bool operator==(const ValuePort& other) const {
+    return producer == other.producer && port == other.port;
+  }
+
+  // Convert output value to ValuePort.
+  explicit ValuePort(Value v) {
+    OpResult opr = v.dyn_cast<OpResult>();
+    if (opr) {
+      producer = opr.getOwner();
+      port = {opr.getResultNumber()};
+    } else {
+      producer = v.cast<BlockArgument>();
+      port = {0};
+    }
+  }
+  ValuePort(PointerUnion<Operation*, BlockArgument> producer,
+            SmallVector<unsigned int, 2> port)
+      : producer(producer), port(port) {}
+
+  raw_ostream& print(raw_ostream& os) const {
+    if (auto* op = producer.dyn_cast<Operation*>())
+      os << "op " << op->getName();
+    if (auto ba = producer.dyn_cast<BlockArgument>())
+      os << "block_arg " << ba.getArgNumber();
+    os << formatv(" [{0}]", llvm::make_range(port.begin(), port.end()));
+    return os;
+  }
+};
+
+struct ValuePortHasher {
+  std::size_t operator()(const ValuePort& other) const {
+    return hash_combine(llvm::hash_value(other.producer.getOpaqueValue()),
+                        hash_value(ArrayRef<unsigned int>(other.port)));
+  }
+};
+
+using ValuePortResultMap =
+    std::unordered_map<ValuePort, Attribute, ValuePortHasher>;
+using ComputedQueryFn = function_ref<bool(ValuePort)>;
+using ValueQueryFn = function_ref<Attribute(const ValuePort&)>;
+using ValuePortInputs = SmallVectorImpl<ValuePort>;
+
+// TODO(jpienaar): ComputeInputsRequiredForOutput and ComputeOutputComponent are
+// intended to be switched to op interfaces once more refined.
+LogicalResult ComputeInputsRequiredForOutput(ValuePort value_port,
+                                             ComputedQueryFn has_been_computed,
+                                             ValuePortInputs* inputs) {
+  auto op = value_port.producer.dyn_cast<Operation*>();
+  auto& port = value_port.port;
+  if (!op) return failure();
+
+  // No inputs required for constants.
+  if (matchPattern(op, m_Constant())) return success();
+
+  // Note: this focusses only on the trivial pack op case and this could be
+  // generalized.
+  if (auto pack_op = dyn_cast<TF::PackOp>(op)) {
+    if (pack_op.getType().cast<TensorType>().getRank() != 1) return failure();
+    if (port.size() != 2) return failure();
+    assert(port[0] == 0);
+    ValuePort req(pack_op.getOperand(port[1]));
+    if (!has_been_computed(req)) inputs->push_back(req);
+    return success();
+  }
+
+  return failure();
+}
+
+// Computes the output produced by ValuePort using the query function of
+// existing computed values.
+Attribute ComputeOutputComponent(const ValuePort& value_port,
+                                 ValueQueryFn values) {
+  LLVM_DEBUG(value_port.print(llvm::errs() << "\nComputing output for "));
+
+  auto op = value_port.producer.dyn_cast<Operation*>();
+  if (!op) return nullptr;
+  auto& port = value_port.port;
+
+  if (port.empty()) {
+    LLVM_DEBUG(llvm::dbgs() << "skipping, port outside spec of " << op << "\n");
+    return nullptr;
+  }
+
+  ElementsAttr attr;
+  if (matchPattern(op, m_Constant(&attr))) {
+    if (port.size() == 1 && port[0] == 0) return attr;
+    return nullptr;
+  }
+
+  // Note: this focusses only on the trivial pack op case and this could be
+  // generalized.
+  if (auto pack_op = dyn_cast<TF::PackOp>(op)) {
+    if (pack_op.getType().cast<TensorType>().getRank() != 1) return nullptr;
+    if (port.size() != 2 || port[0] != 0) return nullptr;
+    ValuePort op_port(op->getOperand(port[1]));
+    return values(op_port);
+  }
+  return nullptr;
+}
+
+// Context used during ShapeInference. This class contains common information
+// that is required by the individual shape inference helper functions (e.g.,
+// TF Graph version, constant values computed, etc.)
+class ShapeInference {
+ public:
+  ShapeInference(int64_t graph_version, MLIRContext* context);
+
+  LogicalResult ComputeInputsRequiredForOutput(ValuePort value_port,
+                                               ValuePortInputs* inputs) {
+    return ::mlir::TF::ComputeInputsRequiredForOutput(
+        value_port,
+        [this](const ValuePort& port) {
+          return results_.find(port) != results_.end();
+        },
+        inputs);
+  }
+
+  Attribute ComputeOutputComponent(const ValuePort& value_port) {
+    return ::mlir::TF::ComputeOutputComponent(
+        value_port, [this](const ValuePort& port) { return results_[port]; });
+  }
+
+  // Returns ShapeHandle if the op result could be computed as shape.
+  ShapeHandle ComputeOutputAsShape(OpResult result, InferenceContext* ic);
+
+  void RecordValue(const ValuePort& value_port, Attribute value) {
+    results_[value_port] = value;
+  }
+
+  // Performs shape inference on the provided op and return true if the type of
+  // at least one result has been changed.
+  // A tf.Cast() is inserted for any uses that isn't in the TensorFlow dialect.
+  // `graph_version` indicates the current GraphDef compatibility versions
+  // (the versions field in graph.proto).
+  bool InferShapeForSingleOperation(Operation* op);
+
+  // Infers shape on the provided region, including nested ones, iterate until
+  // fix point with a limit of max_iteration. Returns success if fix point is
+  // reached before max_iteration.
+  LogicalResult InferShapeUntilFixPoint(Region* region,
+                                        int64_t max_iteration = 10);
+
+  // Updates input types and refine shapes inside body of functions that are
+  // attached to ControlFlow ops (If/While). These functions include Then/Else
+  // branches of IfOp and Cond/Body functions of WhileOp. These functions share
+  // following common properties:
+  //   1) They are never reused, ie. having a single use in module.
+  //   2) Their input types match those of their parent ops (excluding inputs
+  //      like predicate).
+  // Returns a boolean indicating whether any change has been applied.
+  LogicalResult RefineShapeForControlFlowFunc(FuncOp func,
+                                              ArrayRef<Type> input_types,
+                                              int64_t max_iteration);
+
+  // Propagate the shapes to the functions named.
+  LogicalResult PropagateShapeToFunctions(
+      ModuleOp module, Operation::operand_type_range input_types,
+      ArrayRef<StringRef> func_names, int64_t max_iteration);
+
+  // Shape propagation for call/control flow ops.
+  LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op,
+                                                    int64_t max_iteration);
+
+ private:
+  // Mapping between ValuePort (which corresponds to an OpResult or smaller,
+  // e.g., first element of OpResult produded) to an Attribute if the ValuePort
+  // corresponds to a constant value.
+  ValuePortResultMap results_;
+  int64_t graph_version_;
+  MLIRContext* context_;
+  Dialect* tf_dialect_;
+};
+
+ShapeInference::ShapeInference(int64_t graph_version, MLIRContext* context)
+    : graph_version_(graph_version) {
+  context_ = context;
+  tf_dialect_ = context->getRegisteredDialect<TensorFlowDialect>();
+}
+
+ShapeHandle ShapeInference::ComputeOutputAsShape(OpResult result,
+                                                 InferenceContext* ic) {
+  LLVM_DEBUG(result.print(llvm::dbgs() << "\nEvaluate partially "));
+  auto rt = result.getType().dyn_cast<RankedTensorType>();
+  if (!rt || !rt.hasStaticShape() || rt.getRank() != 1) return {};
+  int dim_size = rt.getDimSize(0);
+
+  // Worklist to direct partial evaluation.
+  SmallVector<ValuePort, 4> worklist;
+
+  // Simple evaluator that attempts to partially evaluate the input value even
+  // if unable to evaluate the complete output. Below follows a simple stack
+  // based evaluation where it queries what operands/part of operands need to
+  // be evaluated and attempting to partially evaluate those operands. It does
+  // so by pushing the operands that need to be required on to the worklist
+  // before enqueuing the operation requiering those values.
+  std::vector<DimensionHandle> dims(dim_size, ic->UnknownDim());
+  for (unsigned int i = 0, e = dims.size(); i != e; ++i) {
+    LLVM_DEBUG(llvm::dbgs() << "\nConsidering output dim " << i << "\n");
+
+    worklist.push_back(
+        ValuePort{result.getOwner(), {result.getResultNumber(), i}});
+    while (!worklist.empty()) {
+      auto front = worklist.pop_back_val();
+      LLVM_DEBUG(front.print(llvm::errs() << "\nWorklist front "));
+
+      SmallVector<ValuePort, 4> inputs;
+      auto res = ComputeInputsRequiredForOutput(front, &inputs);
+      if (failed(res)) {
+        // Abort if unable to find which required inputs need to be computed.
+        worklist.clear();
+        break;
+      }
+
+      if (!inputs.empty()) {
+        // Enqueue required computation followed by its required operands in
+        // stack.
+        worklist.push_back(std::move(front));
+        for (auto& it : inputs) worklist.push_back(std::move(it));
+        continue;
+      }
+
+      auto ret = ComputeOutputComponent(front);
+      if (!ret) continue;
+
+      RecordValue(front, ret);
+      LLVM_DEBUG(ret.print(llvm::dbgs() << "\ncomputed result = "));
+
+      // If worklist is empty, then this is the root query op.
+      if (worklist.empty()) {
+        LLVM_DEBUG(llvm::dbgs() << "[root node]\n");
+        if (auto dea = ret.dyn_cast<DenseIntElementsAttr>()) {
+          if (dea.getNumElements() != 1) {
+            LLVM_DEBUG(llvm::errs() << "Unexpected number of elements\n");
+            return {};
+          }
+          int64_t val = (*dea.getIntValues().begin()).getSExtValue();
+          dims[i] = ic->MakeDim(val);
+        }
+      }
+    }
+  }
+  return ic->MakeShape(dims);
+}
+
+bool ShapeInference::InferShapeForSingleOperation(Operation* op) {
+  assert(tf_dialect_ == op->getDialect());
   // The shape function of these ops sometimes does not propagate subtypes
   // (handle shapes) for resource and variant types. We use a simple passthrough
   // to make sure they are preserved in the output.
@@ -312,7 +614,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   // If no result for this op needs shape inference, we have a fast-path return.
   // But if the type is a resource/variant, we do not skip it because we might
   // not have the handle shapes.
-  if (llvm::none_of(op->getResultTypes(), CanBeRefined)) {
+  if (none_of(op->getResultTypes(), CanBeRefined)) {
     LLVM_DEBUG(llvm::dbgs() << "Skipping inference for statically shaped op '"
                             << op->getName() << "'.\n");
     return false;
@@ -327,8 +629,8 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   // This is necessary to avoid reprocessing the tf.Cast that are inserted at
   // the end of this function.
   if (isa<CastOp>(op) &&
-      llvm::all_of(op->getResult(0).getUsers(), [&](Operation* user) {
-        return user->getDialect() != tf_dialect;
+      all_of(op->getResult(0).getUsers(), [&](Operation* user) {
+        return user->getDialect() != tf_dialect_;
       })) {
     LLVM_DEBUG(llvm::dbgs() << "Skipping inference for tf.Cast with no TF "
                                "dialect operation users '"
@@ -408,9 +710,9 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   // Perform the shape inference using an InferenceContext with the input
   // shapes. This object is abstracting the information that the ShapeInference
   // function operates on.
-  tensorflow::shape_inference::InferenceContext c(
-      graph_version, *node_def, op_reg_data->op_def, input_shapes,
-      input_tensors, /*input_tensors_as_shapes=*/{}, handle_shapes_and_types);
+  InferenceContext c(graph_version_, *node_def, op_reg_data->op_def,
+                     input_shapes, input_tensors,
+                     /*input_tensors_as_shapes=*/{}, handle_shapes_and_types);
   auto status = c.Run(op_reg_data->shape_inference_fn);
   if (!status.ok()) {
     LLVM_DEBUG(llvm::dbgs() << "Shape inference error for '" << *op
@@ -418,6 +720,43 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
     return false;
   }
 
+  // Determine if, during shape computation, the shape functions attempted to
+  // query an input operand as shape where the input was not known/constant.
+  bool requires_inputs =
+      any_of(llvm::seq<int>(0, c.num_inputs()), [&](int input) {
+        return c.requested_input_tensor_as_partial_shape(input) &&
+               !input_tensors[input];
+      });
+  if (requires_inputs) {
+    std::vector<ShapeHandle> input_tensors_as_shapes;
+    for (int input : llvm::seq<int>(0, c.num_inputs())) {
+      if (c.requested_input_tensor_as_partial_shape(input) &&
+          !input_tensors[input]) {
+        auto op_result = op->getOperand(input).dyn_cast<OpResult>();
+        if (!op_result) continue;
+        // Resize on first valid shape computed.
+        input_tensors_as_shapes.resize(c.num_inputs());
+        auto handle = ComputeOutputAsShape(op_result, &c);
+        LLVM_DEBUG(llvm::dbgs() << "Requested " << input << " as shape "
+                                << (handle.Handle() ? "found" : "not found"));
+        if (handle.Handle()) input_tensors_as_shapes[input] = handle;
+      }
+    }
+
+    // Attempt to compute the unknown operands as shapes.
+    // Note: in the case where no partial outputs could be computed, this would
+    // be empty.
+    if (!input_tensors_as_shapes.empty()) {
+      c.set_input_tensors_as_shapes(input_tensors_as_shapes);
+      auto status = c.Run(op_reg_data->shape_inference_fn);
+      if (!status.ok()) {
+        LLVM_DEBUG(llvm::dbgs() << "Shape inference error for '" << *op
+                                << "': " << status.error_message() << "\n");
+        return false;
+      }
+    }
+  }
+
   assert(c.num_outputs() == op->getNumResults() &&
          "inference context matches the MLIR number of results.");
 
@@ -430,12 +769,11 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
     if (!CanBeRefined(result.getType())) continue;
     auto shaped_type = result.getType().cast<ShapedType>();
 
-    tensorflow::shape_inference::ShapeHandle shape_handle = c.output(output);
+    ShapeHandle shape_handle = c.output(output);
     LLVM_DEBUG(llvm::dbgs() << "Inferred output " << output << " : "
                             << c.DebugString(shape_handle) << "\n");
-    auto get_tensor_type =
-        [&c](const tensorflow::shape_inference::ShapeHandle& sh,
-             Type element_type) -> TensorType {
+    auto get_tensor_type = [&c](const ShapeHandle& sh,
+                                Type element_type) -> TensorType {
       if (!c.RankKnown(sh)) return UnrankedTensorType::get(element_type);
       // Convert the shape from TensorFlow (int64) to MLIR (int64_t).
       SmallVector<int64_t, 8> shape;
@@ -449,7 +787,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
         new_element_type.isa<TF::VariantType>()) {
       auto handle_shapes_types = c.output_handle_shapes_and_types(output);
       if (handle_shapes_types) {
-        llvm::SmallVector<mlir::TensorType, 1> subtypes;
+        SmallVector<TensorType, 1> subtypes;
         OpBuilder b(op);
         for (const auto& shape_n_type : *handle_shapes_types) {
           Type element_type;
@@ -469,7 +807,7 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
     if (result.getType() == new_type) continue;
     // Inserts a cast back to the original type if any user is not in the TF
     // dialect.
-    AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect,
+    AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect_,
                                        result.getType());
     // Finally we inferred the shape and replace the type for this result.
     result.setType(new_type);
@@ -481,23 +819,13 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
   return changed;
 }
 
-// Updates input types and refine shapes inside body of functions that are
-// attached to ControlFlow ops (If/While). These functions include Then/Else
-// branches of IfOp and Cond/Body functions of WhileOp. These functions share
-// following common properties:
-//   1) They are never reused, ie. having a single use in module.
-//   2) Their input types match those of their parent ops (excluding inputs like
-//      predicate).
-// Returns a boolean indicating whether any change has been applied.
-LogicalResult RefineShapeForControlFlowFunc(FuncOp func,
-                                            llvm::ArrayRef<Type> input_types,
-                                            int64_t graph_version,
-                                            int64_t max_iteration) {
+LogicalResult ShapeInference::RefineShapeForControlFlowFunc(
+    FuncOp func, ArrayRef<Type> input_types, int64_t max_iteration) {
   ModuleOp module = func.getParentOfType<ModuleOp>();
   auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion());
   int num_uses = std::distance(func_uses->begin(), func_uses->end());
   if (num_uses != 1) {
-    func.emitWarning(llvm::formatv(
+    func.emitWarning(formatv(
         "expected control flow function {0} to have exactly 1 use, found {1}.",
         func.getName(), num_uses));
     return failure();
@@ -511,8 +839,7 @@ LogicalResult RefineShapeForControlFlowFunc(FuncOp func,
     arg_and_idx.value().setType(input_types[arg_and_idx.index()]);
   }
 
-  auto res =
-      InferShapeUntilFixPoint(&func.getBody(), graph_version, max_iteration);
+  auto res = InferShapeUntilFixPoint(&func.getBody(), max_iteration);
   if (failed(res)) return res;
 
   auto new_return_types = InferShapeForFunctionReturnType(func);
@@ -524,20 +851,18 @@ LogicalResult RefineShapeForControlFlowFunc(FuncOp func,
   return success();
 }
 
-LogicalResult PropagateShapeToFunctions(
+LogicalResult ShapeInference::PropagateShapeToFunctions(
     ModuleOp module, Operation::operand_type_range input_types,
-    llvm::ArrayRef<StringRef> func_names, int64_t graph_version,
-    int64_t max_iteration) {
-  bool success = true;
+    ArrayRef<StringRef> func_names, int64_t max_iteration) {
+  bool all_succeeded = true;
   auto types = llvm::to_vector<4>(input_types);
   for (auto func_name : func_names) {
     FuncOp func = module.lookupSymbol<FuncOp>(func_name);
-    if (failed(RefineShapeForControlFlowFunc(func, types, graph_version,
-                                             max_iteration))) {
-      success = false;
-    }
+    all_succeeded =
+        succeeded(RefineShapeForControlFlowFunc(func, types, max_iteration)) &&
+        all_succeeded;
   }
-  return mlir::success(success);
+  return success(all_succeeded);
 }
 
 // If the callee has only one use, propagates any constant operand of call_op to
@@ -557,7 +882,7 @@ void PropagateConstantToCallee(CallOpInterface call_op,
     // the constant inside the function.
     for (auto arg : func.getArguments()) {
       auto operand = op->getOperand(arg.getArgNumber()).getDefiningOp();
-      if (llvm::isa_and_nonnull<TF::ConstOp>(operand)) {
+      if (isa_and_nonnull<TF::ConstOp>(operand)) {
         arg.replaceAllUsesWith(builder.clone(*operand)->getResult(0));
       }
     }
@@ -576,33 +901,31 @@ void PropagateConstantFromCallee(CallOpInterface call_op,
   for (auto retval :
        llvm::enumerate(func.front().getTerminator()->getOperands())) {
     auto retval_op = retval.value().getDefiningOp();
-    if (llvm::isa_and_nonnull<TF::ConstOp>(retval_op)) {
+    if (isa_and_nonnull<TF::ConstOp>(retval_op)) {
       op->getResult(retval.index())
           .replaceAllUsesWith(builder.clone(*retval_op)->getResult(0));
     }
   }
 }
 
-LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op,
-                                                  int64_t graph_version,
-                                                  int64_t max_iteration) {
+LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions(
+    Operation* op, int64_t max_iteration) {
   ModuleOp module = op->getParentOfType<ModuleOp>();
   if (auto if_op = dyn_cast<TF::IfOp>(op)) {
     return PropagateShapeToFunctions(
-        module, llvm::drop_begin(if_op.getOperandTypes(), 1),
-        {if_op.then_branch(), if_op.else_branch()}, graph_version,
-        max_iteration);
+        module, drop_begin(if_op.getOperandTypes(), 1),
+        {if_op.then_branch(), if_op.else_branch()}, max_iteration);
   } else if (auto while_op = dyn_cast<TF::WhileOp>(op)) {
     return PropagateShapeToFunctions(module, while_op.getOperandTypes(),
                                      {while_op.cond(), while_op.body()},
-                                     graph_version, max_iteration);
+                                     max_iteration);
   } else if (auto call_op = dyn_cast<CallOpInterface>(op)) {
     CallInterfaceCallable callable = call_op.getCallableForCallee();
     if (SymbolRefAttr sym = callable.dyn_cast<SymbolRefAttr>()) {
       PropagateConstantToCallee(call_op, sym, module);
       if (failed(PropagateShapeToFunctions(
               module, call_op.getArgOperands().getTypes(),
-              {sym.getRootReference()}, graph_version, max_iteration))) {
+              {sym.getRootReference()}, max_iteration))) {
         return failure();
       }
       PropagateConstantFromCallee(call_op, sym, module);
@@ -615,13 +938,10 @@ LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op,
   return success();
 }
 
-LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
-                                      int64_t max_iteration) {
-  MLIRContext* ctx = region->getContext();
-  Dialect* tf_dialect = ctx->getRegisteredDialect<TensorFlowDialect>();
-
-  // An operation folder that is used to attempt folding before inference.
-  OperationFolder folder(ctx);
+LogicalResult ShapeInference::InferShapeUntilFixPoint(Region* region,
+                                                      int64_t max_iteration) {
+  // An operation folder that is used to attempt folding before inference._
+  OperationFolder folder(context_);
   bool changed = true;
 
   // TODO(aminim): we could have a more efficient traversal by guiding the
@@ -633,30 +953,29 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
     LLVM_DEBUG(llvm::dbgs()
                << "Shape inference, iteration " << iteration << "\n");
     region->walk([&](Operation* op) {
-      if (op->getDialect() != tf_dialect) {
-        changed |= InferShapeForNonTFDialectOperation(op, tf_dialect);
-        return;
-      }
-
-      if (auto tf_const = dyn_cast<TF::ConstOp>(op)) {
-        changed |= RefineTfConst(tf_const);
+      if (auto infer_ti = dyn_cast<InferTypeOpInterface>(op)) {
+        changed |= RefineWithInferTypeOpInterface(infer_ti, tf_dialect_);
         // TODO(jpienaar): Debug why we can't just return here. We end up with
         // additional constant due to the propagation of constant into attached
         // function if we return already.
       }
 
+      if (op->getDialect() != tf_dialect_) {
+        changed |= InferShapeForNonTFDialectOperation(op, tf_dialect_);
+        return;
+      }
+
       // Before attempting inference, just try to fold the operation.
       if (succeeded(folder.tryToFold(op))) return;
 
       // Best-effort shape inference in attached functions. Do not return
       // failure even if it doesn't get to fixed point.
-      if (failed(PropagateShapeIntoAttachedFunctions(op, graph_version,
-                                                     max_iteration))) {
+      if (failed(PropagateShapeIntoAttachedFunctions(op, max_iteration))) {
         op->emitWarning() << "unable to refine shape of attached function "
                              "arguments and bodies";
       }
 
-      changed |= InferShapeForSingleOperation(op, tf_dialect, graph_version);
+      changed |= InferShapeForSingleOperation(op);
     });
   }
 
@@ -671,31 +990,43 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
 LogicalResult InferShapeForFunction(FuncOp func,
                                     ArrayRef<ArrayRef<int64_t>> arg_shapes,
                                     int64_t graph_version) {
-  mlir::FunctionType func_type = func.getType();
+  ShapeInference context(graph_version, func.getContext());
+  if (arg_shapes.empty()) {
+    if (failed(context.InferShapeUntilFixPoint(&func.getBody())))
+      return failure();
+    // TODO(b/156276510): Verify that it is always fine to refine a function's
+    // return type, as long as we do not change the argument shapes.
+    if (auto return_types = InferShapeForFunctionReturnType(func)) {
+      func.setType(FunctionType::get(func.getType().getInputs(),
+                                     return_types.getValue(),
+                                     func.getContext()));
+    }
+
+    return success();
+  }
+  FunctionType func_type = func.getType();
   bool needs_refinement = false;
-  llvm::SmallVector<mlir::Type, 4> new_arg_types;
+  SmallVector<Type, 4> new_arg_types;
   new_arg_types.reserve(func_type.getNumInputs());
 
   // Update argument types in-place using the provided arg_shapes.
   for (size_t i = 0; i < func_type.getNumInputs(); ++i) {
     ArrayRef<int64_t> shape = arg_shapes[i];
-    mlir::Type element_type;
-    if (auto input_ty =
-            func_type.getInput(i).dyn_cast<mlir::RankedTensorType>()) {
+    Type element_type;
+    if (auto input_ty = func_type.getInput(i).dyn_cast<RankedTensorType>()) {
       if (!input_ty || input_ty.getShape().size() != shape.size()) {
         return failure();
       }
       element_type = input_ty.getElementType();
     } else {
-      auto unranked_input_ty =
-          func_type.getInput(i).dyn_cast<mlir::TensorType>();
+      auto unranked_input_ty = func_type.getInput(i).dyn_cast<TensorType>();
       if (!unranked_input_ty) {
         return failure();
       }
       element_type = unranked_input_ty.getElementType();
     }
 
-    auto new_arg_type = mlir::RankedTensorType::get(shape, element_type);
+    auto new_arg_type = RankedTensorType::get(shape, element_type);
     if (new_arg_type != func_type.getInput(i)) {
       // If the new type is more detailed, trigger shape inference.
       func.getArgument(i).setType(new_arg_type);
@@ -708,28 +1039,17 @@ LogicalResult InferShapeForFunction(FuncOp func,
     return success();
   }
 
-  mlir::LogicalResult result =
-      mlir::TF::InferShapeUntilFixPoint(&func.getBody(), graph_version);
+  LogicalResult result = context.InferShapeUntilFixPoint(&func.getBody());
   if (failed(result)) {
     return failure();
   }
 
   auto return_types = InferShapeForFunctionReturnType(func);
-  func.setType(mlir::FunctionType::get(new_arg_types,
-                                       return_types.hasValue()
-                                           ? return_types.getValue()
-                                           : func.getType().getResults(),
-                                       func.getContext()));
-
-  return success();
-}
-
-LogicalResult InferShapeForFunctionType(FuncOp func) {
-  if (auto return_types = InferShapeForFunctionReturnType(func)) {
-    func.setType(mlir::FunctionType::get(func.getType().getInputs(),
-                                         return_types.getValue(),
-                                         func.getContext()));
-  }
+  func.setType(FunctionType::get(new_arg_types,
+                                 return_types.hasValue()
+                                     ? return_types.getValue()
+                                     : func.getType().getResults(),
+                                 func.getContext()));
 
   return success();
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
index 0524ec678ed..e36d8d56d6d 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
@@ -27,30 +27,13 @@ namespace mlir {
 
 namespace TF {
 
-// Performs shape inference on the provided op and return true if the type of
-// at least one result has been changed.
-// A tf.Cast() is inserted for any uses that isn't in the TensorFlow dialect.
-// `graph_version` indicates the current GraphDef compatibility versions
-// (the versions field in graph.proto).
-bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
-                                  int64_t graph_version);
-
-// Infers shape on the provided region, including nested ones, iterate until fix
-// point with a limit of max_iteration. Returns success if fix point is reached
-// before max_iteration.
-LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
-                                      int64_t max_iteration = 10);
-
 // Given a list of refined shapes matching the function arguments of func, runs
 // shape inference over the function to propagate this updated information.
+// If arg_shapes are empty, then argument shapes will be left unchanged.
 LogicalResult InferShapeForFunction(FuncOp func,
                                     ArrayRef<ArrayRef<int64_t>> arg_shapes,
                                     int64_t graph_version);
 
-// Refines the return type of the given function by folding tf.Cast that
-// precedes the return instruction.
-LogicalResult InferShapeForFunctionType(FuncOp func);
-
 }  // namespace TF
 
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
index 48e4e77ce0f..acdfc0eb039 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
@@ -58,10 +58,8 @@ struct ShapeInference
     }
     int64_t producer = producer_or.ValueOrDie();
     for (auto func : module.getOps<FuncOp>()) {
-      InferShapeUntilFixPoint(&func.getBody(), producer);
-      // TODO(yuanzx): Verify that it is always fine to refine a function's
-      // return type, as long as we do not change the argument shapes.
-      InferShapeForFunctionType(func);
+      if (failed(InferShapeForFunction(func, /*arg_shapes=*/{}, producer)))
+        return signalPassFailure();
     }
   }
 };
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc b/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc
index 0eafdea0964..e62df78ed11 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/sink_constant.cc
@@ -41,15 +41,15 @@ using ::mlir::TF::ConstOp;
 class ExecutorConstantSinking
     : public mlir::PassWrapper<ExecutorConstantSinking, FunctionPass> {
   void runOnFunction() override {
-    getFunction().walk([](tf_device::LaunchOp launch) {
-      LLVM_DEBUG(llvm::dbgs() << "Visit " << *launch.getOperation() << "\n");
+    getFunction().walk([](tf_device::ClusterOp cluster) {
+      LLVM_DEBUG(llvm::dbgs() << "Visit " << *cluster.getOperation() << "\n");
       // For each launch op, we find the values used that come from a constant
       // defined above and sink these constants in the region body.
       // The sunk_constant map keeps a mapping from a ConstOp defined above to
       // a sunk clone of it. This allows for reusing a sunk constant with
       // multiple uses in the region.
       llvm::DenseMap<Value, TF::ConstOp> sunk_constant;
-      Region &body = launch.body();
+      Region &body = cluster.body();
       visitUsedValuesDefinedAbove(body, [&](OpOperand *use) {
         Value constant = use->get();
         auto const_op = dyn_cast_or_null<TF::ConstOp>(constant.getDefiningOp());
@@ -84,7 +84,7 @@ class ExecutorConstantSinking
 
 static mlir::PassRegistration<ExecutorConstantSinking> pass(
     "tf-device-constant-sinking",
-    "Sink constants implicitly captured in a tf_device.launch region. This "
+    "Sink constants implicitly captured in a tf_device.cluster region. This "
     "reduces the number of arguments when outlining later.");
 
 }  // anonymous namespace
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc
new file mode 100644
index 00000000000..786c4b74b34
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc
@@ -0,0 +1,65 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.h"
+
+#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
+
+namespace mlir {
+namespace TF {
+
+namespace {
+
+struct FuseParallelMapAndBatch : public OpRewritePattern<BatchDatasetV2Op> {
+  using OpRewritePattern<BatchDatasetV2Op>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(BatchDatasetV2Op op,
+                                PatternRewriter &rewriter) const override {
+    auto batchInputDataset = op.input_dataset();
+
+    ParallelMapDatasetOp batchInputOp = dyn_cast_or_null<ParallelMapDatasetOp>(
+        batchInputDataset.getDefiningOp());
+    if (!batchInputOp) return failure();
+
+    // The type of the `num_parallel_calls` argument in ParallelMapDataset
+    // and MapAndBatchDataset is different (int32 and int64 respectively)
+    auto num_parallel_calls_op = rewriter.create<CastOp>(
+        op.getLoc(), UnrankedTensorType::get(rewriter.getIntegerType(64)),
+        batchInputOp.num_parallel_calls(), rewriter.getBoolAttr(false));
+
+    auto fused_op = rewriter.create<MapAndBatchDatasetOp>(
+        op.getLoc(), op.getType(), batchInputOp.input_dataset(),
+        batchInputOp.other_arguments(), op.batch_size(),
+        num_parallel_calls_op.y(), op.drop_remainder(), batchInputOp.f(),
+        op.output_types(), op.output_shapes(),
+        batchInputOp.preserve_cardinality());
+    rewriter.replaceOp(op, {fused_op.handle()});
+    return failure();
+  }
+};
+
+#include "tensorflow/compiler/mlir/tensorflow/transforms/generated_tf_data_optimization.inc"
+}  // namespace
+
+void PopulateTFDataOptimizationPatterns(MLIRContext *context,
+                                        OwningRewritePatternList *patterns) {
+  patterns->insert<FuseParallelMapAndBatch>(context);
+  populateWithGenerated(context, patterns);
+}
+
+}  // namespace TF
+}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.h b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.h
new file mode 100644
index 00000000000..ffbc06a9515
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.h
@@ -0,0 +1,32 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_TF_DATA_OPTIMIZATION_H_
+#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_TF_DATA_OPTIMIZATION_H_
+
+#include "mlir/IR/MLIRContext.h"  // from @llvm-project
+#include "mlir/IR/PatternMatch.h"  // from @llvm-project
+
+namespace mlir {
+namespace TF {
+
+// Populates patterns to perform optimizations specific to tf.data operations.
+void PopulateTFDataOptimizationPatterns(MLIRContext *context,
+                                        OwningRewritePatternList *patterns);
+
+}  // namespace TF
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_TF_DATA_OPTIMIZATION_H_
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.td b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.td
new file mode 100644
index 00000000000..4b4239679b2
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.td
@@ -0,0 +1,32 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+include "mlir/IR/OpBase.td"
+include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td"
+
+// TODO(jpienaar): Move this somewhere general.
+class GetI64ScalarElementsAttr<int value> :
+  NativeCodeCall<"DenseElementsAttr::get<int64_t>(RankedTensorType::get({}, $_builder.getIntegerType(64)), " # value # ")">;
+
+def FuseMapAndBatch : Pat<
+  (TF_BatchDatasetV2Op
+     (TF_MapDatasetOp $input_dataset, $other_arguments, $f, $output_types,
+        $output_shapes, $use_inter_op_parallelism, $preserve_cardinality),
+     $batch_size, $drop_remainder, $parallel_copy, $batch_output_types,
+     $batch_output_shapes),
+  (TF_MapAndBatchDatasetOp $input_dataset, $other_arguments, $batch_size,
+     (TF_ConstOp (GetI64ScalarElementsAttr<1>)), $drop_remainder, $f,
+        $batch_output_types, $batch_output_shapes, $preserve_cardinality)>;
+
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization_pass.cc
new file mode 100644
index 00000000000..5be69bddb11
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization_pass.cc
@@ -0,0 +1,40 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/IR/PatternMatch.h"  // from @llvm-project
+#include "mlir/Pass/Pass.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.h"
+
+namespace mlir {
+namespace TF {
+namespace {
+
+// Perform tf.data optimizations.
+struct TFDataOptimization
+    : public PassWrapper<TFDataOptimization, FunctionPass> {
+  void runOnFunction() override {
+    OwningRewritePatternList patterns;
+    mlir::TF::PopulateTFDataOptimizationPatterns(&getContext(), &patterns);
+
+    applyPatternsAndFoldGreedily(getFunction(), patterns);
+  }
+};
+
+}  // namespace
+}  // namespace TF
+}  // namespace mlir
+
+static mlir::PassRegistration<mlir::TF::TFDataOptimization> pass(
+    "tf-data-optimization", "Performs tf.data optimizations");
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
index 0571701413a..6ea6df38568 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
@@ -14,9 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 // This transformation pass takes ops with the same `_tpu_replicate` attribute
-// in a block and clusters them together under a `tf_device::LaunchOp`.
+// in a block and clusters them together under a `tf_device.cluster`.
 // Associated TPUReplicateMetadata ops are removed and its attributes are copied
-// over to the associated `tf_device::LaunchOp`. If a cluster should be
+// over to the associated `tf_device.cluster`. If a cluster should be
 // replicated, the associated `tf_device::LaunchOp` will be wrapped further with
 // a `tf_device.replicate`. This pass also assumes ops of the same cluster do
 // not have ops outside of the cluster that are both operands and results of the
@@ -179,7 +179,7 @@ llvm::SmallSetVector<Operation*, 8> CollectClusterPrecedingUsers(
 
 // Collects results and associated types of the cluster that are used outside of
 // the cluster. These results and types are used to create the clusters
-// `tf_device::LaunchOp` and associated terminator. Results that have no uses
+// `tf_device.cluster` and associated terminator. Results that have no uses
 // outside of the cluster (i.e. results of ops in the cluster are only consumed
 // by other ops in the cluster) are pruned.
 llvm::SmallVector<Value, 8> CollectClusterResults(
@@ -201,40 +201,37 @@ llvm::SmallVector<Value, 8> CollectClusterResults(
   return results;
 }
 
-// Creates a `tf_device::LaunchOp` to wrap cluster ops.
-tf_device::LaunchOp CreateLaunchOpForCluster(Operation* last_cluster_op,
-                                             llvm::ArrayRef<Value> results) {
-  // `tf_device::LaunchOp` will be placed at where the last op of the cluster
-  // is.
+// Creates a `tf_device.cluster` to wrap cluster ops.
+tf_device::ClusterOp CreateOpForCluster(Operation* last_cluster_op,
+                                        llvm::ArrayRef<Value> results) {
+  // `tf_device.cluster` will be placed at where the last op of the cluster is.
   OpBuilder builder(last_cluster_op);
 
   llvm::SmallVector<Type, 8> result_types;
   for (Value result : results) result_types.push_back(result.getType());
 
-  // An empty string placeholder is used for the device as that will be later
-  // populated with the device of the associated TPUReplicateMetadata op.
-  auto launch_op = builder.create<tf_device::LaunchOp>(
-      last_cluster_op->getLoc(), builder.getStringAttr(""), result_types);
+  auto cluster = builder.create<tf_device::ClusterOp>(last_cluster_op->getLoc(),
+                                                      result_types);
 
-  launch_op.body().push_back(new Block);
+  cluster.body().push_back(new Block);
 
   // Add terminator.
-  builder.setInsertionPointToEnd(&launch_op.GetBody());
+  builder.setInsertionPointToEnd(&cluster.GetBody());
   builder.create<tf_device::ReturnOp>(last_cluster_op->getLoc(), results);
 
-  return launch_op;
+  return cluster;
 }
 
-// Moves cluster ops to associated `tf_device.LaunchOp` body.
-void MoveClusterOpsToLaunchOp(
-    tf_device::LaunchOp launch_op,
+// Moves cluster ops to associated `tf_device.cluster` body.
+void MoveClusterOpsToCluster(
+    tf_device::ClusterOp cluster,
     const llvm::SmallSetVector<Operation*, 8>& cluster_ops) {
-  MLIRContext* context = launch_op.getContext();
-  Operation* terminator = &launch_op.GetBody().back();
+  MLIRContext* context = cluster.getContext();
+  Operation* terminator = cluster.GetBody().getTerminator();
 
   for (Operation* cluster_op : cluster_ops) {
     // Remove `_tpu_replicate` and `device` attribute from ops in the cluster
-    // as that information will be present in the `tf_device.LaunchOp`.
+    // as that information will be present in the `tf_device.cluster`.
     cluster_op->removeAttr(Identifier::get(kTPUReplicateAttr, context));
     cluster_op->removeAttr(Identifier::get(kDeviceAttr, context));
     cluster_op->moveBefore(terminator);
@@ -242,24 +239,24 @@ void MoveClusterOpsToLaunchOp(
 }
 
 // Replaces uses of cluster ops results outside of cluster with the associated
-// `tf_device::LaunchOp` results.
-void UpdateLaunchOpResultExternalUses(tf_device::LaunchOp launch_op,
-                                      llvm::ArrayRef<Value> results) {
-  Block& launch_op_block = launch_op.GetBody();
-  for (auto ret_vals : llvm::zip(results, launch_op.getResults())) {
+// `tf_device.cluster` results.
+void UpdateClusterResultExternalUses(tf_device::ClusterOp cluster,
+                                     llvm::ArrayRef<Value> results) {
+  Block& cluster_block = cluster.GetBody();
+  for (auto ret_vals : llvm::zip(results, cluster.getResults())) {
     Value old_ret = std::get<0>(ret_vals);
     Value new_ret = std::get<1>(ret_vals);
     for (auto& use : llvm::make_early_inc_range(old_ret.getUses()))
-      if (!launch_op_block.findAncestorOpInBlock(*use.getOwner()))
+      if (!cluster_block.findAncestorOpInBlock(*use.getOwner()))
         use.set(new_ret);
   }
 }
 
 // Moves users of cluster that are before the cluster to after the cluster.
-void MovePrecedingClusterUsers(tf_device::LaunchOp launch_op,
+void MovePrecedingClusterUsers(tf_device::ClusterOp cluster,
                                llvm::ArrayRef<Operation*> preceding_users) {
-  Operation* op_after_launch_op = launch_op.getOperation()->getNextNode();
-  for (Operation* user : preceding_users) user->moveBefore(op_after_launch_op);
+  Operation* op_after_cluster = cluster.getOperation()->getNextNode();
+  for (Operation* user : preceding_users) user->moveBefore(op_after_cluster);
 }
 
 // Sorts `tf.TPUReplicatedInput` ops by `index` attribute. Ops with an `index`
@@ -297,19 +294,18 @@ LogicalResult SortTPUReplicatedInputsByIndex(
 
 // Creates a `tf_device.replicate` to represent replication for the cluster, if
 // necessary.
-LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
-                               int num_replicas) {
+LogicalResult ReplicateCluster(tf_device::ClusterOp cluster, int num_replicas) {
   // No need to replicate.
   if (num_replicas == 1) return success();
 
   if (num_replicas < 1)
-    return launch_op.emitError() << "requires '" << kNumReplicasAttr
-                                 << "' int attribute to be at least 1";
+    return cluster.emitError() << "requires '" << kNumReplicasAttr
+                               << "' int attribute to be at least 1";
 
   // Collect all used TPUReplicatedInput ops and sort by `index`.
   llvm::SmallSetVector<Operation*, 8> unique_replicated_input_ops;
   mlir::visitUsedValuesDefinedAbove(
-      launch_op.body(), launch_op.body(), [&](mlir::OpOperand* operand) {
+      cluster.body(), cluster.body(), [&](mlir::OpOperand* operand) {
         Operation* def = operand->get().getDefiningOp();
         if (def && llvm::isa<TF::TPUReplicatedInputOp>(def))
           unique_replicated_input_ops.insert(def);
@@ -339,24 +335,24 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
   }
 
   // Create replicate op.
-  OpBuilder builder(launch_op);
+  OpBuilder builder(cluster);
   auto replicate_op = builder.create<tf_device::ReplicateOp>(
-      launch_op.getLoc(), num_replicas,
+      cluster.getLoc(), num_replicas,
       llvm::SmallDenseMap<llvm::StringRef, llvm::SmallVector<StringRef, 4>>(),
-      replicated_inputs, launch_op.getResultTypes());
+      replicated_inputs, cluster.getResultTypes());
   if (!mirrored_variable_indices.empty())
     replicate_op.setAttr(kMirroredVariableIndicesAttr,
                          builder.getI64ArrayAttr(mirrored_variable_indices));
 
   // Replace replicated cluster results with replicate op results.
-  for (auto result_and_idx : llvm::enumerate(launch_op.getResults())) {
+  for (auto result_and_idx : llvm::enumerate(cluster.getResults())) {
     Value result = result_and_idx.value();
     int idx = result_and_idx.index();
     for (auto& use : result.getUses()) {
       Operation* def = use.getOwner();
       if (!def || !llvm::isa<TF::TPUReplicatedOutputOp>(def))
-        return launch_op.emitError()
-               << "requires output of " << launch_op.getOperationName()
+        return cluster.emitError()
+               << "requires output of " << cluster.getOperationName()
                << " to lead to a 'tf.TPUReplicatedOutput' op";
 
       if (def->getNumResults() != num_replicas)
@@ -375,14 +371,15 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
     Operation* input = std::get<0>(input_and_block_arg);
     Value block_arg = std::get<1>(input_and_block_arg);
     mlir::replaceAllUsesInRegionWith(input->getResult(0), block_arg,
-                                     launch_op.body());
+                                     cluster.body());
   }
 
-  // Create terminator for replicate op and move launch into replicate.
+  // Create terminator for replicate op and move `tf_device.cluster` into
+  // replicate.
   builder.setInsertionPointToEnd(&replicate_op.GetBody());
   auto return_op = builder.create<tf_device::ReturnOp>(replicate_op.getLoc(),
-                                                       launch_op.getResults());
-  launch_op.getOperation()->moveBefore(return_op);
+                                                       cluster.getResults());
+  cluster.getOperation()->moveBefore(return_op);
 
   return success();
 }
@@ -396,31 +393,33 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
 //      `_tpu_replicate` attribute.
 //   2. Find users not in cluster that are interleaved between cluster ops.
 //   3. Find external uses of cluster ops.
-//   4. Create `tf_device::LaunchOp` with results consisting of the external
-//      uses of cluster ops determined at 3.
-//   5. Move cluster ops to `tf_device::LaunchOp` body.
-//   6. Replace external uses of cluster ops uses with `tf_device::LaunchOp`
+//   4. Create `tf_device.cluster` with results consisting of the external uses
+//      of cluster ops determined at 3.
+//   5. Move cluster ops to `tf_device.cluster` body.
+//   6. Replace external uses of cluster ops uses with `tf_device.cluster`
 //      results.
-//   7. Move users from 2 to after the `tf_device::LaunchOp`.
-//   8. Wrap cluster (`tf_device::LaunchOp`) in a `tf_device.replicate` if
+//   7. Move users from 2 to after the `tf_device.cluster`.
+//   8. Wrap cluster (`tf_device.cluster`) in a `tf_device.replicate` if
 //      attribute `num_replicas` is greater than 1.
-//   9. Copy over TPUReplicateMetadata attributes to `tf_device::LaunchOp`.
+//   9. Copy over TPUReplicateMetadata attributes to `tf_device.cluster`.
 LogicalResult FormClustersInBlock(Block* block,
                                   const MetadataMap& metadata_map) {
   ClusterMap clusters;
   LogicalResult result = CollectAndGroupClusterOps(block, &clusters);
   if (failed(result)) return result;
 
-  for (const auto& cluster : clusters) {
-    const auto& cluster_ops = cluster.getSecond();
+  for (const auto& cluster_metadata_and_ops : clusters) {
+    const auto& cluster_ops = cluster_metadata_and_ops.getSecond();
 
-    auto cluster_metadata = metadata_map.find(cluster.getFirst());
+    auto cluster_metadata =
+        metadata_map.find(cluster_metadata_and_ops.getFirst());
 
     // No TPUReplicateMetadata for a `_tpu_replicate` attribute.
     if (cluster_metadata == metadata_map.end()) {
       cluster_ops.front()->emitWarning()
           << "TPUReplicateMetadata for associated '" << kTPUReplicateAttr
-          << "' attribute '" << cluster.getFirst() << "' is missing";
+          << "' attribute '" << cluster_metadata_and_ops.getFirst()
+          << "' is missing";
       continue;
     }
 
@@ -430,28 +429,28 @@ LogicalResult FormClustersInBlock(Block* block,
     llvm::SmallVector<Value, 8> results =
         CollectClusterResults(block, cluster_ops);
 
-    tf_device::LaunchOp launch_op =
-        CreateLaunchOpForCluster(cluster_ops.back(), results);
+    tf_device::ClusterOp cluster =
+        CreateOpForCluster(cluster_ops.back(), results);
 
-    MoveClusterOpsToLaunchOp(launch_op, cluster_ops);
+    MoveClusterOpsToCluster(cluster, cluster_ops);
 
-    UpdateLaunchOpResultExternalUses(launch_op, results);
+    UpdateClusterResultExternalUses(cluster, results);
 
-    MovePrecedingClusterUsers(launch_op, preceding_users.getArrayRef());
+    MovePrecedingClusterUsers(cluster, preceding_users.getArrayRef());
 
     auto num_replicas = cluster_metadata->getSecond().get(kNumReplicasAttr);
     if (!num_replicas || !num_replicas.isa<mlir::IntegerAttr>())
-      return launch_op.emitError()
+      return cluster.emitError()
              << "requires '" << kNumReplicasAttr << "' int attribute";
 
     if (failed(ReplicateCluster(
-            launch_op, num_replicas.cast<mlir::IntegerAttr>().getInt())))
+            cluster, num_replicas.cast<mlir::IntegerAttr>().getInt())))
       return failure();
 
-    // Copy TPUReplicateMetadata attributes to launch.
-    launch_op.setAttrs(cluster_metadata->second);
+    // Copy TPUReplicateMetadata attributes to `tf_device.cluster`.
+    cluster.setAttrs(cluster_metadata->second);
     // Exclude `num_replicas` as cluster should be replicated if necessary.
-    launch_op.removeAttr(kNumReplicasAttr);
+    cluster.removeAttr(kNumReplicasAttr);
   }
 
   return success();
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc
index ad80eaaf1a6..64af2eabd3d 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_padding_mapper.cc
@@ -43,7 +43,7 @@ namespace TFTPU {
 constexpr char kPaddingMapAttr[] = "padding_map";
 
 // This pass remaps and assigns padding maps to an encapsulated function's
-// arguments from a `tf_device.launch_func` `padding_map` attribute. Remapping
+// arguments from a `tf_device.cluster_func` `padding_map` attribute. Remapping
 // is from replicated input index to encapsulated function's operand index
 // (user).
 
@@ -54,13 +54,13 @@ struct TPUDynamicPaddingMapper
 };
 
 // Creates a mapping from replicated input index (in `tf_device.replicate` op)
-// to `tf_device.launch_func` operand index.
+// to `tf_device.cluster_func` operand index.
 llvm::SmallDenseMap<int32_t, int32_t> GetRemappedReplicatedInputIndices(
-    tf_device::LaunchFuncOp launch_func, tf_device::ReplicateOp replicate) {
+    tf_device::ClusterFuncOp cluster_func, tf_device::ReplicateOp replicate) {
   Block* replicate_block = &replicate.GetBody();
 
   llvm::SmallDenseMap<int32_t, int32_t> remapped_indices;
-  for (auto operand_and_idx : llvm::enumerate(launch_func.getOperands()))
+  for (auto operand_and_idx : llvm::enumerate(cluster_func.getOperands()))
     if (auto block_arg = operand_and_idx.value().dyn_cast<BlockArgument>())
       if (block_arg.getOwner() == replicate_block)
         remapped_indices[block_arg.getArgNumber()] = operand_and_idx.index();
@@ -68,11 +68,12 @@ llvm::SmallDenseMap<int32_t, int32_t> GetRemappedReplicatedInputIndices(
   return remapped_indices;
 }
 
-// Extracts `padding_map` from `tf_device.launch_func` and remaps the associated
-// replicated input indices to the encapsulated function operand indices. An
-// error will be returned if an index is not found or parsing failed.
+// Extracts `padding_map` from `tf_device.cluster_func` and remaps the
+// associated replicated input indices to the encapsulated function operand
+// indices. An error will be returned if an index is not found or parsing
+// failed.
 LogicalResult GetRemappedPaddings(
-    tf_device::LaunchFuncOp launch_func, int num_replicated_args,
+    tf_device::ClusterFuncOp cluster_func, int num_replicated_args,
     const llvm::SmallDenseMap<int32_t, int32_t>& remapped_indices,
     llvm::SmallVectorImpl<tensorflow::tpu::PaddingMap>* remapped_paddings) {
   auto bad_index_msg = [num_replicated_args](int32_t index,
@@ -85,12 +86,12 @@ LogicalResult GetRemappedPaddings(
         .str();
   };
 
-  Attribute padding_map_attr = launch_func.getAttr(kPaddingMapAttr);
+  Attribute padding_map_attr = cluster_func.getAttr(kPaddingMapAttr);
   if (!padding_map_attr) return success();
 
   auto padding_map = padding_map_attr.dyn_cast<ArrayAttr>();
   if (!padding_map)
-    return launch_func.emitOpError()
+    return cluster_func.emitOpError()
            << "requires '" << kPaddingMapAttr << "' array attribute";
 
   for (auto padding_attr_and_idx : llvm::enumerate(padding_map)) {
@@ -98,25 +99,25 @@ LogicalResult GetRemappedPaddings(
     auto& padding_attr = padding_attr_and_idx.value();
     auto padding = padding_attr.dyn_cast<StringAttr>();
     if (!padding)
-      return launch_func.emitOpError(
+      return cluster_func.emitOpError(
           llvm::formatv("bad '{0}' attribute at index {1}, not a string",
                         kPaddingMapAttr, padding_attr_and_idx.index()));
 
     tensorflow::tpu::PaddingMap padding_proto;
     if (!padding_proto.ParseFromString(padding.getValue().str()))
-      return launch_func.emitOpError(llvm::formatv(
+      return cluster_func.emitOpError(llvm::formatv(
           "bad '{0}' attribute at index {1}, failed to parse '{2}' as "
           "tensorflow::tpu::PaddingMap",
           kPaddingMapAttr, idx, padding.getValue()));
 
     const int32_t arg_index = padding_proto.arg_index();
     if (arg_index >= num_replicated_args || arg_index < 0)
-      return launch_func.emitOpError()
+      return cluster_func.emitOpError()
              << bad_index_msg(idx, "arg_index", arg_index);
 
     const int32_t padding_arg_index = padding_proto.padding_arg_index();
     if (padding_arg_index >= num_replicated_args || padding_arg_index < 0)
-      return launch_func.emitOpError()
+      return cluster_func.emitOpError()
              << bad_index_msg(idx, "padding_arg_index", padding_arg_index);
 
     auto arg_index_it = remapped_indices.find(arg_index);
@@ -125,7 +126,7 @@ LogicalResult GetRemappedPaddings(
 
     auto padding_arg_index_it = remapped_indices.find(padding_arg_index);
     if (padding_arg_index_it == remapped_indices.end()) {
-      launch_func.emitWarning(llvm::formatv(
+      cluster_func.emitWarning(llvm::formatv(
           "bad '{0}' attribute at index {1}, unused padding_arg_index {2}",
           kPaddingMapAttr, idx, padding_arg_index));
       continue;
@@ -169,22 +170,21 @@ void AnnotateFunctionArgumentsWithPaddings(
   }
 }
 
-LogicalResult RemapAndAssignPaddingMaps(tf_device::LaunchFuncOp launch_func,
+LogicalResult RemapAndAssignPaddingMaps(tf_device::ClusterFuncOp cluster_func,
                                         SymbolTable* symbol_table) {
-  auto replicate =
-      llvm::dyn_cast_or_null<tf_device::ReplicateOp>(launch_func.getParentOp());
+  auto replicate = cluster_func.getParentOfType<tf_device::ReplicateOp>();
   // LaunchFunc is not replicated, there will be no padding.
   if (!replicate) return success();
   const int num_replicated_args = replicate.GetBody().getNumArguments();
 
-  auto func = symbol_table->lookup<FuncOp>(launch_func.func());
+  auto func = symbol_table->lookup<FuncOp>(cluster_func.func());
   if (!func) return success();
 
   llvm::SmallDenseMap<int32_t, int32_t> remapped_indices =
-      GetRemappedReplicatedInputIndices(launch_func, replicate);
+      GetRemappedReplicatedInputIndices(cluster_func, replicate);
 
   llvm::SmallVector<tensorflow::tpu::PaddingMap, 4> remapped_paddings;
-  if (failed(GetRemappedPaddings(launch_func, num_replicated_args,
+  if (failed(GetRemappedPaddings(cluster_func, num_replicated_args,
                                  remapped_indices, &remapped_paddings)))
     return failure();
 
@@ -196,8 +196,8 @@ LogicalResult RemapAndAssignPaddingMaps(tf_device::LaunchFuncOp launch_func,
 void TPUDynamicPaddingMapper::runOnOperation() {
   ModuleOp module = getOperation();
   SymbolTable symbol_table(module);
-  module.walk([&](tf_device::LaunchFuncOp launch_func) {
-    RemapAndAssignPaddingMaps(launch_func, &symbol_table);
+  module.walk([&](tf_device::ClusterFuncOp cluster_func) {
+    RemapAndAssignPaddingMaps(cluster_func, &symbol_table);
   });
 }
 }  // anonymous namespace
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc
new file mode 100644
index 00000000000..b9e214470cd
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_head_tail_outside_compilation.cc
@@ -0,0 +1,231 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <memory>
+#include <type_traits>
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/Block.h"  // from @llvm-project
+#include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/Operation.h"  // from @llvm-project
+#include "mlir/Pass/Pass.h"  // from @llvm-project
+#include "mlir/Pass/PassRegistry.h"  // from @llvm-project
+#include "mlir/Transforms/RegionUtils.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h"
+#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
+#include "tensorflow/compiler/mlir/tensorflow/utils/device_util.h"
+
+namespace mlir {
+namespace TFTPU {
+
+// This pass extracts a CPU computation cluster with `_xla_outside_compilation`
+// annotation from the head or tail of a TPU cluster.
+
+namespace {
+
+constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation";
+
+bool HasOutsideCompilationAttribute(Operation* op) {
+  return op->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr) != nullptr;
+}
+
+// Returns whether all operands of `op` are from values inside the
+// `input_value_set`.
+bool OpContainsOperandsFromSet(Operation* op,
+                               const llvm::SetVector<Value>& input_value_set) {
+  for (auto operand : op->getOperands())
+    if (input_value_set.count(operand) == 0) return false;
+
+  return true;
+}
+
+void RecordOutsideCompiledOpsAndUsages(
+    Operation* op, llvm::SmallSetVector<Operation*, 4>* outside_compiled_ops,
+    llvm::SetVector<Value>* outside_compiled_op_usages) {
+  if (HasOutsideCompilationAttribute(op) &&
+      OpContainsOperandsFromSet(op, *outside_compiled_op_usages)) {
+    outside_compiled_ops->insert(op);
+    outside_compiled_op_usages->insert(op->getResults().begin(),
+                                       op->getResults().end());
+  }
+}
+
+// Traverses the MLIR graph and returns a set of ops that
+// are connected to inputs of TPU computation and outside compiled.
+void ExtractOutsideCompiledOpsConnectedToHead(
+    Value input_value, llvm::SetVector<Value>* values_used_in_host_cluster,
+    llvm::SmallSetVector<Operation*, 4>* outside_compiled_ops) {
+  llvm::SmallSetVector<Operation*, 4> parent_outside_compiled_ops_at_head;
+  for (auto& usage : input_value.getUses()) {
+    auto head_operation = usage.getOwner();
+    RecordOutsideCompiledOpsAndUsages(head_operation,
+                                      &parent_outside_compiled_ops_at_head,
+                                      values_used_in_host_cluster);
+  }
+
+  // Traverse the graph and find all outside compiled ops connected from
+  // the `input_value`.
+  while (!parent_outside_compiled_ops_at_head.empty()) {
+    llvm::SmallSetVector<Operation*, 4> connected_outside_compiled_ops;
+    for (auto head_outside_compiled_op : parent_outside_compiled_ops_at_head) {
+      auto op_results = head_outside_compiled_op->getOpResults();
+      for (auto op_result : op_results) {
+        for (auto& use : op_result.getUses()) {
+          auto connected_op = use.getOwner();
+          RecordOutsideCompiledOpsAndUsages(connected_op,
+                                            &connected_outside_compiled_ops,
+                                            values_used_in_host_cluster);
+        }
+      }
+    }
+
+    outside_compiled_ops->insert(parent_outside_compiled_ops_at_head.begin(),
+                                 parent_outside_compiled_ops_at_head.end());
+    std::swap(parent_outside_compiled_ops_at_head,
+              connected_outside_compiled_ops);
+  }
+}
+
+// TODO(hongjunchoi): Also handle ops without inputs that are outside
+// compiled.
+//
+// Returns set of ops that are outside compiled and are directly connected
+// to inputs to the TPU computation.
+llvm::SmallSetVector<Operation*, 4> IdentifyOutsideCompiledOpsAtHead(
+    tf_device::ClusterOp tpu_cluster) {
+  llvm::SmallSetVector<Operation*, 4> outside_compiled_at_head_ops;
+  llvm::SetVector<Value> values_used_in_cluster;
+  auto& cluster_region = tpu_cluster.body();
+  getUsedValuesDefinedAbove(cluster_region, cluster_region,
+                            values_used_in_cluster);
+
+  auto input_value_list = llvm::to_vector<8>(values_used_in_cluster);
+  for (auto input_value : input_value_list)
+    ExtractOutsideCompiledOpsConnectedToHead(
+        input_value, &values_used_in_cluster, &outside_compiled_at_head_ops);
+  return outside_compiled_at_head_ops;
+}
+
+// Returns output values of extracted outside compiled cluster at head that
+// are used by the TPU computation.
+llvm::SmallVector<Value, 8> GetHeadExtractedClusterOutputs(
+    const llvm::SmallSetVector<Operation*, 4>& head_outside_compiled_ops) {
+  llvm::SmallVector<Value, 8> outputs;
+  outputs.reserve(head_outside_compiled_ops.size());
+
+  for (auto op : head_outside_compiled_ops) {
+    for (Operation* user : op->getUsers()) {
+      if (!head_outside_compiled_ops.count(user)) {
+        outputs.append(op->result_begin(), op->result_end());
+        break;
+      }
+    }
+  }
+
+  return outputs;
+}
+
+// Creates new tf_device.launch op with outside compiled ops extracted
+// from the head of TPU computation.
+llvm::Optional<tf_device::LaunchOp> IsolateHeadExtractedOpsToLaunchOp(
+    OpBuilder* builder, tf_device::ClusterOp cluster,
+    const llvm::SmallSetVector<Operation*, 4>& head_outside_compiled_ops) {
+  if (head_outside_compiled_ops.empty())
+    return llvm::Optional<tf_device::LaunchOp>();
+
+  // Create tf_device.launch op to separate all extracted outside compiled ops
+  // before the tf_device.cluster.
+  auto output_values =
+      GetHeadExtractedClusterOutputs(head_outside_compiled_ops);
+
+  llvm::SmallVector<Type, 8> output_return_types;
+  output_return_types.reserve(output_values.size());
+  for (auto output : output_values)
+    output_return_types.emplace_back(output.getType());
+
+  builder->setInsertionPoint(cluster);
+  auto host_launch_op = builder->create<tf_device::LaunchOp>(
+      cluster.getLoc(), builder->getStringAttr(""), output_return_types);
+
+  // Replace all usages of outside compiled ops that are used in TPU
+  // computation with the results of the above created launch op.
+  for (auto output_and_index : llvm::enumerate(output_values)) {
+    auto output_index = output_and_index.index();
+    auto output = output_and_index.value();
+    for (auto& use : output.getUses()) {
+      if (!head_outside_compiled_ops.count(use.getOwner()))
+        use.set(host_launch_op.getResult(output_index));
+    }
+  }
+
+  // Create terminator op for the newly created launch op.
+  host_launch_op.body().push_back(new Block());
+  builder->setInsertionPointToEnd(&host_launch_op.GetBody());
+  auto terminator = builder->create<tf_device::ReturnOp>(
+      host_launch_op.getLoc(), output_values);
+
+  // Move all outside compile ops from cluster op to launch op.
+  for (auto outside_compiled_op : head_outside_compiled_ops)
+    outside_compiled_op->moveBefore(terminator);
+
+  return host_launch_op;
+}
+
+struct TPUExtractHeadTailOutsideCompilation
+    : public PassWrapper<TPUExtractHeadTailOutsideCompilation,
+                         OperationPass<ModuleOp>> {
+  void runOnOperation() override;
+};
+
+void TPUExtractHeadTailOutsideCompilation::runOnOperation() {
+  // Get runtime devices information from the closest parent module.
+  auto module = getOperation();
+  mlir::TF::RuntimeDevices devices;
+  if (failed(tensorflow::GetDevicesFromOp(module, &devices)))
+    return signalPassFailure();
+
+  OpBuilder builder(&getContext());
+  module.walk([&](tf_device::ClusterOp cluster) {
+    auto head_outside_compiled_ops = IdentifyOutsideCompiledOpsAtHead(cluster);
+    IsolateHeadExtractedOpsToLaunchOp(&builder, cluster,
+                                      head_outside_compiled_ops);
+
+    // TODO(b/156030523): Update device attribute of newly created host launch
+    // op as well as enclosing Replicate op (if TPU computation is replicated)
+    // with host device names.
+
+    // TODO(b/155115766): Implement tail outside compiled op extraction.
+  });
+}
+
+}  // anonymous namespace
+
+std::unique_ptr<OperationPass<ModuleOp>>
+CreateTPUExtractHeadTailOutsideCompilationPass() {
+  return std::make_unique<TPUExtractHeadTailOutsideCompilation>();
+}
+
+static PassRegistration<TPUExtractHeadTailOutsideCompilation> pass(
+    "tf-tpu-extract-head-tail-outside-compilation",
+    "Extracts TPU head or tail outside compilation to separate "
+    "parallel_execute.");
+
+}  // namespace TFTPU
+}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc
index 4e20cd9d64b..4281b85bd7f 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc
@@ -34,7 +34,7 @@ constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation";
 constexpr char kDeviceAttr[] = "device";
 
 // Mapping for `_xla_outside_compilation` attribute to ops of a cluster.
-using ClusterMap =
+using OutsideClusterMap =
     llvm::SmallDenseMap<llvm::StringRef, llvm::SmallVector<Operation*, 8>, 8>;
 
 // This pass extracts a CPU computation cluster with `_xla_outside_compilation`
@@ -51,7 +51,8 @@ struct TPUExtractOutsideCompilation
 // Collects and clusters ops in `block` with the same `_xla_outside_compilation`
 // attribute into `clusters` This returns an error if a
 // `_xla_outside_compilation` attribute of an op is empty.
-LogicalResult CollectAndGroupClusterOps(Block* block, ClusterMap* clusters) {
+LogicalResult CollectAndGroupOutsideClusterOps(Block* block,
+                                               OutsideClusterMap* clusters) {
   for (Operation& op : *block) {
     if (auto attr = op.getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr)) {
       if (attr.getValue().empty())
@@ -67,7 +68,7 @@ LogicalResult CollectAndGroupClusterOps(Block* block, ClusterMap* clusters) {
 }
 
 // Moves `cluster_ops` to associated `launch_op` body.
-void MoveClusterOpsToLaunchOp(
+void MoveOutsideClusterOpsToLaunchOp(
     tf_device::LaunchOp launch_op,
     const llvm::SmallVector<Operation*, 8>& cluster_ops) {
   MLIRContext* context = launch_op.getContext();
@@ -84,8 +85,8 @@ void MoveClusterOpsToLaunchOp(
 }
 
 // Creates a `tf_device::LaunchOp` to wrap cluster ops.
-tf_device::LaunchOp CreateLaunchOpForCluster(OpBuilder* builder,
-                                             Operation* last_cluster_op) {
+tf_device::LaunchOp CreateLaunchOpForOutsideCluster(
+    OpBuilder* builder, Operation* last_cluster_op) {
   // TODO(b/154363171): Set the CPU device.
   // An empty string placeholder is used for the device as that will be later
   // populated with the device of the associated TPUReplicateMetadata op.
@@ -117,14 +118,14 @@ void PropagateParallelExecuteReturnToReplicate(
 
 // Creates a `parallel_execute` op in place of launch with 'clusters` and
 // 'launch` as regions.
-void CreateParallelExecuteFromClusters(tf_device::LaunchOp launch,
-                                       const ClusterMap& clusters) {
-  OpBuilder builder(launch);
+void CreateParallelExecuteFromOutsideClusters(
+    tf_device::ClusterOp tpu_cluster, const OutsideClusterMap& clusters) {
+  OpBuilder builder(tpu_cluster);
   // Create parallel_execute regions.  The original TPU cluster computation
   // is the extra region.
   int num_regions = 1 + clusters.size();
   auto parallel_execute_op = builder.create<tf_device::ParallelExecuteOp>(
-      launch.getLoc(), num_regions, launch.results().getTypes());
+      tpu_cluster.getLoc(), num_regions, tpu_cluster.results().getTypes());
 
   // Move outside compilation clusters to parallel_execute regions.
   for (const auto& cluster : llvm::enumerate(clusters)) {
@@ -134,21 +135,23 @@ void CreateParallelExecuteFromClusters(tf_device::LaunchOp launch,
         parallel_execute_op.GetRegionBlockWithIndex(cluster.index());
     builder.setInsertionPointToEnd(&outside_block);
     tf_device::LaunchOp launch_op =
-        CreateLaunchOpForCluster(&builder, cluster_ops.back());
-    MoveClusterOpsToLaunchOp(launch_op, cluster_ops);
+        CreateLaunchOpForOutsideCluster(&builder, cluster_ops.back());
+    MoveOutsideClusterOpsToLaunchOp(launch_op, cluster_ops);
     builder.setInsertionPointToEnd(&outside_block);
     // TODO(b/154363171): Handle returns from OutsideCompiled parallel_execute
     // regions either through communication with TPU parallel_execute regions
     // or modifying parallel_execute returns.
-    builder.create<tf_device::ReturnOp>(launch.getLoc(), ArrayRef<Value>{});
+    builder.create<tf_device::ReturnOp>(tpu_cluster.getLoc(),
+                                        ArrayRef<Value>{});
   }
 
   // Move the launch body to last parallel_execute block.
   Block& inside_block =
       parallel_execute_op.GetRegionBlockWithIndex(num_regions - 1);
   builder.setInsertionPointToEnd(&inside_block);
-  builder.create<tf_device::ReturnOp>(launch.getLoc(), launch.getResults());
-  launch.getOperation()->moveBefore(inside_block.getTerminator());
+  builder.create<tf_device::ReturnOp>(tpu_cluster.getLoc(),
+                                      tpu_cluster.getResults());
+  tpu_cluster.getOperation()->moveBefore(inside_block.getTerminator());
 
   PropagateParallelExecuteReturnToReplicate(parallel_execute_op);
   // TODO(b/154363171): Handle returns from OutsideCompiled parallel_execute
@@ -157,17 +160,19 @@ void CreateParallelExecuteFromClusters(tf_device::LaunchOp launch,
 }
 
 void TPUExtractOutsideCompilation::runOnFunction() {
-  auto extract_result = getFunction().walk([&](tf_device::LaunchOp launch) {
-    ClusterMap clusters;
-    if (failed(CollectAndGroupClusterOps(&launch.GetBody(), &clusters)))
-      return WalkResult::interrupt();
+  auto extract_result =
+      getFunction().walk([&](tf_device::ClusterOp tpu_cluster) {
+        OutsideClusterMap clusters;
+        if (failed(CollectAndGroupOutsideClusterOps(&tpu_cluster.GetBody(),
+                                                    &clusters)))
+          return WalkResult::interrupt();
 
-    if (clusters.empty()) return WalkResult::advance();
+        if (clusters.empty()) return WalkResult::advance();
 
-    CreateParallelExecuteFromClusters(launch, clusters);
+        CreateParallelExecuteFromOutsideClusters(tpu_cluster, clusters);
 
-    return WalkResult::advance();
-  });
+        return WalkResult::advance();
+      });
 
   if (extract_result.wasInterrupted()) return signalPassFailure();
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
index a635fdb9a1f..f5e9da915c8 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
@@ -82,17 +82,17 @@ constexpr char kBadArrayElementMsg[] =
 constexpr char kBadArrayAttrLengthMsg[] =
     "bad '{0}' attribute, expected array attribute of size {1}, got size {2}";
 
-// Rewrites `tf_device.launch_func` operations assigned to TPU into actual TPU
+// Rewrites `tf_device.cluster_func` operations assigned to TPU into actual TPU
 // jit-compile runtime ops.
 //
 // For example:
-//   %1 = "tf_device.launch_func"(%0) {_tpu_replicate = "cluster", func =
+//   %1 = "tf_device.cluster_func"(%0) {_tpu_replicate = "cluster", func =
 //         @tpu_func}
 //   %2 = "tf.SomeOp"(%1)
 //
 // Would become following ops (unimportant attributes, types are omitted):
 //    %1 = "tf.Shape"(%0)
-//    %2:2 = "tf.MLIRCompileToTPU"(%1) {module = "<Serialized @tpu_func>"}
+//    %2:2 = "tf._TPUCompileMlir"(%1) {module = "<Serialized @tpu_func>"}
 //    "tf.TPUCompileSucceededAssert"(%2#0)
 //    %3 = "tf.TPUExecute"(%0, %2#1)
 //    %4 = "tf.SomeOp"(%3)
@@ -165,7 +165,7 @@ LogicalResult EncapsulateFuncAndSerialize(FuncOp entry_func,
 
 // Extracts device coordinates from a device assignment attribute on an op.
 LogicalResult GetDeviceCoordinates(
-    tf_device::LaunchFuncOp op,
+    tf_device::ClusterFuncOp op,
     llvm::SmallVectorImpl<int64_t>* device_assignment) {
   auto device_assignment_attr =
       op.getAttrOfType<ArrayAttr>(kDeviceAssignmentAttr);
@@ -190,9 +190,9 @@ LogicalResult GetDeviceCoordinates(
 }
 
 // Populates a TPUCompileMetadataProto with StepMarkerLocation from a
-// `tf_device::LaunchFuncOp`.
+// `tf_device::ClusterFuncOp`.
 LogicalResult SetMetadataProtoStepMarkerLocation(
-    tf_device::LaunchFuncOp op,
+    tf_device::ClusterFuncOp op,
     tensorflow::tpu::TPUCompileMetadataProto* metadata) {
   auto step_marker_location =
       op.getAttrOfType<StringAttr>(kStepMarkerLocationAttr);
@@ -216,9 +216,9 @@ LogicalResult SetMetadataProtoStepMarkerLocation(
 }
 
 // Populates a TPUCompileMetadataProto with PaddingMap from a
-// `tf_device::LaunchFuncOp`.
+// `tf_device::ClusterFuncOp`.
 LogicalResult SetMetadataProtoPaddingMap(
-    tf_device::LaunchFuncOp op,
+    tf_device::ClusterFuncOp op,
     tensorflow::tpu::TPUCompileMetadataProto* metadata) {
   auto padding_map = op.getAttrOfType<ArrayAttr>(kPaddingMapAttr);
   if (!padding_map)
@@ -259,9 +259,9 @@ LogicalResult SetOpSharding(Operation* op, Attribute attr, llvm::StringRef name,
 }
 
 // Populates a TPUCompileMetadataProto with argument types and sharding from a
-// `tf_device::LaunchFuncOp`.
+// `tf_device::ClusterFuncOp`.
 LogicalResult SetMetadataProtoArgs(
-    tf_device::LaunchFuncOp op,
+    tf_device::ClusterFuncOp op,
     tensorflow::tpu::TPUCompileMetadataProto* metadata) {
   auto input_shardings =
       op.getAttrOfType<ArrayAttr>(tensorflow::kInputShardingAttr);
@@ -314,9 +314,9 @@ LogicalResult SetMetadataProtoArgs(
 }
 
 // Populates a TPUCompileMetadataProto with result sharding from a
-// `tf_device::LaunchFuncOp`.
+// `tf_device::ClusterFuncOp`.
 LogicalResult SetMetadataProtoRetvals(
-    tf_device::LaunchFuncOp op,
+    tf_device::ClusterFuncOp op,
     tensorflow::tpu::TPUCompileMetadataProto* metadata) {
   auto output_shardings =
       op.getAttrOfType<ArrayAttr>(tensorflow::kOutputShardingAttr);
@@ -341,11 +341,11 @@ LogicalResult SetMetadataProtoRetvals(
 }
 
 // Populates a TPUCompileMetadataProto from attributes of a
-// `tf_device::LaunchFuncOp`. If any necessary attributes are missing from the
+// `tf_device::ClusterFuncOp`. If any necessary attributes are missing from the
 // op, a failure will be returned.
 // TODO(lyandy): Support session handle and guaranteed consts.
-LogicalResult SetMetadataProtoFromLaunchFuncOp(
-    tf_device::LaunchFuncOp op, int num_replicas, int num_cores_per_replica,
+LogicalResult SetMetadataProtoFromClusterFuncOp(
+    tf_device::ClusterFuncOp op, int num_replicas, int num_cores_per_replica,
     llvm::Optional<xla::DeviceAssignmentProto>&& xla_device_assignment,
     tensorflow::tpu::TPUCompileMetadataProto* metadata) {
   metadata->set_num_replicas(num_replicas);
@@ -377,7 +377,7 @@ tf_device::LaunchOp WrapOpInLaunch(OpBuilder* builder, Location loc,
   builder->setInsertionPointToEnd(&launch.GetBody());
   builder->create<tf_device::ReturnOp>(loc, op->getResults());
 
-  // Move op inside launch.
+  // Move op inside cluster.
   op->moveBefore(launch.GetBody().getTerminator());
 
   builder->restoreInsertionPoint(insert_point);
@@ -386,16 +386,16 @@ tf_device::LaunchOp WrapOpInLaunch(OpBuilder* builder, Location loc,
 }
 
 // Create a `tf._TPUCompileMlir` that contains a MLIR module that is
-// functionally equivalent to the function referenced by launch_func.
+// functionally equivalent to the function referenced by cluster_func.
 Operation* BuildCompileOp(
-    tf_device::LaunchFuncOp launch_func, int num_replicas,
+    tf_device::ClusterFuncOp cluster_func, int num_replicas,
     int num_cores_per_replica, llvm::StringRef compilation_device,
     llvm::Optional<xla::DeviceAssignmentProto>&& xla_device_assignment,
     OpBuilder* builder) {
   // Set metadata from attributes.
   tensorflow::tpu::TPUCompileMetadataProto metadata;
-  if (failed(SetMetadataProtoFromLaunchFuncOp(
-          launch_func, num_replicas, num_cores_per_replica,
+  if (failed(SetMetadataProtoFromClusterFuncOp(
+          cluster_func, num_replicas, num_cores_per_replica,
           std::move(xla_device_assignment), &metadata)))
     return nullptr;
 
@@ -405,28 +405,28 @@ Operation* BuildCompileOp(
   else
     metadata.SerializeToString(&txt_metadata);
 
-  // Build a shape op for each input to launch_func.
+  // Build a shape op for each input to cluster_func.
   // TODO(b/139377366): When shape inference is ready, we can use compile time
   // shape inference to get inputs that have static shapes and only use shape
   // ops for the rest.
   llvm::SmallVector<Value, 4> compile_op_operands;
-  compile_op_operands.reserve(launch_func.getNumOperands());
+  compile_op_operands.reserve(cluster_func.getNumOperands());
 
-  for (auto operand_and_idx : llvm::enumerate(launch_func.getOperands())) {
+  for (auto operand_and_idx : llvm::enumerate(cluster_func.getOperands())) {
     // Skip adding shape op for operands that have static shapes.
     tensorflow::PartialTensorShape shape(
         metadata.args(operand_and_idx.index()).shape());
     if (shape.IsFullyDefined()) continue;
 
     auto shape_op = builder->create<TF::ShapeOp>(
-        launch_func.getLoc(),
+        cluster_func.getLoc(),
         RankedTensorType::get({-1}, builder->getIntegerType(64)),
         operand_and_idx.value());
     compile_op_operands.emplace_back(shape_op.getResult());
   }
 
-  FlatSymbolRefAttr func_attr = launch_func.funcAttr();
-  FuncOp func = launch_func.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>(
+  FlatSymbolRefAttr func_attr = cluster_func.funcAttr();
+  FuncOp func = cluster_func.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>(
       func_attr.getValue());
 
   std::string txt_module;
@@ -436,7 +436,7 @@ Operation* BuildCompileOp(
       RankedTensorType::get({}, builder->getType<TF::StringType>());
 
   auto compile_op = builder->create<TF::_TPUCompileMlirOp>(
-      launch_func.getLoc(), /*compilation_status=*/result_type, /*program=*/
+      cluster_func.getLoc(), /*compilation_status=*/result_type, /*program=*/
       llvm::SmallVector<Type, 8>(num_cores_per_replica, result_type),
       compile_op_operands, txt_module, txt_metadata);
 
@@ -448,19 +448,20 @@ Operation* BuildCompileOp(
 // core, and all replica devices per core are grouped together.
 void AssignDevicesToReplicate(
     tf_device::ReplicateOp replicate,
-    llvm::ArrayRef<llvm::SmallVector<std::string, 8>> execution_devices,
+    llvm::ArrayRef<llvm::SmallVector<tensorflow::TPUDeviceAndHost, 8>>
+        tpu_devices,
     OpBuilder* builder) {
   if (!replicate) return;
 
-  const int num_replicas = execution_devices.size();
-  const int num_cores_per_replica = execution_devices.front().size();
+  const int num_replicas = tpu_devices.size();
+  const int num_cores_per_replica = tpu_devices.front().size();
 
   llvm::SmallVector<NamedAttribute, 8> device_attrs;
   for (int core = 0; core < num_cores_per_replica; ++core) {
     llvm::SmallVector<StringRef, 8> devices_by_core;
     devices_by_core.reserve(num_replicas);
     for (int replica = 0; replica < num_replicas; ++replica)
-      devices_by_core.push_back(execution_devices[replica][core]);
+      devices_by_core.push_back(tpu_devices[replica][core].device);
 
     device_attrs.push_back(
         builder->getNamedAttr(tensorflow::GetDeviceAliasForLogicalCore(core),
@@ -473,18 +474,18 @@ void AssignDevicesToReplicate(
 // Creates a `tf.TPUExecute` op that executes TPU program.
 LogicalResult BuildExecuteOp(
     const int core_id, llvm::ArrayRef<xla::OpSharding> output_sharding_config,
-    llvm::ArrayRef<Value> inputs, tf_device::LaunchFuncOp launch_func,
+    llvm::ArrayRef<Value> inputs, tf_device::ClusterFuncOp cluster_func,
     OpBuilder* builder, TF::TPUExecuteOp* execute_op) {
   // TODO(b/139377366): Need to snapshot all resource variable inputs in
   // follow-up CLs.
   llvm::SmallVector<Type, 4> output_types;
   auto result = tensorflow::GetOutputTypesForLogicalDeviceComputation(
-      core_id, output_sharding_config, launch_func, &output_types);
+      core_id, output_sharding_config, cluster_func, &output_types);
   if (failed(result)) return failure();
 
-  // TPUExecute has same output types as launch_func.
+  // TPUExecute has same output types as cluster_func.
   *execute_op = builder->create<TF::TPUExecuteOp>(
-      launch_func.getLoc(), output_types, inputs,
+      cluster_func.getLoc(), output_types, inputs,
       llvm::ArrayRef<NamedAttribute>{});
   return success();
 }
@@ -492,32 +493,33 @@ LogicalResult BuildExecuteOp(
 // Creates a tf_device.parallel_execute op that wraps TPUExecute op to
 // represent execution of TPU program in multiple logical cores.
 LogicalResult BuildParallelExecuteOp(
-    llvm::ArrayRef<llvm::SmallVector<std::string, 8>> execution_devices,
+    llvm::ArrayRef<llvm::SmallVector<tensorflow::TPUDeviceAndHost, 8>>
+        tpu_devices,
     llvm::ArrayRef<xla::OpSharding> output_sharding_config,
-    Operation* compile_op, tf_device::LaunchFuncOp launch_func,
+    Operation* compile_op, tf_device::ClusterFuncOp cluster_func,
     OpBuilder* builder, tf_device::ParallelExecuteOp* parallel_execute_op) {
-  const int num_cores_per_replica = execution_devices.front().size();
+  const int num_cores_per_replica = tpu_devices.front().size();
   // parallel_execute op returns concatenated list of return values of
   // all its regions.
   //
   // TODO(b/149102702): Correctly map inputs to parallel_execute op via
-  // identifying xla_sharding op in the launch_func function.
-  const auto& launch_result_types = launch_func.getResultTypes();
+  // identifying xla_sharding op in the cluster_func function.
+  const auto cluster_result_types = cluster_func.getResultTypes();
   llvm::SmallVector<Type, 8> concatenated_output_types;
-  concatenated_output_types.reserve(launch_result_types.size() *
+  concatenated_output_types.reserve(cluster_result_types.size() *
                                     num_cores_per_replica);
 
   for (int core = 0; core < num_cores_per_replica; ++core) {
     llvm::SmallVector<Type, 4> output_types;
     auto result = tensorflow::GetOutputTypesForLogicalDeviceComputation(
-        core, output_sharding_config, launch_func, &output_types);
+        core, output_sharding_config, cluster_func, &output_types);
     if (failed(result)) return failure();
 
     for (Type t : output_types) concatenated_output_types.emplace_back(t);
   }
 
   *parallel_execute_op = builder->create<tf_device::ParallelExecuteOp>(
-      launch_func.getLoc(), num_cores_per_replica, concatenated_output_types);
+      cluster_func.getLoc(), num_cores_per_replica, concatenated_output_types);
 
   // Extract inputs for each region of the parallel_execute op. The i-th
   // element in the list represents the input lists to TPU computation for
@@ -525,10 +527,10 @@ LogicalResult BuildParallelExecuteOp(
   llvm::SmallVector<llvm::SmallVector<mlir::Value, 4>, 4> input_list;
   builder->setInsertionPoint(*parallel_execute_op);
   auto result = tensorflow::ExtractInputsForLogicalDevices(
-      num_cores_per_replica, launch_func, builder, &input_list);
+      num_cores_per_replica, cluster_func, builder, &input_list);
   if (failed(result)) return failure();
 
-  const bool replicated = execution_devices.size() != 1;
+  const bool replicated = tpu_devices.size() != 1;
   // For each logical core, create a region with TPUExecute op.
   assert(input_list.size() == num_cores_per_replica);
   for (int core = 0; core < num_cores_per_replica; ++core) {
@@ -539,13 +541,13 @@ LogicalResult BuildParallelExecuteOp(
     //
     // TODO(b/148913294): Identify inputs/return values specific to each
     // logical core TPU execution by parsing xla_sharding op in
-    // launch_func.
+    // cluster_func.
     auto execute_inputs = input_list[core];
     execute_inputs.emplace_back(compile_op->getResult(core + 1));
 
     TF::TPUExecuteOp execute;
     result = BuildExecuteOp(core, output_sharding_config, execute_inputs,
-                            launch_func, builder, &execute);
+                            cluster_func, builder, &execute);
     if (failed(result)) return failure();
 
     // If computation is replicated, use aliased device. Otherwise there is only
@@ -553,7 +555,7 @@ LogicalResult BuildParallelExecuteOp(
     // op.
     std::string device = replicated
                              ? tensorflow::GetDeviceAliasForLogicalCore(core)
-                             : execution_devices.front()[core];
+                             : tpu_devices.front()[core].device;
 
     auto region_launch_op =
         WrapOpInLaunch(builder, region.getParent()->getLoc(), execute, device);
@@ -566,13 +568,14 @@ LogicalResult BuildParallelExecuteOp(
 }
 
 tf_device::LaunchOp AssignDevicesToReplicatedExecute(
-    llvm::ArrayRef<llvm::SmallVector<std::string, 8>> execution_devices,
+    llvm::ArrayRef<llvm::SmallVector<tensorflow::TPUDeviceAndHost, 8>>
+        tpu_devices,
     Operation* execute_op, OpBuilder* builder) {
-  const bool replicated = execution_devices.size() != 1;
+  const bool replicated = tpu_devices.size() != 1;
   // If computation is replicated, use aliased device. Otherwise there is only
   // one execution device and the device is assigned to the execute op.
   std::string device = replicated ? tensorflow::GetDeviceAliasForLogicalCore(0)
-                                  : execution_devices.front().front();
+                                  : tpu_devices.front().front().device;
 
   return WrapOpInLaunch(builder, execute_op->getLoc(), execute_op, device);
 }
@@ -587,16 +590,16 @@ void BuildTPUCompileSucceededAssertOp(Operation* compile_op,
   WrapOpInLaunch(builder, compile_op->getLoc(), assert_op, compilation_device);
 }
 
-// Rewrites a `tf_device.launch_func` operation into a set of TPU Runtime
-// Operations that jit-compiles and executes function in `tf_device.launch_func`
-// on TPU. Device assignment is determined from available devices in `devices`.
-// If it is not possible to rewrite the operation or device assignment fails, a
-// failure will be returned.
+// Rewrites a `tf_device.cluster_func` operation into a set of TPU Runtime
+// Operations that jit-compiles and executes function in
+// `tf_device.cluster_func` on TPU. Device assignment is determined from
+// available devices in `devices`. If it is not possible to rewrite the
+// operation or device assignment fails, a failure will be returned.
 //
-// For example, a non replicated `tf_device.launch_func`:
+// For example, a non replicated `tf_device.cluster_func`:
 //
 // func @main(%arg0: tensor<i1>) {
-//   %0 = "tf_device.launch_func"(%arg0)
+//   %0 = "tf_device.cluster_func"(%arg0)
 //          {_tpu_replicate = "cluster0", device = "", func = @_func} :
 //          (tensor<i1>) -> tensor<i1>
 //   return
@@ -613,12 +616,12 @@ void BuildTPUCompileSucceededAssertOp(Operation* compile_op,
 //   return
 // }
 //
-// and a replicated `tf_device.launch_func`:
+// and a replicated `tf_device.cluster_func`:
 //
 // func @main(%arg0: tensor<i1>, %arg1: tensor<i1>) {
 //   %0:2 = tf_device.replicate([%arg0, %arg1] as %ri: tensor<i1>)
 //                              {n = 2 : i32} {
-//     %1 = "tf_device.launch_func"(%ri)
+//     %1 = "tf_device.cluster_func"(%ri)
 //            {_tpu_replicate = "cluster0", device = "", func = @_func} :
 //            (tensor<i1>) -> tensor<i1>
 //     tf_device.return %1 : tensor<i1>
@@ -641,36 +644,37 @@ void BuildTPUCompileSucceededAssertOp(Operation* compile_op,
 //   return
 // }
 LogicalResult Rewrite(
-    tf_device::LaunchFuncOp launch_func,
+    tf_device::ClusterFuncOp cluster_func,
     llvm::ArrayRef<tensorflow::DeviceNameUtils::ParsedName> devices,
     OpBuilder* builder) {
-  // Skip non-tpu device launch_func.
-  auto replicate_attr = launch_func.getAttrOfType<StringAttr>("_tpu_replicate");
+  // Skip non-tpu device cluster_func.
+  auto replicate_attr =
+      cluster_func.getAttrOfType<StringAttr>("_tpu_replicate");
   if (!replicate_attr) return success();
 
   // Collect `num_replicas` and `num_cores_per_replica` attributes.
   int num_replicas = 1;
   tf_device::ReplicateOp replicate =
-      launch_func.getParentOp()
+      cluster_func.getParentOp()
           ? llvm::dyn_cast_or_null<tf_device::ReplicateOp>(
-                launch_func.getParentOp())
+                cluster_func.getParentOp())
           : nullptr;
   if (replicate) num_replicas = replicate.n().getLimitedValue();
 
   auto num_cores_per_replica_attr =
-      launch_func.getAttrOfType<IntegerAttr>(kNumCoresPerReplicaAttr);
+      cluster_func.getAttrOfType<IntegerAttr>(kNumCoresPerReplicaAttr);
   if (!num_cores_per_replica_attr)
-    return launch_func.emitOpError(
+    return cluster_func.emitOpError(
         CreateMissingAttributeMsg(kNumCoresPerReplicaAttr));
 
   int num_cores_per_replica = num_cores_per_replica_attr.getInt();
 
-  auto topology_attr = launch_func.getAttrOfType<StringAttr>(kTopologyAttr);
+  auto topology_attr = cluster_func.getAttrOfType<StringAttr>(kTopologyAttr);
   if (!topology_attr)
-    return launch_func.emitOpError(CreateMissingAttributeMsg(kTopologyAttr));
+    return cluster_func.emitOpError(CreateMissingAttributeMsg(kTopologyAttr));
 
   llvm::SmallVector<int64_t, 6> device_assignment;
-  if (failed(GetDeviceCoordinates(launch_func, &device_assignment)))
+  if (failed(GetDeviceCoordinates(cluster_func, &device_assignment)))
     return failure();
 
   // Determine compilation and execution devices.
@@ -679,15 +683,25 @@ LogicalResult Rewrite(
           devices, num_replicas, num_cores_per_replica,
           topology_attr.getValue(), device_assignment);
   if (!status_or_tpu_device_assignment.ok())
-    return launch_func.emitError()
+    return cluster_func.emitError()
            << "error in fetching TPU compilation/execution devices: "
            << status_or_tpu_device_assignment.status().error_message();
 
   // Create compile op.
   auto& tpu_device_assignment = status_or_tpu_device_assignment.ValueOrDie();
-  builder->setInsertionPoint(launch_func);
+  builder->setInsertionPoint(cluster_func);
+
+  // Create the TPUCompileMlir and TPUCompileSucceededAssert outside of
+  // parallel_execute region if it exists.
+  if (llvm::isa<tf_device::ParallelExecuteOp>(cluster_func.getParentOp())) {
+    // Currently, outside compilation and model parallelism are not supported
+    // together.
+    assert(num_cores_per_replica == 1);
+    builder->setInsertionPoint(cluster_func.getParentOp());
+  }
+
   Operation* compile_op = BuildCompileOp(
-      launch_func, num_replicas, num_cores_per_replica,
+      cluster_func, num_replicas, num_cores_per_replica,
       tpu_device_assignment.compilation_device,
       std::move(tpu_device_assignment.xla_device_assignment), builder);
   if (!compile_op) return failure();
@@ -696,54 +710,55 @@ LogicalResult Rewrite(
   // the same _tpu_replicate attribute and replace it with the result of the
   // compile op. This op is used as a placeholder to hook during graph creation
   // the other ops that are intended to consume the compile result.
-  Block* block = launch_func.getOperation()->getBlock();
+  Block* block = cluster_func.getOperation()->getBlock();
   for (auto compile_result_op : block->getOps<TF::TPUCompilationResultOp>())
     compile_result_op.output().replaceAllUsesWith(compile_op->getResult(0));
 
   BuildTPUCompileSucceededAssertOp(
       compile_op, tpu_device_assignment.compilation_device, builder);
 
-  AssignDevicesToReplicate(replicate, tpu_device_assignment.execution_devices,
+  AssignDevicesToReplicate(replicate, tpu_device_assignment.tpu_devices,
                            builder);
 
   llvm::SmallVector<xla::OpSharding, 4> output_shardings;
   auto result = tensorflow::ParseAndValidateOutputSharding(
-      num_cores_per_replica, launch_func, &output_shardings);
+      num_cores_per_replica, cluster_func, &output_shardings);
   if (failed(result)) return failure();
 
+  builder->setInsertionPoint(cluster_func);
   if (num_cores_per_replica > 1) {
     // For model parallelism, tf_device.parallel_execute is used to express
     // concurrent device execution across multiple logical devices.
 
     tf_device::ParallelExecuteOp execute_op;
-    result = BuildParallelExecuteOp(tpu_device_assignment.execution_devices,
-                                    output_shardings, compile_op, launch_func,
+    result = BuildParallelExecuteOp(tpu_device_assignment.tpu_devices,
+                                    output_shardings, compile_op, cluster_func,
                                     builder, &execute_op);
     if (failed(result)) return failure();
 
     // As tf_device.parallel_execute wraps # logical cores number of TPUExecute
     // ops, the number of return values of parallel_execute op exceeds that of
-    // launch_func op. As so, each return value of parallel_execute op must be
-    // mapped with corresponding return value usages of launch_func.
-    tensorflow::RemapOutputsFromLogicalDevices(launch_func.getLoc(),
-                                               output_shardings, launch_func,
+    // cluster_func op. As so, each return value of parallel_execute op must be
+    // mapped with corresponding return value usages of cluster_func.
+    tensorflow::RemapOutputsFromLogicalDevices(cluster_func.getLoc(),
+                                               output_shardings, cluster_func,
                                                execute_op, builder);
   } else {
-    llvm::SmallVector<Value, 4> execute_inputs(launch_func.getOperands());
+    llvm::SmallVector<Value, 4> execute_inputs(cluster_func.getOperands());
     execute_inputs.emplace_back(compile_op->getResult(1));
 
     TF::TPUExecuteOp execute_op;
     result = BuildExecuteOp(
-        /*core_id=*/0, output_shardings, execute_inputs, launch_func, builder,
+        /*core_id=*/0, output_shardings, execute_inputs, cluster_func, builder,
         &execute_op);
     if (failed(result)) return failure();
 
     tf_device::LaunchOp launch_op = AssignDevicesToReplicatedExecute(
-        tpu_device_assignment.execution_devices, execute_op, builder);
-    launch_func.replaceAllUsesWith(launch_op);
+        tpu_device_assignment.tpu_devices, execute_op, builder);
+    cluster_func.replaceAllUsesWith(launch_op);
   }
 
-  launch_func.erase();
+  cluster_func.erase();
 
   return success();
 }
@@ -754,7 +769,7 @@ void TPURewritePass::runOnOperation() {
     return signalPassFailure();
 
   OpBuilder builder(&getContext());
-  auto result = getOperation().walk([&](tf_device::LaunchFuncOp op) {
+  auto result = getOperation().walk([&](tf_device::ClusterFuncOp op) {
     if (failed(Rewrite(op, devices.device_names(), &builder)))
       return WalkResult::interrupt();
 
@@ -777,7 +792,7 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateTPURewritePass() {
 
 static PassRegistration<TPURewritePass> pass(
     "tf-tpu-rewrite",
-    "Rewriting `tf_device.launch_func` on TPUs into TPU runtime ops");
+    "Rewriting `tf_device.cluster_func` on TPUs into TPU runtime ops");
 
 }  // namespace TFTPU
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_sharding_identification_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_sharding_identification_pass.cc
index ce627737646..f8b6e364f55 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_sharding_identification_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_sharding_identification_pass.cc
@@ -47,19 +47,19 @@ struct TPUShardingIdentificationPass
   void runOnOperation() override;
 };
 
-// Sets `sharding_op` if `op` is XlaShardingOp or if XlaSharding op is
-// adjacent to `op`. XlaSharding op may be direct user of inputs but it
-// may also be followed by an Identity op and, in the case where bfloat16
-// type is used, Cast op may be added right after the input. As so,
-// parse the users of the operation to access connected XlaSharding op.
+// Sets `sharding_op` if `op` is XlaShardingOp or if XlaSharding op is adjacent
+// to `op`. XlaSharding op may be direct user of inputs but it may also be
+// followed by an Identity op and, in the case where bfloat16 type is used, Cast
+// op may be added right after the input. As so, parse the users of the
+// operation to access connected XlaSharding op.
 //
-// TODO(hongjunchoi): Consider explicitly checking op patterns to detect
-// sharded inputs.
+// TODO(hongjunchoi): Consider explicitly checking op patterns to detect sharded
+// inputs.
 void GetAdjacentXlaShardingOp(Operation* op,
                               llvm::Optional<TF::XlaShardingOp>* sharding_op) {
-  // TODO(hongjunchoi): Detect the case when sharding configuration is
-  // ambiguous for a single input (i.e. multiple different XlaSharding ops
-  // with different configuration policies are connected).
+  // TODO(hongjunchoi): Detect the case when sharding configuration is ambiguous
+  // for a single input (i.e. multiple different XlaSharding ops with different
+  // configuration policies are connected).
   if (sharding_op->hasValue()) return;
 
   if (auto sharding = llvm::dyn_cast<TF::XlaShardingOp>(op)) {
@@ -74,11 +74,11 @@ void GetAdjacentXlaShardingOp(Operation* op,
 }
 
 // Parses XlaSharding op connected to input args. If Input to
-// tf_device.LaunchFunc op is of resource type, then XlaSharding op
-// will be connected to following ReadVariable op.
+// tf_device.ClusterFunc op is of resource type, then XlaSharding op will be
+// connected to following ReadVariable op.
 //
-// TODO(hongjunchoi): Add logic to parse XlaSharding op inside a
-// Call op or if/while op.
+// TODO(hongjunchoi): Add logic to parse XlaSharding op inside a Call op or
+// If/While op.
 llvm::Optional<llvm::StringRef> ParseInputSharding(const Value& arg) {
   llvm::Optional<TF::XlaShardingOp> parsed_sharding_op;
   for (auto user : arg.getUsers()) {
@@ -96,8 +96,8 @@ llvm::Optional<llvm::StringRef> ParseInputSharding(const Value& arg) {
   return parsed_sharding_op.getValue()._XlaSharding();
 }
 
-// Returns the provided sharding configuration if operand of return value
-// of tf_device.LaunchFunc op is directly from XlaSharding op,
+// Returns the provided sharding configuration if operand of return value of
+// tf_device.ClusterFunc op is directly from XlaSharding op,
 llvm::Optional<StringRef> ParseReturnValueSharding(FuncOp func,
                                                    const int output_index,
                                                    const OpOperand& operand) {
@@ -108,16 +108,16 @@ llvm::Optional<StringRef> ParseReturnValueSharding(FuncOp func,
   return llvm::Optional<StringRef>();
 }
 
-// Includes information on Func op and argument index of the input value.
-// This is used to trace Value that is fed into function call ops.
+// Includes information on Func op and argument index of the input value. This
+// is used to trace Value that is fed into function call ops.
 struct FunctionAndArgumentInfo {
   FuncOp func;
   int argument_index;
 };
 
-// Adds tf.PartitionedCall op or tf.StatefulPartitionedCall op to `list`.
-// If `op` is a function call op, then find the func op from provided `module`
-// and add the func op with `arg_index` to `list`. `list` will later be used to
+// Adds tf.PartitionedCall op or tf.StatefulPartitionedCall op to `list`. If
+// `op` is a function call op, then find the func op from provided `module` and
+// add the func op with `arg_index` to `list`. `list` will later be used to
 // trace mlir::Value that is fed into (potentially nested) function call ops.
 void AddFunctionalOpsToList(
     const int arg_index, ModuleOp module, Operation* op,
@@ -138,8 +138,8 @@ void AddFunctionalOpsToList(
   }
 }
 
-// Walks the MLIR graph from `arg` and return a list of all function
-// call ops to which the `arg` op is directly connected.
+// Walks the MLIR graph from `arg` and return a list of all function call ops to
+// which the `arg` op is directly connected.
 //
 // For example:
 //   argument0 -> PartitionedCallOp -> StatefulPartitionedCallOp -> AddOp
@@ -177,31 +177,33 @@ llvm::SmallVector<FunctionAndArgumentInfo, 4> ExtractFunctionsConnectedToArg(
   return functions_connected_to_arg;
 }
 
-// Walks the graph from the arguments of the `launch_func_op` and extracts
-// sharding configurations for all inputs by parsing XlaSharding op connected
-// to the arguments. If argument to the `launch_func_op` directly feeds into
+// Walks the graph from the arguments of the `cluster_func_op` and extracts
+// sharding configurations for all inputs by parsing XlaSharding op connected to
+// the arguments. If argument to the `cluster_func_op` directly feeds into
 // another function call op, then recursively walk the function definition to
 // find the connected XlaSharding op.
 void IdentifyXlaShardingForComputationInputs(
-    StringRef logical_core_0_sharding, tf_device::LaunchFuncOp launch_func_op,
-    FuncOp launch_function, Builder* builder) {
+    StringRef logical_core_0_sharding, tf_device::ClusterFuncOp cluster_func_op,
+    FuncOp cluster_function, Builder* builder) {
   // Look up function definition from module.
-  Block& launch_function_block = launch_function.getBody().getBlocks().front();
-  ModuleOp module = launch_func_op.getParentOfType<ModuleOp>();
+  Block& cluster_function_block =
+      cluster_function.getBody().getBlocks().front();
+  ModuleOp module = cluster_func_op.getParentOfType<ModuleOp>();
 
   llvm::SmallVector<llvm::StringRef, 8> sharding_for_args(
-      launch_function_block.getNumArguments(), logical_core_0_sharding);
+      cluster_function_block.getNumArguments(), logical_core_0_sharding);
 
-  // Iterate through input arguments to the entry block of tf_device.LaunchFunc.
-  // For input ops, look for following XlaSharding ops. XlaSharding ops can:
+  // Iterate through input arguments to the entry block of
+  // tf_device.ClusterFunc. For input ops, look for following XlaSharding ops.
+  // XlaSharding ops can:
   //   1) Directly follow the input argument if input argument has non-resource
   //      types.
   //   2) Follow ReadVariableOp if the input type is of resource type.
   //   3) Follow IdentityOp or CastOp after above cases (1), (2).
   //
-  // Sharding configurations are added to the tf_device.LaunchFunc as an
+  // Sharding configurations are added to the tf_device.ClusterFunc as an
   // attribute and the function as an argument attribute.
-  for (auto& arg : launch_function_block.getArguments()) {
+  for (auto& arg : cluster_function_block.getArguments()) {
     auto arg_sharding = ParseInputSharding(arg);
     const int arg_index_to_tpu_computation = arg.getArgNumber();
 
@@ -222,25 +224,25 @@ void IdentifyXlaShardingForComputationInputs(
 
     if (arg_sharding) {
       sharding_for_args[arg_index_to_tpu_computation] = arg_sharding.getValue();
-      launch_function.setArgAttr(
+      cluster_function.setArgAttr(
           arg_index_to_tpu_computation, kShardingAttr,
           builder->getStringAttr(arg_sharding.getValue()));
     } else {
-      launch_function.setArgAttr(
+      cluster_function.setArgAttr(
           arg_index_to_tpu_computation, kShardingAttr,
           builder->getStringAttr(logical_core_0_sharding));
     }
   }
 
-  launch_func_op.setAttr(tensorflow::kInputShardingAttr,
-                         builder->getStrArrayAttr(sharding_for_args));
+  cluster_func_op.setAttr(tensorflow::kInputShardingAttr,
+                          builder->getStrArrayAttr(sharding_for_args));
 }
 
 // Parses XlaSharding op directly connected from the outputs of the
-// `launch_func` and extract sharding configurations for outputs.
+// `cluster_func` and extract sharding configurations for outputs.
 void IdentifyXlaShardingForComputationOutputs(
     StringRef logical_core_0_sharding, FuncOp func,
-    tf_device::LaunchFuncOp launch_func, Builder* builder) {
+    tf_device::ClusterFuncOp cluster_func, Builder* builder) {
   // By default return values from logical core 0 is used if no sharding
   // configuration is defined.
   Block& function_block = func.getBody().getBlocks().front();
@@ -250,7 +252,7 @@ void IdentifyXlaShardingForComputationOutputs(
 
   // Iterate through operands of the terminator. If the preceding op is
   // XlaShardingOp, then the provided sharding configuration is added to the
-  // tf_device.LaunchFunc as an attribute and the function as a result
+  // tf_device.ClusterFunc as an attribute and the function as a result
   // attribute.
   for (auto& ret : terminator->getOpOperands()) {
     const int index = ret.getOperandNumber();
@@ -265,35 +267,35 @@ void IdentifyXlaShardingForComputationOutputs(
                          builder->getStringAttr(logical_core_0_sharding));
     }
   }
-  launch_func.setAttr(tensorflow::kOutputShardingAttr,
-                      builder->getStrArrayAttr(sharding_for_rets));
+  cluster_func.setAttr(tensorflow::kOutputShardingAttr,
+                       builder->getStrArrayAttr(sharding_for_rets));
 }
 
-// Extracts input/output sharding configuration of `launch_func` by parsing
-// XlaSharding ops inside the `launch_func`.
-void IdentifyXlaShardingForTPUComputation(Builder* builder,
-                                          tf_device::LaunchFuncOp launch_func) {
+// Extracts input/output sharding configuration of `cluster_func` by parsing
+// XlaSharding ops inside the `cluster_func`.
+void IdentifyXlaShardingForTPUComputation(
+    Builder* builder, tf_device::ClusterFuncOp cluster_func) {
   // Look up function definition from module.
-  FuncOp func = launch_func.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>(
-      launch_func.func());
+  FuncOp func = cluster_func.getParentOfType<ModuleOp>().lookupSymbol<FuncOp>(
+      cluster_func.func());
 
-  // By default inputs/outputs have maximal sharding and are assigned to
-  // logical core 0 if no sharding is defined.
+  // By default inputs/outputs have maximal sharding and are assigned to logical
+  // core 0 if no sharding is defined.
   const std::string logical_core_0_sharding =
       xla::sharding_builder::AssignDevice(0).SerializeAsString();
 
-  IdentifyXlaShardingForComputationInputs(logical_core_0_sharding, launch_func,
+  IdentifyXlaShardingForComputationInputs(logical_core_0_sharding, cluster_func,
                                           func, builder);
 
   IdentifyXlaShardingForComputationOutputs(logical_core_0_sharding, func,
-                                           launch_func, builder);
+                                           cluster_func, builder);
 }
 
 void TPUShardingIdentificationPass::runOnOperation() {
   Builder builder(getOperation().getContext());
 
-  getOperation().walk([&](tf_device::LaunchFuncOp launch_func) {
-    IdentifyXlaShardingForTPUComputation(&builder, launch_func);
+  getOperation().walk([&](tf_device::ClusterFuncOp cluster_func) {
+    IdentifyXlaShardingForTPUComputation(&builder, cluster_func);
   });
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
index af8b4f064dd..a613ce1f920 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@@ -40,10 +40,10 @@ limitations under the License.
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Analysis/Verifier.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
@@ -57,6 +57,8 @@ limitations under the License.
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
 #include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
+#include "mlir/IR/Verifier.h"  // from @llvm-project
+#include "mlir/Pass/PassManager.h"  // from @llvm-project
 #include "tensorflow/compiler/jit/shape_inference_helpers.h"
 #include "tensorflow/compiler/mlir/op_or_arg_name_mapper.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h"
@@ -65,6 +67,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
+#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
@@ -109,6 +112,7 @@ static inline absl::string_view StringRefToView(llvm::StringRef ref) {
 }
 
 namespace tensorflow {
+using mlir::NamedAttrList;
 using mlir::TensorType;
 using mlir::TF::VarHandleOp;
 using mlir::tf_saved_model::GlobalTensorOp;
@@ -306,9 +310,9 @@ class ImporterBase {
   // AttrValue {name : foo, attrs : {k1 : bar, k2 : rfc}}, it will convert it to
   // a list of MLIR Attributes: [{base_name : foo}, {base_name.k1 : bar},
   // {base_name.k2 : rfc}}.
-  Status ConvertFunctionCallAttribute(
-      const std::string& base_name, const AttrValue& value,
-      llvm::SmallVector<mlir::NamedAttribute, 4>* attributes);
+  Status ConvertFunctionCallAttribute(const std::string& base_name,
+                                      const AttrValue& value,
+                                      NamedAttrList* attributes);
 
   // Helper to create either a tf_executor operation or a TF operation wrapped
   // in an island. When convert_to_legacy_call is true, converts the operation
@@ -1089,9 +1093,9 @@ StatusOr<ImporterBase::ElementSubtypes> ImporterBase::ConvertSubtypes(
   return subtypes;
 }
 
-Status ImporterBase::ConvertFunctionCallAttribute(
-    const std::string& base_name, const AttrValue& value,
-    llvm::SmallVector<mlir::NamedAttribute, 4>* attributes) {
+Status ImporterBase::ConvertFunctionCallAttribute(const std::string& base_name,
+                                                  const AttrValue& value,
+                                                  NamedAttrList* attributes) {
   TF_ASSIGN_OR_RETURN(auto func_attr,
                       ConvertFunctionCallName(value.func().name()));
   attributes->push_back(builder_.getNamedAttr(base_name, func_attr));
@@ -1817,6 +1821,8 @@ Status ImporterBase::ConvertNode(const Node& node) {
   absl::c_stable_sort(in_edges, [](const Edge* e1, const Edge* e2) {
     if (e1->IsControlEdge() && !e2->IsControlEdge()) return false;
     if (!e1->IsControlEdge() && e2->IsControlEdge()) return true;
+    if (e1->IsControlEdge() && e2->IsControlEdge())
+      return e1->src()->id() < e2->src()->id();
     return e1->dst_input() < e2->dst_input();
   });
 
@@ -2426,8 +2432,8 @@ class SavedModelObjectGraphImporter : public ImporterBase {
   // Main entry point: converts all functions in the given meta graph to an MLIR
   // Module.
   static StatusOr<mlir::OwningModuleRef> Convert(
-      SavedModelV2Bundle* saved_model, mlir::MLIRContext* context,
-      absl::Span<std::string> exported_names, bool add_default_attributes);
+      SavedModelV2Bundle* saved_model, absl::Span<std::string> exported_names,
+      mlir::MLIRContext* context, bool add_default_attributes);
 
  private:
   explicit SavedModelObjectGraphImporter(
@@ -3127,8 +3133,8 @@ Status CreateSavedModelIR(
 }
 
 StatusOr<mlir::OwningModuleRef> SavedModelObjectGraphImporter::Convert(
-    SavedModelV2Bundle* saved_model, mlir::MLIRContext* context,
-    absl::Span<std::string> exported_names, bool add_default_attributes) {
+    SavedModelV2Bundle* saved_model, absl::Span<std::string> exported_names,
+    mlir::MLIRContext* context, bool add_default_attributes) {
   GraphDebugInfo dummy_debug_info;
   const GraphDebugInfo& debug_info =
       saved_model->debug_info() ? *saved_model->debug_info() : dummy_debug_info;
@@ -3205,17 +3211,20 @@ class SavedModelSignatureDefImporter {
  public:
   // Main entry point: converts all functions (specified by SignatureDefs) in
   // the given meta graph to an MLIR Module.
-  static StatusOr<mlir::OwningModuleRef> Convert(const SavedModelBundle& bundle,
-                                                 mlir::MLIRContext* context) {
-    SavedModelSignatureDefImporter importer(bundle, context);
+  static StatusOr<mlir::OwningModuleRef> Convert(
+      const SavedModelBundle& bundle, absl::Span<std::string> exported_names,
+      mlir::MLIRContext* context) {
+    SavedModelSignatureDefImporter importer(bundle, exported_names, context);
 
     return importer.ConvertSignatures();
   }
 
  private:
   SavedModelSignatureDefImporter(const SavedModelBundle& bundle,
+                                 absl::Span<std::string> exported_names,
                                  mlir::MLIRContext* context)
       : bundle_(bundle),
+        exported_names_(exported_names),
         module_(mlir::ModuleOp::create(mlir::UnknownLoc::get(context))) {}
 
   // Converts the SavedModel to the SavedModel dialect. Creates an MLIR function
@@ -3248,6 +3257,7 @@ class SavedModelSignatureDefImporter {
       const std::vector<std::pair<std::string, TensorInfo>>& inputs);
 
   const SavedModelBundle& bundle_;
+  absl::Span<std::string> exported_names_;
   mlir::OwningModuleRef module_;
 };
 
@@ -3263,6 +3273,9 @@ SavedModelSignatureDefImporter::ConvertSignatures() {
   GraphDebugInfo debug_info;
   if (bundle_.debug_info != nullptr) debug_info = *bundle_.debug_info;
 
+  llvm::StringSet<> exported_name_set;
+  exported_name_set.insert(exported_names_.begin(), exported_names_.end());
+
   for (const auto& key_and_signature_def : signatures) {
     const std::string& sig_def_key = key_and_signature_def.first;
     const SignatureDef& signature_def = key_and_signature_def.second;
@@ -3272,6 +3285,10 @@ SavedModelSignatureDefImporter::ConvertSignatures() {
     if (sig_def_key == "__saved_model_init_op") {
       continue;
     }
+    if (!exported_name_set.empty() &&
+        exported_name_set.count(sig_def_key) == 0) {
+      continue;
+    }
 
     TF_RETURN_IF_ERROR(ConvertSignature(graphdef, sig_def_key, signature_def,
                                         debug_info, flib_def));
@@ -3554,12 +3571,14 @@ StatusOr<mlir::OwningModuleRef> ConvertSavedModelToMlir(
     SavedModelV2Bundle* saved_model, mlir::MLIRContext* context,
     absl::Span<std::string> exported_names, bool add_default_attributes) {
   return SavedModelObjectGraphImporter::Convert(
-      saved_model, context, exported_names, add_default_attributes);
+      saved_model, exported_names, context, add_default_attributes);
 }
 
 StatusOr<mlir::OwningModuleRef> ConvertSavedModelV1ToMlir(
-    const SavedModelBundle& saved_model, mlir::MLIRContext* context) {
-  return SavedModelSignatureDefImporter::Convert(saved_model, context);
+    const SavedModelBundle& saved_model, absl::Span<std::string> exported_names,
+    mlir::MLIRContext* context) {
+  return SavedModelSignatureDefImporter::Convert(saved_model, exported_names,
+                                                 context);
 }
 
 std::string MlirModuleToString(mlir::ModuleOp module, bool show_debug_info) {
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.h b/tensorflow/compiler/mlir/tensorflow/translate/import_model.h
index 8603eadb487..bdb72345201 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.h
@@ -55,6 +55,7 @@ stream_executor::port::StatusOr<mlir::OwningModuleRef> ConvertSavedModelToMlir(
 // expressed with tf_executor dialect.
 stream_executor::port::StatusOr<mlir::OwningModuleRef>
 ConvertSavedModelV1ToMlir(const SavedModelBundle& saved_model,
+                          absl::Span<std::string> exported_names,
                           mlir::MLIRContext* context);
 
 // Serialize a MLIR module to a string.
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc
index f4d3ff443a0..cb3a3be22d8 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.cc
@@ -15,9 +15,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_pass.h"
 
-#include "mlir/Analysis/Verifier.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Module.h"  // from @llvm-project
+#include "mlir/IR/Verifier.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
index 2c7f84d8268..6ada0fec4e2 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
@@ -141,7 +141,8 @@ mlir::OwningModuleRef SavedModelObjectGraphToMlirImport(
 
 mlir::OwningModuleRef SavedModelSignatureDefsToMlirImport(
     absl::string_view saved_model_dir,
-    const std::unordered_set<std::string>& tags, mlir::MLIRContext* context) {
+    const std::unordered_set<std::string>& tags,
+    absl::Span<std::string> exported_names, mlir::MLIRContext* context) {
   tensorflow::SavedModelBundle bundle;
   tensorflow::SessionOptions session_options;
   // Force saved model states to be restored to CPU.
@@ -155,7 +156,7 @@ mlir::OwningModuleRef SavedModelSignatureDefsToMlirImport(
     return nullptr;
   }
 
-  auto module_or = ConvertSavedModelV1ToMlir(bundle, context);
+  auto module_or = ConvertSavedModelV1ToMlir(bundle, exported_names, context);
   if (!module_or.status().ok()) {
     LOG(ERROR) << "SavedModel V1 import failed: " << module_or.status();
     return nullptr;
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h
index f498864c8aa..490b7c7d8f0 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h
@@ -64,7 +64,8 @@ mlir::OwningModuleRef SavedModelObjectGraphToMlirImport(
 // given MLIR `context`.
 mlir::OwningModuleRef SavedModelSignatureDefsToMlirImport(
     absl::string_view saved_model_dir,
-    const std::unordered_set<std::string>& tags, mlir::MLIRContext* context);
+    const std::unordered_set<std::string>& tags,
+    absl::Span<std::string> exported_names, mlir::MLIRContext* context);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc
index 8212c0b50a4..06805e633e2 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.cc
@@ -52,4 +52,11 @@ void BridgeLoggerConfig::printAfterIfEnabled(mlir::Pass* pass,
   Log(print_callback, pass, operation, "after");
 }
 
+void BridgeTimingConfig::printTiming(PrintCallbackFn printCallback) {
+  std::string name = "mlir_bridge_pass_timing.txt";
+  std::unique_ptr<llvm::raw_ostream> os;
+  std::string filepath;
+  if (CreateFileForDumping(name, &os, &filepath).ok()) printCallback(*os);
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h
index b5b2ad33b31..eaf3a7c2598 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h
@@ -44,6 +44,13 @@ class BridgeLoggerConfig : public mlir::PassManager::IRPrinterConfig {
                            PrintCallbackFn print_callback) override;
 };
 
+// Logger for logging/dumping pass pipeline timings after completion.
+class BridgeTimingConfig : public mlir::PassManager::PassTimingConfig {
+ public:
+  // Hook that control how/where is the output produced
+  void printTiming(PrintCallbackFn printCallback) override;
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_BRIDGE_LOGGER_H_
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
index b891682366b..e8ca691f961 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
@@ -17,10 +17,13 @@ limitations under the License.
 
 #include "absl/types/optional.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/Function.h"  // from @llvm-project
+#include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/StandardTypes.h"  // from @llvm-project
@@ -35,6 +38,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h"
+#include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
@@ -289,6 +293,12 @@ Status ConvertMLIRToXlaComputation(
   tf2xla.addPass(mlir::xla_hlo::createLegalizeTfWithTf2XlaPass(device_type));
   tf2xla.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
 
+  // Run shape inference pass to propagate shapes through tensor_cast operations
+  // from static to dynamic shapes. This could be generated if the shape
+  // inference was originally missing in a TF op but the corresponding HLO op
+  // had static shape after lowering.
+  tf2xla.addPass(mlir::TF::CreateTFShapeInferencePass());
+
   // Run LegalizeTFPass again because the previous legalization passes can
   // expose more graph pruning and canonicalization opportunities that are
   // necessary for the second LegalizeTFPass(allow_partial_conversion=false)
@@ -299,7 +309,7 @@ Status ConvertMLIRToXlaComputation(
   if (VLOG_IS_ON(1)) {
     // Print the whole module after each pass which requires disabling
     // multi-threading as well.
-    tf2xla.disableMultithreading();
+    module_op.getContext()->disableMultithreading();
     tf2xla.enableIRPrinting(std::make_unique<tensorflow::BridgeLoggerConfig>(
         /*print_module_scope=*/true));
   }
@@ -393,14 +403,47 @@ Status CompileSerializedMlirToXlaHlo(
                              std::move(custom_legalization_passes));
 }
 
+// Rewrites the given module with specified args. For each of the constant args,
+// it gets inlined in the "main' function and the corresponding argument is
+// removed from the signature.
+// Returns the original indices for the other arguments on success.
+static StatusOr<std::vector<int>> RewriteWithArgs(
+    mlir::ModuleOp module, llvm::ArrayRef<const XlaCompiler::Argument> args) {
+  mlir::FuncOp main_fn = module.lookupSymbol<mlir::FuncOp>("main");
+  std::vector<int> params;
+
+  auto builder = mlir::OpBuilder(main_fn.getBody());
+  std::vector<int> args_to_erase;
+  for (int idx = 0; idx < args.size(); idx++) {
+    const XlaCompiler::Argument& xla_arg = args[idx];
+    mlir::BlockArgument mlir_arg = main_fn.getArgument(idx);
+    if (xla_arg.kind != XlaCompiler::Argument::kConstant) {
+      params.push_back(idx);
+      continue;
+    }
+
+    TF_ASSIGN_OR_RETURN(auto value_attr,
+                        ConvertTensor(xla_arg.constant_value, &builder));
+    // TODO(hinsu): Use the actual location of the constant.
+    auto constant = builder.create<mlir::TF::ConstOp>(
+        mlir::UnknownLoc::get(module.getContext()), value_attr);
+    mlir_arg.replaceAllUsesWith(constant);
+    args_to_erase.push_back(idx);
+  }
+
+  for (int idx : llvm::reverse(args_to_erase)) main_fn.eraseArgument(idx);
+  return params;
+}
+
 Status CompileGraphToXlaHlo(
-    const Graph& graph, llvm::ArrayRef<TensorShape> arg_shapes,
+    const Graph& graph, llvm::ArrayRef<const XlaCompiler::Argument> args,
     llvm::StringRef device_type, bool use_tuple_args,
     const FunctionLibraryDefinition& flib_def, const GraphDebugInfo& debug_info,
     const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
     XlaCompiler::CompilationResult* compilation_result,
     std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes) {
   RegisterDialects();
+
   mlir::MLIRContext context;
   GraphImportConfig config;
   config.graph_as_function = true;
@@ -408,10 +451,19 @@ Status CompileGraphToXlaHlo(
       ConvertGraphToMlir(graph, debug_info, flib_def, config, &context);
   if (!module_or.ok()) return module_or.status();
 
-  return CompileMlirToXlaHlo(module_or.ValueOrDie().get(), arg_shapes,
-                             device_type, use_tuple_args,
-                             shape_representation_fn, compilation_result,
-                             std::move(custom_legalization_passes));
+  mlir::ModuleOp module = module_or.ValueOrDie().get();
+  TF_ASSIGN_OR_RETURN(std::vector<int> remaining_params,
+                      RewriteWithArgs(module, {args.data(), args.size()}));
+  llvm::SmallVector<TensorShape, 4> arg_shapes;
+  arg_shapes.reserve(args.size());
+  for (unsigned idx : remaining_params)
+    arg_shapes.push_back(absl::get<TensorShape>(args[idx].shape));
+
+  auto status = CompileMlirToXlaHlo(
+      module, arg_shapes, device_type, use_tuple_args, shape_representation_fn,
+      compilation_result, std::move(custom_legalization_passes));
+  compilation_result->input_mapping = remaining_params;
+  return status;
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
index 0218efb83c6..24b60dcb346 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
@@ -71,7 +71,7 @@ Status CompileSerializedMlirToXlaHlo(
 
 // Same as the above but takes input as TensorFlow Graph.
 Status CompileGraphToXlaHlo(
-    const Graph& graph, llvm::ArrayRef<TensorShape> arg_shapes,
+    const Graph& graph, llvm::ArrayRef<const XlaCompiler::Argument> args,
     llvm::StringRef device_type, bool use_tuple_args,
     const FunctionLibraryDefinition& flib_def, const GraphDebugInfo& debug_info,
     const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc
index 118af434629..91640aff437 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc
@@ -455,8 +455,12 @@ TEST(CompileGraphToXlaHlo, Basic) {
   test::graph::Retval(&graph, 0, arg);
 
   XlaCompiler::CompilationResult result;
+  XlaCompiler::Argument compiler_arg;
+  compiler_arg.kind = XlaCompiler::Argument::kParameter;
+  compiler_arg.shape = TensorShape();
+
   TF_ASSERT_OK(
-      CompileGraphToXlaHlo(graph, /*arg_shapes=*/{TensorShape()}, "XLA_CPU_JIT",
+      CompileGraphToXlaHlo(graph, /*args=*/{compiler_arg}, "XLA_CPU_JIT",
                            /*use_tuple_args=*/false, flib_def, GraphDebugInfo(),
                            /*shape_representation_fn=*/nullptr, &result));
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc
index 1c1d127d42f..b28f26b6c3c 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc
@@ -31,12 +31,14 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h"
+#include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/bfloat16/bfloat16.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/tstring.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
@@ -85,16 +87,22 @@ StatusOr<ElementsAttr> ConvertFlatTensor(const Tensor& input_tensor,
       type, llvm::makeArrayRef(arr.data(), arr.size()));
 }
 
-StatusOr<ElementsAttr> ConvertBF16Tensor(const Tensor& input_tensor,
-                                         ShapedType type) {
+ElementsAttr ConvertBf16Tensor(const Tensor& input_tensor,
+                               RankedTensorType type) {
   auto flat = input_tensor.flat<bfloat16>();
+  llvm::SmallVector<llvm::APFloat, 4> floats;
+  floats.reserve(flat.size());
+  for (bfloat16 v : llvm::makeArrayRef(flat.data(), flat.size()))
+    floats.push_back(llvm::APFloat(static_cast<double>(v)));
+  return mlir::DenseElementsAttr::get(type, llvm::makeArrayRef(floats));
+}
 
-  llvm::SmallVector<double, 4> flat_double;
-  flat_double.reserve(flat.size());
-  for (bfloat16 v : llvm::makeArrayRef(flat.data(), flat.size())) {
-    flat_double.push_back(static_cast<double>(v));
-  }
-  return mlir::DenseElementsAttr::get(type, llvm::makeArrayRef(flat_double));
+ElementsAttr ConvertHalfTensor(const Tensor& tensor, RankedTensorType type) {
+  auto buffer = llvm::makeArrayRef(static_cast<char*>(tensor.data()),
+                                   tensor.TotalBytes());
+  return mlir::DenseElementsAttr::getFromRawBuffer(
+      type, buffer,
+      /*isSplatBuffer=*/type.getNumElements() == 1);
 }
 
 StatusOr<ElementsAttr> ConvertStringTensor(const Tensor& input_tensor,
@@ -125,18 +133,28 @@ StatusOr<ElementsAttr> ConvertTensor(const Tensor& input_tensor,
   case DTYPE:                      \
     return ConvertFlatTensor<CTYPE>(input_tensor, type);
 
-  // TODO(fengliuai): customize the conversions for more types.
+  // TODO(fengliuai): customize the conversions for quantized and string types.
   switch (input_dtype) {
     CONVERT_FLAT(DT_BOOL, bool)
     CONVERT_FLAT(DT_FLOAT, float)
     CONVERT_FLAT(DT_DOUBLE, double)
+    CONVERT_FLAT(DT_INT8, int8)
+    CONVERT_FLAT(DT_INT16, int16)
     CONVERT_FLAT(DT_INT32, int32)
     CONVERT_FLAT(DT_INT64, int64)
+    CONVERT_FLAT(DT_UINT8, uint8)
+    CONVERT_FLAT(DT_UINT16, uint16)
+    CONVERT_FLAT(DT_UINT32, uint32)
+    CONVERT_FLAT(DT_UINT64, uint64)
+    CONVERT_FLAT(DT_COMPLEX64, std::complex<float>)
+    CONVERT_FLAT(DT_COMPLEX128, std::complex<double>)
 
     // BFLOAT16 is a special case that it needs to be cast to double type to
     // match its storage type.
     case DT_BFLOAT16:
-      return ConvertBF16Tensor(input_tensor, type);
+      return ConvertBf16Tensor(input_tensor, type);
+    case DT_HALF:
+      return ConvertHalfTensor(input_tensor, type);
 
     case DT_STRING:
       return ConvertStringTensor(input_tensor, type);
@@ -199,12 +217,20 @@ mlir::TF::ShapeAttr ConvertTypeToTensorShapeAttr(const mlir::Type& type) {
 
 // Converts an MLIR dense string elements attribute to a TensorFlow tensor
 // proto.
-Status ConvertStringElementsAttr(const DenseStringElementsAttr attr,
-                                 TensorProto* output_tensor) {
-  for (const auto& val : attr.getRawStringData()) {
-    output_tensor->add_string_val(val.data(), val.size());
+void ConvertStringElementsAttr(
+    const DenseStringElementsAttr attr,
+    protobuf::RepeatedPtrField<std::string>* output) {
+  for (const auto& val : attr.getRawStringData())
+    output->Add({val.data(), val.size()});
+}
+
+template <typename T>
+void ConvertComplexElementsAttr(const mlir::DenseElementsAttr attr,
+                                protobuf::RepeatedField<T>* output) {
+  for (const auto& val : attr.getValues<std::complex<T>>()) {
+    output->Add(val.real());
+    output->Add(val.imag());
   }
-  return Status::OK();
 }
 
 // Converts an MLIR opaque elements attribute to a TensorFlow tensor proto.
@@ -218,139 +244,80 @@ Status ConvertOpaqueElementsAttr(const ElementsAttr attr,
   return InvalidArgument("Unexpected elements attribute type from MLIR.");
 }
 
-// Converts an MLIR elements attribute to a TensorFlow tensor proto
-// with the double_val field updated.
-Status ConvertDoubleElementsAttr(const ElementsAttr attr,
-                                 TensorProto* output_tensor) {
-  if (auto elts = attr.dyn_cast<DenseFPElementsAttr>()) {
-    if (elts.isSplat()) {
-      output_tensor->add_double_val(elts.getSplatValue<double>());
-    } else {
-      for (auto value : elts.getValues<double>())
-        output_tensor->add_double_val(value);
-    }
-    return Status::OK();
-  }
-  return ConvertOpaqueElementsAttr(attr, output_tensor);
-}
-
-// Converts an MLIR elements attribute to a TensorFlow tensor proto
-// with the float_val field updated.
-Status ConvertFloatElementsAttr(const ElementsAttr attr,
-                                TensorProto* output_tensor) {
-  if (auto elts = attr.dyn_cast<DenseFPElementsAttr>()) {
-    if (elts.isSplat()) {
-      output_tensor->add_float_val(elts.getSplatValue<float>());
-    } else {
-      for (auto value : elts.getValues<float>())
-        output_tensor->add_float_val(value);
-    }
-    return Status::OK();
-  }
-  return ConvertOpaqueElementsAttr(attr, output_tensor);
-}
-
-// Converts an MLIR elements attribute to a TensorFlow tensor proto
-// with the half_val field updated.
-Status ConvertHalfElementsAttr(const ElementsAttr attr,
-                               TensorProto* output_tensor) {
-  if (auto elts = attr.dyn_cast<DenseFPElementsAttr>()) {
-    if (elts.isSplat()) {
-      output_tensor->add_half_val(
-          (*elts.begin()).bitcastToAPInt().getSExtValue());
-    } else {
-      for (const auto& value : elts.getFloatValues())
-        output_tensor->add_half_val(value.bitcastToAPInt().getSExtValue());
-    }
-    return Status::OK();
-  }
-  return ConvertOpaqueElementsAttr(attr, output_tensor);
-}
-
-// Converts an MLIR elements attribute to a TensorFlow tensor proto
-// with the int_val field updated.
-Status ConvertIntElementsAttr(const mlir::ElementsAttr attr,
-                              TensorProto* output_tensor) {
-  if (auto elts = attr.dyn_cast<DenseIntElementsAttr>()) {
-    if (elts.isSplat()) {
-      output_tensor->add_int_val((*elts.begin()).getSExtValue());
-    } else {
-      for (const auto& val : elts)
-        output_tensor->add_int_val(val.getSExtValue());
-    }
-    return Status::OK();
-  }
-  return ConvertOpaqueElementsAttr(attr, output_tensor);
-}
-
-Status ConvertBfloat16ElementsAttr(const mlir::ElementsAttr attr,
-                                   TensorProto* output_tensor) {
-  auto elts = attr.dyn_cast<DenseFPElementsAttr>();
-  if (!elts) {
-    return ConvertOpaqueElementsAttr(attr, output_tensor);
-  }
-
-  // Bfloat16 is internally represented as `double` in MLIR.
-  if (elts.isSplat()) {
-    double v = elts.getSplatValue<double>();
-    bfloat16 bf16_val = static_cast<bfloat16>(v);
-    output_tensor->add_half_val(absl::bit_cast<int16>(bf16_val));
+// Converts an MLIR elements attribute and adds it to specified repeated field.
+template <typename T>
+void ConvertElementsAttr(const mlir::DenseElementsAttr attr,
+                         protobuf::RepeatedField<T>* output) {
+  if (attr.isSplat()) {
+    output->Add(attr.getSplatValue<T>());
   } else {
-    for (auto v : elts.getValues<double>()) {
+    for (auto value : attr.getValues<T>()) output->Add(value);
+  }
+}
+
+// Converts an MLIR elements attribute containing half values and adds it to
+// specified repeated field.
+void ConvertHalfElementsAttr(const DenseFPElementsAttr attr,
+                             protobuf::RepeatedField<int>* output_tensor) {
+  if (attr.isSplat()) {
+    output_tensor->Add((*attr.begin()).bitcastToAPInt().getSExtValue());
+  } else {
+    for (const llvm::APFloat value : attr.getFloatValues())
+      output_tensor->Add(value.bitcastToAPInt().getSExtValue());
+  }
+}
+
+// Converts an MLIR elements attribute containing int values and adds it to
+// specified repeated field.
+void ConvertIntElementsAttr(const mlir::DenseIntElementsAttr attr,
+                            protobuf::RepeatedField<int>* output) {
+  if (attr.isSplat()) {
+    output->Add((*attr.begin()).getSExtValue());
+  } else {
+    for (const llvm::APInt val : attr) output->Add(val.getSExtValue());
+  }
+}
+
+void ConvertBfloat16ElementsAttr(const mlir::DenseFPElementsAttr attr,
+                                 protobuf::RepeatedField<int>* output) {
+  // Bfloat16 is internally represented as `double` in MLIR.
+  if (attr.isSplat()) {
+    double v = attr.getSplatValue<double>();
+    bfloat16 bf16_val = static_cast<bfloat16>(v);
+    output->Add(absl::bit_cast<int16>(bf16_val));
+  } else {
+    for (auto v : attr.getValues<double>()) {
       bfloat16 bf16_val = static_cast<bfloat16>(v);
-      output_tensor->add_half_val(absl::bit_cast<int16>(bf16_val));
+      output->Add(absl::bit_cast<int16>(bf16_val));
     }
   }
-
-  return Status::OK();
 }
 
-// Converts an MLIR elements attribute to a TensorFlow tensor proto
-// with the int64_val field updated.
-Status ConvertInt64ElementsAttr(const mlir::ElementsAttr attr,
-                                TensorProto* output_tensor) {
-  if (auto elts = attr.dyn_cast<DenseIntElementsAttr>()) {
-    if (elts.isSplat()) {
-      output_tensor->add_int64_val((*elts.begin()).getSExtValue());
-    } else {
-      for (const auto& val : elts)
-        output_tensor->add_int64_val(val.getSExtValue());
-    }
-    return Status::OK();
-  }
-  return ConvertOpaqueElementsAttr(attr, output_tensor);
-}
-
-// Converts an MLIR elements attribute to a TensorFlow tensor proto
-// with bool_val field updated.
-Status ConvertBoolElementsAttr(const mlir::ElementsAttr attr,
-                               TensorProto* output_tensor) {
-  if (auto elts = attr.dyn_cast<DenseIntElementsAttr>()) {
-    for (const auto& val : elts) {
-      output_tensor->add_bool_val(val.getBoolValue());
-    }
-    return Status::OK();
-  }
-  return ConvertOpaqueElementsAttr(attr, output_tensor);
-}
-
-Status ConvertToTensorProto(const ElementsAttr attr,
-                            TensorProto* output_tensor) {
+Status ConvertToTensorProto(const ElementsAttr attr, TensorProto* output) {
   auto type = attr.getType();
   auto shape = type.getShape();
   DataType output_dtype;
   TF_RETURN_IF_ERROR(ConvertToDataType(type, &output_dtype));
-  output_tensor->set_dtype(output_dtype);
-  ConvertToTensorShapeProto(shape, output_tensor->mutable_tensor_shape());
+  output->set_dtype(output_dtype);
+  ConvertToTensorShapeProto(shape, output->mutable_tensor_shape());
+
+  if (attr.isa<OpaqueElementsAttr>())
+    return ConvertOpaqueElementsAttr(attr.cast<OpaqueElementsAttr>(), output);
+
+  auto dense_attr = attr.dyn_cast<mlir::DenseElementsAttr>();
+  if (!dense_attr) return errors::InvalidArgument("Unsupported elements attr");
 
   switch (output_dtype) {
     case DT_FLOAT:
-      return ConvertFloatElementsAttr(attr, output_tensor);
+      ConvertElementsAttr<float>(dense_attr, output->mutable_float_val());
+      break;
     case DT_HALF:
-      // Handles both DenseFPElementsAttr and OpaqueElementsAttr.
-      return ConvertHalfElementsAttr(attr, output_tensor);
+      ConvertHalfElementsAttr(dense_attr.cast<DenseFPElementsAttr>(),
+                              output->mutable_half_val());
+      break;
     case DT_DOUBLE:
-      return ConvertDoubleElementsAttr(attr, output_tensor);
+      ConvertElementsAttr(dense_attr, output->mutable_double_val());
+      break;
     case DT_QUINT8:
     case DT_UINT8:
     case DT_INT8:
@@ -358,20 +325,40 @@ Status ConvertToTensorProto(const ElementsAttr attr,
     case DT_UINT16:
     case DT_INT16:
     case DT_INT32:
-      return ConvertIntElementsAttr(attr, output_tensor);
+      ConvertIntElementsAttr(dense_attr.cast<DenseIntElementsAttr>(),
+                             output->mutable_int_val());
+      break;
+    case DT_UINT32:
+      ConvertElementsAttr(dense_attr, output->mutable_uint32_val());
+      break;
+    case DT_UINT64:
+      ConvertElementsAttr(dense_attr, output->mutable_uint64_val());
+      break;
     case DT_INT64:
-      return ConvertInt64ElementsAttr(attr, output_tensor);
+      ConvertElementsAttr(dense_attr, output->mutable_int64_val());
+      break;
     case DT_BOOL:
-      return ConvertBoolElementsAttr(attr, output_tensor);
+      ConvertElementsAttr(dense_attr, output->mutable_bool_val());
+      break;
     case DT_BFLOAT16:
-      return ConvertBfloat16ElementsAttr(attr, output_tensor);
+      ConvertBfloat16ElementsAttr(dense_attr.cast<DenseFPElementsAttr>(),
+                                  output->mutable_half_val());
+      break;
     case DT_STRING:
-      return ConvertStringElementsAttr(attr.cast<DenseStringElementsAttr>(),
-                                       output_tensor);
+      ConvertStringElementsAttr(dense_attr.cast<DenseStringElementsAttr>(),
+                                output->mutable_string_val());
+      break;
+    case DT_COMPLEX64:
+      ConvertComplexElementsAttr(dense_attr, output->mutable_scomplex_val());
+      break;
+    case DT_COMPLEX128:
+      ConvertComplexElementsAttr(dense_attr, output->mutable_dcomplex_val());
+      break;
     default:
-      return ConvertOpaqueElementsAttr(attr.cast<OpaqueElementsAttr>(),
-                                       output_tensor);
+      return errors::Unimplemented(absl::StrCat("Unimplemented data type ",
+                                                DataTypeString(output_dtype)));
   }
+  return Status::OK();
 }
 
 Status ConvertToTensor(const mlir::ElementsAttr attr, Tensor* output_tensor) {
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc
index 673b692b4e6..bf96e3d1df4 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 
 #include <cstring>
+#include <initializer_list>
 
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
@@ -23,6 +24,8 @@ limitations under the License.
 #include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
@@ -79,7 +82,7 @@ TEST(ConvertTypeToTensorTypeTest, ConvertStringTensor) {
   mlir::Builder b(&context);
 
   // Create the sample tensor to convert.
-  tensorflow::Tensor tensor(DT_STRING, TensorShape({1, 2, 2, 1}));
+  Tensor tensor(DT_STRING, TensorShape({1, 2, 2, 1}));
   EXPECT_EQ(4, tensor.NumElements());
   auto Tt = tensor.flat<tstring>();
   Tt.setValues({"one", "two", "three", "four"});
@@ -97,5 +100,75 @@ TEST(ConvertTypeToTensorTypeTest, ConvertStringTensor) {
   EXPECT_EQ(string_values[3], mlir::StringRef("four"));
 }
 
+class ConvertTensorTest : public ::testing::Test {
+ protected:
+  template <typename T>
+  void VerifyConversion(std::initializer_list<T> values, DataType dtype,
+                        mlir::Type expected_ty) {
+    mlir::Builder b(expected_ty.getContext());
+    Tensor tensor(dtype, TensorShape({static_cast<int64>(values.size())}));
+    tensor.flat<T>().setValues(values);
+
+    auto value_or = ConvertTensor(tensor, &b);
+    TF_ASSERT_OK(value_or.status());
+    auto attr = value_or.ValueOrDie();
+
+    EXPECT_EQ(attr.getType().getElementType(), expected_ty);
+
+    Tensor out;
+    TF_ASSERT_OK(ConvertToTensor(attr, &out));
+
+    test::ExpectTensorEqual<T>(tensor, out);
+  }
+};
+
+TEST_F(ConvertTensorTest, Simple) {
+  RegisterDialects();
+
+  mlir::MLIRContext context;
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<Eigen::half>(
+      {Eigen::half(1.0)}, DT_HALF, mlir::FloatType::getF16(&context)));
+  ASSERT_NO_FATAL_FAILURE(
+      VerifyConversion<bfloat16>({bfloat16(1.0), bfloat16(-1.0)}, DT_BFLOAT16,
+                                 mlir::FloatType::getBF16(&context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<float>(
+      {1.0, -1.0}, DT_FLOAT, mlir::FloatType::getF32(&context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<double>(
+      {1.0, -1.0}, DT_DOUBLE, mlir::FloatType::getF64(&context)));
+
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<int8>(
+      {1, -1}, DT_INT8, mlir::IntegerType::get(8, &context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<int16>(
+      {1, -1}, DT_INT16, mlir::IntegerType::get(16, &context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<int32>(
+      {1, -1}, DT_INT32, mlir::IntegerType::get(32, &context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<int64>(
+      {1, -1}, DT_INT64, mlir::IntegerType::get(64, &context)));
+
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<uint8>(
+      {1, 2}, DT_UINT8,
+      mlir::IntegerType::get(
+          8, mlir::IntegerType::SignednessSemantics::Unsigned, &context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<uint16>(
+      {1, 2}, DT_UINT16,
+      mlir::IntegerType::get(
+          16, mlir::IntegerType::SignednessSemantics::Unsigned, &context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<uint32>(
+      {1, 2}, DT_UINT32,
+      mlir::IntegerType::get(
+          32, mlir::IntegerType::SignednessSemantics::Unsigned, &context)));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<uint64>(
+      {1, 2}, DT_UINT64,
+      mlir::IntegerType::get(
+          64, mlir::IntegerType::SignednessSemantics::Unsigned, &context)));
+
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<std::complex<float>>(
+      {{0.0, 1.0}, {1.0, 0.0}}, DT_COMPLEX64,
+      mlir::ComplexType::get(mlir::FloatType::getF32(&context))));
+  ASSERT_NO_FATAL_FAILURE(VerifyConversion<std::complex<double>>(
+      {{0.0, 1.0}, {1.0, 0.0}}, DT_COMPLEX128,
+      mlir::ComplexType::get(mlir::FloatType::getF64(&context))));
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/dump_graph.cc b/tensorflow/compiler/mlir/tensorflow/utils/dump_graph.cc
index ffcd1f71a50..c77107c8de7 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/dump_graph.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/dump_graph.cc
@@ -24,8 +24,8 @@ limitations under the License.
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "mlir/Analysis/Verifier.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
+#include "mlir/IR/Verifier.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/core/platform/env.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
index cc795259893..4877cbc4a44 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
@@ -59,6 +59,18 @@ limitations under the License.
 
 namespace tensorflow {
 namespace {
+// static TensorFlow op prefix set.
+std::set<std::string>* GlobalOpPrefixes() {
+  static std::set<std::string>* global_op_prefixes = [] {
+    std::set<std::string>* result = new std::set<std::string>;
+    result->insert("tf.");
+    result->insert("_tf.");
+    result->insert("tf_executor.");
+    return result;
+  }();
+  return global_op_prefixes;
+}
+
 // Converts a location to the debug information for the node def.
 Status ConvertLocation(mlir::Location inst_loc,
                        NodeDef::ExperimentalDebugInfo* debug_info) {
@@ -268,8 +280,10 @@ StatusOr<llvm::StringRef> GetTensorFlowOpName(llvm::StringRef op_name) {
   // - ".sink" or ".Sink": only the NextIteration operation has this suffix. We
   // don't need to consider ".source"/".Source" because the nodes with this
   // suffix are skipped by the caller and will not be added to the graph.
-  if (!op_name.consume_front("_tf.") && !op_name.consume_front("tf.") &&
-      !op_name.consume_front("tf_executor.")) {
+  auto prefixes = GlobalOpPrefixes();
+  if (std::none_of(prefixes->begin(), prefixes->end(), [&](std::string prefix) {
+        return op_name.consume_front(prefix);
+      })) {
     return errors::FailedPrecondition("op node '", op_name.str(),
                                       "' was not a TF op!");
   }
@@ -506,4 +520,9 @@ bool IsLegacyCallInstruction(mlir::Operation* inst) {
          inst->getName().getStringRef().compare("_tf.LegacyCall") == 0;
 }
 
+Status AddTensorFlowOpPrefix(std::string prefix) {
+  GlobalOpPrefixes()->insert(prefix);
+  return Status::OK();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
index 32ed528bd0d..58fe39fa4e8 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
@@ -34,10 +34,17 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 
+namespace mlir {
+class ShapedType;
+}  // namespace mlir
+
 namespace tensorflow {
 
 using stream_executor::port::StatusOr;
 
+// Add custom op prefix for TensorFlow dialects.
+Status AddTensorFlowOpPrefix(std::string);
+
 // Maps an MLIR op name in the TensorFlow dialect or the TensorFlow control
 // dialect back into a TensorFlow valid op name.
 StatusOr<llvm::StringRef> GetTensorFlowOpName(llvm::StringRef);
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/import_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/import_utils.cc
index 47c5d27767d..3d16352f78e 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/import_utils.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/import_utils.cc
@@ -31,12 +31,17 @@ inline llvm::StringRef StringViewToRef(absl::string_view view) {
 }
 }  // namespace
 
-Status LoadProtoFromBuffer(absl::string_view input,
-                           protobuf::MessageLite* proto) {
+Status LoadProtoFromBuffer(absl::string_view input, protobuf::Message* proto) {
   // Attempt to parse as text.
   if (ParseTextProto(input, "", proto).ok()) return Status::OK();
 
   // Else attempt to parse as binary.
+  return LoadProtoFromBuffer(input, static_cast<protobuf::MessageLite*>(proto));
+}
+
+Status LoadProtoFromBuffer(absl::string_view input,
+                           protobuf::MessageLite* proto) {
+  // Attempt to parse as binary.
   protobuf::io::ArrayInputStream binary_stream(input.data(), input.size());
   if (proto->ParseFromZeroCopyStream(&binary_stream)) return Status::OK();
 
@@ -44,8 +49,8 @@ Status LoadProtoFromBuffer(absl::string_view input,
   return errors::InvalidArgument("Could not parse input proto");
 }
 
-Status LoadProtoFromFile(absl::string_view input_filename,
-                         protobuf::MessageLite* proto) {
+template <class T>
+Status LoadProtoFromFileImpl(absl::string_view input_filename, T* proto) {
   const auto file_or_err =
       llvm::MemoryBuffer::getFileOrSTDIN(StringViewToRef(input_filename));
   if (std::error_code error = file_or_err.getError()) {
@@ -60,4 +65,14 @@ Status LoadProtoFromFile(absl::string_view input_filename,
   return LoadProtoFromBuffer(content, proto);
 }
 
+Status LoadProtoFromFile(absl::string_view input_filename,
+                         protobuf::Message* proto) {
+  return LoadProtoFromFileImpl(input_filename, proto);
+}
+
+Status LoadProtoFromFile(absl::string_view input_filename,
+                         protobuf::MessageLite* proto) {
+  return LoadProtoFromFileImpl(input_filename, proto);
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/import_utils.h b/tensorflow/compiler/mlir/tensorflow/utils/import_utils.h
index 56cd188f393..ad1531dd449 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/import_utils.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/import_utils.h
@@ -24,13 +24,20 @@ namespace tensorflow {
 
 // Reads text (.pbtext) or binary (.pb) format of a proto message from the given
 // buffer. Returns error status of the file is not found or malformed proto.
+// Note that text protos can only be parsed when full protobuf::Message protos
+// are used, and will fail for protobuf::MessageLite protos.
+Status LoadProtoFromBuffer(absl::string_view input, protobuf::Message* proto);
 Status LoadProtoFromBuffer(absl::string_view input,
-                           tensorflow::protobuf::MessageLite* proto);
+                           protobuf::MessageLite* proto);
 
 // Reads text (.pbtext) or binary (.pb) format of a proto message from the given
 // file path. Returns error status of the file is not found or malformed proto.
+// Note that text protos can only be parsed when full protobuf::Message protos
+// are used, and will fail for protobuf::MessageLite protos.
 Status LoadProtoFromFile(absl::string_view input_filename,
-                         tensorflow::protobuf::MessageLite* proto);
+                         protobuf::Message* proto);
+Status LoadProtoFromFile(absl::string_view input_filename,
+                         protobuf::MessageLite* proto);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.cc b/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.cc
index b616d34fdd8..1bf615de8c4 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.cc
@@ -24,7 +24,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-#ifndef TENSORFLOW_LITE_PROTOS
 namespace {
 // Error collector that simply ignores errors reported.
 class NoOpErrorCollector : public protobuf::io::ErrorCollector {
@@ -32,7 +31,6 @@ class NoOpErrorCollector : public protobuf::io::ErrorCollector {
   void AddError(int line, int column, const std::string& message) override {}
 };
 }  // namespace
-#endif  // TENSORFLOW_LITE_PROTOS
 
 Status ConsumePrefix(absl::string_view str, absl::string_view prefix,
                      absl::string_view* output) {
@@ -45,8 +43,7 @@ Status ConsumePrefix(absl::string_view str, absl::string_view prefix,
 
 Status ParseTextProto(absl::string_view text_proto,
                       absl::string_view prefix_to_strip,
-                      protobuf::MessageLite* parsed_proto) {
-#ifndef TENSORFLOW_LITE_PROTOS
+                      protobuf::Message* parsed_proto) {
   protobuf::TextFormat::Parser parser;
   // Don't produce errors when attempting to parse text format as it would fail
   // when the input is actually a binary file.
@@ -60,15 +57,11 @@ Status ParseTextProto(absl::string_view text_proto,
   }
   protobuf::io::ArrayInputStream input_stream(text_proto_without_prefix.data(),
                                               text_proto_without_prefix.size());
-  if (parser.Parse(&input_stream,
-                   tensorflow::down_cast<protobuf::Message*>(parsed_proto))) {
+  if (parser.Parse(&input_stream, parsed_proto)) {
     return Status::OK();
   }
   parsed_proto->Clear();
   return errors::InvalidArgument("Could not parse text proto: ", text_proto);
-#else
-  return errors::Unavailable("Cannot parse text protos on mobile.");
-#endif  // TENSORFLOW_LITE_PROTOS
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.h b/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.h
index 5646f1378af..c1f1e3b111d 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/parse_text_proto.h
@@ -32,7 +32,12 @@ Status ConsumePrefix(absl::string_view str, absl::string_view prefix,
 // proto.
 Status ParseTextProto(absl::string_view text_proto,
                       absl::string_view prefix_to_strip,
-                      protobuf::MessageLite* parsed_proto);
+                      protobuf::Message* parsed_proto);
+inline Status ParseTextProto(absl::string_view /* text_proto */,
+                             absl::string_view /* prefix_to_strip */,
+                             protobuf::MessageLite* /* parsed_proto */) {
+  return errors::Unavailable("Cannot parse text protos on mobile.");
+}
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc
index 6cf2781e48d..06c10c26835 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc
@@ -164,12 +164,19 @@ std::string GetTPUCompilationDevice(Device system_device) {
   return DeviceNameUtils::ParsedNameToString(system_device);
 }
 
+// Finds the host CPU device for a given TPU device.
+std::string GetCPUHostDeviceForTPUDevice(Device tpu_device) {
+  tpu_device.type = DEVICE_CPU;
+  tpu_device.id = 0;
+  return DeviceNameUtils::ParsedNameToString(tpu_device);
+}
+
 // Determines execution devices when topology and device assignment are not
 // defined. This is a special case where a single core computation is replicated
 // to every core in the mesh. TPU devices are simply added to
 // `execution_devices` of one replica. `num_replicas` must be 1 or the total
 // number of TPU devices available, and `num_cores_per_replica` must be 1.
-StatusOr<ExecutionDevices> GetFullMeshTPUExecutionDeviceAssignment(
+StatusOr<TPUDevicesAndHosts> GetFullMeshTPUExecutionDeviceAssignment(
     int num_replicas, int num_cores_per_replica,
     llvm::ArrayRef<llvm::SmallVector<Device, 8>> tpu_devices) {
   const int num_tasks = tpu_devices.size();
@@ -185,17 +192,18 @@ StatusOr<ExecutionDevices> GetFullMeshTPUExecutionDeviceAssignment(
         "'num_cores_per_replica' must be equal to 1, got ",
         num_cores_per_replica);
 
-  ExecutionDevices execution_devices;
-  execution_devices.reserve(num_replicas);
+  TPUDevicesAndHosts devices_and_hosts;
+  devices_and_hosts.reserve(num_replicas);
   for (int i = 0; i < num_replicas; ++i) {
     const int task = i / num_tpus_per_task;
     const int device = i % num_tpus_per_task;
-    execution_devices.push_back(
-        {tensorflow::DeviceNameUtils::ParsedNameToString(
-            tpu_devices[task][device])});
+    const auto& tpu_device = tpu_devices[task][device];
+    devices_and_hosts.push_back({TPUDeviceAndHost(
+        /*device=*/tensorflow::DeviceNameUtils::ParsedNameToString(tpu_device),
+        /*host=*/GetCPUHostDeviceForTPUDevice(tpu_device))});
   }
 
-  return execution_devices;
+  return devices_and_hosts;
 }
 
 // Helper struct for keeping track of task and device for an associated TPU
@@ -326,7 +334,7 @@ StatusOr<xla::Array4D<TaskAndDevice>> ParseTopologyAttr(
 //  - number of device coordinates (in tuple 3) match number 'num_replicas' *
 //    'num_cores_per_replica'
 //  - a TPU device associated with each device coordinate
-StatusOr<std::pair<ExecutionDevices, xla::DeviceAssignmentProto>>
+StatusOr<std::pair<TPUDevicesAndHosts, xla::DeviceAssignmentProto>>
 GetGeneralTPUExecutionDeviceAssignment(
     int num_replicas, int num_cores_per_replica,
     llvm::ArrayRef<llvm::SmallVector<Device, 8>> tpu_devices,
@@ -361,9 +369,9 @@ GetGeneralTPUExecutionDeviceAssignment(
   std::vector<bool> used_device_ids(
       location_to_id(bound_x - 1, bound_y - 1, bound_z - 1, bound_core - 1),
       false);
-  ExecutionDevices execution_devices(
-      num_replicas,
-      llvm::SmallVector<std::string, 8>(num_cores_per_replica, ""));
+  TPUDevicesAndHosts devices_and_hosts(
+      num_replicas, llvm::SmallVector<TPUDeviceAndHost, 8>(
+                        num_cores_per_replica, TPUDeviceAndHost()));
   xla::DeviceAssignment device_assignment(num_replicas, num_cores_per_replica);
   int pos = 0;
   for (int replica = 0; replica < num_replicas; ++replica) {
@@ -393,16 +401,18 @@ GetGeneralTPUExecutionDeviceAssignment(
 
       used_device_ids[device_id] = true;
       device_assignment(replica, logical_core) = device_id;
-      execution_devices[replica][logical_core] =
-          DeviceNameUtils::ParsedNameToString(tpu_devices[task][device]);
+      auto& device_and_host = devices_and_hosts[replica][logical_core];
+      const auto& tpu_device = tpu_devices[task][device];
+      device_and_host.device = DeviceNameUtils::ParsedNameToString(tpu_device);
+      device_and_host.host = GetCPUHostDeviceForTPUDevice(tpu_device);
     }
   }
 
   xla::DeviceAssignmentProto device_assignment_proto;
   TF_RETURN_IF_ERROR(device_assignment.Serialize(&device_assignment_proto));
 
-  return std::pair<ExecutionDevices, xla::DeviceAssignmentProto>(
-      std::move(execution_devices), std::move(device_assignment_proto));
+  return std::pair<TPUDevicesAndHosts, xla::DeviceAssignmentProto>(
+      std::move(devices_and_hosts), std::move(device_assignment_proto));
 }
 
 }  // anonymous namespace
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h
index dd296a13f4b..5fdb6b8768b 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.h
@@ -30,29 +30,40 @@ limitations under the License.
 namespace tensorflow {
 using stream_executor::port::StatusOr;
 
-// TPU devices to be used for execution (e.g. devices for TPUExecute ops). They
-// are ordered by `num_replicas` followed by `num_cores_per_replica`.
-using ExecutionDevices =
-    llvm::SmallVector<llvm::SmallVector<std::string, 8>, 8>;
+// A TPU device for execution alongside its associated host CPU device.
+struct TPUDeviceAndHost {
+  TPUDeviceAndHost() {}
+  TPUDeviceAndHost(llvm::StringRef device, llvm::StringRef host)
+      : device(device), host(host) {}
 
-// TPU compilation device, execution devices, and optionally execution device
-// IDs. Execution device IDs are populated if `topology` and `device_assignment`
-// are provided.
+  std::string device;
+  std::string host;
+};
+
+// TPU devices to be used for execution (e.g. devices for TPUExecute ops) and
+// their associated host CPU devices (for outside compilation). They are ordered
+// by `num_replicas` followed by `num_cores_per_replica`.
+using TPUDevicesAndHosts =
+    llvm::SmallVector<llvm::SmallVector<TPUDeviceAndHost, 8>, 8>;
+
+// TPU compilation device, execution and associated host devices, and optionally
+// execution device IDs. Execution device IDs are populated if `topology` and
+// `device_assignment` are provided.
 struct TPUDeviceAssignment {
   TPUDeviceAssignment(llvm::StringRef compilation_device,
-                      ExecutionDevices&& execution_devices)
+                      TPUDevicesAndHosts&& tpu_devices)
       : compilation_device(compilation_device),
-        execution_devices(std::move(execution_devices)) {}
+        tpu_devices(std::move(tpu_devices)) {}
 
   TPUDeviceAssignment(llvm::StringRef compilation_device,
-                      ExecutionDevices&& execution_devices,
+                      TPUDevicesAndHosts&& tpu_devices,
                       xla::DeviceAssignmentProto&& xla_device_assignment)
       : compilation_device(compilation_device),
-        execution_devices(std::move(execution_devices)),
+        tpu_devices(std::move(tpu_devices)),
         xla_device_assignment(std::move(xla_device_assignment)) {}
 
   std::string compilation_device;
-  ExecutionDevices execution_devices;
+  TPUDevicesAndHosts tpu_devices;
   llvm::Optional<xla::DeviceAssignmentProto> xla_device_assignment;
 };
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc
index 87319f2adeb..7ac5635a6e4 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc
@@ -323,30 +323,46 @@ TEST(TPURewriteDeviceUtilTest, ValidFullMeshDeviceAssignment) {
 
   TF_ASSERT_OK(status_or.status());
 
-  auto& tpu_device_assignment = status_or.ValueOrDie();
+  const auto& tpu_device_assignment = status_or.ValueOrDie();
   EXPECT_EQ(tpu_device_assignment.compilation_device,
             "/job:worker/replica:0/task:0/device:CPU:0");
-  auto& execution_devices = tpu_device_assignment.execution_devices;
-  ASSERT_EQ(execution_devices.size(), 8);
-  for (const auto& replica_execution_device : execution_devices)
-    ASSERT_EQ(replica_execution_device.size(), 1);
+  const auto& tpu_devices = tpu_device_assignment.tpu_devices;
+  ASSERT_EQ(tpu_devices.size(), 8);
+  for (const auto& replica_tpu_devices : tpu_devices)
+    ASSERT_EQ(replica_tpu_devices.size(), 1);
 
-  EXPECT_EQ(execution_devices[0][0],
+  EXPECT_EQ(tpu_devices[0][0].device,
             "/job:worker/replica:0/task:0/device:TPU:0");
-  EXPECT_EQ(execution_devices[1][0],
+  EXPECT_EQ(tpu_devices[0][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[1][0].device,
             "/job:worker/replica:0/task:0/device:TPU:1");
-  EXPECT_EQ(execution_devices[2][0],
+  EXPECT_EQ(tpu_devices[1][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[2][0].device,
             "/job:worker/replica:0/task:0/device:TPU:2");
-  EXPECT_EQ(execution_devices[3][0],
+  EXPECT_EQ(tpu_devices[2][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[3][0].device,
             "/job:worker/replica:0/task:0/device:TPU:3");
-  EXPECT_EQ(execution_devices[4][0],
+  EXPECT_EQ(tpu_devices[3][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[4][0].device,
             "/job:worker/replica:0/task:1/device:TPU:0");
-  EXPECT_EQ(execution_devices[5][0],
+  EXPECT_EQ(tpu_devices[4][0].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[5][0].device,
             "/job:worker/replica:0/task:1/device:TPU:1");
-  EXPECT_EQ(execution_devices[6][0],
+  EXPECT_EQ(tpu_devices[5][0].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[6][0].device,
             "/job:worker/replica:0/task:1/device:TPU:2");
-  EXPECT_EQ(execution_devices[7][0],
+  EXPECT_EQ(tpu_devices[6][0].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[7][0].device,
             "/job:worker/replica:0/task:1/device:TPU:3");
+  EXPECT_EQ(tpu_devices[7][0].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
 
   EXPECT_FALSE(tpu_device_assignment.xla_device_assignment.hasValue());
 }
@@ -410,30 +426,46 @@ TEST(TPURewriteDeviceUtilTest, ValidGeneralDeviceAssignmentMesh2x2x2) {
 
   TF_ASSERT_OK(status_or.status());
 
-  auto& tpu_device_assignment = status_or.ValueOrDie();
+  const auto& tpu_device_assignment = status_or.ValueOrDie();
   EXPECT_EQ(tpu_device_assignment.compilation_device,
             "/job:worker/replica:0/task:0/device:CPU:0");
-  auto& execution_devices = tpu_device_assignment.execution_devices;
-  ASSERT_EQ(execution_devices.size(), 4);
-  for (const auto& replica_execution_device : execution_devices)
-    ASSERT_EQ(replica_execution_device.size(), 2);
+  const auto& tpu_devices = tpu_device_assignment.tpu_devices;
+  ASSERT_EQ(tpu_devices.size(), 4);
+  for (const auto& replica_tpu_devices : tpu_devices)
+    ASSERT_EQ(replica_tpu_devices.size(), 2);
 
-  EXPECT_EQ(execution_devices[0][0],
+  EXPECT_EQ(tpu_devices[0][0].device,
             "/job:worker/replica:0/task:0/device:TPU:0");
-  EXPECT_EQ(execution_devices[0][1],
+  EXPECT_EQ(tpu_devices[0][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[0][1].device,
             "/job:worker/replica:0/task:1/device:TPU:3");
-  EXPECT_EQ(execution_devices[1][0],
+  EXPECT_EQ(tpu_devices[0][1].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[1][0].device,
             "/job:worker/replica:0/task:0/device:TPU:1");
-  EXPECT_EQ(execution_devices[1][1],
+  EXPECT_EQ(tpu_devices[1][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[1][1].device,
             "/job:worker/replica:0/task:1/device:TPU:2");
-  EXPECT_EQ(execution_devices[2][0],
+  EXPECT_EQ(tpu_devices[1][1].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[2][0].device,
             "/job:worker/replica:0/task:0/device:TPU:3");
-  EXPECT_EQ(execution_devices[2][1],
+  EXPECT_EQ(tpu_devices[2][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[2][1].device,
             "/job:worker/replica:0/task:1/device:TPU:0");
-  EXPECT_EQ(execution_devices[3][0],
+  EXPECT_EQ(tpu_devices[2][1].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[3][0].device,
             "/job:worker/replica:0/task:0/device:TPU:2");
-  EXPECT_EQ(execution_devices[3][1],
+  EXPECT_EQ(tpu_devices[3][0].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[3][1].device,
             "/job:worker/replica:0/task:1/device:TPU:1");
+  EXPECT_EQ(tpu_devices[3][1].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
 
   auto& xla_device_assignment = tpu_device_assignment.xla_device_assignment;
   ASSERT_TRUE(xla_device_assignment.hasValue());
@@ -511,23 +543,35 @@ TEST(TPURewriteDeviceUtilTest, ValidGeneralDeviceAssignmentMesh1x2x1x3) {
   EXPECT_EQ(tpu_device_assignment.compilation_device,
             "/job:worker/replica:0/task:0/device:CPU:0");
 
-  auto& execution_devices = tpu_device_assignment.execution_devices;
-  ASSERT_EQ(execution_devices.size(), 2);
-  for (const auto& replica_execution_device : execution_devices)
-    ASSERT_EQ(replica_execution_device.size(), 3);
+  auto& tpu_devices = tpu_device_assignment.tpu_devices;
+  ASSERT_EQ(tpu_devices.size(), 2);
+  for (const auto& replica_tpu_devices : tpu_devices)
+    ASSERT_EQ(replica_tpu_devices.size(), 3);
 
-  EXPECT_EQ(execution_devices[0][0],
+  EXPECT_EQ(tpu_devices[0][0].device,
             "/job:worker/replica:0/task:1/device:TPU:1");
-  EXPECT_EQ(execution_devices[0][1],
+  EXPECT_EQ(tpu_devices[0][0].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[0][1].device,
             "/job:worker/replica:0/task:1/device:TPU:0");
-  EXPECT_EQ(execution_devices[0][2],
+  EXPECT_EQ(tpu_devices[0][1].host,
+            "/job:worker/replica:0/task:1/device:CPU:0");
+  EXPECT_EQ(tpu_devices[0][2].device,
             "/job:worker/replica:0/task:2/device:TPU:0");
-  EXPECT_EQ(execution_devices[1][0],
+  EXPECT_EQ(tpu_devices[0][2].host,
+            "/job:worker/replica:0/task:2/device:CPU:0");
+  EXPECT_EQ(tpu_devices[1][0].device,
             "/job:worker/replica:0/task:2/device:TPU:1");
-  EXPECT_EQ(execution_devices[1][1],
+  EXPECT_EQ(tpu_devices[1][0].host,
+            "/job:worker/replica:0/task:2/device:CPU:0");
+  EXPECT_EQ(tpu_devices[1][1].device,
             "/job:worker/replica:0/task:0/device:TPU:0");
-  EXPECT_EQ(execution_devices[1][2],
+  EXPECT_EQ(tpu_devices[1][1].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
+  EXPECT_EQ(tpu_devices[1][2].device,
             "/job:worker/replica:0/task:0/device:TPU:1");
+  EXPECT_EQ(tpu_devices[1][2].host,
+            "/job:worker/replica:0/task:0/device:CPU:0");
 
   auto& xla_device_assignment = tpu_device_assignment.xla_device_assignment;
   ASSERT_TRUE(xla_device_assignment.hasValue());
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc
index aef336330e0..083a5abf840 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc
@@ -202,23 +202,23 @@ mlir::LogicalResult HandleTileShardedInputs(
 }  // namespace
 
 mlir::LogicalResult ExtractInputsForLogicalDevices(
-    const int num_cores_per_replica, mlir::tf_device::LaunchFuncOp launch_func,
-    mlir::OpBuilder* builder,
+    const int num_cores_per_replica,
+    mlir::tf_device::ClusterFuncOp cluster_func, mlir::OpBuilder* builder,
     llvm::SmallVectorImpl<llvm::SmallVector<mlir::Value, 4>>* input_list) {
   // Initialize the input list for each logical devices.
   input_list->reserve(num_cores_per_replica);
   for (int i = 0; i < num_cores_per_replica; ++i)
     input_list->emplace_back(llvm::SmallVector<mlir::Value, 4>());
 
-  llvm::SmallVector<mlir::Value, 4> launch_func_inputs(
-      launch_func.getOperands());
+  llvm::SmallVector<mlir::Value, 4> cluster_func_inputs(
+      cluster_func.getOperands());
   auto sharding_attrs =
-      launch_func.getOperation()->getAttrOfType<mlir::ArrayAttr>(
+      cluster_func.getOperation()->getAttrOfType<mlir::ArrayAttr>(
           kInputShardingAttr);
   // If sharding attribute does not exist, then all inputs are placed on 0th
   // logical core by default.
   if (!sharding_attrs) {
-    (*input_list)[0] = launch_func_inputs;
+    (*input_list)[0] = cluster_func_inputs;
     return mlir::success();
   }
 
@@ -229,7 +229,7 @@ mlir::LogicalResult ExtractInputsForLogicalDevices(
   for (const auto& sharding_attr_and_index : llvm::enumerate(sharding_attrs)) {
     const auto& sharding_attr = sharding_attr_and_index.value();
     const auto input_index = sharding_attr_and_index.index();
-    const auto& input_value = launch_func_inputs[input_index];
+    const auto& input_value = cluster_func_inputs[input_index];
 
     xla::OpSharding sharding;
     sharding.ParseFromString(
@@ -239,11 +239,11 @@ mlir::LogicalResult ExtractInputsForLogicalDevices(
     if (input_sharding_type == xla::OpSharding::OTHER) {
       llvm::SmallVector<mlir::Value, 4> tiled_inputs;
       auto result = HandleTileShardedInputs(
-          launch_func.getLoc(), sharding, input_value, builder, &tiled_inputs);
+          cluster_func.getLoc(), sharding, input_value, builder, &tiled_inputs);
       if (mlir::failed(result)) return mlir::failure();
 
       if (tiled_inputs.size() != num_cores_per_replica)
-        launch_func.emitError(llvm::formatv(
+        cluster_func.emitError(llvm::formatv(
             "incorrect {0}-th tiled input sharding received. "
             "Product of tile sharding splits({1}) must be equal to "
             "number of logical devices : {2}",
@@ -265,36 +265,37 @@ mlir::LogicalResult ExtractInputsForLogicalDevices(
 }
 
 mlir::LogicalResult ParseAndValidateOutputSharding(
-    const int num_cores_per_replica, mlir::tf_device::LaunchFuncOp launch_func,
+    const int num_cores_per_replica,
+    mlir::tf_device::ClusterFuncOp cluster_func,
     mlir::SmallVector<xla::OpSharding, 4>* output_sharding_list) {
-  output_sharding_list->reserve(launch_func.getNumResults());
+  output_sharding_list->reserve(cluster_func.getNumResults());
 
   const auto output_sharding_attrs =
-      launch_func.getOperation()->getAttrOfType<mlir::ArrayAttr>(
+      cluster_func.getOperation()->getAttrOfType<mlir::ArrayAttr>(
           kOutputShardingAttr);
   if (!output_sharding_attrs)
-    return launch_func.emitError(
-        "output_sharding_configuration missing from launch func");
+    return cluster_func.emitError(
+        "output_sharding_configuration missing from cluster func");
 
-  if (output_sharding_attrs.size() != launch_func.getNumResults())
-    return launch_func.emitError("incorrect number of output sharding");
+  if (output_sharding_attrs.size() != cluster_func.getNumResults())
+    return cluster_func.emitError("incorrect number of output sharding");
 
   for (auto output_sharding_and_index :
        llvm::enumerate(output_sharding_attrs)) {
     const auto& output_sharding = output_sharding_and_index.value();
     const int sharding_index = output_sharding_and_index.index();
     if (!output_sharding.isa<mlir::StringAttr>())
-      return launch_func.emitError(llvm::formatv(
+      return cluster_func.emitError(llvm::formatv(
           "non-string output sharding at index {0}", sharding_index));
 
     xla::OpSharding sharding;
     if (!sharding.ParseFromString(
             output_sharding.cast<mlir::StringAttr>().getValue().str()))
-      return launch_func.emitError("incorrect sharding format for outputs");
+      return cluster_func.emitError("incorrect sharding format for outputs");
 
     if (sharding.type() == xla::OpSharding::OTHER &&
         sharding.tile_assignment_devices_size() != num_cores_per_replica)
-      return launch_func.emitError(llvm::formatv(
+      return cluster_func.emitError(llvm::formatv(
           "incorrect sharding format for outputs. Number of "
           "tiled outputs({0}) must match the number of logical "
           "devices({1})",
@@ -303,7 +304,7 @@ mlir::LogicalResult ParseAndValidateOutputSharding(
     if (sharding.type() == xla::OpSharding::MAXIMAL &&
         ((sharding.tile_assignment_devices(0) >= num_cores_per_replica) ||
          (sharding.tile_assignment_devices(0) < 0)))
-      return launch_func.emitError(llvm::formatv(
+      return cluster_func.emitError(llvm::formatv(
           "incorrect sharding format for outputs. Maximal "
           "sharding should be assigned to device id in range "
           "[0, {0}). Currently assigned to {1}",
@@ -323,15 +324,15 @@ bool IsAssignedToLogicalDevice(const int core_id,
 }
 
 // Returns the index of the return value of region in
-// `tf_device.parallel_execute` that represents launch func output at
-// index |launch_func_output_index|. Regions of parallel_execute may
+// `tf_device.parallel_execute` that represents cluster func output at
+// index |cluster_func_output_index|. Regions of parallel_execute may
 // have different return values depending on outside sharding
 // configuration.
-int MapLaunchOutputIndexWithRegionOutputIndex(
+int MapClusterOutputIndexWithRegionOutputIndex(
     llvm::ArrayRef<xla::OpSharding> output_sharding_config, const int core_id,
-    const int launch_func_output_index) {
+    const int cluster_func_output_index) {
   int region_output_index = 0;
-  for (int output_index = 0; output_index < launch_func_output_index;
+  for (int output_index = 0; output_index < cluster_func_output_index;
        ++output_index) {
     const auto& sharding = output_sharding_config[output_index];
     if (sharding.type() != xla::OpSharding::MAXIMAL ||
@@ -344,8 +345,8 @@ int MapLaunchOutputIndexWithRegionOutputIndex(
 
 // Merges outputs from TPU computation for tile-sharded outputs.
 mlir::LogicalResult HandleTileShardedOutputs(
-    const int launch_func_output_index, const xla::OpSharding& sharding,
-    const mlir::Location& location, mlir::Value launch_func_output,
+    const int cluster_func_output_index, const xla::OpSharding& sharding,
+    const mlir::Location& location, mlir::Value cluster_func_output,
     mlir::tf_device::ParallelExecuteOp parallel_execute,
     mlir::OpBuilder* builder) {
   // Inject concat ops after parallel_execute to merge outputs from
@@ -357,8 +358,8 @@ mlir::LogicalResult HandleTileShardedOutputs(
   llvm::SmallVector<mlir::Value, 4> outputs_to_merge;
   outputs_to_merge.reserve(sharding.tile_assignment_devices_size());
   for (const auto logical_device_id : sharding.tile_assignment_devices()) {
-    const int region_output_index = MapLaunchOutputIndexWithRegionOutputIndex(
-        sharding, logical_device_id, launch_func_output_index);
+    const int region_output_index = MapClusterOutputIndexWithRegionOutputIndex(
+        sharding, logical_device_id, cluster_func_output_index);
     const auto output_from_logical_device = parallel_execute.GetRegionOutputs(
         logical_device_id)[region_output_index];
     outputs_to_merge.emplace_back(output_from_logical_device);
@@ -393,30 +394,30 @@ mlir::LogicalResult HandleTileShardedOutputs(
   }
 
   assert(outputs_to_merge.size() == 1);
-  launch_func_output.replaceAllUsesWith(outputs_to_merge[0]);
+  cluster_func_output.replaceAllUsesWith(outputs_to_merge[0]);
   return mlir::success();
 }
 
 mlir::LogicalResult ValidateAndGetTiledExecuteOutputShape(
     const mlir::Location& location,
-    const mlir::TensorType launch_func_output_type,
+    const mlir::TensorType cluster_func_output_type,
     const xla::OpSharding& output_sharding,
     mlir::Type* tiled_logical_computation_type) {
   auto new_output_shape =
-      llvm::to_vector<4>(launch_func_output_type.getShape());
+      llvm::to_vector<4>(cluster_func_output_type.getShape());
   for (auto dimension_and_output_splits :
        llvm::enumerate(output_sharding.tile_assignment_dimensions())) {
     const auto dimension_index = dimension_and_output_splits.index();
     const auto output_splits = dimension_and_output_splits.value();
-    const auto& output_shape = launch_func_output_type.getShape();
+    const auto output_shape = cluster_func_output_type.getShape();
 
     if (output_shape[dimension_index] == mlir::ShapedType::kDynamicSize) {
-      *tiled_logical_computation_type = launch_func_output_type;
+      *tiled_logical_computation_type = cluster_func_output_type;
       break;
     }
 
     auto output_shape_at_dim =
-        launch_func_output_type.getShape()[dimension_index];
+        cluster_func_output_type.getShape()[dimension_index];
     if (output_shape_at_dim % output_splits != 0) {
       mlir::emitError(
           location,
@@ -432,7 +433,7 @@ mlir::LogicalResult ValidateAndGetTiledExecuteOutputShape(
   }
 
   *tiled_logical_computation_type = mlir::RankedTensorType::get(
-      new_output_shape, launch_func_output_type.getElementType());
+      new_output_shape, cluster_func_output_type.getElementType());
 
   return mlir::success();
 }
@@ -441,34 +442,34 @@ mlir::LogicalResult ValidateAndGetTiledExecuteOutputShape(
 
 mlir::LogicalResult GetOutputTypesForLogicalDeviceComputation(
     const int core_id, llvm::ArrayRef<xla::OpSharding> output_sharding_config,
-    mlir::tf_device::LaunchFuncOp launch_func,
+    mlir::tf_device::ClusterFuncOp cluster_func,
     llvm::SmallVectorImpl<mlir::Type>* output_types) {
-  output_types->reserve(launch_func.getNumResults());
+  output_types->reserve(cluster_func.getNumResults());
 
-  for (auto result_and_index : llvm::enumerate(launch_func.getResults())) {
+  for (auto result_and_index : llvm::enumerate(cluster_func.getResults())) {
     const auto output_index = result_and_index.index();
     const auto& output_sharding = output_sharding_config[output_index];
     const auto output_sharding_type = output_sharding.type();
-    const auto& launch_func_output_type =
+    const auto cluster_func_output_type =
         result_and_index.value().getType().cast<mlir::TensorType>();
 
-    // If output shape of launch func is statically known and output is tiled
-    // sharded, then the corresponding output shape of launch func must be
+    // If output shape of cluster func is statically known and output is tiled
+    // sharded, then the corresponding output shape of cluster func must be
     // evenly divisible number of shardings.
     if (output_sharding_type == xla::OpSharding::OTHER) {
       mlir::Type tiled_logical_computation_type;
-      if (launch_func_output_type.hasRank()) {
+      if (cluster_func_output_type.hasRank()) {
         auto result = ValidateAndGetTiledExecuteOutputShape(
-            launch_func.getLoc(), launch_func_output_type, output_sharding,
+            cluster_func.getLoc(), cluster_func_output_type, output_sharding,
             &tiled_logical_computation_type);
         if (mlir::failed(result)) return mlir::failure();
       } else {
-        tiled_logical_computation_type = launch_func_output_type;
+        tiled_logical_computation_type = cluster_func_output_type;
       }
       output_types->emplace_back(tiled_logical_computation_type);
     } else if (output_sharding_type == xla::OpSharding::REPLICATED ||
                IsAssignedToLogicalDevice(core_id, output_sharding)) {
-      output_types->emplace_back(launch_func_output_type);
+      output_types->emplace_back(cluster_func_output_type);
     }
   }
 
@@ -478,17 +479,17 @@ mlir::LogicalResult GetOutputTypesForLogicalDeviceComputation(
 void RemapOutputsFromLogicalDevices(
     const mlir::Location& location,
     llvm::ArrayRef<xla::OpSharding> output_sharding_config,
-    mlir::tf_device::LaunchFuncOp launch_func,
+    mlir::tf_device::ClusterFuncOp cluster_func,
     mlir::tf_device::ParallelExecuteOp parallel_execute,
     mlir::OpBuilder* builder) {
-  for (auto result_and_index : llvm::enumerate(launch_func.getResults())) {
+  for (auto result_and_index : llvm::enumerate(cluster_func.getResults())) {
     const auto output_index = result_and_index.index();
-    const auto& launch_func_output = result_and_index.value();
+    const auto cluster_func_output = result_and_index.value();
     const auto& output_sharding = output_sharding_config[output_index];
     const auto output_sharding_type = output_sharding.type();
     if (output_sharding_type == xla::OpSharding::OTHER) {
       HandleTileShardedOutputs(output_index, output_sharding, location,
-                               launch_func_output, parallel_execute, builder);
+                               cluster_func_output, parallel_execute, builder);
       continue;
     }
 
@@ -497,13 +498,13 @@ void RemapOutputsFromLogicalDevices(
       logical_device_id = output_sharding.tile_assignment_devices(0);
 
     // For maximal sharding configuration, correctly remap outputs from
-    // parallel_execute region to users of the launch func.
-    const int region_output_index = MapLaunchOutputIndexWithRegionOutputIndex(
+    // parallel_execute region to users of the cluster func.
+    const int region_output_index = MapClusterOutputIndexWithRegionOutputIndex(
         output_sharding_config, logical_device_id, output_index);
 
     const auto output_from_logical_device = parallel_execute.GetRegionOutputs(
         logical_device_id)[region_output_index];
-    launch_func_output.replaceAllUsesWith(output_from_logical_device);
+    cluster_func_output.replaceAllUsesWith(output_from_logical_device);
   }
 }
 
@@ -522,7 +523,7 @@ llvm::SmallVector<llvm::SmallVector<int64_t, 4>, 4> GetMetadataArgumentMapping(
     const auto& sharding = arg_and_idx.value().sharding();
     const int64_t idx = arg_and_idx.index();
 
-    const auto& sharding_type = sharding.type();
+    const auto sharding_type = sharding.type();
     if (sharding_type == xla::OpSharding::OTHER) {
       for (const auto& device : sharding.tile_assignment_devices())
         input_mappings[device].push_back(idx);
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.h b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.h
index 52a633d3111..69bc092927d 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.h
@@ -32,19 +32,20 @@ namespace tensorflow {
 extern const char* const kInputShardingAttr;
 extern const char* const kOutputShardingAttr;
 
-// Parses "input_sharding_configuration" attribute and returns a list where
-// i-th element is a list of mlir::Value's which represent inputs for the
-// TPU computation correponding to i-th logical device. If the attribute
-// does not exist, the all inputs are placed on logical core 0.
+// Parses "input_sharding_configuration" attribute and returns a list where i-th
+// element is a list of mlir::Value's which represent inputs for the TPU
+// computation correponding to i-th logical device. If the attribute does not
+// exist, the all inputs are placed on logical core 0.
 mlir::LogicalResult ExtractInputsForLogicalDevices(
-    const int num_cores_per_replica, mlir::tf_device::LaunchFuncOp launch_func,
-    mlir::OpBuilder* builder,
+    const int num_cores_per_replica,
+    mlir::tf_device::ClusterFuncOp cluster_func, mlir::OpBuilder* builder,
     llvm::SmallVectorImpl<llvm::SmallVector<mlir::Value, 4>>* input_list);
 
-// Extracts a list of OpSharding that represent output sharding configuration
-// of `tf_device.launch`.
+// Extracts a list of OpSharding that represent output sharding configuration of
+// `tf_device.cluster`.
 mlir::LogicalResult ParseAndValidateOutputSharding(
-    const int num_cores_per_replica, mlir::tf_device::LaunchFuncOp launch_func,
+    const int num_cores_per_replica,
+    mlir::tf_device::ClusterFuncOp cluster_func,
     mlir::SmallVector<xla::OpSharding, 4>* output_sharding_list);
 
 // Retrieves output types for TPUExecute op representing execution for provided
@@ -52,15 +53,15 @@ mlir::LogicalResult ParseAndValidateOutputSharding(
 // different outputs depending on the output sharding configuration.
 mlir::LogicalResult GetOutputTypesForLogicalDeviceComputation(
     const int core_id, llvm::ArrayRef<xla::OpSharding> output_sharding_config,
-    mlir::tf_device::LaunchFuncOp launch_func,
+    mlir::tf_device::ClusterFuncOp cluster_func,
     llvm::SmallVectorImpl<mlir::Type>* output_types);
 
 // Remaps outputs of `tf_device.parallel_execute` op that represent concurrent
-// execution of the `tf_device.launch_func` with its users.
+// execution of the `tf_device.cluster_func` with its users.
 void RemapOutputsFromLogicalDevices(
     const mlir::Location& location,
     llvm::ArrayRef<xla::OpSharding> output_sharding_config,
-    mlir::tf_device::LaunchFuncOp launch_func,
+    mlir::tf_device::ClusterFuncOp cluster_func,
     mlir::tf_device::ParallelExecuteOp parallel_execute,
     mlir::OpBuilder* builder);
 
diff --git a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc
index 62b862f5e21..2e1528e0d60 100644
--- a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc
+++ b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc
@@ -104,26 +104,24 @@ int main(int argc, char** argv) {
     return 1;
   }
 
+  std::unordered_set<std::string> tags = absl::StrSplit(saved_model_tags, ',');
+  std::vector<std::string> exported_names_vector =
+      absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty());
+  absl::Span<std::string> exported_names(exported_names_vector);
+
   if (import_saved_model_object_graph) {
-    std::unordered_set<std::string> tags =
-        absl::StrSplit(saved_model_tags, ',');
-    std::vector<std::string> exported_names =
-        absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty());
     mlir::MLIRContext context;
 
     auto module = tensorflow::SavedModelObjectGraphToMlirImport(
-        input_filename, tags, absl::Span<std::string>(exported_names),
-        &context);
+        input_filename, tags, exported_names, &context);
     if (!module) return 1;
 
     module->print(output->os());
   } else if (import_saved_model_signature_defs) {
-    std::unordered_set<std::string> tags =
-        absl::StrSplit(saved_model_tags, ',');
     mlir::MLIRContext context;
 
     auto module = tensorflow::SavedModelSignatureDefsToMlirImport(
-        input_filename, tags, &context);
+        input_filename, tags, exported_names, &context);
     if (!module) return 1;
 
     module->print(output->os());
diff --git a/tensorflow/compiler/mlir/tfjs/BUILD b/tensorflow/compiler/mlir/tfjs/BUILD
index 9b731d2c912..ac629ac4573 100644
--- a/tensorflow/compiler/mlir/tfjs/BUILD
+++ b/tensorflow/compiler/mlir/tfjs/BUILD
@@ -1,4 +1,5 @@
 load("//third_party/mlir:tblgen.bzl", "gentbl")
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
 
 package(
     default_visibility = ["//visibility:public"],
@@ -39,7 +40,7 @@ gentbl(
         "ir/tfjs_ops.td",
         "@llvm-project//mlir:OpBaseTdFiles",
         "@llvm-project//mlir:include/mlir/Interfaces/LoopLikeInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
     ],
 )
 
@@ -131,10 +132,106 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_passes",
         "//tensorflow/compiler/mlir/tensorflow:tf_graph_optimization_pass",
-        "//tensorflow/compiler/mlir/tensorflow:translate_lib",
-        "@llvm-project//mlir:Analysis",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Pass",
         "@llvm-project//mlir:Transforms",
     ],
 )
+
+cc_library(
+    name = "json_translate_lib",
+    srcs = [
+        "translate/json_translate.cc",
+    ],
+    hdrs = [
+        "translate/json_translate.h",
+    ],
+    deps = [
+        ":tensorflow_js",
+        ":tensorflow_js_dialect_registration",
+        "//tensorflow/compiler/mlir/tensorflow",
+        "//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
+        "//tensorflow/compiler/mlir/tensorflow:export_utils",
+        "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags",
+        "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:graph",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/status",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:Translation",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tf_to_tfjs_json",
+    srcs = ["translate/tf_to_tfjs_json.cc"],
+    hdrs = [
+        "translate/tf_to_tfjs_json.h",
+    ],
+    deps = [
+        ":json_translate_lib",
+        ":tfjs_optimize",
+        "//tensorflow/compiler/mlir/tensorflow",
+        "//tensorflow/compiler/mlir/tensorflow:decode_constant_pass",
+        "//tensorflow/compiler/mlir/tensorflow:error_util",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
+        "//tensorflow/compiler/mlir/tensorflow:translate_cl_options",
+        "//tensorflow/compiler/mlir/tensorflow:translate_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AllPassesAndDialects",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
+    ],
+    alwayslink = 1,
+)
+
+tf_cc_binary(
+    name = "json_translate",
+    deps = [
+        ":json_translate_lib",
+        "@llvm-project//mlir:MlirTranslateMain",
+    ],
+)
+
+filegroup(
+    name = "tf_tfjs_translate_main",
+    srcs = [
+        "translate/tf_tfjs_translate.cc",
+    ],
+)
+
+tf_cc_binary(
+    name = "tf_tfjs_translate",
+    srcs = [":tf_tfjs_translate_main"],
+    deps = [
+        ":json_translate_lib",
+        ":tensorflow_js_passes",
+        ":tf_to_tfjs_json",
+        ":tfjs_optimize",
+        "//tensorflow/compiler/mlir:init_mlir",
+        "//tensorflow/compiler/mlir/tensorflow:translate_cl_options",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/platform:errors",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/strings",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:Support",
+    ],
+)
diff --git a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h
index 318895de79c..545183a052b 100644
--- a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h
+++ b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h
@@ -28,6 +28,7 @@ limitations under the License.
 #include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffects.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
+
 namespace mlir {
 namespace tfjs {
 
diff --git a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td
index 172347bc0f5..134aa010d8c 100644
--- a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td
+++ b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.td
@@ -23,7 +23,7 @@ limitations under the License.
 #define TFJS_DIALECT
 
 include "mlir/IR/OpBase.td"
-include "mlir/Interfaces/SideEffects.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 
 //===----------------------------------------------------------------------===//
 // TensorFlow.js dialect definitions
diff --git a/tensorflow/compiler/mlir/tfjs/tests/e2e/BUILD b/tensorflow/compiler/mlir/tfjs/tests/e2e/BUILD
new file mode 100644
index 00000000000..5c8d37da2f0
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/tests/e2e/BUILD
@@ -0,0 +1,23 @@
+load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests")
+
+licenses(["notice"])
+
+glob_lit_tests(
+    data = [
+        ":test_utilities",
+    ],
+    driver = "@llvm-project//mlir:run_lit.sh",
+    test_file_exts = [
+        "pbtxt",
+    ],
+)
+
+# Bundle together all of the test utilities that are used by tests.
+filegroup(
+    name = "test_utilities",
+    testonly = True,
+    data = [
+        "//tensorflow/compiler/mlir/tfjs:tf_tfjs_translate",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/tensorflow/compiler/mlir/tfjs/tests/e2e/add.pbtxt b/tensorflow/compiler/mlir/tfjs/tests/e2e/add.pbtxt
new file mode 100644
index 00000000000..f6a324fdc13
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/tests/e2e/add.pbtxt
@@ -0,0 +1,78 @@
+# RUN: tf_tfjs_translate %s -tf-input-arrays=input0,input1 -tf-input-data-types=DT_INT32,DT_INT32 -tf-input-shapes=10:10 -tf-output-arrays=Mul -o - | FileCheck %s --dump-input-on-failure
+# Add two tensor<4xi32> inputs and return the result
+
+node {
+  name: "Add"
+  op: "Add"
+  input: "input0"
+  input: "input1"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "input0"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "input1"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+node {
+  name: "Mul"
+  op: "Mul"
+  input: "Add"
+  input: "Add"
+  attr {
+    key: "T"
+    value {
+      type: DT_INT32
+    }
+  }
+}
+versions {
+  producer: 27
+}
+
+# CHECK: "name": "input0"
+# CHECK-NEXT: "op": "Placeholder"
+# CHECK: "type": "DT_INT32"
+# CHECK: "name": "input1",
+# CHECK-NEXT: "op": "Placeholder"
+# CHECK: "type": "DT_INT32"
+# CHECK: "name": "Add"
+# CHECK-NEXT: "op": "AddV2"
+# CHECK-NEXT: "input":
+# CHECK-NEXT: "input0"
+# CHECK-NEXT: "input1"
+# CHECK: "type": "DT_INT32"
+# CHECK: "name": "Mul1"
+# CHECK-NEXT: "op": "Mul"
+# CHECK-NEXT: "input":
+# CHECK-NEXT: "Add"
+# CHECK-NEXT: "Add"
+# CHECK: "type": "DT_INT32"
+# CHECK: "name": "Mul"
+# CHECK-NEXT: "op": "_Retval"
+# CHECK-NEXT: "input":
+# CHECK-NEXT: "Mul1"
+# CHECK: "type": "DT_INT32"
+# CHECK: "library"
+# CHECK: "versions"
+# CHECK: "producer": 27
+
diff --git a/tensorflow/compiler/mlir/tfjs/tests/e2e/prelu.pbtxt b/tensorflow/compiler/mlir/tfjs/tests/e2e/prelu.pbtxt
new file mode 100644
index 00000000000..810db71f5e0
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/tests/e2e/prelu.pbtxt
@@ -0,0 +1,175 @@
+# RUN: tf_tfjs_translate %s -tf-input-arrays=input0 -tf-input-data-types=DT_FLOAT -tf-input-shapes=10 -tf-output-arrays=Add -tf-custom-opdefs="name: 'Prelu' input_arg: { name: 'x' type: DT_FLOAT } input_arg: { name: 'alpha' type: DT_FLOAT } output_arg: { name: 'c' type: DT_FLOAT }" -o - | FileCheck %s --dump-input-on-failure
+# Add two tensor<4xi32> inputs and return the result
+
+node {
+  name: "input0"
+  op: "Placeholder"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+        dim {
+          size: 10
+        }
+      }
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "alpha"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 0.5
+      }
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "Relu"
+  op: "Relu"
+  input: "input0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "Neg"
+  op: "Neg"
+  input: "input0"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "Relu1"
+  op: "Relu"
+  input: "Neg"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "Mul"
+  op: "Mul"
+  input: "alpha"
+  input: "Relu1"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "Add"
+  op: "Add"
+  input: "Relu"
+  input: "Mul"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  experimental_debug_info {
+  }
+}
+node {
+  name: "main"
+  op: "_Retval"
+  input: "Add"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "index"
+    value {
+      i: 0
+    }
+  }
+}
+library {
+}
+versions {
+  producer: 344
+}
+
+# CHECK: "node":
+# CHECK: "name": "input0",
+# CHECK-NEXT: "op": "Placeholder",
+# CHECK-NEXT: "attr":
+# CHECK: "type": "DT_FLOAT"
+# CHECK: "name": "Add.Relu.Neg.Relu1.Mul",
+# CHECK-NEXT: "op": "Const",
+# CHECK-NEXT: "attr":
+# CHECK: "value":
+# CHECK: "tensor":
+# CHECK: "dtype": "DT_FLOAT",
+# CHECK: "tensorShape": {},
+# CHECK: "floatVal":
+# CHECK: -0.5
+# CHECK: "name": "Add.Relu.Neg.Relu1.Mul1",
+# CHECK-NEXT: "op": "Prelu",
+# CHECK-NEXT: "input":
+# CHECK: "input0",
+# CHECK: "Add.Relu.Neg.Relu1.Mul"
+# CHECK: "attr":
+# CHECK: "_output_shapes":
+# CHECK: "list":
+# CHECK: "shape":
+# CHECK: "dim":
+# CHECK: "size": "10"
+# CHECK: "experimentalDebugInfo": {}
+# CHECK: "name": "Add",
+# CHECK-NEXT: "op": "_Retval",
+# CHECK-NEXT: "input":
+# CHECK: "Add.Relu.Neg.Relu1.Mul1"
+# CHECK: "attr":
+# CHECK: "T":
+# CHECK: "type": "DT_FLOAT"
+# CHECK: "library": {},
+# CHECK: "versions":
+# CHECK: "producer": 344
+
diff --git a/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc b/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc
index 631bb1ae2af..a445937570e 100644
--- a/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc
+++ b/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@ limitations under the License.
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Pass/PassManager.h"  // from @llvm-project
 #include "mlir/Transforms/Passes.h"  // from @llvm-project
-#include "tensorflow/compiler/mlir/tensorflow/transforms/decode_constant.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tfjs/transforms/passes.h"
 
@@ -47,6 +46,11 @@ void AddTFToTFJSConversionPasses(mlir::OpPassManager* pm) {
   // Canonicalize, CSE etc.
   pm->addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
   pm->addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
+
+  // raise to executor dialect in order to use GraphDef converter
+  pm->addNestedPass<mlir::FuncOp>(
+      mlir::CreateFunctionalToExecutorDialectConversionPass());
+  pm->addNestedPass<mlir::FuncOp>(mlir::CreateBreakUpIslandsPass());
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tfjs/translate/json_translate.cc b/tensorflow/compiler/mlir/tfjs/translate/json_translate.cc
new file mode 100644
index 00000000000..7f4b8ffae09
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/translate/json_translate.cc
@@ -0,0 +1,105 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/tfjs/translate/json_translate.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/status/status.h"
+#include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/Module.h"  // from @llvm-project
+#include "mlir/Support/LogicalResult.h"  // from @llvm-project
+#include "mlir/Translation.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.h"
+#include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h"
+#include "tensorflow/compiler/mlir/tensorflow/utils/export_utils.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/status.h"
+
+using mlir::ModuleOp;
+using mlir::TranslateFromMLIRRegistration;
+using std::string;
+using tensorflow::Status;
+using xla::StatusOr;
+
+// Translates the given MLIR module in the TFJS dialect to TFJS JSON
+// format. Returns false on success.
+//
+bool tfjs::MlirToJSONTranslateFunction(ModuleOp module,
+                                       std::string* serialized_json) {
+  string json_output;
+  // Allow TF to treat TFJS ops as TF ops.
+  if (!tensorflow::AddTensorFlowOpPrefix("tfjs.").ok()) {
+    LOG(ERROR) << "Failed to add tfjs op prefix.";
+    return false;
+  }
+  tensorflow::GraphExportConfig confs;
+  confs.export_shapes = true;
+  confs.export_library = true;
+  tensorflow::FunctionLibraryDefinition flib_def(
+      tensorflow::OpRegistry::Global(), tensorflow::FunctionDefLibrary());
+  absl::flat_hash_set<tensorflow::Node*> control_ret_nodes;
+  auto graph = absl::make_unique<tensorflow::Graph>(flib_def);
+  auto status = tensorflow::ConvertMlirToGraph(module, confs, &graph, &flib_def,
+                                               &control_ret_nodes);
+  if (!status.ok()) {
+    LOG(ERROR) << "Graph export failed: " << status;
+    return false;
+  }
+  auto graphdef = absl::make_unique<tensorflow::GraphDef>();
+  graph->ToGraphDef(graphdef.get());
+
+  // Replace the _Arg nodes of the main function with Placeholder op.
+  auto nodes = graphdef->mutable_node();
+  for (const auto& node : llvm::enumerate(*nodes)) {
+    if (node.value().op() == "_Arg") {
+      nodes->Mutable(node.index())->set_op("Placeholder");
+    }
+  }
+
+  tensorflow::protobuf::util::JsonPrintOptions json_options;
+  json_options.add_whitespace = true;
+  auto jsonStatus = tensorflow::protobuf::util::MessageToJsonString(
+      *graphdef, &json_output, json_options);
+  if (!jsonStatus.ok()) {
+    LOG(ERROR) << "Proto2Json failed: " << status;
+    return false;
+  }
+  *serialized_json = std::move(json_output);
+  return true;
+}
+
+static mlir::LogicalResult MlirToJSONFileTranslateFunction(
+    ModuleOp module, llvm::raw_ostream& output) {
+  std::string serialized_json;
+  if (!tfjs::MlirToJSONTranslateFunction(module, &serialized_json))
+    return mlir::failure();
+
+  output << serialized_json;
+  return mlir::success();
+}
+
+static TranslateFromMLIRRegistration MLIRToJSONFileTranslate(
+    "mlir-to-tfjs-json", MlirToJSONFileTranslateFunction);
diff --git a/tensorflow/compiler/mlir/tfjs/translate/json_translate.h b/tensorflow/compiler/mlir/tfjs/translate/json_translate.h
new file mode 100644
index 00000000000..0a931f770ad
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/translate/json_translate.h
@@ -0,0 +1,31 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_JSON_TRANSLATE_H_
+#define TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_JSON_TRANSLATE_H_
+
+#include <string>
+
+#include "mlir/IR/Module.h"  // from @llvm-project
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tfjs {
+
+// Translates the given MLIR `module` into a JSON string. Returns true if
+// translation fails, otherwise returns false.
+bool MlirToJSONTranslateFunction(mlir::ModuleOp module,
+                                 std::string* serialized_json);
+}  // namespace tfjs
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_JSON_TRANSLATE_H_
diff --git a/tensorflow/compiler/mlir/tfjs/translate/tf_tfjs_translate.cc b/tensorflow/compiler/mlir/tfjs/translate/tf_tfjs_translate.cc
new file mode 100644
index 00000000000..e735a3c7b8c
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/translate/tf_tfjs_translate.cc
@@ -0,0 +1,173 @@
+
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <iostream>
+#include <string>
+
+#include "absl/strings/str_split.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "mlir/IR/Diagnostics.h"  // from @llvm-project
+#include "mlir/IR/Function.h"  // from @llvm-project
+#include "mlir/IR/MLIRContext.h"  // from @llvm-project
+#include "mlir/IR/Module.h"  // from @llvm-project
+#include "mlir/Pass/Pass.h"  // from @llvm-project
+#include "mlir/Support/FileUtilities.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/init_mlir.h"
+#include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate_cl.h"
+#include "tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.h"
+#include "tensorflow/compiler/mlir/tfjs/transforms/passes.h"
+#include "tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+using llvm::cl::opt;
+using mlir::MLIRContext;
+using stream_executor::port::StatusOr;
+
+// NOLINTNEXTLINE
+opt<std::string> input_file_name(llvm::cl::Positional,
+                                 llvm::cl::desc("<input file>"),
+                                 llvm::cl::init("-"));
+
+// NOLINTNEXTLINE
+opt<bool> import_saved_model_object_graph(
+    "savedmodel-objectgraph-to-mlir",
+    llvm::cl::desc("Import a saved model to its MLIR representation"),
+    llvm::cl::value_desc("dir"));
+
+// NOLINTNEXTLINE
+opt<bool> import_saved_model_signature_defs(
+    "savedmodel-signaturedefs-to-mlir",
+    llvm::cl::desc("Import a saved model V1 to its MLIR representation"),
+    llvm::cl::value_desc("dir"));
+
+// NOLINTNEXTLINE
+opt<std::string> saved_model_tags(
+    "tf-savedmodel-tags",
+    llvm::cl::desc("Tags used to indicate which MetaGraphDef to import, "
+                   "separated by ','"),
+    llvm::cl::init("serve"));
+
+// NOLINTNEXTLINE
+opt<std::string> saved_model_exported_names(
+    "tf-savedmodel-exported-names",
+    llvm::cl::desc("Names to export from SavedModel, separated by ','. Empty "
+                   "(the default) means export all."),
+    llvm::cl::init(""));
+
+// NOLINTNEXTLINE
+opt<std::string> output_file_name("o", llvm::cl::desc("<output file>"),
+                                  llvm::cl::value_desc("filename"),
+                                  llvm::cl::init("-"));
+// NOLINTNEXTLINE
+opt<bool> input_mlir(
+    "input-mlir",
+    llvm::cl::desc("Take input TensorFlow model in textual MLIR instead of "
+                   "GraphDef format"),
+    llvm::cl::init(false), llvm::cl::Hidden);
+// NOLINTNEXTLINE
+opt<bool> output_mlir(
+    "output-mlir",
+    llvm::cl::desc("Output MLIR rather than JSON for the generated TFJS model"),
+    llvm::cl::init(false));
+
+// The following approach allows injecting opdefs in addition
+// to those that are already part of the global TF registry  to be linked in
+// prior to importing the graph. The primary goal is for support of custom ops.
+// This is not intended to be a general solution for custom ops for the future
+// but mainly for supporting older models like mobilenet_ssd. More appropriate
+// mechanisms, such as op hints or using functions to represent composable ops
+// like https://github.com/tensorflow/community/pull/113 should be encouraged
+// going forward.
+// NOLINTNEXTLINE
+llvm::cl::list<std::string> custom_opdefs(
+    "tf-custom-opdefs", llvm::cl::desc("List of custom opdefs when importing "
+                                       "graphdef"));
+
+// Debugging flag to print function mapping in the JSON.
+// NOLINTNEXTLINE
+static opt<bool> print_function_result_mapping(
+    "print-function-result-mapping",
+    llvm::cl::desc(
+        "Print the mapping of function result to json output buffer"),
+    llvm::cl::init(false));
+
+enum TranslationStatus { kTrSuccess, kTrFailure };
+
+static int PrintFunctionResultMapping(const std::string& result) {
+  std::cout << result << std::endl;
+  return kTrSuccess;
+}
+
+int main(int argc, char** argv) {
+  tensorflow::InitMlir y(&argc, &argv);
+
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "TF GraphDef to TFJS JSON converter\n");
+
+  MLIRContext context;
+  llvm::SourceMgr source_mgr;
+  mlir::SourceMgrDiagnosticHandler sourceMgrHandler(source_mgr, &context);
+
+  StatusOr<mlir::OwningModuleRef> module;
+
+  if (import_saved_model_object_graph || import_saved_model_signature_defs) {
+    if (input_mlir)
+      module = tensorflow::errors::InvalidArgument(
+          "Importing saved model should not have input_mlir set");
+    module = tensorflow::ImportSavedModel(
+        import_saved_model_object_graph, import_saved_model_signature_defs,
+        custom_opdefs, input_file_name, saved_model_tags,
+        saved_model_exported_names, &context);
+  } else {
+    module = tensorflow::LoadFromGraphdefOrMlirSource(
+        input_file_name, input_mlir, custom_opdefs, debug_info_file,
+        input_arrays, input_dtypes, input_shapes, output_arrays,
+        /*prune_unused_nodes=*/true, &source_mgr, &context);
+  }
+
+  // If errors occur, the library call in the above already logged the error
+  // message. So we can just return here.
+  if (!module.ok()) return kTrFailure;
+
+  mlir::PassManager pm(&context);
+
+  tensorflow::AddTFToTFJSConversionPasses(&pm);
+
+  std::string result;
+  auto status = tensorflow::ConvertTFOpsToTfjsJSON(module.ValueOrDie().get(),
+                                                   output_mlir, &result, &pm);
+  if (!status.ok()) return kTrFailure;
+
+  std::string error_msg;
+  auto output = mlir::openOutputFile(output_file_name, &error_msg);
+  if (output == nullptr) {
+    llvm::errs() << error_msg << '\n';
+    return kTrFailure;
+  }
+  output->os() << result;
+  output->keep();
+
+  // Print out debugging info related to function mapping.
+  if (print_function_result_mapping) return PrintFunctionResultMapping(result);
+  return kTrSuccess;
+}
diff --git a/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.cc b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.cc
new file mode 100644
index 00000000000..7dc9ea049ba
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.cc
@@ -0,0 +1,152 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h"
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "mlir/IR/MLIRContext.h"  // from @llvm-project
+#include "mlir/IR/Module.h"  // from @llvm-project
+#include "mlir/Parser.h"  // from @llvm-project
+#include "mlir/Pass/PassManager.h"  // from @llvm-project
+#include "mlir/Support/FileUtilities.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h"
+#include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
+#include "tensorflow/compiler/mlir/tfjs/translate/json_translate.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/framework/op_def_builder.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+namespace tensorflow {
+
+using mlir::MLIRContext;
+using mlir::ModuleOp;
+using mlir::OwningModuleRef;
+using stream_executor::port::StatusOr;
+
+namespace {
+tensorflow::Status RegisterCustomOps(
+    const std::vector<std::string>& extra_tf_opdefs) {
+  for (const auto& tf_opdefs_string : extra_tf_opdefs) {
+    tensorflow::OpDef opdef;
+    if (!tensorflow::protobuf::TextFormat::ParseFromString(tf_opdefs_string,
+                                                           &opdef)) {
+      LOG(ERROR) << "OpDef parsing failed for: " << tf_opdefs_string;
+      return errors::InvalidArgument("fail to parse extra OpDef");
+    }
+    // Register extra opdefs.
+    tensorflow::OpRegistry::Global()->Register(
+        [opdef](tensorflow::OpRegistrationData* op_reg_data) -> Status {
+          *op_reg_data = tensorflow::OpRegistrationData(opdef);
+          return Status::OK();
+        });
+  }
+  return Status::OK();
+}
+}  // namespace
+
+StatusOr<OwningModuleRef> LoadFromGraphdefOrMlirSource(
+    const std::string& input_filename, bool input_mlir,
+    const std::vector<std::string>& extra_tf_opdefs,
+    absl::string_view debug_info_file, absl::string_view input_arrays,
+    absl::string_view input_dtypes, absl::string_view input_shapes,
+    absl::string_view output_arrays, bool prune_unused_nodes,
+    llvm::SourceMgr* source_mgr, MLIRContext* context) {
+  // Set up the input file.
+  std::string error_message;
+  auto file = mlir::openInputFile(input_filename, &error_message);
+  if (!file) {
+    llvm::errs() << error_message << "\n";
+    return errors::InvalidArgument("fail to open input file");
+  }
+
+  if (input_mlir) {
+    source_mgr->AddNewSourceBuffer(std::move(file), llvm::SMLoc());
+    return OwningModuleRef(mlir::parseSourceFile(*source_mgr, context));
+  }
+
+  TF_RETURN_IF_ERROR(RegisterCustomOps(extra_tf_opdefs));
+
+  return tensorflow::GraphdefToMlirTranslateFunction(
+      file->getBuffer(), debug_info_file, input_arrays, input_dtypes,
+      input_shapes, output_arrays, /*control_output_arrays=*/"",
+      prune_unused_nodes, /*convert_legacy_fed_inputs=*/true,
+      /*graph_as_function=*/false, /*upgrade_legacy=*/true,
+      /*enable_shape_inference=*/true, context);
+}
+
+Status ConvertTFOpsToTfjsJSON(mlir::ModuleOp module, bool export_to_mlir,
+                              std::string* result,
+                              mlir::PassManager* pass_manager) {
+  mlir::StatusScopedDiagnosticHandler statusHandler(module.getContext(),
+                                                    /*propagate=*/true);
+  if (failed(pass_manager->run(module))) {
+    return statusHandler.ConsumeStatus();
+  }
+
+  if (export_to_mlir) {
+    llvm::raw_string_ostream os(*result);
+    module.print(os);
+    return Status::OK();
+  }
+
+  return tfjs::MlirToJSONTranslateFunction(module, result)
+             ? Status::OK()
+             : statusHandler.ConsumeStatus();
+}
+
+StatusOr<mlir::OwningModuleRef> ImportSavedModel(
+    bool import_saved_model, bool import_saved_model_v1,
+    const std::vector<std::string>& extra_tf_opdefs,
+    const std::string& input_filename, const std::string& saved_model_tags,
+    const std::string& saved_model_exported_names, mlir::MLIRContext* context) {
+  std::unordered_set<std::string> tags = absl::StrSplit(saved_model_tags, ',');
+  std::vector<std::string> exported_names_in_vector =
+      absl::StrSplit(saved_model_exported_names, ',', absl::SkipEmpty());
+  absl::Span<std::string> exported_names(exported_names_in_vector);
+  if (import_saved_model) {
+    auto module = tensorflow::SavedModelObjectGraphToMlirImport(
+        input_filename, tags, absl::Span<std::string>(exported_names), context);
+    if (!module)
+      return tensorflow::errors::InvalidArgument("fail to open input file");
+    TF_RETURN_IF_ERROR(RegisterCustomOps(extra_tf_opdefs));
+    return module;
+  } else if (import_saved_model_v1) {
+    auto module = tensorflow::SavedModelSignatureDefsToMlirImport(
+        input_filename, tags, exported_names, context);
+
+    if (!module)
+      return tensorflow::errors::InvalidArgument("fail to open input file");
+    TF_RETURN_IF_ERROR(RegisterCustomOps(extra_tf_opdefs));
+    return module;
+  } else {
+    return tensorflow::errors::InvalidArgument(
+        "Should be either saved model v1 or v2");
+  }
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h
new file mode 100644
index 00000000000..d68f0e7d46e
--- /dev/null
+++ b/tensorflow/compiler/mlir/tfjs/translate/tf_to_tfjs_json.h
@@ -0,0 +1,63 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_TF_TO_TFJS_JSON_H_
+#define TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_TF_TO_TFJS_JSON_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "llvm/Support/SourceMgr.h"
+#include "mlir/IR/MLIRContext.h"  // from @llvm-project
+#include "mlir/IR/Module.h"  // from @llvm-project
+#include "mlir/Pass/PassManager.h"  // from @llvm-project
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+namespace tensorflow {
+
+// Load a TF model from a GraphDef definition or a TF control flow dialect MLIR
+// source into a MLIR module. If `input_mlir` is true, load from a MLIR source
+// file; otherwise, load from a GraphDef.
+// Setting prune_unused_nodes to true, would prune unreachable nodes if
+// output_arrays is specified.
+stream_executor::port::StatusOr<mlir::OwningModuleRef>
+LoadFromGraphdefOrMlirSource(
+    const std::string& input_filename, bool input_mlir,
+    const std::vector<std::string>& extra_tf_opdefs,
+    absl::string_view debug_info_file, absl::string_view input_arrays,
+    absl::string_view input_dtypes, absl::string_view input_shapes,
+    absl::string_view output_arrays, bool prune_unused_nodes,
+    llvm::SourceMgr* source_mgr, mlir::MLIRContext* context);
+
+// Load Saved model (either v1 or v2) into MLIR.
+stream_executor::port::StatusOr<mlir::OwningModuleRef> ImportSavedModel(
+    bool import_saved_model, bool import_saved_model_v1,
+    const std::vector<std::string>& extra_tf_opdefs,
+    const std::string& input_filename, const std::string& saved_model_tags,
+    const std::string& saved_model_exported_names, mlir::MLIRContext* context);
+
+// Taking a MLIR module in TF executor dialect and a set of parameters,
+// applies a set of passes to convert the module to TFJS dialect and
+// serializes the result to JSON string.
+// If `export_to_mlir` is true, the result is exported in MLIR text format,
+// otherwise exported in JSON.
+Status ConvertTFOpsToTfjsJSON(mlir::ModuleOp module, bool export_to_mlir,
+                              std::string* result,
+                              mlir::PassManager* pass_manager);
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TFJS_TRANSLATE_TF_TO_TFJS_JSON_H_
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
new file mode 100644
index 00000000000..27a8dbd2809
--- /dev/null
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
@@ -0,0 +1,50 @@
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+
+licenses(["notice"])
+
+cc_library(
+    name = "cubin_creator",
+    srcs = ["cubin_creator.cc"],
+    hdrs = ["cubin_creator.h"],
+    copts = if_cuda(["-DGOOGLE_CUDA=1"]),
+    deps = [
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@llvm-project//llvm:support",
+        "@llvm-project//mlir:AllPassesAndDialects",
+        "@llvm-project//mlir:GPUDialect",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:LLVMDialect",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:TargetNVVMIR",
+        "@llvm-project//mlir:Transforms",
+        "//tensorflow/compiler/mlir/xla:hlo",
+        "//tensorflow/compiler/mlir/xla:lhlo",
+        "//tensorflow/compiler/mlir/xla:xla_legalize_tf",
+        "//tensorflow/compiler/mlir/xla:xla_materialize_broadcasts",  # buildcleaner: keep
+        "//tensorflow/compiler/mlir/xla:xla_unfuse_batch_norm",  # buildcleaner: keep
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service/gpu:stream_executor_util",
+        "//tensorflow/compiler/xla/service/gpu:target_constants",
+        "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
+        "//tensorflow/compiler/xla/service/mlir_gpu:kernel_lowering",
+        "//tensorflow/core:cuda_libdevice_path",
+        "//tensorflow/core:lib",
+    ] + if_cuda(["//tensorflow/stream_executor/gpu:asm_compiler"]),
+)
+
+tf_cc_binary(
+    name = "tf_to_cubin",
+    srcs = ["tf_to_cubin.cc"],
+    visibility = ["//tensorflow/core/kernels/cubin_headers:__pkg__"],
+    deps = [
+        ":cubin_creator",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
new file mode 100644
index 00000000000..b1c4b1beae1
--- /dev/null
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
@@ -0,0 +1,264 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+//===- cubin_creator.cc -----------------------------------------*- C++ -*-===//
+//
+// This file implements the function to compile a TF kernel function to a cubin.
+//
+//===----------------------------------------------------------------------===//
+#include "tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "absl/strings/escaping.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include "mlir/Dialect/GPU/GPUDialect.h"  // from @llvm-project
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // from @llvm-project
+#include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+#include "mlir/IR/Function.h"  // from @llvm-project
+#include "mlir/IR/Operation.h"  // from @llvm-project
+#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/Value.h"  // from @llvm-project
+#include "mlir/Parser.h"  // from @llvm-project
+#include "mlir/Pass/Pass.h"  // from @llvm-project
+#include "mlir/Pass/PassManager.h"  // from @llvm-project
+#include "mlir/Target/NVVMIR.h"  // from @llvm-project
+#include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
+#include "tensorflow/compiler/mlir/xla/transforms/passes.h"
+#include "tensorflow/compiler/mlir/xla/transforms/rewriters.h"
+#include "tensorflow/compiler/xla/debug_options_flags.h"
+#include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h"
+#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
+#include "tensorflow/compiler/xla/service/gpu/target_constants.h"
+#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
+#include "tensorflow/core/platform/cuda_libdevice_path.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/path.h"
+#if GOOGLE_CUDA
+#include "tensorflow/stream_executor/gpu/asm_compiler.h"
+#endif
+
+namespace {
+using tensorflow::Status;
+using xla::InternalError;
+using xla::StatusOr;
+
+StatusOr<std::string> GetLibdeviceDir(
+    const xla::HloModuleConfig& hlo_module_config) {
+  for (const std::string& cuda_root : tensorflow::CandidateCudaRoots(
+           hlo_module_config.debug_options().xla_gpu_cuda_data_dir())) {
+    std::string libdevice_dir =
+        tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
+    VLOG(2) << "Looking for libdevice at " << libdevice_dir;
+    if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
+      VLOG(2) << "Found libdevice dir " << libdevice_dir;
+      return libdevice_dir;
+    }
+  }
+  return InternalError(
+      "Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice");
+}
+
+struct MaterializeBroadcastsPass
+    : public mlir::PassWrapper<MaterializeBroadcastsPass, mlir::FunctionPass> {
+  void runOnFunction() override {
+    mlir::ConversionTarget conversionTarget(getContext());
+    mlir::OwningRewritePatternList conversionPatterns;
+
+    // Consider the xla_hlo dialect legal for tests.
+    conversionTarget.addLegalDialect<mlir::xla_hlo::XlaHloDialect>();
+    // The conversion uses helpers from the Standard dialect.
+    conversionTarget.addLegalDialect<mlir::StandardOpsDialect>();
+
+    mlir::xla_hlo::SetupMaterializeBroadcastsLegality(&getContext(),
+                                                      &conversionTarget);
+    mlir::xla_hlo::PopulateMaterializeBroadcastsPatterns(&getContext(),
+                                                         &conversionPatterns);
+
+    if (failed(applyPartialConversion(getFunction(), conversionTarget,
+                                      conversionPatterns))) {
+      return signalPassFailure();
+    }
+  }
+};
+
+struct UnfuseBatchNormPass
+    : public mlir::PassWrapper<UnfuseBatchNormPass, mlir::FunctionPass> {
+  void runOnFunction() override {
+    mlir::OwningRewritePatternList patterns;
+    mlir::xla_hlo::PopulateUnfuseBatchNormPatterns(&getContext(), &patterns);
+    mlir::applyPatternsAndFoldGreedily(getOperation(), patterns);
+  }
+};
+
+Status LowerTfOpToLhloWithDynamicShapes(mlir::ModuleOp module) {
+  mlir::PassManager pm(module.getContext());
+  auto enable_if_vlog_is_on = [](mlir::Pass* pass, mlir::Operation* op) {
+    return VLOG_IS_ON(1);
+  };
+  pm.enableIRPrinting(/*shouldPrintBeforePass=*/{},
+                      /*shouldPrintAfterPass=*/enable_if_vlog_is_on,
+                      /*printModuleScope=*/false,
+                      /*printAfterOnlyOnChange=*/false, llvm::dbgs());
+  pm.addNestedPass<mlir::FuncOp>(mlir::xla_hlo::createLegalizeTFPass(false));
+  pm.addNestedPass<mlir::FuncOp>(
+      absl::make_unique<MaterializeBroadcastsPass>());
+  pm.addNestedPass<mlir::FuncOp>(absl::make_unique<UnfuseBatchNormPass>());
+  pm.addPass(mlir::xla_hlo::createLegalizeToLhloPass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::xla_lhlo::createLhloCopyRemovalPass());
+
+  if (failed(pm.run(module))) {
+    return InternalError("Lowering TF to LHLO failed.");
+  }
+  return Status::OK();
+}
+
+struct PropagateStaticKnowledge
+    : public mlir::PassWrapper<PropagateStaticKnowledge,
+                               mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
+  explicit PropagateStaticKnowledge(mlir::FunctionType type,
+                                    llvm::ArrayRef<uint32_t> same_shape_)
+      : func_type(type), same_shape(same_shape_) {}
+
+  void runOnOperation() override {
+    // We know due to tensorflow ABI that the offset is always 0 and that the
+    // innermost stride is always 1. To make this visible to the compiler,
+    // we insert constants into the code and replace usages accordingly.
+    // We do not change the signature so that we keep a somewhat stable ABI
+    // that is easy to undertand by tools.
+    mlir::LLVM::LLVMFuncOp func = getOperation();
+    mlir::OpBuilder b(func.getBody());
+    auto index_type = func.getArgument(3).getType();
+    mlir::Value one = b.create<mlir::LLVM::ConstantOp>(
+        func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
+    mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
+        func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
+    uint32_t arg_pos = 0;
+    std::vector<uint32_t> positions;
+    for (mlir::Type arg_type : func_type.getInputs()) {
+      positions.push_back(arg_pos);
+      func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
+      arg_pos += 3 + arg_type.cast<mlir::ShapedType>().getRank() * 2;
+      func.getArgument(arg_pos - 1).replaceAllUsesWith(one);
+    }
+
+    // If we have knowledge that some arguments have the same shape, we
+    // can use that here. Simply replace usages of the shape parameters within
+    // the function body to a single shape parameter.
+    if (!same_shape.empty()) {
+      auto first = same_shape.front();
+      auto first_offset = positions.at(first);
+      mlir::ShapedType first_type =
+          func_type.getInput(first).cast<mlir::ShapedType>();
+      uint32_t rank = first_type.getRank();
+      for (auto same : same_shape.drop_front(1)) {
+        uint32_t same_offset = positions.at(same);
+        auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
+        if (same_type.getRank() != rank) {
+          func.emitOpError() << "same shape constraints on arguments with "
+                                "non-matching shapes: #"
+                             << first << " and #" << same;
+          signalPassFailure();
+        }
+
+        for (uint32_t i = 0; i < 2 * rank; ++i) {
+          // Replace uses for second arg data with first arg.
+          auto same_arg = func.getArgument(same_offset + 3 + i);
+          auto first_arg = func.getArgument(first_offset + 3 + i);
+          same_arg.replaceAllUsesWith(first_arg);
+        }
+      }
+    }
+  }
+
+  mlir::FunctionType func_type;
+  llvm::ArrayRef<uint32_t> same_shape;
+};
+
+Status PropagateStaticShapeKnowledgeToKernel(
+    mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
+  // Grab the original signature from the single function.
+  auto func = *module.getBody()->op_begin<mlir::FuncOp>();
+
+  mlir::PassManager pm(module.getContext());
+  auto enable_if_vlog_is_on = [](mlir::Pass*, mlir::Operation*) {
+    return VLOG_IS_ON(1);
+  };
+  pm.enableIRPrinting(/*shouldPrintBeforePass=*/{},
+                      /*shouldPrintAfterPass=*/enable_if_vlog_is_on,
+                      /*printModuleScope=*/false,
+                      /*printAfterOnlyOnChange=*/false, llvm::dbgs());
+  auto& kernel_pm = pm.nest<::mlir::gpu::GPUModuleOp>();
+  kernel_pm.addNestedPass<mlir::LLVM::LLVMFuncOp>(
+      absl::make_unique<PropagateStaticKnowledge>(func.getType(), same_shape));
+
+  if (failed(pm.run(module))) {
+    return InternalError("Static knowledge propagation failed.");
+  }
+  return Status::OK();
+}
+}  // namespace
+
+StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
+    llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
+    llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
+    llvm::ArrayRef<uint32_t> unroll_factors) {
+  mlir::MLIRContext context;
+  context.allowUnregisteredDialects();  // TODO(b/152572127)
+  mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
+
+  TF_RETURN_IF_ERROR(LowerTfOpToLhloWithDynamicShapes(module.get()));
+  TF_RETURN_IF_ERROR(
+      xla::mlir_gpu::LowerLHLOToGPU(module.get(), tile_sizes, unroll_factors,
+                                    /*collapseParallelLoops=*/false));
+  TF_RETURN_IF_ERROR(xla::mlir_gpu::LowerKernelBodiesToNVVM(module.get()));
+  TF_RETURN_IF_ERROR(
+      PropagateStaticShapeKnowledgeToKernel(module.get(), same_shape));
+
+  mlir::OwningModuleRef kernel_module =
+      xla::mlir_gpu::ExtractKernelModule(*module).ValueOrDie();
+  auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module);
+  if (!llvmModule) {
+    return InternalError("Could not translate MLIR module to NVVM");
+  }
+
+  llvmModule->setModuleIdentifier("acme");
+  llvmModule->setDataLayout(xla::gpu::nvptx::kDataLayout);
+
+  xla::HloModuleConfig config;
+  config.set_debug_options(xla::GetDebugOptionsFromFlags());
+
+  TF_ASSIGN_OR_RETURN(std::string libdevice_dir, GetLibdeviceDir(config));
+  TF_ASSIGN_OR_RETURN(std::string ptx, xla::gpu::nvptx::CompileToPtx(
+                                           llvmModule.get(), compute_capability,
+                                           config, libdevice_dir));
+  VLOG(1) << ptx;
+
+#if GOOGLE_CUDA
+  return tensorflow::se::CompileGpuAsm(
+      std::get<0>(compute_capability), std::get<1>(compute_capability),
+      ptx.c_str(), xla::gpu::PtxOptsFromConfig(config));
+#else
+  return InternalError(
+      "GOOGLE_CUDA not defined. Did you specify --config=cuda ?");
+#endif
+}
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
new file mode 100644
index 00000000000..47626ba9d0d
--- /dev/null
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
@@ -0,0 +1,42 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+//===- cubin_creator.h ------------------------------------------*- C++ -*-===//
+//
+// This file declares the function to compile a TF kernel function to a cubin.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_CUBIN_CREATOR_H_
+#define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_CUBIN_CREATOR_H_
+
+#include <utility>
+#include <vector>
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace tensorflow {
+namespace kernel_gen {
+xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
+    llvm::StringRef tf_code,
+    std::pair<int32_t, int32_t> compute_capability = {7, 5},
+    llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
+    llvm::ArrayRef<uint32_t> same_shape = {},
+    llvm::ArrayRef<uint32_t> unroll_factors = {});
+}  // namespace kernel_gen
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_CUBIN_CREATOR_H_
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
new file mode 100644
index 00000000000..8edc567e777
--- /dev/null
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
@@ -0,0 +1,118 @@
+// Copyright 2020 The TensorFlow Runtime Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//===- tf_to_cubin.cc -------------------------------------------*- C++ -*-===//
+//
+// This file implements the entry point to compile a tf op to a cubin file.
+//
+//===----------------------------------------------------------------------===//
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace {
+bool ParseStringList(std::string string_list, std::vector<uint32_t>* result) {
+  result->clear();
+  uint32_t item;
+  auto items = absl::StrSplit(string_list, ',');
+  for (const auto& item_str : items) {
+    if (!absl::SimpleAtoi(item_str, &item)) {
+      LOG(ERROR) << "Expected token " << item_str << " to be an integer";
+      return false;
+    }
+    result->push_back(item);
+  }
+  return true;
+}
+}  // namespace
+
+int main(int argc, char** argv) {
+  std::string output_file = "foo.bin";
+  int32_t architecture = 50;
+  std::vector<uint32_t> tile_sizes;
+  std::vector<uint32_t> unroll_factors;
+  std::vector<uint32_t> same_shape;
+
+  auto parse_tile_sizes = [&tile_sizes](std::string tile_sizes_str) {
+    if (!ParseStringList(tile_sizes_str, &tile_sizes)) {
+      return false;
+    }
+    // Initialize with the default.
+    if (tile_sizes.empty()) {
+      tile_sizes.push_back(16);
+      tile_sizes.push_back(64);
+    }
+    return true;
+  };
+
+  auto parse_unroll_factors =
+      [&unroll_factors](std::string unroll_factors_str) {
+        return ParseStringList(unroll_factors_str, &unroll_factors);
+      };
+
+  auto parse_same_shape = [&same_shape](std::string same_shape_str) {
+    return ParseStringList(same_shape_str, &same_shape);
+  };
+
+  std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("output", &output_file, "output file"),
+      tensorflow::Flag("arch", &architecture,
+                       "target architecture (e.g. 50 for sm_50)"),
+      tensorflow::Flag("tile_sizes", parse_tile_sizes, "16,64",
+                       "tile sizes to use"),
+      tensorflow::Flag("unroll_factors", parse_unroll_factors, "",
+                       "factors to unroll by, separated by commas"),
+      tensorflow::Flag("same_shape", parse_same_shape, "",
+                       "arguments with same shape, separated by commas"),
+  };
+  bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  tensorflow::port::InitMain("usage", &argc, &argv);
+  if (!parse_ok) {
+    return 1;
+  }
+
+  std::pair<int32_t, int32_t> compute_capability(architecture / 10,
+                                                 architecture % 10);
+
+  auto cubin = tensorflow::kernel_gen::GenerateCubinForTfCode(
+      argv[1], compute_capability, tile_sizes, same_shape, unroll_factors);
+
+  if (!cubin.ok()) {
+    LOG(ERROR) << cubin.status();
+    return 1;
+  }
+
+  std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();
+
+  auto status = tensorflow::WriteStringToFile(
+      tensorflow::Env::Default(), output_file,
+      absl::string_view{reinterpret_cast<char*>(cubin_data.data()),
+                        cubin_data.size()});
+
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD
index 598383d81ec..12334e463fa 100644
--- a/tensorflow/compiler/mlir/xla/BUILD
+++ b/tensorflow/compiler/mlir/xla/BUILD
@@ -23,7 +23,6 @@ package_group(
         "//tensorflow/compiler/xla/...",
         "//third_party/iree/...",
         "//third_party/mlir_edge/...",
-        "//third_party/tf_runtime/tools/tf_kernel_gen/...",
     ],
 )
 
@@ -39,7 +38,7 @@ filegroup(
         "ir/lhlo_ops.td",
         "@llvm-project//mlir:OpBaseTdFiles",
         "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
     ],
 )
 
@@ -133,6 +132,7 @@ cc_library(
         "transforms/legalize_tf_control_flow.cc",
     ],
     deps = [
+        ":chlo_legalize_to_hlo",
         ":convert_op_folder",
         ":hlo",
         "//tensorflow/compiler/mlir/tensorflow",
@@ -165,6 +165,7 @@ cc_library(
         ":mlir_hlo_builder",
         "//tensorflow/compiler/mlir:op_or_arg_name_mapper",
         "//tensorflow/compiler/mlir/tensorflow",
+        "//tensorflow/compiler/mlir/tensorflow:convert_tensor",
         "//tensorflow/compiler/mlir/tensorflow:convert_type",
         "//tensorflow/compiler/mlir/tensorflow:export_tf_dialect_op",
         "//tensorflow/compiler/mlir/tensorflow:lower_tf_lib",
@@ -186,6 +187,7 @@ cc_library(
         "@llvm-project//llvm:support",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
     ],
     alwayslink = 1,
@@ -239,8 +241,8 @@ cc_library(
         "@llvm-project//llvm:support",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:LinalgOps",
-        "@llvm-project//mlir:LoopOps",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:SCFDialect",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Transforms",
     ],
@@ -277,8 +279,8 @@ cc_library(
         "@llvm-project//mlir:GPUDialect",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:LinalgOps",
-        "@llvm-project//mlir:LoopOps",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:SCFDialect",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Transforms",
     ],
@@ -397,9 +399,8 @@ cc_library(
 
 cc_library(
     name = "xla_hlo_to_lhlo_with_xla",
-    srcs = [
-        "transforms/xla_hlo_to_lhlo_with_xla.cc",
-    ],
+    srcs = ["transforms/xla_hlo_to_lhlo_with_xla.cc"],
+    hdrs = ["transforms/xla_hlo_to_lhlo_with_xla.h"],
     deps = [
         ":hlo",
         ":hlo_utils",
@@ -588,6 +589,7 @@ cc_library(
         "//tensorflow/compiler/xla:comparison_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:shape_inference",
@@ -717,6 +719,7 @@ cc_library(
         "//tensorflow/compiler/xla/client/lib:slicing",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/stream_executor/lib",
         "@llvm-project//llvm:support",
@@ -820,7 +823,7 @@ genrule(
     name = "operator_writer_inc",
     srcs = [
         "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
         "@llvm-project//mlir:include/mlir/IR/OpBase.td",
         ":ir/hlo_ops.td",
         ":ir/hlo_ops_base.td",
diff --git a/tensorflow/compiler/mlir/xla/attribute_importer.cc b/tensorflow/compiler/mlir/xla/attribute_importer.cc
index 2d17127b075..201ec0d053f 100644
--- a/tensorflow/compiler/mlir/xla/attribute_importer.cc
+++ b/tensorflow/compiler/mlir/xla/attribute_importer.cc
@@ -117,7 +117,7 @@ mlir::xla_hlo::ConvDimensionNumbers ConvertConvDimensionNumbers(
       builder->getI64IntegerAttr(dnums.kernel_output_feature_dimension()),
       Convert(kernel_spatial_dims, builder),
       builder->getI64IntegerAttr(dnums.output_batch_dimension()),
-      builder->getI64IntegerAttr(dnums.kernel_output_feature_dimension()),
+      builder->getI64IntegerAttr(dnums.output_feature_dimension()),
       Convert(output_spatial_dims, builder), builder->getContext());
 }
 
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc
index c685cc296fd..dc801f64ede 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc
@@ -139,6 +139,10 @@ StatusOr<mlir::DenseElementsAttr> CreateDenseElementsAttrFromLiteral(
       return CreateDenseAttrFromLiteral<uint32>(type, literal);
     case PrimitiveType::U64:
       return CreateDenseAttrFromLiteral<uint64>(type, literal);
+    case PrimitiveType::C64:
+      return CreateDenseAttrFromLiteral<complex64>(type, literal);
+    case PrimitiveType::C128:
+      return CreateDenseAttrFromLiteral<complex128>(type, literal);
     default:
       return tensorflow::errors::Internal(
           absl::StrCat("Unsupported type: ", PrimitiveType_Name(element_type)));
diff --git a/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc
index bc6842a617e..5322668aa2e 100644
--- a/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc
+++ b/tensorflow/compiler/mlir/xla/ir/chlo_ops.cc
@@ -97,16 +97,12 @@ static Type GetBroadcastType(Type x, Type y, Type element_type,
 
 LogicalResult InferBroadcastBinaryOpReturnTypeComponents(
     MLIRContext* context, Optional<Location> location, ValueRange operands,
-    ArrayRef<NamedAttribute> attributes, Type element_type,
+    DictionaryAttr attributes, Type element_type,
     SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
   // Find broadcast_dimensions.
-  DenseIntElementsAttr broadcast_dimensions;
-  for (auto attr : attributes) {
-    if (attr.first == "broadcast_dimensions") {
-      broadcast_dimensions = attr.second.dyn_cast<DenseIntElementsAttr>();
-      break;
-    }
-  }
+  DenseIntElementsAttr broadcast_dimensions =
+      attributes.get("broadcast_dimensions")
+          .dyn_cast_or_null<DenseIntElementsAttr>();
 
   ShapedType lhs_type = operands[0].getType().dyn_cast<ShapedType>();
   ShapedType rhs_type = operands[1].getType().dyn_cast<ShapedType>();
@@ -168,7 +164,7 @@ LogicalResult ReifyBroadcastBinaryOpReturnTypeShapes(
 
 LogicalResult BroadcastComplexOp::inferReturnTypeComponents(
     MLIRContext* context, Optional<Location> location, ValueRange operands,
-    ArrayRef<NamedAttribute> attributes, RegionRange regions,
+    DictionaryAttr attributes, RegionRange regions,
     SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
   ShapedType lhs_type = operands[0].getType().dyn_cast<ShapedType>();
   if (!lhs_type) {
@@ -191,7 +187,7 @@ LogicalResult BroadcastComplexOp::reifyReturnTypeShapes(
 
 LogicalResult BroadcastCompareOp::inferReturnTypeComponents(
     MLIRContext* context, Optional<Location> location, ValueRange operands,
-    ArrayRef<NamedAttribute> attributes, RegionRange regions,
+    DictionaryAttr attributes, RegionRange regions,
     SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
   Type element_type = IntegerType::get(1, context);
   return InferBroadcastBinaryOpReturnTypeComponents(context, location, operands,
@@ -211,7 +207,7 @@ LogicalResult BroadcastCompareOp::reifyReturnTypeShapes(
 #define BROADCAST_INFER_SHAPE_TYPE_OP_DEFS(Op)                                \
   LogicalResult Op::inferReturnTypeComponents(                                \
       MLIRContext* context, Optional<Location> location, ValueRange operands, \
-      ArrayRef<NamedAttribute> attributes, RegionRange regions,               \
+      DictionaryAttr attributes, RegionRange regions,                         \
       SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {           \
     return InferBroadcastBinaryOpReturnTypeComponents(                        \
         context, location, operands, attributes, /*element_type=*/nullptr,    \
diff --git a/tensorflow/compiler/mlir/xla/ir/chlo_ops.td b/tensorflow/compiler/mlir/xla/ir/chlo_ops.td
index a244985c9b5..f9672c1a95a 100644
--- a/tensorflow/compiler/mlir/xla/ir/chlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/chlo_ops.td
@@ -31,7 +31,7 @@ limitations under the License.
 
 include "mlir/IR/OpBase.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
-include "mlir/Interfaces/SideEffects.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td"
 
 def HLOClient_Dialect : Dialect {
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
index c9742ad5337..68eafb8b33e 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <stdint.h>
 
 #include <algorithm>
+#include <functional>
 
 #include "absl/container/flat_hash_set.h"
 #include "llvm/ADT/APFloat.h"
@@ -842,6 +843,51 @@ void ConcatenateOp::getCanonicalizationPatterns(
   results.insert<ConcatenateOperandRemoval>(context);
 }
 
+template <typename T>
+static Attribute foldConcatenateHelper(ConcatenateOp* op,
+                                       ArrayRef<Attribute> operands) {
+  auto axis = op->dimension().getLimitedValue();
+  auto type = op->getType().cast<ShapedType>();
+
+  SmallVector<T, 6> values;
+  auto shape = type.getShape();
+
+  size_t top_size = 1;
+  for (int i = 0; i < axis; i++) {
+    top_size = top_size * shape[i];
+  }
+
+  for (size_t i = 0; i < top_size; i++) {
+    for (auto operand : operands) {
+      DenseElementsAttr attr = operand.cast<DenseElementsAttr>();
+      size_t bottom_size = attr.getNumElements() / top_size;
+      auto iter = attr.getValues<T>().begin() + i * bottom_size;
+      values.append(iter, iter + bottom_size);
+    }
+  }
+
+  return DenseElementsAttr::get(type, values);
+}
+
+static Attribute foldConcatenate(ConcatenateOp* op,
+                                 ArrayRef<Attribute> operands) {
+  for (auto operand : operands) {
+    if (!operand) return {};
+  }
+
+  auto type = op->getResult().getType().cast<ShapedType>();
+  auto etype = type.getElementType();
+  if (etype.isa<IntegerType>()) {
+    return foldConcatenateHelper<APInt>(op, operands);
+  }
+
+  if (etype.isa<FloatType>()) {
+    return foldConcatenateHelper<APFloat>(op, operands);
+  }
+
+  return {};
+}
+
 OpFoldResult ConcatenateOp::fold(ArrayRef<Attribute> operands) {
   if (getNumOperands() == 1) return getOperand(0);
 
@@ -849,6 +895,10 @@ OpFoldResult ConcatenateOp::fold(ArrayRef<Attribute> operands) {
   if (!type.hasStaticShape()) return {};
 
   auto axis = dimension().getLimitedValue();
+  if (auto attr = foldConcatenate(this, operands)) {
+    return attr;
+  }
+
   llvm::SmallVector<Value, 6> new_operands;
   for (auto operand : getOperands()) {
     auto ty = operand.getType().cast<ShapedType>();
@@ -1120,9 +1170,22 @@ OpFoldResult CopyOp::fold(ArrayRef<Attribute> operands) { return getOperand(); }
 //===----------------------------------------------------------------------===//
 
 OpFoldResult ReverseOp::fold(ArrayRef<Attribute> operands) {
+  auto input = operand();
+
   // No dimensions to reverse.
-  if (dimensions().getNumElements() == 0) return operand();
-  return nullptr;
+  if (dimensions().getNumElements() == 0) return input;
+
+  llvm::SmallVector<APInt, 5> new_dims;
+  new_dims.reserve(dimensions().getNumElements());
+
+  auto shaped_type = input.getType().cast<ShapedType>();
+  for (auto dim : dimensions().getValues<APInt>()) {
+    if (shaped_type.getDimSize(dim.getLimitedValue()) != 1) {
+      return nullptr;
+    }
+  }
+
+  return input;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1190,7 +1253,7 @@ static LogicalResult Verify(SelectOp op) {
 // the return type based on operand type.
 LogicalResult SelectOp::inferReturnTypes(
     MLIRContext*, Optional<Location> location, ValueRange operands,
-    ArrayRef<NamedAttribute> attributes, RegionRange regions,
+    DictionaryAttr attributes, RegionRange regions,
     SmallVectorImpl<Type>& inferredReturnTypes) {
   auto x_type = operands[1].getType();
   auto y_type = operands[2].getType();
@@ -1412,6 +1475,53 @@ BINARY_BUILDER(XorOp);
 
 #undef BINARY_BUILDER
 
+template <typename Op, typename ElementType = Type, typename ValType,
+          typename Convert>
+static Attribute BinaryFolder(Op* op, ArrayRef<Attribute> attrs) {
+  if (!attrs[0] || !attrs[1]) return {};
+  if (op->broadcast_dimensions().hasValue()) return {};
+
+  DenseElementsAttr lhs = attrs[0].dyn_cast<DenseElementsAttr>();
+  DenseElementsAttr rhs = attrs[1].dyn_cast<DenseElementsAttr>();
+  if (!lhs || !rhs) return {};
+
+  ShapedType type = op->getType().template cast<ShapedType>();
+  if (!type.hasStaticShape()) {
+    return {};
+  }
+
+  Type etype = type.getElementType();
+
+  // Evaluate for integer values.
+  if (!etype.isa<ElementType>()) {
+    return {};
+  }
+
+  SmallVector<ValType, 6> values;
+  values.reserve(lhs.getNumElements());
+  for (const auto zip :
+       llvm::zip(lhs.getValues<ValType>(), rhs.getValues<ValType>())) {
+    values.push_back(Convert()(std::get<0>(zip), std::get<1>(zip)));
+  }
+
+  return DenseElementsAttr::get(type, values);
+}
+
+#define BINARY_FOLDER(Op, Func)                                                \
+  OpFoldResult Op::fold(ArrayRef<Attribute> attrs) {                           \
+    if (getElementTypeOrSelf(getType()).isa<FloatType>())                      \
+      return BinaryFolder<Op, FloatType, APFloat, Func<APFloat>>(this, attrs); \
+    if (getElementTypeOrSelf(getType()).isa<IntegerType>())                    \
+      return BinaryFolder<Op, IntegerType, APInt, Func<APInt>>(this, attrs);   \
+    return {};                                                                 \
+  }
+
+BINARY_FOLDER(AddOp, std::plus);
+BINARY_FOLDER(SubOp, std::minus);
+BINARY_FOLDER(MulOp, std::multiplies);
+
+#undef BINARY_FOLDER
+
 //===----------------------------------------------------------------------===//
 // SliceOp
 //===----------------------------------------------------------------------===//
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
index 16c9a7b4f05..f78ac7624d2 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
@@ -23,7 +23,7 @@ limitations under the License.
 
 include "mlir/IR/OpBase.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
-include "mlir/Interfaces/SideEffects.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td"
 include "tensorflow/compiler/mlir/xla/ir/hlo_utils.td"
 
@@ -95,6 +95,7 @@ def HLO_CreateTokenOp : HLO_Op<"create_token", [NoSideEffect]> {
 // XLA unary elementwise op definitions.
 //===----------------------------------------------------------------------===//
 // See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions
+
 class HLO_UnaryElementwiseOp<string mnemonic, list<OpTrait> traits,
       Type TensorType>: HLO_Op<mnemonic,
         !listconcat(traits, [InferShapedTypeOpInterface])> {
@@ -103,8 +104,7 @@ class HLO_UnaryElementwiseOp<string mnemonic, list<OpTrait> traits,
     let extraClassDeclaration = [{
       static  LogicalResult inferReturnTypeComponents(
           MLIRContext* context, Optional<Location> location,
-          ValueRange operands, ArrayRef<NamedAttribute> attributes,
-          RegionRange regions,
+          ValueRange operands, DictionaryAttr attributes, RegionRange regions,
           SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
         return failure();
       }
@@ -161,6 +161,16 @@ def HLO_Expm1Op: HLO_UnaryElementwiseOp<"exponential_minus_one",
 def HLO_FloorOp: HLO_UnaryElementwiseOp<"floor",
     [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_FloorOp;
 
+def HLO_ImagOp: HLO_Op<
+    "imag", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_ImagOp {
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value val">];
+
+  let arguments = (ins HLO_ComplexTensor);
+  let results = (outs HLO_FpTensor);
+  let hasFolder = 1;
+}
+
 def HLO_IsFiniteOp: HLO_UnaryElementwiseOp<"is_finite",
     [NoSideEffect, SameOperandsAndResultShape], HLO_Tensor>,
     BASE_HLO_IsFiniteOp {
@@ -188,6 +198,16 @@ def HLO_PopulationCountOp: HLO_UnaryElementwiseOp<"popcnt",
     [NoSideEffect, SameOperandsAndResultType], HLO_IntTensor>,
     BASE_HLO_PopulationCountOp;
 
+def HLO_RealOp: HLO_Op<
+    "real", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_RealOp {
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value val">];
+
+  let arguments = (ins HLO_ComplexTensor);
+  let results = (outs HLO_FpTensor);
+  let hasFolder = 1;
+}
+
 def HLO_RoundOp: HLO_UnaryElementwiseOp<"round_nearest_afz",
     [NoSideEffect, SameOperandsAndResultType], HLO_FpTensor>, BASE_HLO_RoundOp;
 
@@ -209,50 +229,14 @@ def HLO_SqrtOp: HLO_UnaryElementwiseOp<"sqrt",
     BASE_HLO_SqrtOp;
 
 def HLO_TanhOp: HLO_UnaryElementwiseOp<"tanh",
-    [ResultsAreFloatLike, NoSideEffect, SameOperandsAndResultType],
+    [NoSideEffect, SameOperandsAndResultType],
     HLO_FpOrComplexTensor>, BASE_HLO_TanhOp;
 
-//===----------------------------------------------------------------------===//
-// XLA complex unary elementwise op definitions.
-//===----------------------------------------------------------------------===//
-// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions
-
-def HLO_ComplexOp: HLO_Op<"complex",
-    [NoSideEffect, SameOperandsElementType, SameOperandsAndResultShape]>,
-    BASE_HLO_ComplexOp {
-  let builders = [OpBuilder<
-    "OpBuilder &, OperationState &tblgen_state, Value lhs, Value rhs">];
-
-  let arguments = (ins HLO_FpTensor:$lhs, HLO_FpTensor:$rhs);
-  let results = (outs HLO_ComplexTensor);
-  let hasFolder = 1;
-}
-
-def HLO_ImagOp: HLO_Op<
-    "imag", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_ImagOp {
-  let builders = [OpBuilder<
-    "OpBuilder &, OperationState &tblgen_state, Value val">];
-
-  let arguments = (ins HLO_ComplexTensor);
-  let results = (outs HLO_FpTensor);
-  let hasFolder = 1;
-}
-
-def HLO_RealOp: HLO_Op<
-    "real", [NoSideEffect, SameOperandsAndResultShape]>, BASE_HLO_RealOp {
-  let builders = [OpBuilder<
-    "OpBuilder &, OperationState &tblgen_state, Value val">];
-
-  let arguments = (ins HLO_ComplexTensor);
-  let results = (outs HLO_FpTensor);
-  let hasFolder = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // XLA binary elementwise op definitions.
 //===----------------------------------------------------------------------===//
-
 // See https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations
+
 class HLO_BinaryElementwiseOp<string mnemonic, list<OpTrait> traits> :
         HLO_Op<mnemonic, !listconcat(traits, [InferShapedTypeOpInterface])> {
   let arguments = (ins
@@ -269,7 +253,7 @@ class HLO_BinaryElementwiseOp<string mnemonic, list<OpTrait> traits> :
   let extraClassDeclaration = [{
     static  LogicalResult inferReturnTypeComponents(
         MLIRContext* context, Optional<Location> location, ValueRange operands,
-        ArrayRef<NamedAttribute> attributes, RegionRange regions,
+        DictionaryAttr attributes, RegionRange regions,
         SmallVectorImpl<ShapedTypeComponents>& inferedReturnShapes) {
       return failure();
     }
@@ -286,22 +270,40 @@ class HLO_BinaryElementwiseOp<string mnemonic, list<OpTrait> traits> :
 }
 
 def HLO_AddOp : HLO_BinaryElementwiseOp<"add",
-      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_AddOp;
+      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_AddOp {
+  let hasFolder = 1;
+}
 
 def HLO_Atan2Op : HLO_BinaryElementwiseOp<"atan2",
       [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_Atan2Op;
 
+def HLO_ComplexOp: HLO_Op<"complex",
+    [NoSideEffect, SameOperandsElementType, SameOperandsAndResultShape]>,
+    BASE_HLO_ComplexOp {
+  let builders = [OpBuilder<
+    "OpBuilder &, OperationState &tblgen_state, Value lhs, Value rhs">];
+
+  let arguments = (ins HLO_FpTensor:$lhs, HLO_FpTensor:$rhs);
+  let results = (outs HLO_ComplexTensor);
+  let hasFolder = 1;
+}
+
 def HLO_DivOp : HLO_BinaryElementwiseOp<"divide",
-      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_DivOp;
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_DivOp {
+}
 
 def HLO_MaxOp : HLO_BinaryElementwiseOp<"maximum",
-      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_MaxOp;
+      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_MaxOp {
+}
 
 def HLO_MinOp : HLO_BinaryElementwiseOp<"minimum",
-      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_MinOp;
+      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_MinOp {
+}
 
 def HLO_MulOp : HLO_BinaryElementwiseOp<"multiply",
-      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_MulOp;
+      [Commutative, NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_MulOp {
+  let hasFolder = 1;
+}
 
 def HLO_PowOp : HLO_BinaryElementwiseOp<"power",
       [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_PowOp;
@@ -319,7 +321,9 @@ def HLO_ShiftRightLogicalOp : HLO_BinaryElementwiseOp<"shift_right_logical",
       [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_ShiftRightLogicalOp;
 
 def HLO_SubOp : HLO_BinaryElementwiseOp<"subtract",
-      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_SubOp;
+      [NoSideEffect, SameOperandsAndResultElementType]>, BASE_HLO_SubOp {
+  let hasFolder = 1;
+}
 
 //===----------------------------------------------------------------------===//
 // XLA binary elementwise op definitions.
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
index c087ffd1f40..b5de675f13f 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
@@ -150,15 +150,6 @@ class BASE_HLO_ClzOp {
   }];
 }
 
-class BASE_HLO_ComplexOp {
-  string summary = "Complex operator";
-
-  string description = [{
-    Performs element-wise conversion of a pair of real and imaginary values to
-    a complex value.
-  }];
-}
-
 class BASE_HLO_ConvertOp {
   string summary = "Convert operator";
 
@@ -400,6 +391,15 @@ class BASE_HLO_AddOp {
   }];
 }
 
+class BASE_HLO_ComplexOp {
+  string summary = "Complex operator";
+
+  string description = [{
+    Performs element-wise conversion of a pair of real and imaginary values to
+    a complex value.
+  }];
+}
+
 class BASE_HLO_DivOp {
   string summary = "Division operator";
 
diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td
index 6fcb2582002..db75bbd1f67 100644
--- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td
@@ -19,7 +19,7 @@ limitations under the License.
 #define LHLO_OPS
 
 include "mlir/IR/OpBase.td"
-include "mlir/Interfaces/SideEffects.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td"
 
 def LHLO_Dialect : Dialect {
@@ -92,39 +92,30 @@ def LHLO_CosOp: LHLO_UnaryElementwiseOp<"cosine">, BASE_HLO_CosOp;
 
 def LHLO_ExpOp: LHLO_UnaryElementwiseOp<"exponential">, BASE_HLO_ExpOp;
 
+def LHLO_ImagOp: LHLO_Op<"imag", [SameOperandsShape]>, BASE_HLO_ImagOp {
+  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$input,
+                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
+}
+
 def LHLO_LogOp: LHLO_UnaryElementwiseOp<"log">, BASE_HLO_LogOp;
 
 def LHLO_NegOp: LHLO_UnaryElementwiseOp<"negate">, BASE_HLO_NegOp;
 
+def LHLO_RealOp: LHLO_Op<"real", [SameOperandsShape]>, BASE_HLO_RealOp {
+  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$input,
+                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
+}
+
 def LHLO_RsqrtOp: LHLO_UnaryElementwiseOp<"rsqrt">, BASE_HLO_RsqrtOp;
 
 def LHLO_SqrtOp: LHLO_UnaryElementwiseOp<"sqrt">, BASE_HLO_SqrtOp;
 
 def LHLO_SignOp: LHLO_UnaryElementwiseOp<"sign">, BASE_HLO_SignOp;
 
+def LHLO_SinOp: LHLO_UnaryElementwiseOp<"sine">, BASE_HLO_SinOp;
+
 def LHLO_TanhOp: LHLO_UnaryElementwiseOp<"tanh">, BASE_HLO_TanhOp;
 
-//===----------------------------------------------------------------------===//
-// XLA complex unary elementwise op definitions.
-//===----------------------------------------------------------------------===//
-// See https://www.tensorflow.org/xla/operation_semantics#element-wise_unary_functions
-
-def LHLO_ComplexOp: LHLO_Op<"complex", [SameOperandsShape]>, BASE_HLO_ComplexOp {
-  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
-                       Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
-                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
-}
-
-def LHLO_ImagOp: LHLO_Op<"imag", [SameOperandsShape]>, BASE_HLO_ImagOp {
-  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$input,
-                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
-}
-
-def LHLO_RealOp: LHLO_Op<"real", [SameOperandsShape]>, BASE_HLO_RealOp {
-  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$input,
-                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
-}
-
 //===----------------------------------------------------------------------===//
 // XLA binary elementwise op definitions.
 //===----------------------------------------------------------------------===//
@@ -142,6 +133,12 @@ class LHLO_BinaryElementwiseOp<string mnemonic, list<OpTrait> traits> :
 
 def LHLO_AddOp : LHLO_BinaryElementwiseOp<"add", []>, BASE_HLO_AddOp;
 
+def LHLO_ComplexOp: LHLO_Op<"complex", [SameOperandsShape]>, BASE_HLO_ComplexOp {
+  let arguments = (ins Arg<LHLO_Buffer, "", [MemRead]>:$lhs,
+                       Arg<LHLO_Buffer, "", [MemRead]>:$rhs,
+                       Arg<LHLO_Buffer, "", [MemWrite]>:$output);
+}
+
 def LHLO_DivOp : LHLO_BinaryElementwiseOp<"divide", []>, BASE_HLO_DivOp;
 
 def LHLO_MaxOp : LHLO_BinaryElementwiseOp<"maximum", []>, BASE_HLO_MaxOp;
diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc
index cfa8c1b6bfc..461c357e509 100644
--- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc
+++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/xla/type_to_shape.h"
 #include "tensorflow/compiler/xla/comparison_util.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
+#include "tensorflow/compiler/xla/util.h"
 
 namespace xla {
 
@@ -55,6 +56,20 @@ static mlir::DenseIntElementsAttr GetI64ElementsAttr(
   return mlir::DenseIntElementsAttr::get(ty, mlir_values);
 }
 
+static mlir::DenseIntElementsAttr ConvertPadding(
+    absl::Span<const std::pair<tensorflow::int64, tensorflow::int64>> padding,
+    mlir::Builder* builder) {
+  llvm::SmallVector<int64_t, 8> elements;
+  elements.reserve(padding.size() * 2);
+  for (const auto& vals : padding) {
+    elements.push_back(vals.first);
+    elements.push_back(vals.second);
+  }
+  auto ty = mlir::RankedTensorType::get(
+      {static_cast<int64_t>(padding.size()), 2}, builder->getIntegerType(64));
+  return mlir::DenseIntElementsAttr::get(ty, elements);
+}
+
 MlirHloBuilder::~MlirHloBuilder() = default;
 
 StatusOr<XlaOp> MlirHloBuilder::MakeXlaOp(mlir::Value val) {
@@ -78,6 +93,31 @@ XlaOp MlirHloBuilder::ConstantLiteral(const LiteralSlice& literal) {
   });
 }
 
+StatusOr<XlaOp> MlirHloBuilder::ConvGeneralDilatedInternal(
+    const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window,
+    absl::Span<const int64> window_strides,
+    absl::Span<const std::pair<int64, int64>> padding,
+    absl::Span<const int64> lhs_dilation, absl::Span<const int64> rhs_dilation,
+    const ConvolutionDimensionNumbers& dimension_numbers,
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
+  TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType<mlir::RankedTensorType>(
+                                         shape, builder_));
+  mlir::ArrayAttr config_attr;
+  if (precision_config)
+    config_attr = ConvertPrecisionConfig(precision_config, &builder_);
+  auto op = builder_.create<mlir::xla_hlo::ConvOp>(
+      loc_, ty, GetValue(lhs), GetValue(rhs),
+      GetI64ElementsAttr(window_strides, &builder_),
+      ConvertPadding(padding, &builder_),
+      GetI64ElementsAttr(lhs_dilation, &builder_),
+      GetI64ElementsAttr(rhs_dilation, &builder_),
+      ConvertConvDimensionNumbers(dimension_numbers, &builder_),
+      builder_.getI64IntegerAttr(feature_group_count),
+      builder_.getI64IntegerAttr(batch_group_count), config_attr);
+  return MakeXlaOp(op);
+}
+
 StatusOr<XlaOp> MlirHloBuilder::TransposeInternal(
     const Shape& shape, XlaOp operand, absl::Span<const int64> permutation) {
   TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType<mlir::RankedTensorType>(
@@ -100,6 +140,29 @@ StatusOr<XlaOp> MlirHloBuilder::GatherInternal(
   return MakeXlaOp(op);
 }
 
+StatusOr<XlaOp> MlirHloBuilder::RngOpInternal(
+    RandomDistribution distribution, absl::Span<const XlaOp> parameters,
+    const Shape& shape) {
+  // TODO(hinsu): Introduce RngOp in the HLO dialect in MLIR and then RngUniform
+  // and RngNormal can be mapped to the new op.
+  std::string op_name;
+  if (distribution == xla::RandomDistribution::RNG_UNIFORM) {
+    op_name = "xla_hlo.rng_uniform";
+  } else {
+    TF_RET_CHECK(distribution == xla::RandomDistribution::RNG_NORMAL)
+        << "Unexpected distribution: " << distribution;
+    op_name = "xla_hlo.rng_normal";
+  }
+
+  if (shape.is_dynamic())
+    return Unimplemented("RngOp with dynamic dims not supported");
+  llvm::SmallVector<XlaOp, 3> operands;
+  operands.append(parameters.begin(), parameters.end());
+  operands.push_back(
+      ConstantLiteral(LiteralUtil::CreateR1<int64>(shape.dimensions())));
+  return CreateOp(op_name, shape, operands);
+}
+
 StatusOr<XlaOp> MlirHloBuilder::ReshapeInternal(const Shape& shape,
                                                 XlaOp operand,
                                                 int64 inferred_dimension) {
@@ -154,15 +217,14 @@ StatusOr<XlaOp> MlirHloBuilder::Compare(const Shape& shape, XlaOp lhs,
 XlaOp MlirHloBuilder::BinaryOpNoBroadcast(HloOpcode binop, const Shape& shape,
                                           XlaOp lhs, XlaOp rhs) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    return CreateOp(GetMlirOpName(binop), shape, {lhs, rhs}, /*attributes=*/{});
+    return CreateOp(GetMlirOpName(binop), shape, {lhs, rhs});
   });
 }
 
 StatusOr<XlaOp> MlirHloBuilder::AddOpWithShape(
     HloOpcode opcode, const Shape& shape, absl::Span<const XlaOp> operands) {
   return CreateOp(GetMlirOpName(opcode), shape,
-                  llvm::makeArrayRef<XlaOp>(operands.data(), operands.size()),
-                  /*attributes=*/{});
+                  llvm::makeArrayRef<XlaOp>(operands.data(), operands.size()));
 }
 
 XlaOp MlirHloBuilder::CreateToken() {
@@ -220,6 +282,28 @@ StatusOr<XlaOp> MlirHloBuilder::SliceInternal(
       GetI64ElementsAttr(strides, &builder_)));
 }
 
+StatusOr<XlaOp> MlirHloBuilder::DynamicSliceInternal(
+    const Shape& shape, XlaOp operand, absl::Span<const XlaOp> start_indices,
+    absl::Span<const int64> slice_sizes) {
+  TF_ASSIGN_OR_RETURN(
+      mlir::Type result_ty,
+      ConvertShapeToType<mlir::RankedTensorType>(shape, builder_));
+  return MakeXlaOp(builder_.create<mlir::xla_hlo::DynamicSliceOp>(
+      loc_, result_ty, GetValue(operand), GetValues(start_indices),
+      GetI64ElementsAttr(slice_sizes, &builder_)));
+}
+
+StatusOr<XlaOp> MlirHloBuilder::DynamicUpdateSliceInternal(
+    const Shape& shape, XlaOp operand, XlaOp update,
+    absl::Span<const XlaOp> start_indices) {
+  TF_ASSIGN_OR_RETURN(
+      mlir::Type result_ty,
+      ConvertShapeToType<mlir::RankedTensorType>(shape, builder_));
+  return MakeXlaOp(builder_.create<mlir::xla_hlo::DynamicUpdateSliceOp>(
+      loc_, result_ty, GetValue(operand), GetValue(update),
+      GetValues(start_indices)));
+}
+
 StatusOr<XlaOp> MlirHloBuilder::PadInternal(
     const Shape& shape, XlaOp operand, XlaOp padding_value,
     const PaddingConfig& padding_config) {
diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h
index c0ef645a731..fc5baaee44d 100644
--- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h
+++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h
@@ -101,9 +101,25 @@ class MlirHloBuilder : public XlaBuilder {
   // Returns the shape of the given op.
   StatusOr<const Shape*> GetShapePtr(XlaOp op) const override;
 
+  // Creates the given op at the current location.
+  template <typename OpTy, typename... Args>
+  OpTy create(Args&&... args) {
+    return builder_.create<OpTy>(loc_, std::forward<Args>(args)...);
+  }
+
  private:
   XlaOp ConstantLiteral(const LiteralSlice& literal) override;
 
+  StatusOr<XlaOp> ConvGeneralDilatedInternal(
+      const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window,
+      absl::Span<const int64> window_strides,
+      absl::Span<const std::pair<int64, int64>> padding,
+      absl::Span<const int64> lhs_dilation,
+      absl::Span<const int64> rhs_dilation,
+      const ConvolutionDimensionNumbers& dimension_numbers,
+      int64 feature_group_count, int64 batch_group_count,
+      const PrecisionConfig* precision_config) override;
+
   StatusOr<XlaOp> TransposeInternal(
       const Shape& shape, XlaOp operand,
       absl::Span<const int64> permutation) override;
@@ -113,6 +129,10 @@ class MlirHloBuilder : public XlaBuilder {
       const GatherDimensionNumbers& dimension_numbers,
       absl::Span<const int64> slice_sizes, bool indices_are_sorted) override;
 
+  StatusOr<XlaOp> RngOpInternal(RandomDistribution distribution,
+                                absl::Span<const XlaOp> parameters,
+                                const Shape& shape) override;
+
   StatusOr<XlaOp> ReshapeInternal(const Shape& shape, XlaOp operand,
                                   int64 inferred_dimension) override;
 
@@ -155,6 +175,14 @@ class MlirHloBuilder : public XlaBuilder {
                                 absl::Span<const int64> limit_indices,
                                 absl::Span<const int64> strides) override;
 
+  StatusOr<XlaOp> DynamicSliceInternal(
+      const Shape& shape, XlaOp operand, absl::Span<const XlaOp> start_indices,
+      absl::Span<const int64> slice_sizes) override;
+
+  StatusOr<XlaOp> DynamicUpdateSliceInternal(
+      const Shape& shape, XlaOp operand, XlaOp update,
+      absl::Span<const XlaOp> start_indices) override;
+
   StatusOr<XlaOp> PadInternal(const Shape& shape, XlaOp operand,
                               XlaOp padding_value,
                               const PaddingConfig& padding_config) override;
@@ -163,9 +191,10 @@ class MlirHloBuilder : public XlaBuilder {
                                 absl::Span<const XlaOp> elements) override;
 
   // Creates HLO dialect op and returns the result as an XlaOp.
-  StatusOr<XlaOp> CreateOp(const std::string& op_name, const Shape& shape,
-                           llvm::ArrayRef<XlaOp> operands,
-                           llvm::ArrayRef<mlir::NamedAttribute> attributes);
+  StatusOr<XlaOp> CreateOp(
+      const std::string& op_name, const Shape& shape,
+      llvm::ArrayRef<XlaOp> operands,
+      llvm::ArrayRef<mlir::NamedAttribute> attributes = {});
 
   mlir::OpBuilder builder_;
   mlir::Location loc_;
diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
index d92e3d25343..228a26b5abd 100644
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
@@ -56,6 +56,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 
 using ::stream_executor::port::StatusOr;
@@ -907,6 +908,10 @@ namespace mlir {
 namespace {
 
 StatusOr<xla::Literal> CreateLiteralFromAttr(ElementsAttr attr) {
+  if (attr.isa<OpaqueElementsAttr>())
+    return tensorflow::errors::Unimplemented(
+        "Opaque elements attr not supported");
+
   xla::Shape shape = xla::TypeToShape(attr.getType());
 
 #define ELEMENTS_ATTR_TO_LITERAL(xla_type, cpp_type)       \
@@ -928,6 +933,8 @@ StatusOr<xla::Literal> CreateLiteralFromAttr(ElementsAttr attr) {
     ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::U16, uint16)
     ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::U32, uint32)
     ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::U64, uint64)
+    ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::C64, std::complex<float>)
+    ELEMENTS_ATTR_TO_LITERAL(xla::PrimitiveType::C128, std::complex<double>)
     case xla::PrimitiveType::F16: {
       llvm::SmallVector<xla::half, 16> values;
       values.reserve(attr.getNumElements());
@@ -979,10 +986,26 @@ LogicalResult ConvertToHloModule::Lower(
     return LowerFunctionCall(&call_op, builder, &value_map);
   }
 
+  if (auto op = dyn_cast<mlir::TensorCastOp>(inst)) {
+    Value operand = op.getOperand();
+    auto ty = operand.getType().dyn_cast<ShapedType>();
+    // If this was a cast from a static shaped tensors, then it is a noop for
+    // export to HLO and we can use the operand.
+    if (!ty || !ty.hasStaticShape()) {
+      inst->emitOpError()
+          << "requires static shaped operand for HLO translation";
+      return failure();
+    }
+
+    value_map[op.getResult()] = value_map[operand];
+    return success();
+  }
+
   // TODO(jpienaar): This doesn't support layouts yet.
   if (matchPattern(inst, m_Constant(&const_attr))) {
     auto literal_or = CreateLiteralFromAttr(const_attr);
-    if (!literal_or.ok()) return inst->emitError("unsupported elemental type");
+    if (!literal_or.ok())
+      return inst->emitError(literal_or.status().ToString());
     value_map[inst->getResult(0)] =
         xla::ConstantLiteral(builder, literal_or.ValueOrDie());
     return success();
diff --git a/tensorflow/compiler/mlir/xla/tests/canonicalize.mlir b/tensorflow/compiler/mlir/xla/tests/canonicalize.mlir
index 5f28693c49d..30255586002 100644
--- a/tensorflow/compiler/mlir/xla/tests/canonicalize.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/canonicalize.mlir
@@ -1,5 +1,50 @@
 // RUN: xla-opt %s -pass-pipeline='func(canonicalize)' | FileCheck %s --dump-input-on-failure
 
+// CHECK-LABEL: add_fold
+func @add_fold() -> tensor<4xi64> {
+  %0 = xla_hlo.constant dense<[1, 2, 3, 4]> : tensor<4xi64>
+  %1 = xla_hlo.constant dense<[5, 6, 7, 8]> : tensor<4xi64>
+  // CHECK: xla_hlo.constant dense<[6, 8, 10, 12]>
+  %2 = "xla_hlo.add"(%0, %1) : (tensor<4xi64>, tensor<4xi64>) -> (tensor<4xi64>)
+  return %2 : tensor<4xi64>
+}
+
+// CHECK-LABEL: add_scalar_fold
+func @add_scalar_fold() -> tensor<4xi64> {
+  %0 = xla_hlo.constant dense<1> : tensor<4xi64>
+  %1 = xla_hlo.constant dense<5> : tensor<4xi64>
+  // CHECK: xla_hlo.constant dense<6>
+  %2 = "xla_hlo.add"(%0, %1) : (tensor<4xi64>, tensor<4xi64>) -> (tensor<4xi64>)
+  return %2 : tensor<4xi64>
+}
+
+// CHECK-LABEL: add_fold_float
+func @add_fold_float() -> tensor<4xf64> {
+  %0 = xla_hlo.constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf64>
+  %1 = xla_hlo.constant dense<[5.0, 6.0, 7.0, 8.0]> : tensor<4xf64>
+  // CHECK: xla_hlo.constant dense<[6.000000e+00, 8.000000e+00, 1.000000e+01, 1.200000e+01]>
+  %2 = "xla_hlo.add"(%0, %1) : (tensor<4xf64>, tensor<4xf64>) -> (tensor<4xf64>)
+  return %2 : tensor<4xf64>
+}
+
+// CHECK-LABEL: sub_scalar_fold
+func @sub_scalar_fold() -> tensor<4xi64> {
+  %0 = xla_hlo.constant dense<5> : tensor<4xi64>
+  %1 = xla_hlo.constant dense<1> : tensor<4xi64>
+  // CHECK: xla_hlo.constant dense<4>
+  %2 = "xla_hlo.subtract"(%0, %1) : (tensor<4xi64>, tensor<4xi64>) -> (tensor<4xi64>)
+  return %2 : tensor<4xi64>
+}
+
+// CHECK-LABEL: multiply_scalar_fold
+func @multiply_scalar_fold() -> tensor<4xi64> {
+  %0 = xla_hlo.constant dense<5> : tensor<4xi64>
+  %1 = xla_hlo.constant dense<3> : tensor<4xi64>
+  // CHECK: xla_hlo.constant dense<15>
+  %2 = "xla_hlo.multiply"(%0, %1) : (tensor<4xi64>, tensor<4xi64>) -> (tensor<4xi64>)
+  return %2 : tensor<4xi64>
+}
+
 // CHECK-LABEL: concatenate_noop
 func @concatenate_noop(%arg0: tensor<4xi32>) -> tensor<4xi32> {
   // CHECK-SAME: [[ARG:%.+]]: tensor<4xi32>
@@ -43,6 +88,54 @@ func @concatenate_empty_float(%arg0: tensor<0xf32>, %arg1: tensor<0xf32>) -> ten
   return %0 : tensor<0xf32>
 }
 
+// CHECK-LABEL: concatenate_const_1D
+func @concatenate_const_1D() -> tensor<4xi32> {
+  // CHECK: [[VAL:%.+]]= xla_hlo.constant dense<[0, 1, 2, 3]>
+  %0 = xla_hlo.constant dense<[0, 1]> : tensor<2xi32>
+  %1 = xla_hlo.constant dense<[2, 3]> : tensor<2xi32>
+  %2 = "xla_hlo.concatenate"(%0, %1) { dimension = 0 : i64 } : (tensor<2xi32>, tensor<2xi32>) -> tensor<4xi32>
+
+  // CHECK: return [[VAL]]
+  return %2 : tensor<4xi32>
+}
+
+// CHECK-LABEL: concatenate_const_1D_float
+func @concatenate_const_1D_float() -> tensor<4xf32> {
+  // CHECK: [[VAL:%.+]] = xla_hlo.constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00]>
+
+  %0 = xla_hlo.constant dense<[0.0, 1.0]> : tensor<2xf32>
+  %1 = xla_hlo.constant dense<[2.0, 3.0]> : tensor<2xf32>
+  %2 = "xla_hlo.concatenate"(%0, %1) { dimension = 0 : i64 } : (tensor<2xf32>, tensor<2xf32>) -> tensor<4xf32>
+
+  // CHECK: return [[VAL]]
+  return %2 : tensor<4xf32>
+}
+
+// CHECK-LABEL: concatenate_const_2D_vertical
+func @concatenate_const_2D_vertical() -> tensor<2x2xi32> {
+  // CHECK: [[VAL:%.+]]= xla_hlo.constant dense<[
+  // CHECK-SAME: [0, 1], [2, 3]
+  // CHECK-SAME: ]>
+  %0 = xla_hlo.constant dense<[[0, 1]]> : tensor<1x2xi32>
+  %1 = xla_hlo.constant dense<[[2, 3]]> : tensor<1x2xi32>
+  %2 = "xla_hlo.concatenate"(%0, %1) { dimension = 0 : i64 } : (tensor<1x2xi32>, tensor<1x2xi32>) -> tensor<2x2xi32>
+
+  // CHECK: return [[VAL]]
+  return %2 : tensor<2x2xi32>
+}
+
+// CHECK-LABEL: concatenate_const_2D_horizontal
+func @concatenate_const_2D_horizontal() -> tensor<2x2xi32> {
+  // CHECK: [[VAL:%.+]]= xla_hlo.constant dense<[
+  // CHECK-SAME: [0, 2], [1, 3]
+  // CHECK-SAME: ]>
+  %0 = xla_hlo.constant dense<[[0], [1]]> : tensor<2x1xi32>
+  %1 = xla_hlo.constant dense<[[2], [3]]> : tensor<2x1xi32>
+  %2 = "xla_hlo.concatenate"(%0, %1) { dimension = 1 : i64 } : (tensor<2x1xi32>, tensor<2x1xi32>) -> tensor<2x2xi32>
+
+  // CHECK: return [[VAL]]
+  return %2 : tensor<2x2xi32>
+}
 
 // CHECK-LABEL: dynamic_slice_variable_start
 func @dynamic_slice_variable_start(%arg0: tensor<3x4xi32>, %arg1: tensor<i64>, %arg2: tensor<i64>) -> tensor<1x4xi32> {
diff --git a/tensorflow/compiler/mlir/xla/tests/chlo_infer_shape_type_methods.mlir b/tensorflow/compiler/mlir/xla/tests/chlo_infer_shape_type_methods.mlir
index ce0243e416c..d67a7d09f7c 100644
--- a/tensorflow/compiler/mlir/xla/tests/chlo_infer_shape_type_methods.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/chlo_infer_shape_type_methods.mlir
@@ -6,8 +6,8 @@
 // CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>,
 // CHECK-SAME: %[[ARG1:.+]]: tensor<?xf32>
 func @broadcast_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<1xindex> {
-  // CHECK-DAG: %[[ARG0_S:.+]] = "shape.shape_of"(%[[ARG0]])
-  // CHECK-DAG: %[[ARG1_S:.+]] = "shape.shape_of"(%[[ARG1]])
+  // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]]
+  // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]]
   // CHECK-DAG: %[[BCAST_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]])
   // CHECK: %[[EXTENTS:.+]] = "shape.to_extent_tensor"(%[[BCAST_S]])
   // CHECK: return %[[EXTENTS]]
diff --git a/tensorflow/compiler/mlir/xla/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/xla/tests/chlo_legalize_to_hlo_broadcasts.mlir
index 2bc1e0c6852..7194f7034b5 100644
--- a/tensorflow/compiler/mlir/xla/tests/chlo_legalize_to_hlo_broadcasts.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/chlo_legalize_to_hlo_broadcasts.mlir
@@ -14,8 +14,8 @@ func @addWithoutBroadcast(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<
 // CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>
 // CHECK-SAME: %[[ARG1:.+]]: tensor<?x?xf32>
 func @dynamicBroadcast(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  // CHECK-DAG: %[[ARG0_S:.+]] = "shape.shape_of"(%[[ARG0]])
-  // CHECK-DAG: %[[ARG1_S:.+]] = "shape.shape_of"(%[[ARG1]])
+  // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]]
+  // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]]
   // CHECK-DAG: %[[RESULT_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]])
   // CHECK: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_S]])
   // CHECK-DAG: %[[ARG0_B:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>}
@@ -31,8 +31,8 @@ func @dynamicBroadcast(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?
 // CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>
 // CHECK-SAME: %[[ARG1:.+]]: tensor<?x?xf32>
 func @dynamicBroadcastComplex(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xcomplex<f32>> {
-  // CHECK-DAG: %[[ARG0_S:.+]] = "shape.shape_of"(%[[ARG0]])
-  // CHECK-DAG: %[[ARG1_S:.+]] = "shape.shape_of"(%[[ARG1]])
+  // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]]
+  // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]]
   // CHECK-DAG: %[[RESULT_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]])
   // CHECK: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_S]])
   // CHECK-DAG: %[[ARG0_B:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
@@ -48,8 +48,8 @@ func @dynamicBroadcastComplex(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> t
 // CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32>
 // CHECK-SAME: %[[ARG1:.+]]: tensor<?x?xf32>
 func @dynamicBroadcastCompare(%arg0: tensor<?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xi1> {
-  // CHECK-DAG: %[[ARG0_S:.+]] = "shape.shape_of"(%[[ARG0]])
-  // CHECK-DAG: %[[ARG1_S:.+]] = "shape.shape_of"(%[[ARG1]])
+  // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]]
+  // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]]
   // CHECK-DAG: %[[RESULT_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]])
   // CHECK: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_S]])
   // CHECK-DAG: %[[ARG0_B:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
diff --git a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir
index 262533bbf08..53296b257ae 100644
--- a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir
@@ -1,4 +1,4 @@
-// RUN: xla-opt -hlo-legalize-to-lhlo %s -o - | FileCheck %s --dump-input-on-failure
+// RUN: xla-opt -hlo-legalize-to-lhlo -buffer-placement %s -o - | FileCheck %s --dump-input-on-failure
 
 // CHECK-LABEL: func @attrs
 func @attrs_copy(%operand: memref<2x2xf32>, %result: memref<2x2xf32>) {
@@ -13,33 +13,42 @@ func @attrs_copy(%operand: memref<2x2xf32>, %result: memref<2x2xf32>) {
 
 // -----
 
+func @return_func(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+  return %arg0 : tensor<4xf32>
+}
+//      CHECK: (%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[TYPE]])
+// CHECK-NEXT: "xla_lhlo.copy"(%[[ARG0]], %[[RESULT]]) : ([[TYPE]], [[TYPE]]) -> ()
+// CHECK-NEXT: "xla_lhlo.terminator"() : () -> ()
+
+// -----
+
 // CHECK-LABEL: func @func_op_long
 func @func_op_long(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
-  // CHECK: (%[[NEW_ARG0:.*]]: memref<4xf32>, %[[NEW_ARG1:.*]]: memref<4xf32>, %[[RESULT:.*]]: memref<4xf32>)
-  // CHECK-NEXT: %[[MUL_RESULT:.*]] = alloc() {temp = true} : memref<4xf32>
-  // CHECK-NEXT: %[[SUB_RESULT:.*]] = alloc() {temp = true} : memref<4xf32>
-  // CHECK-NEXT: %[[MIN_RESULT:.*]] = alloc() {temp = true} : memref<4xf32>
-  // CHECK-NEXT: %[[ADD_RESULT:.*]] = alloc() {temp = true} : memref<4xf32>
-  // CHECK-NEXT: %[[MAX_RESULT:.*]] = alloc() {temp = true} : memref<4xf32>
   %1 = xla_hlo.maximum %arg0, %arg1 : tensor<4xf32>
-  // CHECK-NEXT: "xla_lhlo.maximum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MAX_RESULT]])
   %2 = xla_hlo.add %arg0, %1 : tensor<4xf32>
-  // CHECK-NEXT: "xla_lhlo.add"(%[[NEW_ARG0]], %[[MAX_RESULT]], %[[ADD_RESULT]])
   %3 = xla_hlo.minimum %arg0, %arg1 : tensor<4xf32>
-  // CHECK-NEXT: "xla_lhlo.minimum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MIN_RESULT]])
   %4 = xla_hlo.subtract %arg1, %3 : tensor<4xf32>
-  // CHECK-NEXT: "xla_lhlo.subtract"(%[[NEW_ARG1]], %[[MIN_RESULT]], %[[SUB_RESULT]])
   %5 = xla_hlo.multiply %2, %4 : tensor<4xf32>
-  // CHECK-NEXT: "xla_lhlo.multiply"(%[[ADD_RESULT]], %[[SUB_RESULT]], %[[MUL_RESULT]])
-  // CHECK-NEXT: dealloc %[[MAX_RESULT]] : memref<4xf32>
-  // CHECK-NEXT: dealloc %[[ADD_RESULT]] : memref<4xf32>
-  // CHECK-NEXT: dealloc %[[MIN_RESULT]] : memref<4xf32>
-  // CHECK-NEXT: dealloc %[[SUB_RESULT]] : memref<4xf32>
-  // CHECK-NEXT: "xla_lhlo.copy"(%[[MUL_RESULT]], %[[RESULT]]) : (memref<4xf32>, memref<4xf32>) -> ()
-  // CHECK-NEXT: dealloc %[[MUL_RESULT]] : memref<4xf32>
   return %5 : tensor<4xf32>
-  // CHECK-NEXT: "xla_lhlo.terminator"() : () -> ()
 }
+//      CHECK: (%[[NEW_ARG0:.*]]: memref<4xf32>, %[[NEW_ARG1:.*]]: memref<4xf32>, %[[RESULT:.*]]: memref<4xf32>)
+// CHECK-NEXT: %[[MAX_RESULT:.*]] = alloc() : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.maximum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MAX_RESULT]])
+// CHECK-NEXT: %[[ADD_RESULT:.*]] = alloc() : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.add"(%[[NEW_ARG0]], %[[MAX_RESULT]], %[[ADD_RESULT]])
+// CHECK-NEXT: dealloc %[[MAX_RESULT]] : memref<4xf32>
+// CHECK-NEXT: %[[MIN_RESULT:.*]] = alloc() : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.minimum"(%[[NEW_ARG0]], %[[NEW_ARG1]], %[[MIN_RESULT]])
+// CHECK-NEXT: %[[SUB_RESULT:.*]] = alloc() : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.subtract"(%[[NEW_ARG1]], %[[MIN_RESULT]], %[[SUB_RESULT]])
+// CHECK-NEXT: dealloc %[[MIN_RESULT]] : memref<4xf32>
+// CHECK-NEXT: %[[MUL_RESULT:.*]] = alloc() : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.multiply"(%[[ADD_RESULT]], %[[SUB_RESULT]], %[[MUL_RESULT]])
+// CHECK-NEXT: dealloc %[[SUB_RESULT]] : memref<4xf32>
+// CHECK-NEXT: dealloc %[[ADD_RESULT]] : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.copy"(%[[MUL_RESULT]], %[[RESULT]]) : (memref<4xf32>, memref<4xf32>) -> ()
+// CHECK-NEXT: dealloc %[[MUL_RESULT]] : memref<4xf32>
+// CHECK-NEXT: "xla_lhlo.terminator"() : () -> ()
 
 // -----
 
@@ -47,20 +56,20 @@ func @func_op_long(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
 func @fusion(%multiplier: memref<2x2xf32>, %summand_1: memref<2x2xf32>,
              %summand_2: memref<2x2xf32>, %result: memref<2x2xf32>) {
   // CHECK: (%{{.*}}: {{.*}}, {{.*}}: {{.*}}, {{.*}}: {{.*}}, %[[RESULT:.*]]: {{.*}})
-  // CHECK-NEXT:  %[[MUL_RESULT:.*]] = alloc() {temp = true} : memref<2x2xf32>
-  // CHECK-NEXT:  %[[ADD_RESULT:.*]] = alloc() {temp = true} : memref<2x2xf32>
+  // CHECK-NEXT:  %[[ADD_RESULT:.*]] = alloc() : memref<2x2xf32>
   %tensor_summand_1 = tensor_load %summand_1 : memref<2x2xf32>
   %tensor_summand_2 = tensor_load %summand_2 : memref<2x2xf32>
   %sum = "xla_hlo.add"(%tensor_summand_1, %tensor_summand_2)
       : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
   // CHECK-NEXT: "xla_lhlo.add"(%{{.*}}, %{{.*}}, %[[ADD_RESULT]])
+  // CHECK-NEXT:  %[[MUL_RESULT:.*]] = alloc() : memref<2x2xf32>
   %tensor_multiplier = tensor_load %multiplier : memref<2x2xf32>
   %tensor_result = "xla_hlo.multiply"(%sum, %tensor_multiplier)
       : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
   // CHECK-NEXT: "xla_lhlo.multiply"(%[[ADD_RESULT]], %{{.*}}, %[[MUL_RESULT]])
+  // CHECK-NEXT:  dealloc %[[ADD_RESULT]] : memref<2x2xf32>
   // CHECK-NEXT: "xla_lhlo.copy"(%[[MUL_RESULT]], %[[RESULT]])
   tensor_store %tensor_result, %result : memref<2x2xf32>
-  // CHECK-NEXT:  dealloc %[[ADD_RESULT]] : memref<2x2xf32>
   // CHECK-NEXT:  dealloc %[[MUL_RESULT]] : memref<2x2xf32>
   // CHECK-NEXT: "xla_lhlo.terminator"() : () -> ()
   "xla_lhlo.terminator"() : () -> ()
diff --git a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir
index ca8e64b9141..a856ee5e83c 100644
--- a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-linalg.mlir
@@ -222,6 +222,16 @@ func @float_cos(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
 
 // -----
 
+// CHECK-LABEL: func @float_sin
+func @float_sin(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  // CHECK: linalg.generic
+  // CHECK: sin
+  %0 = "xla_hlo.sine"(%arg0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
 // CHECK-LABEL: func @copy
 // CHECK-SAME: [[ARG:%[a-zA-Z0-9]+]]
 func @copy(%input: tensor<2x4x8xf32>) -> tensor<2x4x8xf32> {
@@ -274,8 +284,8 @@ func @broadcast(%arg: tensor<4x?x16xf32>) -> tensor<4x2x1x4x?x16xf32> {
 
 // CHECK-DAG: #[[OPERAND_MAP:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d4, d0, 0)>
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK-LABEL: func @broadcast
-func @broadcast(%operand: tensor<5x7x1xf32>) -> tensor<7x10x6x4x5xf32> {
+// CHECK-LABEL: func @broadcast_in_dim
+func @broadcast_in_dim(%operand: tensor<5x7x1xf32>) -> tensor<7x10x6x4x5xf32> {
   %0 = "xla_hlo.broadcast_in_dim"(%operand)
          {broadcast_dimensions = dense<[4,0,2]> : tensor<3xi64>}
          : (tensor<5x7x1xf32>) -> tensor<7x10x6x4x5xf32>
@@ -287,6 +297,22 @@ func @broadcast(%operand: tensor<5x7x1xf32>) -> tensor<7x10x6x4x5xf32> {
 
 // -----
 
+// CHECK-DAG: #[[OPERAND_MAP:.+]] = affine_map<(d0, d1) -> (d0)>
+// CHECK-DAG: #[[RESULT_MAP:.+]] = affine_map<(d0, d1) -> (d0, d1)>
+// CHECK-LABEL: func @broadcast_in_dim_with_one_to_one
+func @broadcast_in_dim_with_one_to_one(
+         %operand: tensor<1xf32>) -> tensor<1x5xf32> {
+  %0 = "xla_hlo.broadcast_in_dim"(%operand)
+         {broadcast_dimensions = dense<[0]> : tensor<1xi64>}
+         : (tensor<1xf32>) -> tensor<1x5xf32>
+  return %0 : tensor<1x5xf32>
+}
+// CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
+// CHECK-NEXT: ^bb0(%[[OPERAND:.*]]: f32):
+// CHECK-NEXT:   linalg.yield %[[OPERAND]] : f32
+
+// -----
+
 // CHECK-DAG: #[[OPERAND_MAP:.*]] = affine_map<(d0, d1, d2) -> ()>
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 // CHECK-LABEL: func @broadcast_scalar
@@ -444,3 +470,75 @@ func @reshape_multiple_collapse
 //   CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
 // CHECK-LABEL: func @reshape_multiple_collapse
 //       CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
+
+// -----
+
+// CHECK-LABEL: func @convert_i32_to_f32
+func @convert_i32_to_f32(%input: tensor<2x2xi32>) -> tensor<2x2xf32> {
+  %result = "xla_hlo.convert"(%input) : (tensor<2x2xi32>) -> tensor<2x2xf32>
+  return %result : tensor<2x2xf32>
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: i32):
+// CHECK-NEXT:   %[[RESULT:.*]] = sitofp %[[OPERAND_IN]] : i32 to f32
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
+
+// -----
+
+// CHECK-LABEL: func @convert_i16_to_i32
+func @convert_i16_to_i32(%input: tensor<2x2xi16>) -> tensor<2x2xi32> {
+  %result = "xla_hlo.convert"(%input) : (tensor<2x2xi16>) -> tensor<2x2xi32>
+  return %result : tensor<2x2xi32>
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: i16):
+// CHECK-NEXT:   %[[RESULT:.*]] = zexti %[[OPERAND_IN]] : i16 to i32
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
+
+// -----
+
+// CHECK-LABEL: func @convert_i32_to_i16
+func @convert_i32_to_i16(%input: tensor<2x2xi32>) -> tensor<2x2xi16> {
+  %result = "xla_hlo.convert"(%input) : (tensor<2x2xi32>) -> tensor<2x2xi16>
+  return %result : tensor<2x2xi16>
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: i32):
+// CHECK-NEXT:   %[[RESULT:.*]] = trunci %[[OPERAND_IN]] : i32 to i16
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : i16
+
+// -----
+
+// CHECK-LABEL: func @convert_f32_to_f64
+func @convert_f32_to_f64(%input: tensor<2x2xf32>) -> tensor<2x2xf64> {
+  %result = "xla_hlo.convert"(%input) : (tensor<2x2xf32>) -> tensor<2x2xf64>
+  return %result : tensor<2x2xf64>
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32):
+// CHECK-NEXT:   %[[RESULT:.*]] = fpext %[[OPERAND_IN]] : f32 to f64
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : f64
+
+// -----
+
+// CHECK-LABEL: func @convert_f64_to_f32
+func @convert_f64_to_f32(%input: tensor<2x2xf64>) -> tensor<2x2xf32> {
+  %result = "xla_hlo.convert"(%input) : (tensor<2x2xf64>) -> tensor<2x2xf32>
+  return %result : tensor<2x2xf32>
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f64):
+// CHECK-NEXT:   %[[RESULT:.*]] = fptrunc %[[OPERAND_IN]] : f64 to f32
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
+
+// -----
+
+// CHECK-LABEL: func @convert_f32_to_i32
+func @convert_f32_to_i32(%input: tensor<2x2xf32>) -> tensor<2x2xi32> {
+  %result = "xla_hlo.convert"(%input) : (tensor<2x2xf32>) -> tensor<2x2xi32>
+  return %result : tensor<2x2xi32>
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32):
+// CHECK-NEXT:   %[[RESULT:.*]] = fptosi %[[OPERAND_IN]] : f32 to i32
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir
new file mode 100644
index 00000000000..149c0c94663
--- /dev/null
+++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir
@@ -0,0 +1,307 @@
+// RUN: xla-opt -split-input-file -xla-hlo-to-lhlo-with-xla %s | FileCheck --enable-var-scope --dump-input=fail %s
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.abs
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %abs = "xla_hlo.abs"(%value) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %abs : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>, %value1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.add
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.add"(%value0, %value1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xi32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xi32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xi32>, %value1: tensor<2x2xi32>) -> tensor<2x2xi32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xi32>
+// CHECK: lhlo.and
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.and"(%value0, %value1) : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
+  return %res : tensor<2x2xi32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.ceil
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.ceil"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<1x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<1x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<1x2xf32>, %value1: tensor<1x2xf32>) -> tensor<1x2xcomplex<f32>> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<1x2xcomplex<f32>>
+// CHECK: lhlo.complex
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.complex"(%value0, %value1) : (tensor<1x2xf32>, tensor<1x2xf32>) -> (tensor<1x2xcomplex<f32>>)
+  return %res : tensor<1x2xcomplex<f32>>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<1x2xcomplex<f32>> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<1x2xcomplex<f32>>) -> tensor<1x2xcomplex<f32>> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<1x2xcomplex<f32>>
+// CHECK: lhlo.cosine
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.cosine"(%value0) : (tensor<1x2xcomplex<f32>>) -> tensor<1x2xcomplex<f32>>
+  return %res : tensor<1x2xcomplex<f32>>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>, %value1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.divide
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.divide"(%value0, %value1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.exponential
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.exponential"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.log
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.log"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>, %value1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.maximum
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.maximum"(%value0, %value1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>, %value1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.minimum
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.minimum"(%value0, %value1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>, %value1: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.multiply
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.multiply"(%value0, %value1) : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.negate
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.negate"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<1x2xcomplex<f32>> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<8xi8>
+func @main(%value0: tensor<1x2xcomplex<f32>>) -> tensor<1x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<8xi8> to memref<1x2xf32>
+// CHECK: lhlo.real
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.real"(%value0) : (tensor<1x2xcomplex<f32>>) -> (tensor<1x2xf32>)
+  return %res : tensor<1x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<1x2xcomplex<f32>> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<8xi8>
+func @main(%value0: tensor<1x2xcomplex<f32>>) -> tensor<1x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<8xi8> to memref<1x2xf32>
+// CHECK: lhlo.imag
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.imag"(%value0) : (tensor<1x2xcomplex<f32>>) -> (tensor<1x2xf32>)
+  return %res : tensor<1x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xi32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xi32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xi32>, %value1: tensor<2x2xi32>) -> tensor<2x2xi32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xi32>
+// CHECK: lhlo.remainder
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.remainder"(%value0, %value1) : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
+  return %res : tensor<2x2xi32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.rsqrt
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.rsqrt"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xi1> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xf32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<2x2xf32> {xla_lhlo.params = 2
+// CHECK-SAME: %[[ARG3:.*]]: memref<16xi8>
+func @main(%pred: tensor<2x2xi1>, %lhs: tensor<2x2xf32>, %rhs: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.select
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]], %[[VIEW]]
+// CHECK-NEXT: return
+  %0 = "xla_hlo.select"(%pred, %lhs, %rhs) : (tensor<2x2xi1>, tensor<2x2xf32>, tensor<2x2xf32>) -> (tensor<2x2xf32>)
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.sign
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.sign"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.sqrt
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.sqrt"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xi32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<2x2xi32> {xla_lhlo.params = 1
+// CHECK-SAME: %[[ARG2:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xi32>, %value1: tensor<2x2xi32>) -> tensor<2x2xi32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xi32>
+// CHECK: lhlo.subtract
+// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[VIEW]]
+// CHECK-NEXT: return
+  %res = "xla_hlo.subtract"(%value0, %value1) : (tensor<2x2xi32>, tensor<2x2xi32>) -> tensor<2x2xi32>
+  return %res : tensor<2x2xi32>
+}
+
+// -----
+
+// CHECK-LABEL: func @main
+// CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {xla_lhlo.params = 0
+// CHECK-SAME: %[[ARG1:.*]]: memref<16xi8>
+func @main(%value0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+// CHECK: %[[VIEW:.*]] = {{.*}} memref<16xi8> to memref<2x2xf32>
+// CHECK: lhlo.tanh
+// CHECK-SAME: %[[ARG0]], %[[VIEW]]
+  %res = "xla_hlo.tanh"(%value0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %res : tensor<2x2xf32>
+}
diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir
index cda1dc481a7..6a2b68adac3 100644
--- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir
@@ -8,7 +8,9 @@
 // CHECK-SAME: ) {
 func @main(%value: tensor<2x2xf32>) -> tensor<2x2xf32> {
   // The only expected instruction is a copy from the input into the output.
-  // CHECK: %[[OUTPUT:.*]] = std.view %[[ARG1]][][] : memref<16xi8> to memref<2x2xf32>
+  // CHECK: %[[C0:.*]] = constant 0 : index
+  // CHECK: %[[C02:.*]] = constant 0 : index
+  // CHECK: %[[OUTPUT:.*]] = std.view %[[ARG1]][%[[C02]]][] : memref<16xi8> to memref<2x2xf32>
   // CHECK: xla_lhlo.copy
   // CHECK-SAME: %[[ARG0]], %[[OUTPUT]]
   return %value : tensor<2x2xf32>
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir
index 08df9fd3808..3605e2a0d5c 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir
@@ -7,8 +7,8 @@
 func @batchmatmulv2_basic(%arg0: tensor<1x4x2xf32>, %arg1: tensor<3x2x4xf32>) -> tensor<3x4x4xf32> {
 // CHECK-LABEL:   func @batchmatmulv2_basic
 // CHECK-SAME:        ([[LHS:%.*]]: tensor<1x4x2xf32>, [[RHS:%.*]]: tensor<3x2x4xf32>) -> tensor<3x4x4xf32>
-// CHECK:           [[LHSSHAPE:%.*]] = "shape.shape_of"([[LHS]]) : (tensor<1x4x2xf32>) -> !shape.shape
-// CHECK:           [[RHSSHAPE:%.*]] = "shape.shape_of"([[RHS]]) : (tensor<3x2x4xf32>) -> !shape.shape
+// CHECK:           [[LHSSHAPE:%.*]] = shape.shape_of [[LHS]] : tensor<1x4x2xf32>
+// CHECK:           [[RHSSHAPE:%.*]] = shape.shape_of [[RHS]] : tensor<3x2x4xf32>
 // CHECK:           [[CM2:%.*]] = constant -2 : i32
 // CHECK:           [[LHSHEAD:%.*]], [[LHSTAIL:%.*]] = "shape.split_at"([[LHSSHAPE]], [[CM2]]) : (!shape.shape, i32) -> (!shape.shape, !shape.shape)
 // CHECK:           [[RHSHEAD:%.*]], [[RHSTAIL:%.*]] = "shape.split_at"([[RHSSHAPE]], [[CM2]]) : (!shape.shape, i32) -> (!shape.shape, !shape.shape)
@@ -86,8 +86,8 @@ func @batchmatmulv2_adj_complex(%arg0: tensor<5x2xcomplex<f32>>, %arg1: tensor<2
 // CHECK:           [[RHSIM:%.*]] = "xla_hlo.imag"([[RHS]])
 // CHECK:           [[RHSIMNEG:%.*]] = "xla_hlo.negate"([[RHSIM]])
 // CHECK:           [[RHSCONJ:%.*]] = "xla_hlo.complex"([[RHSRE]], [[RHSIMNEG]])
-// CHECK:           "shape.shape_of"([[LHSCONJ]])
-// CHECK:           "shape.shape_of"([[RHSCONJ]])
+// CHECK:           shape.shape_of [[LHSCONJ]]
+// CHECK:           shape.shape_of [[RHSCONJ]]
   %0 = "tf.BatchMatMulV2"(%arg0, %arg1) {adj_x = true, adj_y = true, device = ""} : (tensor<5x2xcomplex<f32>>, tensor<2x4xcomplex<f32>>) -> tensor<5x4xcomplex<f32>>
   return %0 : tensor<5x4xcomplex<f32>>
 }
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir
index d2b4d269fef..0660af4ed1c 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-full-conversion.mlir
@@ -1,22 +1,24 @@
 // RUN: tf-opt %s -xla-legalize-tf -split-input-file -verify-diagnostics
 
+// expected-error@below{{The following operations cannot be legalized: tf.NoOp (count: 1); tf_executor.fetch (count: 1); tf_executor.graph (count: 1); tf_executor.island (count: 1); tf_executor.yield (count: 1). These legalization failure(s) may be due to missing TF to HLO lowerings and/or unsupported attributes, etc.}}
+// expected-error@below{{Emitting more detail about one op that failed to legalize...}}
 func @tf_executor_graph_op() {
-    // expected-error@+1 {{failed to legalize operation 'tf_executor.graph'}}
     tf_executor.graph {
       %0 = tf_executor.island {
+        // expected-error@+1 {{'tf.NoOp' op is not legalizable}}
         "tf.NoOp"() {} : () -> ()
         tf_executor.yield
       }
       tf_executor.fetch
     }
     return
-
 }
 
 // -----
 
+// expected-error@below{{The following operations cannot be legalized: tf.OpA (count: 1). These legalization failure(s) may be due to missing TF to HLO lowerings and/or unsupported attributes, etc.}}
 func @tf_unknown_op(%arg0: tensor<2xi32>) -> tensor<2xi32> {
-  // expected-error@+1 {{failed to legalize operation 'tf.OpA'}}
+  // expected-error@+1 {{'tf.OpA' op is not legalizable}}
   %0 = "tf.OpA"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
   return %0: tensor<2xi32>
 }
@@ -27,3 +29,16 @@ func @tf_known_op(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   %0 = "tf.Add"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
   return %0: tensor<2xi32>
 }
+
+// -----
+
+// expected-error@below{{The following operations cannot be legalized: tf.OpA (count: 1); tf.OpB (count: 2). These legalization failure(s) may be due to missing TF to HLO lowerings and/or unsupported attributes, etc.}}
+// expected-error@below{{Emitting more detail about one op that failed to legalize...}}
+func @tf_unknown_known_mix(%arg0: tensor<2xi32>) -> tensor<2xi32> {
+  // expected-error@+1 {{'tf.OpA' op is not legalizable}}
+  %0 = "tf.OpA"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
+  %1 = "tf.OpB"(%0, %0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
+  %2 = "tf.Add"(%1, %1) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
+  %3 = "tf.OpB"(%2, %2) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi32>
+  return %2: tensor<2xi32>
+}
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir
index d2ce1d311f6..e8d5cfe997d 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir
@@ -50,6 +50,15 @@ func @dynamic_operand(%arg0: tensor<?xf32>) -> tensor<?xf32> {
   return %0 : tensor<?xf32>
 }
 
+// CHECK-LABEL: unsupported_dtype
+func @unsupported_dtype(%arg0: tensor<2x!tf.variant>) -> tensor<2x!tf.variant> {
+  // CHECK: tf.AddN
+  // expected-remark@+1 {{unsupported type: tensor<2x!tf.variant>}}
+  %0 = "tf.AddN"(%arg0, %arg0) : (tensor<2x!tf.variant>, tensor<2x!tf.variant>) -> tensor<2x!tf.variant>
+
+  return %0 : tensor<2x!tf.variant>
+}
+
 // CHECK-LABEL: multiple_dialect_ops
 func @multiple_dialect_ops(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: xla_hlo.negate
@@ -115,12 +124,68 @@ func @greater(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// TODO(hinsu): Add a test with variant type once one of the ops supporting
-// the type is whitelisted. It should be rejected with unsupported type remark.
+// CHECK-LABEL: func @const_inputs
+// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x2xf64>, %[[ARG1:.*]]: tensor<f64>,
+func @const_inputs(%arg0: tensor<2x2xf64>, %arg1: tensor<f64>, %arg2: tensor<2xi32>, %arg3: tensor<2xi32>, %arg4: tensor<2xi32>) -> tensor<6x5xf64> {
 
-// TODO(hinsu): Add a test with uint8 type once one of the ops supporting the
-// type is whitelisted. Unsigned types are not yet added to the HLO dialect so
-// it should return an error. See b/130356985
+  // CHECK: "xla_hlo.pad"(%[[ARG0]], %[[ARG1]])
+  // CHECK-SAME-DAG: edge_padding_high = dense<[1, 2]> : tensor<2xi64>
+  // CHECK-SAME-DAG: edge_padding_low = dense<[2, 1]> : tensor<2xi64>
+  // CHECK-SAME-DAG: interior_padding = dense<[1, 0]> : tensor<2xi64>
+
+  %0 = xla_hlo.constant dense<[2, 1]> : tensor<2xi32>
+  %1 = xla_hlo.constant dense<[1, 2]> : tensor<2xi32>
+  %2 = xla_hlo.constant dense<[1, 0]> : tensor<2xi32>
+  %3 = "tf.XlaPad"(%arg0, %arg1, %0, %1, %2) : (tensor<2x2xf64>, tensor<f64>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<6x5xf64>
+  return %3 : tensor<6x5xf64>
+}
+
+func @non_const_inputs(%arg0: tensor<2x2xf64>, %arg1: tensor<f64>, %arg2: tensor<2xi32>, %arg3: tensor<2xi32>, %arg4: tensor<2xi32>) -> tensor<6x5xf64> {
+  // expected-remark@+1 {{lowering requires operand #2 to be a constant}}
+  %0 = "tf.XlaPad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor<2x2xf64>, tensor<f64>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor<6x5xf64>
+  return %0 : tensor<6x5xf64>
+}
+
+// CHECK-LABEL: dynamic_result_type
+func @dynamic_result_type(%arg0: tensor<2xf32>) -> tensor<*xf32> {
+  // CHECK: %[[RESULT:.*]] = "xla_hlo.abs"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
+  // CHECK: tensor_cast %0 : tensor<2xf32> to tensor<*xf32>
+  %0 = "tf.Abs"(%arg0) : (tensor<2xf32>) -> tensor<*xf32>
+
+  // return %[[RESULT]]
+  return %0 : tensor<*xf32>
+}
+
+func @truncated_normal() -> tensor<2x2xf32> {
+  // CHECK-NOT: tf.TruncatedNormal
+  %0 = xla_hlo.constant dense<[2, 2]> : tensor<2xi32>
+  %1 = "tf.TruncatedNormal"(%0) {T = i32, device = "", dtype = f32, seed = 0 : i64, seed2 = 1950157571 : i64} : (tensor<2xi32>) -> tensor<2x2xf32>
+  return %1 : tensor<2x2xf32>
+}
+
+// CHECK-LABEL: dynamic_update_slice
+// CHECK-SAME: (%[[ARG0:.*]]: tensor<3x4xi32>, %[[ARG1:.*]]: tensor<2x2xi32>, %[[ARG2:.*]]: tensor<2xi32>
+func @dynamic_update_slice(%arg0: tensor<3x4xi32>, %arg1: tensor<2x2xi32>, %arg2: tensor<2xi32>) -> tensor<3x4xi32> {
+
+  // CHECK: %[[SLICE0:.*]] = "xla_hlo.slice"(%[[ARG2]])
+  // CHECK-DAG-SAME: start_indices = dense<0> : tensor<1xi64>
+  // CHECK-DAG-SAME: limit_indices = dense<1> : tensor<1xi64>
+  // CHECK-DAG-SAME: strides = dense<1> : tensor<1xi64>
+  // CHECK-SAME: (tensor<2xi32>) -> tensor<1xi32>
+  // CHECK: %[[DIM0:.*]] = "xla_hlo.reshape"(%[[SLICE0]]) : (tensor<1xi32>) -> tensor<i32>
+
+  // CHECK: %[[SLICE1:.*]] = "xla_hlo.slice"(%[[ARG2]])
+  // CHECK-DAG-SAME: start_indices = dense<1> : tensor<1xi64>
+  // CHECK-DAG-SAME: limit_indices = dense<2> : tensor<1xi64>
+  // CHECK-DAG-SAME: strides = dense<1> : tensor<1xi64>
+  // CHECK-SAME: (tensor<2xi32>) -> tensor<1xi32>
+  // CHECK: %[[DIM1:.*]] = "xla_hlo.reshape"(%[[SLICE1]]) : (tensor<1xi32>) -> tensor<i32>
+
+  // CHECK: "xla_hlo.dynamic-update-slice"(%[[ARG0]], %[[ARG1]], %[[DIM0]], %[[DIM1]])
+
+  %0 = "tf.XlaDynamicUpdateSlice"(%arg0, %arg1, %arg2) : (tensor<3x4xi32>, tensor<2x2xi32>, tensor<2xi32>) -> tensor<3x4xi32>
+  return %0: tensor<3x4xi32>
+}
 
 // TODO(hinsu): Add a test with a valid TF op for which tf2xla kernel is
 // available but doesn't support this instance.
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
index e15101a165e..450910b2e4d 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
@@ -426,6 +426,8 @@ func @biasAdd_dynamic(%arg0: tensor<?x?x?x?xi32>, %arg1: tensor<?xi32>) -> tenso
 
 //===----------------------------------------------------------------------===//
 // Binary op legalizations.
+// Most of these expand from the same pattern. Full semantics are
+// verified for tf.Add and pattern application only for the rest.
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: func @add
@@ -439,19 +441,49 @@ func @add(%arg0: tensor<2xi32>) -> tensor<2xi32> {
 }
 
 // CHECK-LABEL: func @broadcast_add
+// TODO(laurenzo): Change this to a (5 + 2x1) shaped add to make the check
+// patterns unambiguous and more interesting (once broadcastable trait is
+// fixed upstream).
 func @broadcast_add(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> {
-  // CHECK-NEXT: "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  // CHECK: %[[UNUSED_LHS_SHAPE:.+]] = shape.const_shape [1]
+  // CHECK: %[[UNUSED_RHS_SHAPE:.+]] = shape.const_shape [1, 2]
+  // CHECK: %[[RESULT_SHAPE:.+]] = shape.const_shape [1, 2]
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>}
+  // CHECK: xla_hlo.add %[[LHS_BCAST]], %[[RHS_BCAST]]
   %0 = "tf.Add"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32>
   return %0: tensor<1x2xi32>
 }
 
 // CHECK-LABEL: func @broadcast_multi_dim_add
+// TODO(laurenzo): Change this to a (4x1x1 + 1x4x4x4) shaped add once upstream
+// broadcastable bug is fixed (helps make the CHECK matching unambiguous)
 func @broadcast_multi_dim_add(%arg0: tensor<4x1x1xi32>, %arg1: tensor<4x4x4x4xi32>) -> tensor<4x4x4x4xi32> {
-  // CHECK-NEXT: "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<[1, 2, 3]> : tensor<3xi64>}
+  // CHECK: %[[UNUSED_LHS_SHAPE:.+]] = shape.const_shape [4, 1, 1]
+  // CHECK: %[[UNUSED_RHS_SHAPE:.+]] = shape.const_shape [4, 4, 4, 4]
+  // CHECK: %[[RESULT_SHAPE:.+]] = shape.const_shape [4, 4, 4, 4]
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[1, 2, 3]> : tensor<3xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1, 2, 3]> : tensor<4xi64>}
+  // CHECK: xla_hlo.add %[[LHS_BCAST]], %[[RHS_BCAST]]
   %0 = "tf.Add"(%arg0, %arg1) : (tensor<4x1x1xi32>, tensor<4x4x4x4xi32>) -> tensor<4x4x4x4xi32>
   return %0: tensor<4x4x4x4xi32>
 }
 
+// CHECK-LABEL: func @add_dynamic
+func @add_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
+  // CHECK-DAG: %[[LHS_SHAPE:.+]] = shape.shape_of %arg0
+  // CHECK-DAG: %[[RHS_SHAPE:.+]] = shape.shape_of %arg1
+  // CHECK-DAG: %[[RESULT_SHAPE:.+]] = "shape.broadcast"(%[[LHS_SHAPE]], %[[RHS_SHAPE]])
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>}
+  // CHECK: xla_hlo.add %4, %5 : tensor<?x?xi32>
+  %0 = "tf.Add"(%arg0, %arg1) : (tensor<?xi32>, tensor<?x?xi32>) -> tensor<?x?xi32>
+  return %0: tensor<?x?xi32>
+}
+
 // CHECK-LABEL: func @div
 func @div(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   // CHECK-NEXT:  %0 = xla_hlo.divide %arg0, %arg0 : tensor<2xi32>
@@ -460,13 +492,6 @@ func @div(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   return %0: tensor<2xi32>
 }
 
-// CHECK-LABEL: func @broadcast_div
-func @broadcast_div(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> {
-  // CHECK-NEXT: "xla_hlo.divide"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
-  %0 = "tf.Div"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32>
-  return %0: tensor<1x2xi32>
-}
-
 // CHECK-LABEL: func @shift_left
 func @shift_left(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   // CHECK:  xla_hlo.shift_left %arg0, %arg1 : tensor<4xi32>
@@ -474,13 +499,6 @@ func @shift_left(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   return %0 : tensor<4xi32>
 }
 
-// CHECK-LABEL: func @div_dynamic
-func @div_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
-  // CHECK: "xla_hlo.divide"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
-  %0 = "tf.Div"(%arg0, %arg1) : (tensor<?xi32>, tensor<?x?xi32>) -> tensor<?x?xi32>
-  return %0: tensor<?x?xi32>
-}
-
 // CHECK-LABEL: func @div_unranked
 func @div_unranked(%arg0: tensor<*xi32>, %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
   // CHECK: tf.Div
@@ -510,13 +528,6 @@ func @mul(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   return %0: tensor<2xi32>
 }
 
-// CHECK-LABEL: func @broadcast_mul
-func @broadcast_mul(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> {
-  // CHECK-NEXT: "xla_hlo.multiply"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
-  %0 = "tf.Mul"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32>
-  return %0: tensor<1x2xi32>
-}
-
 // CHECK-LABEL: func @real_div
 func @real_div(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   // CHECK-NEXT:  %0 = xla_hlo.divide %arg0, %arg0 : tensor<2xi32>
@@ -524,13 +535,6 @@ func @real_div(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   return %0: tensor<2xi32>
 }
 
-// CHECK-LABEL: func @broadcast_real_div
-func @broadcast_real_div(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> {
-  // CHECK-NEXT: "xla_hlo.divide"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
-  %0 = "tf.RealDiv"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32>
-  return %0: tensor<1x2xi32>
-}
-
 // CHECK-LABEL: func @sub
 func @sub(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   // CHECK-NEXT:  %0 = xla_hlo.subtract %arg0, %arg0 : tensor<2xi32>
@@ -539,13 +543,6 @@ func @sub(%arg0: tensor<2xi32>) -> tensor<2xi32> {
   return %0: tensor<2xi32>
 }
 
-// CHECK-LABEL: func @broadcast_sub
-func @broadcast_sub(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi32> {
-  // CHECK-NEXT: "xla_hlo.subtract"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
-  %0 = "tf.Sub"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi32>
-  return %0: tensor<1x2xi32>
-}
-
 // CHECK-LABEL: func @shift_right
 func @shift_right(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   // CHECK:  xla_hlo.shift_right_arithmetic %arg0, %arg1 : tensor<4xi32>
@@ -553,13 +550,6 @@ func @shift_right(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   return %0 : tensor<4xi32>
 }
 
-// CHECK-LABEL: func @broadcast_shift_right
-func @broadcast_shift_right(%arg0: tensor<4xi32>, %arg1: tensor<2x4xi32>) -> tensor<2x4xi32> {
-  // CHECK: "xla_hlo.shift_right_arithmetic"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
-  %0 = "tf.RightShift"(%arg0, %arg1) : (tensor<4xi32>, tensor<2x4xi32>) -> tensor<2x4xi32>
-  return %0 : tensor<2x4xi32>
-}
-
 // CHECK-LABEL: func @shift_right_unsigned
 func @shift_right_unsigned(%arg0: tensor<4xui8>, %arg1: tensor<4xui8>) -> tensor<4xui8> {
   // CHECK:  tf.RightShift
@@ -581,20 +571,6 @@ func @and(%arg0: tensor<2xi1>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// CHECK-LABEL: func @and_broadcast
-func @and_broadcast(%arg0: tensor<1xi1>, %arg1: tensor<1x2xi1>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.and"
-  %0 = "tf.LogicalAnd"(%arg0, %arg1) : (tensor<1xi1>, tensor<1x2xi1>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
-// CHECK-LABEL: func @and_dynamic
-func @and_dynamic(%arg0: tensor<?xi1>, %arg1: tensor<1xi1>) -> tensor<?xi1> {
-  // CHECK-NEXT: "xla_hlo.and"
-  %0 = "tf.LogicalAnd"(%arg0, %arg1) : (tensor<?xi1>, tensor<1xi1>) -> tensor<?xi1>
-  return %0: tensor<?xi1>
-}
-
 // CHECK-LABEL: func @and_unranked
 func @and_unranked(%arg0: tensor<*xi1>, %arg1: tensor<*xi1>) -> tensor<*xi1> {
   // CHECK: tf.LogicalAnd
@@ -609,20 +585,6 @@ func @or(%arg0: tensor<2xi1>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// CHECK-LABEL: func @or_broadcast
-func @or_broadcast(%arg0: tensor<1xi1>, %arg1: tensor<1x2xi1>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: xla_hlo.or
-  %0 = "tf.LogicalOr"(%arg0, %arg1) : (tensor<1xi1>, tensor<1x2xi1>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
-// CHECK-LABEL: func @or_dynamic
-func @or_dynamic(%arg0: tensor<?xi1>, %arg1: tensor<1xi1>) -> tensor<?xi1> {
-  // CHECK-NEXT: xla_hlo.or
-  %0 = "tf.LogicalOr"(%arg0, %arg1) : (tensor<?xi1>, tensor<1xi1>) -> tensor<?xi1>
-  return %0: tensor<?xi1>
-}
-
 // CHECK-LABEL: func @bitwise_or
 func @bitwise_or(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   // CHECK-NEXT: xla_hlo.or
@@ -630,20 +592,6 @@ func @bitwise_or(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   return %0: tensor<4xi32>
 }
 
-// CHECK-LABEL: func @bitwise_or_broadcast
-func @bitwise_or_broadcast(%arg0: tensor<1xi8>, %arg1: tensor<1x4xi8>) -> tensor<1x4xi8> {
-  // CHECK-NEXT: xla_hlo.or
-  %0 = "tf.BitwiseOr"(%arg0, %arg1) : (tensor<1xi8>, tensor<1x4xi8>) -> tensor<1x4xi8>
-  return %0: tensor<1x4xi8>
-}
-
-// CHECK-LABEL: func @bitwise_or_dynamic
-func @bitwise_or_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<1xi32>) -> tensor<?xi32> {
-  // CHECK-NEXT: xla_hlo.or
-  %0 = "tf.BitwiseOr"(%arg0, %arg1) : (tensor<?xi32>, tensor<1xi32>) -> tensor<?xi32>
-  return %0: tensor<?xi32>
-}
-
 // CHECK-LABEL: func @bitwise_and
 func @bitwise_and(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   // CHECK-NEXT: xla_hlo.and
@@ -651,20 +599,6 @@ func @bitwise_and(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<4xi32> {
   return %0: tensor<4xi32>
 }
 
-// CHECK-LABEL: func @bitwise_and_broadcast
-func @bitwise_and_broadcast(%arg0: tensor<1xi8>, %arg1: tensor<1x4xi8>) -> tensor<1x4xi8> {
-  // CHECK-NEXT: xla_hlo.and
-  %0 = "tf.BitwiseAnd"(%arg0, %arg1) : (tensor<1xi8>, tensor<1x4xi8>) -> tensor<1x4xi8>
-  return %0: tensor<1x4xi8>
-}
-
-// CHECK-LABEL: func @bitwise_and_dynamic
-func @bitwise_and_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<1xi32>) -> tensor<?xi32> {
-  // CHECK-NEXT: xla_hlo.and
-  %0 = "tf.BitwiseAnd"(%arg0, %arg1) : (tensor<?xi32>, tensor<1xi32>) -> tensor<?xi32>
-  return %0: tensor<?xi32>
-}
-
 // CHECK-LABEL: func @pow
 func @pow(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK-NEXT:  xla_hlo.power
@@ -672,13 +606,6 @@ func @pow(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   return %0: tensor<2xf32>
 }
 
-// CHECK-LABEL: func @pow_dynamic
-func @pow_dynamic(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  // CHECK-NEXT:  xla_hlo.power
-  %0 = "tf.Pow"(%arg0, %arg0) : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-  return %0: tensor<?xf32>
-}
-
 // CHECK-LABEL: func @diag_part
 // CHECK-SAME: %[[ARG:.*]]: tensor<4x3x4x3xf32>
 func @diag_part(%arg0: tensor<4x3x4x3xf32>) -> tensor<4x3xf32> {
@@ -862,6 +789,8 @@ func @broadcast_to(%arg0: tensor<16xf32>) -> tensor<16x16x16x16xf32> {
 
 //===----------------------------------------------------------------------===//
 // Equality op legalizations.
+// tf.Equal and tf.NotEqual expand from the same pattern. Full semantics are
+// verified for tf.Equal and pattern application only for tf.NotEqual
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: func @equal
@@ -873,14 +802,26 @@ func @equal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
 
 // CHECK-LABEL: func @equal_dynamic
 func @equal_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<1xi32>) -> tensor<?xi1> {
-  // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg1) {comparison_direction = "EQ"}
+  // CHECK-DAG: %[[LHS_SHAPE:.+]] = shape.shape_of %arg0
+  // CHECK-DAG: %[[RHS_SHAPE:.+]] = shape.const_shape [1]
+  // CHECK-DAG: %[[RESULT_SHAPE:.+]] = "shape.broadcast"(%[[LHS_SHAPE]], %[[RHS_SHAPE]])
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>}
+  // CHECK: "xla_hlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "EQ"}
   %0 = "tf.Equal"(%arg0, %arg1) : (tensor<?xi32>, tensor<1xi32>) -> tensor<?xi1>
   return %0: tensor<?xi1>
 }
 
 // CHECK-LABEL: func @equal_broadcast
 func @equal_broadcast(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.compare"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>, comparison_direction = "EQ"}
+  // CHECK-DAG: %[[LHS_SHAPE:.+]] = shape.const_shape [1]
+  // CHECK-DAG: %[[RHS_SHAPE:.+]] = shape.const_shape [1, 2]
+  // CHECK-DAG: %[[RESULT_SHAPE:.+]] = shape.const_shape [1, 2]
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>}
+  // CHECK: "xla_hlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "EQ"}
   %0 = "tf.Equal"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
   return %0: tensor<1x2xi1>
 }
@@ -927,70 +868,42 @@ func @notequal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// CHECK-LABEL: func @notequal_dynamic
-func @notequal_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<1xi32>) -> tensor<?xi1> {
-  // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg1) {comparison_direction = "NE"}
-  %0 = "tf.NotEqual"(%arg0, %arg1) : (tensor<?xi32>, tensor<1xi32>) -> tensor<?xi1>
-  return %0: tensor<?xi1>
-}
-
-// CHECK-LABEL: func @notequal_broadcast
-func @notequal_broadcast(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.compare"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>, comparison_direction = "NE"}
-  %0 = "tf.NotEqual"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
-// CHECK-LABEL: func @notequal_broadcast_no_incompatible_shapes_error
-func @notequal_broadcast_no_incompatible_shapes_error(%arg0: tensor<2xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "tf.NotEqual"(%arg0, %arg1) {incompatible_shape_error = false}
-  %0 = "tf.NotEqual"(%arg0, %arg1) {incompatible_shape_error = false} : (tensor<2xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
-// CHECK-LABEL: func @notequal_incompatible_shape_broadcastable
-func @notequal_incompatible_shape_broadcastable(%arg0: tensor<?xi32>, %arg1: tensor<1xi32>) -> tensor<?xi1> {
-  // CHECK-NEXT: "tf.NotEqual"(%arg0, %arg1) {incompatible_shape_error = false}
-  %0 = "tf.NotEqual"(%arg0, %arg1) { incompatible_shape_error = false } : (tensor<?xi32>, tensor<1xi32>) -> tensor<?xi1>
-  return %0: tensor<?xi1>
-}
-
-// CHECK-LABEL: func @notequal_incompatible_shape_dynamic
-func @notequal_incompatible_shape_dynamic(%arg0: tensor<2xi32>, %arg1: tensor<?xi32>) -> tensor<*xi1> {
-  // CHECK-NEXT: "tf.NotEqual"(%arg0, %arg1) {incompatible_shape_error = false}
-  %0 = "tf.NotEqual"(%arg0, %arg1) { incompatible_shape_error = false } : (tensor<2xi32>, tensor<?xi32>) -> tensor<*xi1>
-  return %0: tensor<*xi1>
-}
-
-// CHECK-LABEL: func @notequal_incompatible_shape_both_dynamic
-func @notequal_incompatible_shape_both_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>) -> tensor<*xi1> {
-  // CHECK-NEXT: "tf.NotEqual"(%arg0, %arg1) {incompatible_shape_error = false}
-  %0 = "tf.NotEqual"(%arg0, %arg1) { incompatible_shape_error = false } : (tensor<?xi32>, tensor<?xi32>) -> tensor<*xi1>
-  return %0: tensor<*xi1>
-}
-
 //===----------------------------------------------------------------------===//
 // Compare op legalizations.
+// These expand from the same pattern. Full semantics are checked for
+// tf.Greater. Others just check that the pattern applied.
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: func @greater
 func @greater(%arg0: tensor<2xi32>) -> tensor<2xi1> {
-  // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "GT"}
+  // CHECK: "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "GT"}
   %0 = "tf.Greater"(%arg0, %arg0) : (tensor<2xi32>, tensor<2xi32>) -> tensor<2xi1>
   return %0: tensor<2xi1>
 }
 
 // CHECK-LABEL: func @broadcast_greater
 func @broadcast_greater(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.compare"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>, comparison_direction = "GT"}
+  // CHECK-DAG: %[[LHS_SHAPE:.+]] = shape.const_shape [1]
+  // CHECK-DAG: %[[RHS_SHAPE:.+]] = shape.const_shape [1, 2]
+  // CHECK-DAG: %[[RESULT_SHAPE:.+]] = shape.const_shape [1, 2]
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>}
+  // CHECK: "xla_hlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "GT"}
   %0 = "tf.Greater"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
   return %0: tensor<1x2xi1>
 }
 
 // CHECK-LABEL: func @greater_dynamic
-func @greater_dynamic(%arg0: tensor<?xi32>) -> tensor<?xi1> {
-  // CHECK:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "GT"}
-  %0 = "tf.Greater"(%arg0, %arg0) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi1>
+func @greater_dynamic(%arg0: tensor<?xi32>, %arg1: tensor<?xi32>) -> tensor<?xi1> {
+  // CHECK-DAG: %[[LHS_SHAPE:.+]] = shape.shape_of %arg0
+  // CHECK-DAG: %[[RHS_SHAPE:.+]] = shape.shape_of %arg1
+  // CHECK-DAG: %[[RESULT_SHAPE:.+]] = "shape.broadcast"(%[[LHS_SHAPE]], %[[RHS_SHAPE]])
+  // CHECK-DAG: %[[RESULT_EXTENTS:.+]] = "shape.to_extent_tensor"(%[[RESULT_SHAPE]])
+  // CHECK-DAG: %[[LHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>}
+  // CHECK-DAG: %[[RHS_BCAST:.+]] = "xla_hlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>}
+  // CHECK: "xla_hlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "GT"}
+  %0 = "tf.Greater"(%arg0, %arg1) : (tensor<?xi32>, tensor<?xi32>) -> tensor<?xi1>
   return %0: tensor<?xi1>
 }
 
@@ -1008,13 +921,6 @@ func @greater_equal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// CHECK-LABEL: func @broadcast_greater_equal
-func @broadcast_greater_equal(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.compare"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>, comparison_direction = "GE"}
-  %0 = "tf.GreaterEqual"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
 // CHECK-LABEL: func @less
 func @less(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "LT"}
@@ -1022,13 +928,6 @@ func @less(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// CHECK-LABEL: func @broadcast_less
-func @broadcast_less(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.compare"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>, comparison_direction = "LT"}
-  %0 = "tf.Less"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
 // CHECK-LABEL: func @less_equal
 func @less_equal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   // CHECK-NEXT:  "xla_hlo.compare"(%arg0, %arg0) {comparison_direction = "LE"}
@@ -1036,13 +935,6 @@ func @less_equal(%arg0: tensor<2xi32>) -> tensor<2xi1> {
   return %0: tensor<2xi1>
 }
 
-// CHECK-LABEL: func @broadcast_less_equal
-func @broadcast_less_equal(%arg0: tensor<1xi32>, %arg1: tensor<1x2xi32>) -> tensor<1x2xi1> {
-  // CHECK-NEXT: "xla_hlo.compare"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>, comparison_direction = "LE"}
-  %0 = "tf.LessEqual"(%arg0, %arg1) : (tensor<1xi32>, tensor<1x2xi32>) -> tensor<1x2xi1>
-  return %0: tensor<1x2xi1>
-}
-
 
 //===----------------------------------------------------------------------===//
 // Complex op legalizations.
@@ -1596,6 +1488,44 @@ func @unhandled_partitioned_call_2(%arg0: tensor<i32>, %arg1: tensor<*xi32>) ->
   return %0, %1 : tensor<i32>, tensor<i32>
 }
 
+
+//===----------------------------------------------------------------------===//
+// ReverseV2 op legalization.
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @reverse_func_32
+func @reverse_func_32(%arg0: tensor<5xi32>) -> tensor<5xi32> {
+  %axis = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> (tensor<1xi32>)
+
+  // CHECK: [[VAL:%.+]] = "xla_hlo.reverse"(%arg0) {dimensions = dense<0> : tensor<1xi64>}
+  %reversed = "tf.ReverseV2"(%arg0, %axis) : (tensor<5xi32>, tensor<1xi32>) -> tensor<5xi32>
+
+  // CHECK: return [[VAL]] : tensor<5xi32>
+  return %reversed : tensor<5xi32>
+}
+
+// CHECK-LABEL: @reverse_func_64
+func @reverse_func_64(%arg0: tensor<5xi32>) -> tensor<5xi32> {
+  %axis = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> (tensor<1xi64>)
+
+  // CHECK: [[VAL:%.+]] = "xla_hlo.reverse"(%arg0) {dimensions = dense<0> : tensor<1xi64>}
+  %reversed = "tf.ReverseV2"(%arg0, %axis) : (tensor<5xi32>, tensor<1xi64>) -> tensor<5xi32>
+
+  // CHECK: return [[VAL]] : tensor<5xi32>
+  return %reversed : tensor<5xi32>
+}
+
+// CHECK-LABEL: @reverse_func_neg
+func @reverse_func_neg(%arg0: tensor<5x5xi32>) -> tensor<5x5xi32> {
+  %axis = "tf.Const"() {value = dense<[-1]> : tensor<1xi32>} : () -> (tensor<1xi32>)
+
+  // CHECK: [[VAL:%.+]] = "xla_hlo.reverse"(%arg0) {dimensions = dense<1> : tensor<1xi64>}
+  %reversed = "tf.ReverseV2"(%arg0, %axis) : (tensor<5x5xi32>, tensor<1xi32>) -> tensor<5x5xi32>
+
+  // CHECK: return [[VAL]] : tensor<5x5xi32>
+  return %reversed : tensor<5x5xi32>
+}
+
 //===----------------------------------------------------------------------===//
 // StatefulPartitionedCall op legalization.
 //===----------------------------------------------------------------------===//
@@ -2205,13 +2135,6 @@ func @sin_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> {
   return %0 : tensor<*xf32>
 }
 
-// CHECK-LABEL: func @round
-func @round(%arg0: tensor<2xf32>) -> tensor<2xf32> {
-  // CHECK:  "xla_hlo.round_nearest_afz"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
-  %0 = "tf.Round"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
-  return %0 : tensor<2xf32>
-}
-
 // CHECK-LABEL: func @rsqrt
 func @rsqrt(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK:  "xla_hlo.rsqrt"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
@@ -3720,11 +3643,11 @@ func @unsorted_segment_max(%data: tensor<8x?x64xf32>, %segment_ids : tensor<?x16
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: @gather_v2
-func @gather_v2(%arg0: tensor<16x2x3xf32>, %arg1: tensor<16x5xi32>) -> tensor<16x2x5x3xf32> {
-  // CHECK: "xla_hlo.torch_index_select"(%arg0, %arg1) {batch_dims = 1 : i64, dim = 2 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>) -> tensor<16x2x5x3xf32>
+func @gather_v2(%arg0: tensor<16x2x3xf32>, %arg1: tensor<16x5xi32>) -> tensor<16x2x5xf32> {
+  // CHECK: "xla_hlo.torch_index_select"(%arg0, %arg1) {batch_dims = 1 : i64, dim = 2 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>) -> tensor<16x2x5xf32>
   %0 = "tf.Const"() { value = dense<[-1]> : tensor<1xi32> } : () -> tensor<1xi32>
-  %1 = "tf.GatherV2"(%arg0, %arg1, %0) {batch_dims = -1 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>, tensor<1xi32>) -> tensor<16x2x5x3xf32>
-  return %1 : tensor<16x2x5x3xf32>
+  %1 = "tf.GatherV2"(%arg0, %arg1, %0) {batch_dims = -1 : i64} : (tensor<16x2x3xf32>, tensor<16x5xi32>, tensor<1xi32>) -> tensor<16x2x5xf32>
+  return %1 : tensor<16x2x5xf32>
 }
 
 // CHECK-LABEL: @gather_v2_dynamic
@@ -4081,6 +4004,41 @@ func @xla_sharding(%arg0: tensor<4x16xf32>) -> tensor<4x16xf32> {
   return %0 : tensor<4x16xf32>
 }
 
+// CHECK-LABEL: inplace_update_one
+func @inplace_update_one(%arg0: tensor<8x4xf32>, %arg1: tensor<1x4xf32>, %arg2: tensor<1xi32>) -> tensor<8x4xf32> {
+  // CHECK-DAG: [[CST:%.+]] = xla_hlo.constant dense<0>
+  // CHECK-DAG: [[SLICE1:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: [[SLICE2:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[1, 4]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  // CHECK-DAG: [[RESHAPE1:%.+]] = "xla_hlo.reshape"([[SLICE1]])
+  // CHECK-DAG: [[UPDATE:%.+]] = "xla_hlo.dynamic-update-slice"(%arg0, [[SLICE2]], [[RESHAPE1]], [[CST]])
+  %0 = "tf.InplaceUpdate"(%arg0, %arg2, %arg1) : (tensor<8x4xf32>, tensor<1xi32>, tensor<1x4xf32>) -> tensor<8x4xf32>
+
+  // CHECK: return [[UPDATE]]
+  return %0 : tensor<8x4xf32>
+}
+
+// CHECK-LABEL: inplace_update_three
+func @inplace_update_three(%arg0: tensor<8x8x4xf32>, %arg1: tensor<3x8x4xf32>, %arg2: tensor<3xi32>) -> tensor<8x8x4xf32> {
+  // CHECK-DAG: [[CST:%.+]] = xla_hlo.constant dense<0>
+  // CHECK-DAG: [[SLICE1:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: [[SLICE2:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<2> : tensor<1xi64>, start_indices = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: [[SLICE3:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<3> : tensor<1xi64>, start_indices = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
+  // CHECK-DAG: [[SLICE4:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[1, 8, 4]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
+  // CHECK-DAG: [[SLICE5:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[2, 8, 4]> : tensor<3xi64>, start_indices = dense<[1, 0, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
+  // CHECK-DAG: [[SLICE6:%.+]] = "xla_hlo.slice"(%arg1) {limit_indices = dense<[3, 8, 4]> : tensor<3xi64>, start_indices = dense<[2, 0, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
+  // CHECK-DAG: [[RESHAPE1:%.+]] = "xla_hlo.reshape"([[SLICE1]])
+  // CHECK-DAG: [[RESHAPE2:%.+]] = "xla_hlo.reshape"([[SLICE2]])
+  // CHECK-DAG: [[RESHAPE3:%.+]] = "xla_hlo.reshape"([[SLICE3]])
+  // CHECK-DAG: [[UPDATE1:%.+]] = "xla_hlo.dynamic-update-slice"(%arg0, [[SLICE4]], [[RESHAPE1]], [[CST]], [[CST]])
+  // CHECK-DAG: [[UPDATE2:%.+]] = "xla_hlo.dynamic-update-slice"([[UPDATE1]], [[SLICE5]], [[RESHAPE2]], [[CST]], [[CST]])
+  // CHECK-DAG: [[UPDATE3:%.+]] = "xla_hlo.dynamic-update-slice"([[UPDATE2]], [[SLICE6]], [[RESHAPE3]], [[CST]], [[CST]])
+  %0 = "tf.InplaceUpdate"(%arg0, %arg2, %arg1) : (tensor<8x8x4xf32>, tensor<3xi32>, tensor<3x8x4xf32>) -> tensor<8x8x4xf32>
+
+  // CHECK:  return [[UPDATE3]] : tensor<8x8x4xf32>
+  return %0 : tensor<8x8x4xf32>
+}
+
+
 // CHECK-LABEL: xla_dynamic_update_slice
 func @xla_dynamic_update_slice(%arg0: tensor<4x16xf32>, %arg1: tensor<2x4xf32>, %arg2: tensor<2xi32>) -> tensor<4x16xf32> {
   // CHECK: [[SLICE0:%.+]] = "xla_hlo.slice"(%arg2) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} : (tensor<2xi32>) -> tensor<1xi32>
@@ -4103,6 +4061,21 @@ func @xla_dynamic_update_slice2(%arg0: tensor<4xf32>, %arg1: tensor<2xf32>, %arg
   return %0 : tensor<4xf32>
 }
 
+//===----------------------------------------------------------------------===//
+// AllToAll op legalizations.
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: func @alltoall_basic
+func @alltoall_basic(%input: tensor<10xf32>) -> tensor<10xf32> {
+  %group_assignment = "tf.Const" () {
+    value = dense<[[0, 2, 4, 6], [1, 3, 5, 7], [3, 5, 6, 8]]> : tensor<3x4xi32>
+  } : () -> tensor<3x4xi32>
+  %result = "tf.AllToAll"(%input, %group_assignment) {T = f32, concat_dimension = 1 : i64, split_count = 2 : i64, split_dimension = 0 : i64} :  (tensor<10xf32>, tensor<3x4xi32>)  -> tensor<10xf32>
+  // CHECK: xla_hlo.all_to_all
+  // CHECK-SAME: replica_groups = dense<{{\[}}[0, 2, 4, 6], [1, 3, 5, 7], [3, 5, 6, 8]]> : tensor<3x4xi64>
+  return %result : tensor<10xf32>
+}
+
 //===----------------------------------------------------------------------===//
 // Cumsum op legalizations.
 //===----------------------------------------------------------------------===//
diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir
index 013748fea28..99b1766e73c 100644
--- a/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir
@@ -24,9 +24,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
 // CHECK-LABEL: func @fusion
 //       CHECK:  %[[C1:.*]] = constant 1
 //   CHECK-NOT:  linalg.generic
-//       CHECK:  loop.for {{.*}} step %[[C1]]
-//       CHECK:    loop.for {{.*}} step %[[C1]]
-//   CHECK-NOT:  loop.for
+//       CHECK:  scf.for {{.*}} step %[[C1]]
+//       CHECK:    scf.for {{.*}} step %[[C1]]
+//   CHECK-NOT:  scf.for
 //       CHECK:      linalg.generic
 //       CHECK:        addf
 //       CHECK:      linalg.generic
@@ -36,9 +36,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
 //   TILED-DAG:  %[[C2:.*]] = constant 2
 //   TILED-DAG:  %[[C3:.*]] = constant 3
 //   TILED-NOT:  linalg.generic
-//       TILED:  loop.for {{.*}} step %[[C2]]
-//       TILED:    loop.for {{.*}} step %[[C3]]
-//   TILED-NOT:  loop.for
+//       TILED:  scf.for {{.*}} step %[[C2]]
+//       TILED:    scf.for {{.*}} step %[[C3]]
+//   TILED-NOT:  scf.for
 //       TILED:      linalg.generic
 //       TILED:        addf
 //       TILED:      linalg.generic
@@ -46,8 +46,8 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
 
 // PLOOP-LABEL: func @fusion
 //   PLOOP-NOT:  linalg.generic
-//       PLOOP:  loop.parallel
-//   PLOOP-NOT:  loop.parallel
+//       PLOOP:  scf.parallel
+//   PLOOP-NOT:  scf.parallel
 //       PLOOP:      linalg.generic
 //       PLOOP:        addf
 //       PLOOP:      linalg.generic
@@ -94,9 +94,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
 // CHECK-LABEL: func @fusion
 //       CHECK:  %[[C1:.*]] = constant 1
 //   CHECK-NOT:  linalg.generic
-//       CHECK:  loop.for {{.*}} step %[[C1]]
-//       CHECK:    loop.for {{.*}} step %[[C1]]
-//   CHECK-NOT:  loop.for
+//       CHECK:  scf.for {{.*}} step %[[C1]]
+//       CHECK:    scf.for {{.*}} step %[[C1]]
+//   CHECK-NOT:  scf.for
 //       CHECK:      linalg.generic
 //       CHECK:      linalg.generic
 //       CHECK:        subf
@@ -107,9 +107,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
 //   TILED-DAG:   %[[C2:.*]] = constant 2
 //   TILED-DAG:   %[[C3:.*]] = constant 3
 //   TILED-NOT:   linalg.generic
-//       TILED:   loop.for {{.*}} step %[[C2]]
-//       TILED:     loop.for {{.*}} step %[[C3]]
-//   TILED-NOT:   loop.for
+//       TILED:   scf.for {{.*}} step %[[C2]]
+//       TILED:     scf.for {{.*}} step %[[C3]]
+//   TILED-NOT:   scf.for
 //       TILED:       linalg.generic
 //       TILED:       linalg.generic
 //       TILED:         subf
@@ -118,8 +118,8 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
 
 // PLOOP-LABEL: func @fusion_of_three
 //   PLOOP-NOT:   linalg.generic
-//       PLOOP:   loop.parallel
-//   PLOOP-NOT:   loop.parallel
+//       PLOOP:   scf.parallel
+//   PLOOP-NOT:   scf.parallel
 //       PLOOP:       linalg.generic
 //       PLOOP:       linalg.generic
 //       PLOOP:         subf
@@ -147,11 +147,11 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
 // CHECK-LABEL: func @fusion_4d
 //       CHECK:  %[[C1:.*]] = constant 1
 //   CHECK-NOT:  linalg.generic
-//       CHECK:  loop.for {{.*}} step %[[C1]]
-//       CHECK:    loop.for {{.*}} step %[[C1]]
-//       CHECK:      loop.for {{.*}} step %[[C1]]
-//       CHECK:        loop.for {{.*}} step %[[C1]]
-//   CHECK-NOT:  loop.for
+//       CHECK:  scf.for {{.*}} step %[[C1]]
+//       CHECK:    scf.for {{.*}} step %[[C1]]
+//       CHECK:      scf.for {{.*}} step %[[C1]]
+//       CHECK:        scf.for {{.*}} step %[[C1]]
+//   CHECK-NOT:  scf.for
 //       CHECK:      linalg.generic
 //       CHECK:        addf
 //       CHECK:      linalg.generic
@@ -161,9 +161,9 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
 //   TILED-DAG:  %[[C2:.*]] = constant 2
 //   TILED-DAG:  %[[C3:.*]] = constant 3
 //   TILED-NOT:  linalg.generic
-//       TILED:  loop.for {{.*}} step %[[C2]]
-//       TILED:    loop.for {{.*}} step %[[C3]]
-//   TILED-NOT:  loop.for
+//       TILED:  scf.for {{.*}} step %[[C2]]
+//       TILED:    scf.for {{.*}} step %[[C3]]
+//   TILED-NOT:  scf.for
 //       TILED:      linalg.generic
 //       TILED:        addf
 //       TILED:      linalg.generic
@@ -171,8 +171,8 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
 
 // PLOOP-LABEL: func @fusion_4d
 //   PLOOP-NOT:  linalg.generic
-//       PLOOP:  loop.parallel
-//   PLOOP-NOT:  loop.parallel
+//       PLOOP:  scf.parallel
+//   PLOOP-NOT:  scf.parallel
 //       PLOOP:      linalg.generic
 //       PLOOP:        addf
 //       PLOOP:      linalg.generic
diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir
index 5b763cde2ed..c640b395f4d 100644
--- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir
@@ -50,19 +50,19 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
 
 // Parallel loop to initialize the output buffer.
 // CHECK:    [[INIT:%.*]] = load [[INIT_BUF]][] : memref<f32>
-// CHECK:    loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:    scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:          to ([[C112]], [[C112]]) step ([[C1]], [[C1]]) {
 // CHECK:      store [[INIT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]]
-// CHECK:      loop.yield
+// CHECK:      scf.yield
 // CHECK:    }
 
 // Parallel loop over source buffer to compute scattered values.
-// CHECK:    loop.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]])
+// CHECK:    scf.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:          to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) {
 
 // Window loop w.r.t. first dim.
 // CHECK:      [[SEL_RES_I:%.*]]:4
-// CHECK-SAME:   = loop.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]]
+// CHECK-SAME:   = scf.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]]
 // CHECK-SAME:     iter_args(
 // CHECK-SAME:       [[SEL_I_0:%.*]] = [[C0]], [[SEL_J_0:%.*]] = [[C0]],
 // CHECK-SAME:       [[SEL_VAL_0:%.*]] = [[C0_F32]],
@@ -71,7 +71,7 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
 
 // Window loop w.r.t. second dim.
 // CHECK:      [[SEL_RES_J:%.*]]:4
-// CHECK-SAME:   = loop.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]]
+// CHECK-SAME:   = scf.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]]
 // CHECK-SAME:     iter_args(
 // CHECK-SAME:       [[SEL_I:%.*]] = [[SEL_I_0]], [[SEL_J:%.*]] = [[SEL_J_0]],
 // CHECK-SAME:       [[SEL_VAL:%.*]] = [[SEL_VAL_0]],
@@ -102,14 +102,14 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
 // be applied, current selected ivs (SEL_I, SEL_J) and value (SEL_VAL) are
 // returned in that case.
 // CHECK:  [[IF_INBOUNDS_RES:%.*]]:4
-// CHECK-SAME:  = loop.if [[INBOUNDS_1]] -> (index, index, f32, i1) {
+// CHECK-SAME:  = scf.if [[INBOUNDS_1]] -> (index, index, f32, i1) {
 
 
   // INBOUNDS-THEN-BODY, i.e. if INBOUNDS == true
 
   // CHECK: [[ARG_ELEM:%.*]] = load [[ARG_BUF]]{{\[}}[[ARG_I]], [[ARG_J]]]
   // CHECK: [[IF_INIT_RES:%.*]]:4
-  // CHECK-SAME:  = loop.if [[SEL_INIT]] -> (index, index, f32, i1) {
+  // CHECK-SAME:  = scf.if [[SEL_INIT]] -> (index, index, f32, i1) {
 
     // INIT-THEN-BODY, i.e. INBOUNDS == true and INIT = true
 
@@ -133,40 +133,40 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
 
 
     // Depending on PRED, return ARG ivs & elem or current select ivs and value.
-    // CHECK:  [[IF_PRED_RES:%.*]]:4 = loop.if [[PRED]]
-    // CHECK:    loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]]
+    // CHECK:  [[IF_PRED_RES:%.*]]:4 = scf.if [[PRED]]
+    // CHECK:    scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]]
     // CHECK:  } else {
-    // CHECK:    loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]]
+    // CHECK:    scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]]
     // CHECK:  }
 
     // INIT-THEN-BODY yield.
-    // CHECK:  loop.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1,
+    // CHECK:  scf.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1,
     // CHECK-SAME:        [[IF_PRED_RES]]#2, [[IF_PRED_RES]]#3
 
     // INIT-ELSE-BODY, i.e. if INBOUNDS == TRUE and INIT == FALSE, returns ARG
     // ivs and element without computing Select function.
-    // CHECK:  loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]],
+    // CHECK:  scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]],
     // CHECK-SAME:        [[CTRUE]] : index, index, f32, i1
     // CHECK:  }
 
   // INBOUNDS-THEN-BODY yield.
-  // CHECK:  loop.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2,
+  // CHECK:  scf.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2,
   // CHECK-SAME:        [[IF_INIT_RES]]#3 : index, index, f32, i1
   // CHECK:  }
 
   // INBOUNDS-ELSE-REGION, i.e. if INBOUNDS == FALSE
   // We are in the pad area, return current iter_args.
-  // CHECK:  loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]],
+  // CHECK:  scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]],
   // CHECK-SAME:  [[SEL_INIT]] : index, index, f32, i1
   // CHECK:  }
 
 // Window loop w.r.t. second dim yield.
-// CHECK:  loop.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1,
+// CHECK:  scf.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1,
 // CHECK-SAME:        [[IF_INBOUNDS_RES]]#2, [[IF_INBOUNDS_RES]]#3
 // CHECK:  }
 
 // Window loop w.r.t. first dim yield.
-// CHECK:    loop.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2,
+// CHECK:    scf.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2,
 // CHECK-SAME:          [[SEL_RES_J]]#3 : index, index, f32, i1
 // CHECK:  }
 
@@ -196,4 +196,4 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
 // CHECK:  atomic_yield [[RES]] : f32
 
 // Parallel loop over source buffer yield
-// CHECK:  loop.yield
+// CHECK:  scf.yield
diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir
index 4d878cee6f4..16ffbf241b0 100644
--- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir
@@ -22,7 +22,7 @@ func @reduce(%arg: memref<100x10xf32>,
 // CHECK-DAG: %[[LB:.*]] = constant 0 : index
 // CHECK-DAG: %[[UB:.*]] = constant 10 : index
 // CHECK-DAG: %[[STEP:.*]] = constant 1 : index
-// CHECK: loop.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+// CHECK: scf.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
 // CHECK: %[[LHS:.*]] = linalg.slice %[[ARG2]][%[[IDX]]] : memref<100xf32>, index, memref<f32, #map0>
 // CHECK: %[[RHS:.*]] = linalg.slice %[[ARG0]][%[[IDX]], %[[IDX1]]] : memref<100x10xf32>, index, index, memref<f32, #map0>
 // CHECK: "xla_lhlo.add"(%[[LHS]], %[[RHS]], %[[LHS]]) : (memref<f32, {{.*}}>, memref<f32, {{.*}}>, memref<f32, {{.*}}>) -> ()
diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir
index b80d5ba6755..bb8010b520c 100644
--- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir
@@ -3,7 +3,7 @@
 // CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK-LABEL: func @element_wise
 func @element_wise(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
+                   %result: memref<2x2xf32>) {
   "xla_lhlo.add"(%lhs, %rhs, %result)
       : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
@@ -16,8 +16,9 @@ func @element_wise(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @element_wise_with_dynamic_shape
-func @element_wise_with_dynamic_shape(%lhs: memref<?x?xf32>, %rhs: memref<?x?xf32>,
-          %result: memref<?x?xf32>) {
+func @element_wise_with_dynamic_shape(%lhs: memref<?x?xf32>,
+                                       %rhs: memref<?x?xf32>,
+                                       %result: memref<?x?xf32>) {
   "xla_lhlo.add"(%lhs, %rhs, %result)
       : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
   return
@@ -31,22 +32,22 @@ func @element_wise_with_dynamic_shape(%lhs: memref<?x?xf32>, %rhs: memref<?x?xf3
 
 // CHECK-LABEL: func @element_wise_scalar
 func @element_wise_scalar(%lhs: memref<f32>, %rhs: memref<f32>,
-          %result: memref<f32>) {
+                          %result: memref<f32>) {
+  "xla_lhlo.add"(%lhs, %rhs, %result)
+      : (memref<f32>, memref<f32>, memref<f32>) -> ()
+  return
+}
 // CHECK: %[[LHS:.*]] = load
 // CHECK: %[[RHS:.*]] = load
 // CHECK: %[[RES:.*]] = addf %[[LHS]], %[[RHS]]
 // CHECK: store %[[RES]]
 // CHECK-NEXT: return
-  "xla_lhlo.add"(%lhs, %rhs, %result)
-      : (memref<f32>, memref<f32>, memref<f32>) -> ()
-  return
-}
 
 // -----
 
 // CHECK-LABEL: func @minf
 func @minf(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
+           %result: memref<2x2xf32>) {
   "xla_lhlo.minimum"(%lhs, %rhs, %result)
       : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
@@ -61,7 +62,7 @@ func @minf(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
 
 // CHECK-LABEL: func @maxi
 func @maxi(%lhs: memref<2x2xi32>, %rhs: memref<2x2xi32>,
-          %result: memref<2x2xi32>) {
+           %result: memref<2x2xi32>) {
   "xla_lhlo.maximum"(%lhs, %rhs, %result)
       : (memref<2x2xi32>, memref<2x2xi32>, memref<2x2xi32>) -> ()
   return
@@ -89,8 +90,7 @@ func @and(%lhs: memref<2x2xi32>, %rhs: memref<2x2xi32>,
 // -----
 
 // CHECK-LABEL: func @exp
-func @exp(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
+func @exp(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
   "xla_lhlo.exponential"(%input, %result)
       : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
@@ -103,10 +103,8 @@ func @exp(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @log
-func @log(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.log"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @log(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.log"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -117,10 +115,8 @@ func @log(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @copy
-func @copy(%input: memref<2x4x8xf32>,
-           %result: memref<2x4x8xf32>) {
-  "xla_lhlo.copy"(%input, %result)
-      : (memref<2x4x8xf32>, memref<2x4x8xf32>) -> ()
+func @copy(%in: memref<2x4x8xf32>, %out: memref<2x4x8xf32>) {
+  "xla_lhlo.copy"(%in, %out) : (memref<2x4x8xf32>, memref<2x4x8xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -131,7 +127,7 @@ func @copy(%input: memref<2x4x8xf32>,
 
 // CHECK-LABEL: func @float_cmp
 func @float_cmp(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
-    %result: memref<2x2xi1>) {
+                %result: memref<2x2xi1>) {
   "xla_lhlo.compare"(%lhs, %rhs, %result) {comparison_direction = "EQ"}
       : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xi1>) -> ()
   return
@@ -146,7 +142,8 @@ func @float_cmp(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
 // CHECK-LABEL: func @int_cmp
 func @int_cmp(%lhs: memref<2x2xi32>, %rhs: memref<2x2xi32>,
           %result: memref<2x2xi1>) {
-  "xla_lhlo.compare"(%lhs, %rhs, %result) {comparison_direction = "LT"} : (memref<2x2xi32>, memref<2x2xi32>, memref<2x2xi1>) -> ()
+  "xla_lhlo.compare"(%lhs, %rhs, %result) {comparison_direction = "LT"}
+      : (memref<2x2xi32>, memref<2x2xi32>, memref<2x2xi1>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -157,10 +154,10 @@ func @int_cmp(%lhs: memref<2x2xi32>, %rhs: memref<2x2xi32>,
 // -----
 
 // CHECK-LABEL: func @select
-func @select(%pred: memref<2x2xi1>, %lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
+func @select(%pred: memref<2x2xi1>, %lhs: memref<2x2xf32>,
+             %rhs: memref<2x2xf32>, %result: memref<2x2xf32>) {
   "xla_lhlo.select"(%pred, %lhs, %rhs, %result)
-      : (memref<2x2xi1>, memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
+    : (memref<2x2xi1>, memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -184,20 +181,13 @@ func @iota(%out: memref<7x10xf32>) {
 
 // -----
 
-// CHECK: #[[RESULT_MAP:.*]] = affine_map<(d0, d1) -> (d0, d1)>
-// CHECK-LABEL: func @iota
-func @iota(%out: memref<7x10xi64>) {
-  "xla_lhlo.iota"(%out) {iota_dimension = 1 : i64} : (memref<7x10xi64>) -> ()
-  return
-}
-
-// -----
-
 // CHECK-DAG: #[[OPERAND_MAP:.+]] = affine_map<(d0, d1, d2) -> ()>
 // CHECK-DAG: #[[RESULT_MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 // CHECK-LABEL: func @broadcast_scalar
 func @broadcast_scalar(%operand: memref<f32>, %result: memref<4x2x1xf32>) {
-  "xla_lhlo.broadcast"(%operand, %result) {broadcast_sizes = dense<[4, 2, 1]> : tensor<3xi64>} : (memref<f32>, memref<4x2x1xf32>) -> ()
+  "xla_lhlo.broadcast"(%operand, %result) {
+    broadcast_sizes = dense<[4, 2, 1]> : tensor<3xi64>
+  } : (memref<f32>, memref<4x2x1xf32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
@@ -209,8 +199,11 @@ func @broadcast_scalar(%operand: memref<f32>, %result: memref<4x2x1xf32>) {
 // CHECK-DAG: #[[OPERAND_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)>
 // CHECK-DAG: #[[RESULT_MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>
 // CHECK-LABEL: func @broadcast
-func @broadcast(%operand: memref<4x?x16xf32>, %result: memref<4x2x1x4x?x16xf32>) {
-  "xla_lhlo.broadcast"(%operand, %result) {broadcast_sizes = dense<[4, 2, 1]> : tensor<3xi64>} : (memref<4x?x16xf32>, memref<4x2x1x4x?x16xf32>) -> ()
+func @broadcast(%operand: memref<4x?x16xf32>,
+                %result: memref<4x2x1x4x?x16xf32>) {
+  "xla_lhlo.broadcast"(%operand, %result) {
+    broadcast_sizes = dense<[4, 2, 1]> : tensor<3xi64>
+  } : (memref<4x?x16xf32>, memref<4x2x1x4x?x16xf32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
@@ -221,12 +214,12 @@ func @broadcast(%operand: memref<4x?x16xf32>, %result: memref<4x2x1x4x?x16xf32>)
 
 // CHECK-DAG: #[[OPERAND_MAP:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d4, d0, d2)>
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK-LABEL: func @dynamic_broadcast
-func @dynamic_broadcast(%operand: memref<?x?x?xf32>,
-                        %result: memref<?x?x?x?x?xf32>) {
-  "xla_lhlo.broadcast_in_dim"(%operand, %result)
-    {broadcast_dimensions = dense<[4,0,2]> : tensor<3xi64>}
-    : (memref<?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
+// CHECK-LABEL: func @dynamic_broadcast_in_dim
+func @dynamic_broadcast_in_dim(%operand: memref<?x?x?xf32>,
+                               %result: memref<?x?x?x?x?xf32>) {
+  "xla_lhlo.broadcast_in_dim"(%operand, %result) {
+    broadcast_dimensions = dense<[4,0,2]> : tensor<3xi64>
+  } : (memref<?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
@@ -237,11 +230,12 @@ func @dynamic_broadcast(%operand: memref<?x?x?xf32>,
 
 // CHECK-DAG: #[[OPERAND_MAP:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d4, d0, 0)>
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
-// CHECK-LABEL: func @broadcast
-func @broadcast(%operand: memref<5x7x1xf32>, %result: memref<7x10x6x4x5xf32>) {
-  "xla_lhlo.broadcast_in_dim"(%operand, %result)
-    {broadcast_dimensions = dense<[4,0,2]> : tensor<3xi64>}
-    : (memref<5x7x1xf32>, memref<7x10x6x4x5xf32>) -> ()
+// CHECK-LABEL: func @broadcast_in_dim_with_expansion
+func @broadcast_in_dim_with_expansion(%operand: memref<5x7x1xf32>,
+                                      %result: memref<7x10x6x4x5xf32>) {
+  "xla_lhlo.broadcast_in_dim"(%operand, %result) {
+    broadcast_dimensions = dense<[4,0,2]> : tensor<3xi64>
+  } : (memref<5x7x1xf32>, memref<7x10x6x4x5xf32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
@@ -252,11 +246,12 @@ func @broadcast(%operand: memref<5x7x1xf32>, %result: memref<7x10x6x4x5xf32>) {
 
 // CHECK-DAG: #[[RESULT_MAP_0:.*]] = affine_map<(d0, d1, d2) -> ()>
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-// CHECK-LABEL: func @broadcast_scalar
-func @broadcast_scalar(%operand: memref<f32>, %result: memref<7x10x6xf32>) {
-  "xla_lhlo.broadcast_in_dim"(%operand, %result)
-    {broadcast_dimensions = dense<[]> : tensor<0xi64>}
-    : (memref<f32>, memref<7x10x6xf32>) -> ()
+// CHECK-LABEL: func @broadcast_in_dim_scalar
+func @broadcast_in_dim_scalar(%operand: memref<f32>,
+                              %result: memref<7x10x6xf32>) {
+  "xla_lhlo.broadcast_in_dim"(%operand, %result) {
+    broadcast_dimensions = dense<[]> : tensor<0xi64>
+  } : (memref<f32>, memref<7x10x6xf32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[RESULT_MAP_0]], #[[RESULT_MAP]]]
@@ -265,9 +260,26 @@ func @broadcast_scalar(%operand: memref<f32>, %result: memref<7x10x6xf32>) {
 
 // -----
 
+// CHECK-DAG: #[[OPERAND_MAP:.+]] = affine_map<(d0, d1) -> (d0)>
+// CHECK-DAG: #[[RESULT_MAP:.+]] = affine_map<(d0, d1) -> (d0, d1)>
+// CHECK-LABEL: func @broadcast_in_dim_with_one_to_one
+func @broadcast_in_dim_with_one_to_one(%operand: memref<1xf32>, %result: memref<1x5xf32>) {
+  "xla_lhlo.broadcast_in_dim"(%operand, %result) {
+    broadcast_dimensions = dense<[0]> : tensor<1xi64>
+  } : (memref<1xf32>, memref<1x5xf32>) -> ()
+  return
+}
+// CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
+// CHECK-NEXT: ^bb0(%[[OPERAND:.+]]: f32, %{{.+}}: f32):
+// CHECK-NEXT:   linalg.yield %[[OPERAND]] : f32
+
+// -----
+
 // CHECK-LABEL: func @constant
 func @constant(%value: memref<i32>) {
-  "xla_lhlo.constant"(%value) {value = dense<10> : tensor<i32>} : (memref<i32>) -> ()
+  "xla_lhlo.constant"(%value) {
+    value = dense<10> : tensor<i32>
+  } : (memref<i32>) -> ()
   return
 }
 // CHECK: %[[CONSTANT:.*]] = constant 10 : i32
@@ -275,11 +287,9 @@ func @constant(%value: memref<i32>) {
 
 // -----
 
-// CHECK-LABEL: func @abs
-func @abs(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.abs"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+// CHECK-LABEL: func @absf
+func @absf(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.abs"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -289,10 +299,10 @@ func @abs(%input: memref<2x2xf32>,
 
 // -----
 
-func @abs(%input: memref<2x2xi32>,
+// CHECK-LABEL: func @absi
+func @absi(%input: memref<2x2xi32>,
           %result: memref<2x2xi32>) {
-  "xla_lhlo.abs"(%input, %result)
-      : (memref<2x2xi32>, memref<2x2xi32>) -> ()
+  "xla_lhlo.abs"(%input, %result) : (memref<2x2xi32>, memref<2x2xi32>) -> ()
   return
 }
 
@@ -307,10 +317,8 @@ func @abs(%input: memref<2x2xi32>,
 // -----
 
 // CHECK-LABEL: func @ceil
-func @ceil(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.ceil"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @ceil(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.ceil"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -321,10 +329,8 @@ func @ceil(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @convert_i32_to_f32
-func @convert_i32_to_f32(%input: memref<2x2xi32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xi32>, memref<2x2xf32>) -> ()
+func @convert_i32_to_f32(%input: memref<2x2xi32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xi32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -337,8 +343,7 @@ func @convert_i32_to_f32(%input: memref<2x2xi32>,
 // CHECK-LABEL: func @convert_i16_to_i32
 func @convert_i16_to_i32(%input: memref<2x2xi16>,
           %result: memref<2x2xi32>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xi16>, memref<2x2xi32>) -> ()
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xi16>, memref<2x2xi32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -349,10 +354,8 @@ func @convert_i16_to_i32(%input: memref<2x2xi16>,
 // -----
 
 // CHECK-LABEL: func @convert_i32_to_i16
-func @convert_i32_to_i16(%input: memref<2x2xi32>,
-          %result: memref<2x2xi16>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xi32>, memref<2x2xi16>) -> ()
+func @convert_i32_to_i16(%input: memref<2x2xi32>, %result: memref<2x2xi16>) {
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xi32>, memref<2x2xi16>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -363,10 +366,8 @@ func @convert_i32_to_i16(%input: memref<2x2xi32>,
 // -----
 
 // CHECK-LABEL: func @convert_f32_to_f64
-func @convert_f32_to_f64(%input: memref<2x2xf32>,
-          %result: memref<2x2xf64>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf64>) -> ()
+func @convert_f32_to_f64(%input: memref<2x2xf32>, %result: memref<2x2xf64>) {
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xf32>, memref<2x2xf64>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -377,10 +378,8 @@ func @convert_f32_to_f64(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @convert_f64_to_f32
-func @convert_f64_to_f32(%input: memref<2x2xf64>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xf64>, memref<2x2xf32>) -> ()
+func @convert_f64_to_f32(%input: memref<2x2xf64>, %result: memref<2x2xf32>) {
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xf64>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -391,10 +390,8 @@ func @convert_f64_to_f32(%input: memref<2x2xf64>,
 // -----
 
 // CHECK-LABEL: func @convert_i32_to_i32
-func @convert_i32_to_i32(%input: memref<2x2xi32>,
-          %result: memref<2x2xi32>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xi32>, memref<2x2xi32>) -> ()
+func @convert_i32_to_i32(%input: memref<2x2xi32>, %result: memref<2x2xi32>) {
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xi32>, memref<2x2xi32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -404,10 +401,8 @@ func @convert_i32_to_i32(%input: memref<2x2xi32>,
 // -----
 
 // CHECK-LABEL: func @convert_f32_to_f32
-func @convert_f32_to_f32(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.convert"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @convert_f32_to_f32(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.convert"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -416,11 +411,22 @@ func @convert_f32_to_f32(%input: memref<2x2xf32>,
 
 // -----
 
+// CHECK-LABEL: func @convert_f32_to_i32
+func @convert_f32_to_i32(%input: memref<2x2xf32>, %result: memref<2x2xi32>) {
+  "xla_lhlo.convert"(%input, %result)
+      : (memref<2x2xf32>, memref<2x2xi32>) -> ()
+  return
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32, %[[RESULT_OUT:.*]]: i32):
+// CHECK-NEXT:   %[[RESULT:.*]] = fptosi %[[OPERAND_IN]] : f32 to i32
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : i32
+
+// -----
+
 // CHECK-LABEL: func @cos
-func @cos(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.cosine"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @cos(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.cosine"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -430,28 +436,37 @@ func @cos(%input: memref<2x2xf32>,
 
 // -----
 
-// CHECK-LABEL: func @neg
-func @neg(%input: memref<2x2xf32>,
+// CHECK-LABEL: func @sin
+func @sin(%input: memref<2x2xf32>,
           %result: memref<2x2xf32>) {
-  "xla_lhlo.negate"(%input, %result)
+  "xla_lhlo.sine"(%input, %result)
       : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
 // CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32, %[[RESULT_OUT:.*]]):
+// CHECK-NEXT:   %[[RESULT:.*]] = sin %[[OPERAND_IN]] : f32
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
+
+// -----
+
+// CHECK-LABEL: func @negf
+func @negf(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.negate"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+  return
+}
+// CHECK: linalg.generic
+// CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: f32, %[[RESULT_OUT:.*]]):
 // CHECK-NEXT:   %[[RESULT:.*]] = negf %[[OPERAND_IN]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
 
 // -----
 
-// CHECK-LABEL: func @neg
-func @neg(%input: memref<2x2xi32>,
-          %result: memref<2x2xi32>) {
-  "xla_lhlo.negate"(%input, %result)
-      : (memref<2x2xi32>, memref<2x2xi32>) -> ()
+// CHECK-LABEL: func @negi
+func @negi(%input: memref<2x2xi32>, %result: memref<2x2xi32>) {
+  "xla_lhlo.negate"(%input, %result) : (memref<2x2xi32>, memref<2x2xi32>) -> ()
   return
 }
-
 // CHECK: linalg.generic
 // CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: i32, %[[RESULT_OUT:.*]]):
 // CHECK-NEXT:   %[[L0:.*]] = constant 0 : i32 
@@ -462,7 +477,7 @@ func @neg(%input: memref<2x2xi32>,
 
 // CHECK-LABEL: func @rem
 func @remainder(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
+                %result: memref<2x2xf32>) {
   "xla_lhlo.remainder"(%lhs, %rhs, %result)
       : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
@@ -475,10 +490,8 @@ func @remainder(%lhs: memref<2x2xf32>, %rhs: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @rsqrt
-func @rsqrt(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.rsqrt"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @rsqrt(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.rsqrt"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -489,10 +502,8 @@ func @rsqrt(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @sign
-func @sign(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.sign"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @sign(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.sign"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -504,10 +515,8 @@ func @sign(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @sqrt
-func @sqrt(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.sqrt"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @sqrt(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.sqrt"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -518,10 +527,8 @@ func @sqrt(%input: memref<2x2xf32>,
 // -----
 
 // CHECK-LABEL: func @tanh
-func @tanh(%input: memref<2x2xf32>,
-          %result: memref<2x2xf32>) {
-  "xla_lhlo.tanh"(%input, %result)
-      : (memref<2x2xf32>, memref<2x2xf32>) -> ()
+func @tanh(%input: memref<2x2xf32>, %result: memref<2x2xf32>) {
+  "xla_lhlo.tanh"(%input, %result) : (memref<2x2xf32>, memref<2x2xf32>) -> ()
   return
 }
 // CHECK: linalg.generic
@@ -529,6 +536,48 @@ func @tanh(%input: memref<2x2xf32>,
 // CHECK-NEXT:   %[[RESULT:.*]] = tanh %[[OPERAND_IN]] : f32
 // CHECK-NEXT:   linalg.yield %[[RESULT]] : f32
 
+// -----
+
+// CHECK-LABEL: func @complex
+func @complex(%real: memref<2x2xf32>,
+              %imag: memref<2x2xf32>,
+              %cplx: memref<2x2xcomplex<f32>>) {
+  "xla_lhlo.complex"(%real, %imag, %cplx)
+      : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xcomplex<f32>>) -> ()
+  return
+}
+// CHECK:      linalg.generic
+// CHECK-NEXT: ^bb0(%[[RE:.*]]: f32, %[[IM:.*]]: f32, %[[CP:.*]]: complex<f32>):
+// CHECK-NEXT:   %[[RESULT:.*]] = create_complex %[[RE]], %[[IM]] : complex<f32>
+// CHECK-NEXT:   linalg.yield %[[RESULT]] : complex<f32>
+
+// -----
+
+// CHECK-LABEL: func @real
+func @real(%cplx: memref<2x2xcomplex<f32>>,
+           %real: memref<2x2xf32>) {
+  "xla_lhlo.real"(%cplx, %real)
+      : (memref<2x2xcomplex<f32>>, memref<2x2xf32>) -> ()
+  return
+}
+// CHECK:      linalg.generic
+// CHECK-NEXT: ^bb0(%[[CPLX_IN:.*]]: complex<f32>, %[[REAL_OUT:.*]]: f32):
+// CHECK-NEXT:   %[[REAL:.*]] = re %[[CPLX_IN:.*]] : complex<f32>
+// CHECK-NEXT:   linalg.yield %[[REAL]] : f32
+
+// -----
+
+// CHECK-LABEL: func @imag
+func @imag(%cplx: memref<2x2xcomplex<f32>>,
+           %imag: memref<2x2xf32>) {
+  "xla_lhlo.imag"(%cplx, %imag)
+      : (memref<2x2xcomplex<f32>>, memref<2x2xf32>) -> ()
+  return
+}
+// CHECK:      linalg.generic
+// CHECK-NEXT: ^bb0(%[[CPLX_IN:.*]]: complex<f32>, %[[IMAG_OUT:.*]]: f32):
+// CHECK-NEXT:   %[[IMAG:.*]] = im %[[CPLX_IN:.*]] : complex<f32>
+// CHECK-NEXT:   linalg.yield %[[IMAG]] : f32
 
 // -----
 
@@ -558,7 +607,8 @@ func @slice(%operand: memref<?x?xf32>, %result: memref<?x?xf32>) {
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK-LABEL: func @reshape_3D_2D
 func @reshape_3D_2D(%arg0: memref<12x1x42xi32>, %arg1 : memref<12x42xi32>) {
-  "xla_lhlo.reshape"(%arg0, %arg1) : (memref<12x1x42xi32>, memref<12x42xi32>) -> ()
+  "xla_lhlo.reshape"(%arg0, %arg1)
+    : (memref<12x1x42xi32>, memref<12x42xi32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
@@ -569,7 +619,8 @@ func @reshape_3D_2D(%arg0: memref<12x1x42xi32>, %arg1 : memref<12x42xi32>) {
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK-LABEL: func @reshape_4D_2D
 func @reshape_4D_2D(%arg0: memref<12x42x1x1xi32>, %arg1 : memref<12x42xi32>) {
-  "xla_lhlo.reshape"(%arg0, %arg1) : (memref<12x42x1x1xi32>, memref<12x42xi32>) -> ()
+  "xla_lhlo.reshape"(%arg0, %arg1)
+    : (memref<12x42x1x1xi32>, memref<12x42xi32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
@@ -580,7 +631,8 @@ func @reshape_4D_2D(%arg0: memref<12x42x1x1xi32>, %arg1 : memref<12x42xi32>) {
 // CHECK-DAG: #[[RESULT_MAP:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 // CHECK-LABEL: func @reshape_2D_4D
 func @reshape_2D_4D(%arg0: memref<12x42xi32>, %arg1 : memref<12x1x42x1xi32>) {
-  "xla_lhlo.reshape"(%arg0, %arg1) : (memref<12x42xi32>, memref<12x1x42x1xi32>) -> ()
+  "xla_lhlo.reshape"(%arg0, %arg1)
+    : (memref<12x42xi32>, memref<12x1x42x1xi32>) -> ()
   return
 }
 // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]]
diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir
index cb169e060ef..32c367f97d6 100644
--- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir
@@ -22,13 +22,13 @@ func @reduce(%arg: memref<100x10x5xf32>,
 // CHECK-DAG:  [[C10:%.*]] = constant 10 : index
 // CHECK-DAG:  [[C100:%.*]] = constant 100 : index
 // CHECK:  [[INIT:%.*]] = load [[INIT_BUF]]
-// CHECK:  loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
+// CHECK:  scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:                     to ([[C100]], [[C5]]) step ([[C1]], [[C1]]) {
-// CHECK:    [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) =
+// CHECK:    [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) =
 // CHECK-SAME:      ([[C0]]) to ([[C10]]) step ([[C1]]) init ([[INIT]]) -> f32 {
 // CHECK:      [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]
 // CHECK-SAME:                 {{\[}}[[I]], [[J]], [[K]]] : memref<100x10x5xf32>
-// CHECK:      loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
+// CHECK:      scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
 // CHECK:      ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
 // CHECK:        [[ELEM_BUF:%.*]] = alloc() : memref<f32>
 // CHECK:        [[ACC_BUF:%.*]] = alloc() : memref<f32>
@@ -37,12 +37,12 @@ func @reduce(%arg: memref<100x10x5xf32>,
 // CHECK:        store [[ACC]], [[ACC_BUF]][] : memref<f32>
 // CHECK:        "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
 // CHECK:        [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
-// CHECK:        loop.reduce.return [[ACC_RESULT]] : f32
+// CHECK:        scf.reduce.return [[ACC_RESULT]] : f32
 // CHECK:      }
-// CHECK:      loop.yield
+// CHECK:      scf.yield
 // CHECK:    }
 // CHECK:    store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]]
-// CHECK:    loop.yield
+// CHECK:    scf.yield
 
 // -----
 
@@ -66,10 +66,10 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>,
 // CHECK-DAG:  [[C1:%.*]] = constant 1 : index
 // CHECK-DAG:  [[C100:%.*]] = constant 100 : index
 // CHECK:      [[INIT:%.*]] = load [[INIT_BUF]]
-// CHECK:      [[REDUCTION_RESULT:%.*]] = loop.parallel ([[I:%.*]]) = ([[C0]])
+// CHECK:      [[REDUCTION_RESULT:%.*]] = scf.parallel ([[I:%.*]]) = ([[C0]])
 // CHECK-SAME:     to ([[C100]]) step ([[C1]]) init ([[INIT]]) -> f32 {
 // CHECK:        [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]{{\[}}[[I]]{{\]}}
-// CHECK:        loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
+// CHECK:        scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
 // CHECK:        ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
 // CHECK:          [[ELEM_BUF:%.*]] = alloc() : memref<f32>
 // CHECK:          [[ACC_BUF:%.*]] = alloc() : memref<f32>
@@ -78,9 +78,9 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>,
 // CHECK:          store [[ACC]], [[ACC_BUF]][] : memref<f32>
 // CHECK:          "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
 // CHECK:          [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
-// CHECK:          loop.reduce.return [[ACC_RESULT]]
+// CHECK:          scf.reduce.return [[ACC_RESULT]]
 // CHECK:        }
-// CHECK:        loop.yield
+// CHECK:        scf.yield
 // CHECK:      store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[C0]]]
 
 // -----
@@ -107,13 +107,13 @@ func @dynamic_reduce(%arg: memref<?x?x?xf32>,
 // CHECK:  [[DIM1:%.*]] = dim [[ARG_BUF]], 1 : memref<?x?x?xf32>
 // CHECK:  [[DIM2:%.*]] = dim [[ARG_BUF]], 2 : memref<?x?x?xf32>
 // CHECK:  [[INIT:%.*]] = load [[INIT_BUF]]
-// CHECK:  loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
+// CHECK:  scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:                     to ([[DIM0]], [[DIM2]]) step ([[C1]], [[C1]]) {
-// CHECK:    [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) =
+// CHECK:    [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) =
 // CHECK-SAME:     ([[C0]]) to ([[DIM1]]) step ([[C1]]) init ([[INIT]]) -> f32 {
 // CHECK:      [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]
 // CHECK-SAME:                 {{\[}}[[I]], [[J]], [[K]]] : memref<?x?x?xf32>
-// CHECK:      loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
+// CHECK:      scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
 // CHECK:      ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
 // CHECK:        [[ELEM_BUF:%.*]] = alloc() : memref<f32>
 // CHECK:        [[ACC_BUF:%.*]] = alloc() : memref<f32>
@@ -122,12 +122,12 @@ func @dynamic_reduce(%arg: memref<?x?x?xf32>,
 // CHECK:        store [[ACC]], [[ACC_BUF]][] : memref<f32>
 // CHECK:        "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
 // CHECK:        [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
-// CHECK:        loop.reduce.return [[ACC_RESULT]] : f32
+// CHECK:        scf.reduce.return [[ACC_RESULT]] : f32
 // CHECK:      }
-// CHECK:      loop.yield
+// CHECK:      scf.yield
 // CHECK:    }
 // CHECK:    store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]]
-// CHECK:    loop.yield
+// CHECK:    scf.yield
 
 // -----
 
@@ -158,9 +158,9 @@ func @reduce_window(%arg: memref<112x112xf32>,
 // CHECK-DAG:  [[C56:%.*]] = constant 56 : index
 // CHECK-DAG:  [[C112:%.*]] = constant 112 : index
 // CHECK:      [[INIT:%.*]] = load [[INIT_BUF]][] : memref<f32>
-// CHECK:      loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:         to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) {
-// CHECK:        [[REDUCTION_RESULT:%.*]] = loop.parallel
+// CHECK:        [[REDUCTION_RESULT:%.*]] = scf.parallel
 // CHECK-SAME:       ([[IW:%.*]], [[JW:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:       to ([[C3]], [[C3]]) step ([[C1]], [[C1]])
 // CHECK-SAME:       init ([[INIT]]) -> f32 {
@@ -177,15 +177,15 @@ func @reduce_window(%arg: memref<112x112xf32>,
 // CHECK:          [[INDEX_J_FITS:%.*]] = cmpi "ult", [[INDEX_J]], [[C112]]
 // CHECK:          [[IN_BOUNDS_1:%.*]] = and [[IN_BOUNDS_0]], [[INDEX_J_FITS]]
 
-// CHECK:          [[ELEM_TO_REDUCE:%.*]] = loop.if [[IN_BOUNDS_1]] -> (f32) {
+// CHECK:          [[ELEM_TO_REDUCE:%.*]] = scf.if [[IN_BOUNDS_1]] -> (f32) {
 // CHECK:            [[OPERAND_ELEM:%.*]] =
 // CHECK-SAME:         load [[OPERAND_BUF]]{{\[}}[[INDEX_I]], [[INDEX_J]]]
-// CHECK:              loop.yield [[OPERAND_ELEM]] : f32
+// CHECK:              scf.yield [[OPERAND_ELEM]] : f32
 // CHECK:            } else {
-// CHECK:              loop.yield [[INIT]] : f32
+// CHECK:              scf.yield [[INIT]] : f32
 // CHECK:            }
 
-// CHECK:          loop.reduce([[ELEM_TO_REDUCE]])  : f32 {
+// CHECK:          scf.reduce([[ELEM_TO_REDUCE]])  : f32 {
 // CHECK:          ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
 // CHECK:            [[ELEM_BUF:%.*]] = alloc() : memref<f32>
 // CHECK:            [[ACC_BUF:%.*]] = alloc() : memref<f32>
@@ -194,12 +194,12 @@ func @reduce_window(%arg: memref<112x112xf32>,
 // CHECK:            store [[ACC]], [[ACC_BUF]][] : memref<f32>
 // CHECK:            "xla_lhlo.maximum"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
 // CHECK:            [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
-// CHECK:            loop.reduce.return [[ACC_RESULT]] : f32
+// CHECK:            scf.reduce.return [[ACC_RESULT]] : f32
 // CHECK:          }
-// CHECK:          loop.yield
+// CHECK:          scf.yield
 // CHECK:        }
 // CHECK:        store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]]
-// CHECK:        loop.yield
+// CHECK:        scf.yield
 // CHECK:      }
 // CHECK:      return
 // CHECK:    }
diff --git a/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir b/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir
index 4050340ce49..2340650dda8 100644
--- a/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/materialize-broadcasts.mlir
@@ -20,6 +20,17 @@ func @addBroadcastLhs(%arg0: tensor<4xf32>, %arg1: tensor<1x4xf32>) -> tensor<1x
 
 // -----
 
+// CHECK-LABEL: @addBroadcastEqual
+func @addBroadcastEqual(%arg0: tensor<4x1xf32>, %arg1: tensor<1x4xf32>) -> tensor<4x4xf32> {
+  // CHECK-NEXT: %[[BROADCAST0:.*]] = "xla_hlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x1xf32>) -> tensor<4x4xf32>
+  // CHECK-NEXT: %[[BROADCAST1:.*]] = "xla_hlo.broadcast_in_dim"(%arg1) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x4xf32>) -> tensor<4x4xf32>
+  // CHECK-NEXT: %[[RESULT:.*]] = xla_hlo.add %[[BROADCAST0]], %[[BROADCAST1]] : tensor<4x4xf32>
+  %0 = "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<4x1xf32>, tensor<1x4xf32>) -> tensor<4x4xf32>
+  return %0 : tensor<4x4xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @addBroadcastMultidimension
 func @addBroadcastMultidimension(%arg0: tensor<1x1xf32>, %arg1: tensor<1x1x4xf32>) -> tensor<1x1x4xf32> {
   // CHECK-NEXT: %[[BROADCAST0:.*]] = "xla_hlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<1x1xf32>) -> tensor<1x1x4xf32>
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
index 3650307ea94..15fa91588a5 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
@@ -294,6 +294,12 @@ func @main() {
   // CHECK: f16[4] constant({1, -4, -65504, 0.015625}
   %cst_8 = constant dense<[1.0e+00, -4.0e+00, -65504.0e+00, 1.5625e-02]> : tensor<4xf16>
 
+  // CHECK: c64[] constant((1, 0))
+  %cst_9 = constant dense<(1.000000e+00,0.000000e+00)> : tensor<complex<f32>>
+
+  // CHECK: c128[] constant((1, 0))
+  %cst_10 = constant dense<(1.000000e+00,0.000000e+00)> : tensor<complex<f64>>
+
   return
 }
 
@@ -1038,3 +1044,16 @@ func @main(%arg0: tensor<4xui8>) -> (tensor<4xui8>) {
 // CHECK: ENTRY
 // CHECK: %[[ARG0:.*]] = u8[4] parameter(0)
 //  ROOT %[[RESULT:.*]] = u8[4] not(u8[4] %[[ARG0]])
+
+// -----
+
+// CHECK:  HloModule
+func @main(%arg0: tensor<4xi32>) -> (tensor<*xi32>) {
+  %0 = "xla_hlo.not"(%arg0) : (tensor<4xi32>) -> tensor<4xi32>
+  %1 = tensor_cast %0 : tensor<4xi32> to tensor<*xi32>
+  return %1 : tensor<*xi32>
+}
+
+// CHECK: ENTRY
+// CHECK: %[[ARG0:.*]] = s32[4] parameter(0)
+//  ROOT %[[RESULT:.*]] = s32[4] not(s32[4] %[[ARG0]])
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/export_errors.mlir b/tensorflow/compiler/mlir/xla/tests/translate/export_errors.mlir
new file mode 100644
index 00000000000..97c53cb5f9f
--- /dev/null
+++ b/tensorflow/compiler/mlir/xla/tests/translate/export_errors.mlir
@@ -0,0 +1,7 @@
+// RUN: not tf-mlir-translate -split-input-file -mlir-hlo-to-hlo-text %s 2>&1 | FileCheck %s
+
+// CHECK: Opaque elements attr not supported
+func @main() {
+  %0 = "tf.Const"() {value = opaque<"tf", "0x0123456789ABCDEF"> : tensor<4xf32>} : () -> tensor<4xf32>
+  return
+}
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
index d1133057544..207a8f2eabc 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
+++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
@@ -212,10 +212,14 @@ add {
   // CHECK: dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xbf16>
   %constant.3 = bf16[4] constant({1, 2, 3, 4})
 
+  // CHECK: dense<(1.000000e+00,0.000000e+00)> : tensor<complex<f32>>
+  %constant.4 = c64[] constant((1, 0))
+
+  // CHECK: dense<(1.000000e+00,0.000000e+00)> : tensor<complex<f64>>
+  %constant.5 = c128[] constant((1, 0))
+
   // CHECK: dense<[1.000000e+00, -4.000000e+00, -6.550400e+04, 1.562500e-02]> : tensor<4xf16>
-  ROOT %constant.4 = f16[4] constant({1, -4, -65504, 0.015625})
-
-
+  ROOT %constant.6 = f16[4] constant({1, -4, -65504, 0.015625})
 }
 
 // TODO(b/129422361) Potentially update when copy, reshape, and conv have actual
@@ -244,8 +248,8 @@ add {
   // CHECK-SAME:       kernel_input_feature_dimension = 2 : i64
   // CHECK-SAME:       kernel_output_feature_dimension = 3 : i64
   // CHECK-SAME:       kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>
-  // CHECK-SAME:       output_batch_dimension = 0 : i64
-  // CHECK-SAME:       output_feature_dimension = 3 : i64
+  // CHECK-SAME:       output_batch_dimension = 3 : i64
+  // CHECK-SAME:       output_feature_dimension = 0 : i64
   // CHECK-SAME:       output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>
   // CHECK-SAME:     }
   // CHECK-SAME:     feature_group_count = 1 : i64
@@ -255,11 +259,11 @@ add {
   // CHECK-SAME:     rhs_dilations = dense<[2, 3]> : tensor<2xi64>
   // CHECK-SAME:     window_strides = dense<[4, 5]> : tensor<2xi64>
   // CHECK-SAME:   }
-  // CHECK-SAME:   (tensor<256x32x32x6xf32>, tensor<2x2x1x1xf32>) -> tensor<256x30x30x16xf32>
+  // CHECK-SAME:   (tensor<256x32x32x6xf32>, tensor<2x2x1x1xf32>) -> tensor<16x30x30x256xf32>
 
-  %convolution.4 = f32[256,30,30,16]{2,1,3,0} convolution(%reshape.2, %constant.3), window={size=3x3 stride=4x5 pad=44_45x60_60 rhs_dilate=2x3}, dim_labels=b01f_01io->b01f, metadata={op_type="Conv2D" op_name="embedded_inference/conv_model/conv_0/Conv2D"}
+  %convolution.4 = f32[16,30,30,256]{2,1,3,0} convolution(%reshape.2, %constant.3), window={size=3x3 stride=4x5 pad=44_45x60_60 rhs_dilate=2x3}, dim_labels=b01f_01io->f01b, metadata={op_type="Conv2D" op_name="embedded_inference/conv_model/conv_0/Conv2D"}
 
-  // CHECK-NEXT:  %3 = "xla_hlo.reshape"(%2) {name = "{{.*}}"} : (tensor<256x30x30x16xf32>) -> tensor<256x30x30x16xf32>
+  // CHECK-NEXT:  %3 = "xla_hlo.reshape"(%2) {name = "{{.*}}"} : (tensor<16x30x30x256xf32>) -> tensor<256x30x30x16xf32>
   %reshape.5 = f32[256,30,30,16]{3,2,1,0} reshape(%convolution.4), metadata={op_name="HLO_Retvals"}
 
   // CHECK-NEXT:  "xla_hlo.tuple"(%3) {name = "{{.*}}"} : (tensor<256x30x30x16xf32>) -> tuple<tensor<256x30x30x16xf32>>
diff --git a/tensorflow/compiler/mlir/xla/transforms/buffer_assignment.h b/tensorflow/compiler/mlir/xla/transforms/buffer_assignment.h
index d8b4c2554bb..ced5769b44c 100644
--- a/tensorflow/compiler/mlir/xla/transforms/buffer_assignment.h
+++ b/tensorflow/compiler/mlir/xla/transforms/buffer_assignment.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_BUFFER_ASSIGNMENT_H_
 #define TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_BUFFER_ASSIGNMENT_H_
 
-#include "mlir/Analysis/Dominance.h"
 #include "mlir/Analysis/Liveness.h"
-#include "mlir/IR/Builders.h"   // TF:llvm-project
+#include "mlir/IR/Builders.h"  // TF:llvm-project
+#include "mlir/IR/Dominance.h"
 #include "mlir/IR/Operation.h"  // TF:llvm-project
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/DialectConversion.h"  // TF:llvm-project
diff --git a/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc
index a20511a95fc..0c9585a817f 100644
--- a/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/chlo_legalize_to_hlo.cc
@@ -33,24 +33,23 @@ namespace {
 // Converts binary ops that statically are determined to not broadcast directly
 // to the corresponding xla_hlo non-broadcasting op.
 template <typename ChloOpTy, typename HloOpTy, typename Adaptor>
-struct ConvertTrivialNonBroadcastBinaryOp
-    : public OpConversionPattern<ChloOpTy> {
-  using OpConversionPattern<ChloOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      ChloOpTy op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+struct ConvertTrivialNonBroadcastBinaryOp : public OpRewritePattern<ChloOpTy> {
+  using OpRewritePattern<ChloOpTy>::OpRewritePattern;
+  LogicalResult matchAndRewrite(ChloOpTy op,
+                                PatternRewriter &rewriter) const override {
     // Only rewrite for statically determinable non-broadcasting cases.
-    auto lhs = operands[0].getType().dyn_cast<RankedTensorType>();
-    auto rhs = operands[1].getType().dyn_cast<RankedTensorType>();
-    if (!lhs || !rhs) return failure();
+    auto lhs_type = op.lhs().getType().template dyn_cast<RankedTensorType>();
+    auto rhs_type = op.rhs().getType().template dyn_cast<RankedTensorType>();
+    if (!lhs_type || !rhs_type) return failure();
 
     // Requires rank broadcast.
-    if (lhs.getRank() != rhs.getRank()) return failure();
+    if (lhs_type.getRank() != rhs_type.getRank()) return failure();
     // Any dynamic dimension may require broadcasting and requires more
     // analysis.
-    if (!lhs.hasStaticShape() || !rhs.hasStaticShape()) return failure();
+    if (!lhs_type.hasStaticShape() || !rhs_type.hasStaticShape())
+      return failure();
 
-    for (auto extents : llvm::zip(lhs.getShape(), rhs.getShape())) {
+    for (auto extents : llvm::zip(lhs_type.getShape(), rhs_type.getShape())) {
       auto lhs_extent = std::get<0>(extents);
       auto rhs_extent = std::get<1>(extents);
       if (lhs_extent != rhs_extent) {
@@ -58,9 +57,8 @@ struct ConvertTrivialNonBroadcastBinaryOp
       }
     }
 
-    rewriter.replaceOp(
-        op, {Adaptor::CreateOp(op, op.getResult().getType(), operands[0],
-                               operands[1], rewriter)});
+    rewriter.replaceOp(op, {Adaptor::CreateOp(op, op.getResult().getType(),
+                                              op.lhs(), op.rhs(), rewriter)});
     return success();
   }
 };
@@ -83,14 +81,13 @@ struct ConvertTrivialNonBroadcastBinaryOp
 // Whether that is of any practical benefit remains to be seen.
 template <typename ChloOpTy, typename HloOpTy, typename Adaptor>
 struct ConvertRankedDynamicBroadcastBinaryOp
-    : public OpConversionPattern<ChloOpTy> {
-  using OpConversionPattern<ChloOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      ChloOpTy op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+    : public OpRewritePattern<ChloOpTy> {
+  using OpRewritePattern<ChloOpTy>::OpRewritePattern;
+  LogicalResult matchAndRewrite(ChloOpTy op,
+                                PatternRewriter &rewriter) const override {
     // Only support ranked operands.
-    Value lhs = operands[0];
-    Value rhs = operands[1];
+    Value lhs = op.lhs();
+    Value rhs = op.rhs();
     auto lhs_type = lhs.getType().dyn_cast<RankedTensorType>();
     auto rhs_type = rhs.getType().dyn_cast<RankedTensorType>();
     auto result_type =
diff --git a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
index aa29241048b..10f35768bbd 100644
--- a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
 #include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
+#include "mlir/Transforms/BufferPlacement.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
@@ -39,16 +40,11 @@ namespace xla_hlo {
 namespace {
 
 constexpr StringRef kTempBufferAttr = "temp";
-
-/// Returns DeallocOp to ensure that CopyOp is not inserted after dealloc.
-Operation* FindInsertionPointForCopy(Value value) {
-  for (const auto& user : value.getUsers()) {
-    if (auto dealloc = dyn_cast<DeallocOp>(user)) {
-      return user;
-    }
-  }
-  return nullptr;
-}
+template <typename T>
+using BaseOpConversion = BufferAssignmentOpConversionPattern<T>;
+using StdReturnOpConverter =
+    NonVoidToVoidReturnOpConverter<mlir::ReturnOp, xla_lhlo::TerminatorOp,
+                                   xla_lhlo::CopyOp>;
 
 Value InsertDynamicAllocAndDealloc(Location loc, Value result,
                                    Value shape_operand,
@@ -92,8 +88,9 @@ Value InsertDynamicAllocAndDealloc(Location loc, Value result,
   return alloc;
 }
 
-Value InsertAllocAndDealloc(Location loc, Value result,
-                            ConversionPatternRewriter* rewriter) {
+Value InsertAlloc(Location loc, OpResult result,
+                  BufferAssignmentPlacer* bufferAssignment,
+                  ConversionPatternRewriter* rewriter) {
   auto result_type = result.getType().dyn_cast<ShapedType>();
   if (!result_type || !result_type.hasStaticShape()) {
     result.getDefiningOp()->emitOpError()
@@ -101,31 +98,21 @@ Value InsertAllocAndDealloc(Location loc, Value result,
   }
   auto memref_type =
       MemRefType::get(result_type.getShape(), result_type.getElementType());
-
-  Operation* op = result.getDefiningOp();
-  auto block = op->getBlock();
-
-  OpBuilder allocBuilder(op);
-  allocBuilder.setInsertionPointToStart(block);  // Inserting at the beginning
-  auto alloc = allocBuilder.create<AllocOp>(loc, memref_type);
-
-  alloc.setAttr(kTempBufferAttr, rewriter->getBoolAttr(true));
-
-  allocBuilder.setInsertionPoint(block, std::prev(block->end()));
-  allocBuilder.create<DeallocOp>(loc, alloc);
-
+  OpBuilder::InsertionGuard guard(*rewriter);
+  rewriter->restoreInsertionPoint(
+      bufferAssignment->computeAllocPosition(result));
+  auto alloc = rewriter->create<AllocOp>(loc, memref_type);
   return alloc;
 }
 
 template <typename HloOpTy>
-class HloToLhloOpConverter : public ConversionPattern {
+class HloToLhloOpConverter : public BaseOpConversion<HloOpTy> {
  public:
-  explicit HloToLhloOpConverter(MLIRContext* context)
-      : ConversionPattern(HloOpTy::getOperationName(), 1, context) {}
-
+  using BaseOpConversion<HloOpTy>::BaseOpConversion;
   LogicalResult matchAndRewrite(
-      Operation* op, ArrayRef<Value> operands,
+      HloOpTy hloOp, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
+    Operation* op = hloOp.getOperation();
     const auto& original_results = op->getResults();
     SmallVector<Value, 4> buffer_args(operands.begin(), operands.end());
     for (auto result : llvm::enumerate(original_results)) {
@@ -135,8 +122,8 @@ class HloToLhloOpConverter : public ConversionPattern {
         return failure();
       }
       if (resultType.hasStaticShape()) {
-        buffer_args.push_back(
-            InsertAllocAndDealloc(op->getLoc(), result.value(), &rewriter));
+        buffer_args.push_back(InsertAlloc(op->getLoc(), result.value(),
+                                          this->bufferAssignment, &rewriter));
       } else {
         SmallVector<Value, 1> results_shape;
         auto shape_type_op = dyn_cast<InferShapedTypeOpInterface>(op);
@@ -156,9 +143,9 @@ class HloToLhloOpConverter : public ConversionPattern {
 };
 
 struct HloToLhloDynamicBroadcastInDimOpConverter
-    : public OpConversionPattern<xla_hlo::DynamicBroadcastInDimOp> {
+    : public BaseOpConversion<xla_hlo::DynamicBroadcastInDimOp> {
  public:
-  using OpConversionPattern::OpConversionPattern;
+  using BaseOpConversion<xla_hlo::DynamicBroadcastInDimOp>::BaseOpConversion;
 
   LogicalResult matchAndRewrite(
       xla_hlo::DynamicBroadcastInDimOp op, ArrayRef<Value> operands,
@@ -175,10 +162,9 @@ struct HloToLhloDynamicBroadcastInDimOpConverter
   }
 };
 
-struct HloToLhloReduceOpConverter
-    : public OpConversionPattern<xla_hlo::ReduceOp> {
+struct HloToLhloReduceOpConverter : public BaseOpConversion<xla_hlo::ReduceOp> {
  public:
-  using OpConversionPattern::OpConversionPattern;
+  using BaseOpConversion<xla_hlo::ReduceOp>::BaseOpConversion;
 
   LogicalResult matchAndRewrite(
       xla_hlo::ReduceOp op, ArrayRef<Value> operands,
@@ -194,7 +180,8 @@ struct HloToLhloReduceOpConverter
     const auto& original_results = op.getResults();
     SmallVector<Value, 4> buffer_args(operands.begin(), operands.end());
     for (auto result : original_results) {
-      buffer_args.push_back(InsertAllocAndDealloc(loc, result, &rewriter));
+      buffer_args.push_back(
+          InsertAlloc(loc, result, this->bufferAssignment, &rewriter));
     }
     auto new_op = rewriter.create<xla_lhlo::ReduceOp>(
         loc, llvm::None, buffer_args, op.getAttrs());
@@ -230,12 +217,12 @@ struct HloToLhloReduceOpConverter
   }
 };
 
-class HloToLhloTensorLoadOpConverter : public ConversionPattern {
+class HloToLhloTensorLoadOpConverter
+    : public BaseOpConversion<mlir::TensorLoadOp> {
  public:
-  explicit HloToLhloTensorLoadOpConverter(MLIRContext* context)
-      : ConversionPattern(TensorLoadOp::getOperationName(), 1, context) {}
+  using BaseOpConversion<mlir::TensorLoadOp>::BaseOpConversion;
   LogicalResult matchAndRewrite(
-      Operation* op, ArrayRef<Value> operands,
+      mlir::TensorLoadOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     rewriter.replaceOp(op, operands);
     return success();
@@ -243,13 +230,13 @@ class HloToLhloTensorLoadOpConverter : public ConversionPattern {
 };
 
 // TODO(b/137624192): Rewrite into a copy and elide copy if possible.
-class HloToLhloTensorStoreOpConverter : public ConversionPattern {
+class HloToLhloTensorStoreOpConverter
+    : public BaseOpConversion<mlir::TensorStoreOp> {
  public:
-  explicit HloToLhloTensorStoreOpConverter(MLIRContext* context)
-      : ConversionPattern(TensorStoreOp::getOperationName(), 1, context) {}
+  using BaseOpConversion<mlir::TensorStoreOp>::BaseOpConversion;
 
   LogicalResult matchAndRewrite(
-      Operation* op, ArrayRef<Value> operands,
+      mlir::TensorStoreOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
     rewriter.replaceOpWithNewOp<xla_lhlo::CopyOp>(
         op, llvm::None, operands.front(), operands.back());
@@ -291,7 +278,6 @@ class HloToLhloTensorStoreOpConverter : public ConversionPattern {
 //         (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
 //     "xla_lhlo.multiply"(%0, %arg0, %arg3) :
 //         (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
-//     dealloc %0 : memref<2x2xf32>
 //     "xla_lhlo.terminator"() : () -> ()
 //   }) : () -> ()
 //   return
@@ -313,14 +299,13 @@ class HloToLhloTensorStoreOpConverter : public ConversionPattern {
 //               %arg1: memref<4xf32>,
 //               %arg2: memref<4xf32>) {
 //   %0 = alloc() : memref<4xf32>
-//   %1 = alloc() : memref<4xf32>
+
 //   "xla_lhlo.maximum"(%arg0, %arg1, %0) :
 //         (memref<4xf32>, memref<4xf32>, memref<4xf32>) -> ()
+//   %1 = alloc() : memref<4xf32>
 //   "xla_lhlo.add"(%arg0, %0, %1) :
 //         (memref<4xf32>, memref<4xf32>, memref<4xf32>) -> ()
 //   "xla_lhlo.copy"(%1, %arg2) : (memref<4xf32>, memref<4xf32>) -> ()
-//   dealloc %0 : memref<4xf32>
-//   dealloc %1 : memref<4xf32>
 //   "xla_lhlo.terminator"() : () -> ()
 // }
 
@@ -346,101 +331,25 @@ struct HloLegalizeToLhlo
     });
 
     auto module = getOperation();
-    populateHLOToLHLOConversionPattern(module.getContext(), &patterns);
-
-    // Do partial conversion so we can have unknown ops in tests.
-    if (failed(applyPartialConversion(module, target, patterns, nullptr))) {
-      signalPassFailure();
-    }
+    BufferAssignmentTypeConverter converter;
+    module.walk([&](FuncOp func) {
+      BufferAssignmentPlacer bufferAssignment(func);
+      OwningRewritePatternList patterns;
+      populateHLOToLHLOConversionPattern(func.getContext(), &bufferAssignment,
+                                         &converter, &patterns);
+      return WalkResult(
+          applyPartialConversion(func, target, patterns, &converter));
+    });
   }
 };
-
-Type ConvertType(Type t) {
-  if (auto tensorType = t.dyn_cast<RankedTensorType>()) {
-    return MemRefType::get(tensorType.getShape(), tensorType.getElementType());
-  }
-  return t;
-}
-
 }  // namespace
 
-/// Transforms FuncOp arguments and results from tensors to buffers. Tensor
-/// results are converted to memrefs and appended to the argument list.
-class HloToLhloFuncOpConverter : public OpConversionPattern<FuncOp> {
- public:
-  using OpConversionPattern::OpConversionPattern;
-
-  LogicalResult matchAndRewrite(
-      FuncOp funcOp, ArrayRef<Value> operands,
-      ConversionPatternRewriter& rewriter) const final {
-    if (funcOp.getBody().getBlocks().size() > 1) {
-      funcOp.emitOpError() << "tensor to buffer conversion expects a single "
-                              "block in the region containing the operation";
-      return failure();
-    }
-
-    auto funcType = funcOp.getType();
-
-    TypeConverter::SignatureConversion conversion(funcType.getNumInputs());
-    for (auto argType : llvm::enumerate(funcType.getInputs())) {
-      conversion.addInputs(argType.index(), ConvertType(argType.value()));
-    }
-    for (auto resType : funcType.getResults()) {
-      conversion.addInputs(ConvertType(resType));
-    }
-    rewriter.updateRootInPlace(funcOp, [&] {
-      funcOp.setType(
-          rewriter.getFunctionType(conversion.getConvertedTypes(), llvm::None));
-      rewriter.applySignatureConversion(&funcOp.getBody(), conversion);
-    });
-    return success();
-  }
-};
-
-/// Transforms ReturnOp to LhloTerminator. CopyOp is inserted to copy each
-/// result to the corresponding buffer argument.
-class StdToLhloReturnOpConverter : public OpConversionPattern<mlir::ReturnOp> {
- public:
-  using OpConversionPattern::OpConversionPattern;
-
-  LogicalResult matchAndRewrite(
-      mlir::ReturnOp returnOp, ArrayRef<Value> operands,
-      ConversionPatternRewriter& rewriter) const final {
-    auto numReturnValues = returnOp.getNumOperands();
-    auto funcOp = returnOp.getParentOfType<FuncOp>();
-    auto numFuncArgs = funcOp.getNumArguments();
-    auto loc = returnOp.getLoc();
-
-    for (auto operand : llvm::enumerate(operands)) {
-      auto returnArgNumber = numFuncArgs - numReturnValues + operand.index();
-      auto dstBuffer = funcOp.getArgument(returnArgNumber);
-      if (dstBuffer == operand.value()) {
-        continue;
-      }
-
-      auto dealloc = FindInsertionPointForCopy(operand.value());
-
-      if (dealloc == nullptr) {
-        returnOp.emitOpError()
-            << "Missing dealloc for operand " << operand.index();
-        return failure();
-      }
-      OpBuilder::InsertionGuard guard(rewriter);
-      rewriter.setInsertionPoint(dealloc);
-      rewriter.create<xla_lhlo::CopyOp>(loc, llvm::None, operand.value(),
-                                        funcOp.getArgument(returnArgNumber));
-    }
-    rewriter.replaceOpWithNewOp<xla_lhlo::TerminatorOp>(returnOp);
-    return success();
-  }
-};
-
-void populateHLOToLHLOConversionPattern(MLIRContext* context,
-                                        OwningRewritePatternList* patterns) {
+void populateHLOToLHLOConversionPattern(
+    MLIRContext* context, BufferAssignmentPlacer* bufferAssignment,
+    TypeConverter* converter, OwningRewritePatternList* patterns) {
   // clang-format off
   patterns->insert<
       HloToLhloDynamicBroadcastInDimOpConverter,
-      HloToLhloFuncOpConverter,
       HloToLhloOpConverter<xla_hlo::AbsOp>,
       HloToLhloOpConverter<xla_hlo::AddOp>,
       HloToLhloOpConverter<xla_hlo::AndOp>,
@@ -472,8 +381,9 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
       HloToLhloReduceOpConverter,
       HloToLhloTensorLoadOpConverter,
       HloToLhloTensorStoreOpConverter,
-      StdToLhloReturnOpConverter
-  >(context);
+      FunctionAndBlockSignatureConverter,
+      StdReturnOpConverter
+  >(context, bufferAssignment, converter);
   // clang-format on
 }
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index a6a6829b109..10bac232b0f 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
@@ -43,9 +44,11 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h"
 #include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
+#include "tensorflow/compiler/mlir/xla/ir/chlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_utils.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
+#include "tensorflow/compiler/mlir/xla/transforms/rewriters.h"
 #include "tensorflow/compiler/xla/client/padding.h"
 #include "tensorflow/compiler/xla/client/sharding_builder.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -2589,6 +2592,21 @@ class ConvertRangeOp : public OpRewritePattern<TF::RangeOp> {
   }
 };
 
+ElementsAttr ConvertAxisAttr(Value val, ElementsAttr attr, Builder *builder) {
+  auto int_attr = attr.cast<DenseIntElementsAttr>();
+  auto type = val.getType().cast<ShapedType>();
+
+  SmallVector<int64_t, 6> axis;
+  axis.reserve(int_attr.getNumElements());
+
+  int64_t rank = type.getRank();
+  for (auto val : int_attr.getValues<APInt>()) {
+    axis.push_back((val.getSExtValue() + rank) % rank);
+  }
+
+  return builder->getI64TensorAttr(axis);
+}
+
 /// Converts the LinSpace tensorflow op to a xla_hlo.iota op with a scaling
 /// and offset applied to generate the linspace values. The output tensor needs
 /// to have a static shape.  The implementation is defined in C++ because there
@@ -4181,6 +4199,68 @@ class ConvertXlaShardingOp : public OpRewritePattern<TF::XlaShardingOp> {
   }
 };
 
+// Converts a TF InplaceUpdate op to DynamicUpdateSlice HLO.
+class ConvertInplaceUpdateOp : public OpRewritePattern<TF::InplaceUpdateOp> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(TF::InplaceUpdateOp op,
+                                PatternRewriter &rewriter) const override {
+    auto input = op.x();
+    auto indices = op.i();
+    auto updates = op.v();
+
+    // Slice each row of `i` and `v` to perform a separate dynamic-update-slice
+    // on the contents of `x`.
+    auto input_type = input.getType().cast<ShapedType>();
+    auto updates_type = updates.getType().cast<ShapedType>();
+    auto indices_type = indices.getType().cast<ShapedType>();
+    if (!indices_type.hasStaticShape()) return failure();
+
+    if (indices_type.getRank() != 1) return failure();
+
+    SmallVector<Type, 4> unpacked_indices_type(
+        indices_type.getDimSize(0),
+        RankedTensorType::get({}, indices_type.getElementType()));
+    auto zero_attr = IntegerAttr::get(rewriter.getIntegerType(64), 0);
+    auto unpacked_indices = rewriter.create<TF::UnpackOp>(
+        op.getLoc(), unpacked_indices_type, indices, zero_attr);
+
+    SmallVector<int64_t, 4> split_updates_shape;
+    split_updates_shape.append(updates_type.getShape().begin(),
+                               updates_type.getShape().end());
+    split_updates_shape.front() = 1;
+    SmallVector<Type, 4> split_updates_type;
+    split_updates_type.resize(
+        updates_type.getShape().front(),
+        RankedTensorType::get(split_updates_shape,
+                              updates_type.getElementType()));
+
+    auto cst =
+        rewriter.create<xla_hlo::ConstOp>(op.getLoc(), zero_attr).getResult();
+    auto split_updates = rewriter.create<TF::SplitOp>(
+        op.getLoc(), split_updates_type, cst, updates);
+
+    SmallVector<Value, 6> input_indices;
+    input_indices.resize(input_type.getRank(), cst);
+
+    SmallVector<int64_t, 6> starts(updates_type.getRank(), 0);
+    SmallVector<int64_t, 6> strides(updates_type.getRank(), 1);
+    SmallVector<int64_t, 6> limits(updates_type.getShape().begin(),
+                                   updates_type.getShape().end());
+
+    for (auto pair :
+         llvm::zip(unpacked_indices.output(), split_updates.output())) {
+      input_indices.front() = std::get<0>(pair);
+      input = rewriter.create<xla_hlo::DynamicUpdateSliceOp>(
+          op.getLoc(), op.getType(), input, std::get<1>(pair), input_indices);
+    }
+
+    rewriter.replaceOp(op, input);
+    return success();
+  }
+};
+
 // Converts a TF XlaDynamicUpdateSlice op to DynamicUpdateSlice HLO.
 class ConvertXlaDynamicUpdateSliceOp
     : public OpRewritePattern<TF::XlaDynamicUpdateSliceOp> {
@@ -4785,6 +4865,62 @@ class ConvertQrOp : public OpRewritePattern<TF::QrOp> {
   }
 };
 
+// Emits debug information which includes the number of ops of each type which
+// failed to legalize.
+void EmitLegalizationErrors(Operation *op,
+                            const DenseSet<Operation *> &nonlegalized_ops) {
+  // Track the legalization failures by mapping op name to information about
+  // that failure: the number of unlegalized occurances of the op, and one
+  // example operation that failed.
+  std::map<StringRef, std::pair<int, Operation *>> op_name_to_error_info;
+  DenseSet<Operation *> error_ops;
+  for (Operation *nonlegalized_op : nonlegalized_ops) {
+    // Increment count of this legalization failure.
+    StringRef op_name = nonlegalized_op->getName().getStringRef();
+    // If this emplace is successful, it's the first time we've encountered
+    // this op type. Initialize count to 0 so that after increment, it is 1.
+    auto insertion_result = op_name_to_error_info.emplace(
+        op_name, std::make_pair(0, nonlegalized_op));
+    ++insertion_result.first->second.first;
+  }
+  std::vector<std::string> error_messages;
+  error_messages.reserve(op_name_to_error_info.size());
+  for (const auto &op_info : op_name_to_error_info) {
+    error_messages.push_back(
+        llvm::formatv("{0} (count: {1})", op_info.first, op_info.second.first));
+  }
+  Location loc = op->getLoc();
+  emitError(loc) << "The following operations cannot be legalized: "
+                 << llvm::join(error_messages, "; ")
+                 << ". These legalization failure(s) may be due to missing TF "
+                    "to HLO lowerings and/or unsupported attributes, etc.";
+  // Emit more information about the missing ops. This error message
+  // contains useful details beyond the op name (input and output shapes,
+  // attributes, etc.).
+  if (!VLOG_IS_ON(1) && nonlegalized_ops.size() != 1) {
+    emitError(loc)
+        << "Emitting more detail about one op that failed to legalize...";
+  } else if (VLOG_IS_ON(1)) {
+    emitError(loc) << "Emitting more detail about one of each type of op "
+                      "that failed to legalize...";
+  }
+  for (const auto &op_info : op_name_to_error_info) {
+    op_info.second.second->emitOpError() << "is not legalizable";
+    if (!VLOG_IS_ON(1)) break;
+  }
+}
+
+// Performs the lowering to XLA dialect.
+void LegalizeTF::runOnFunction() {
+  if (failed(legalizeTF(getFunction(), allow_partial_conversion_)))
+    signalPassFailure();
+}
+
+static PassRegistration<LegalizeTF> pass(
+    "xla-legalize-tf", "Legalize from TensorFlow to the XLA dialect");
+
+}  // end namespace
+
 #include "tensorflow/compiler/mlir/xla/transforms/generated_legalize_tf.inc"
 
 LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
@@ -4806,12 +4942,13 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
       ConvertConv3DBackpropInputOp, ConvertCumsumOp, ConvertDiagPartOp,
       ConvertEinsumOp, ConvertFusedBatchNormGradOp,
       ConvertFusedBatchNormGradV2Op, ConvertFusedBatchNormGradV3Op,
-      ConvertFusedBatchNormV3Op, ConvertInfeedDequeueTupleOp, ConvertLinSpaceOp,
-      ConvertMaxOp, ConvertMinOp, ConvertAvgPoolOp, ConvertMaxPool2DOp,
-      ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp,
-      ConvertMeanOp, ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp,
-      ConvertProdOp, ConvertQrOp, ConvertRangeOp, ConvertSelectV2Op,
-      ConvertSigmoidOp, ConvertSizeOp, ConvertSoftmaxOp<TF::LogSoftmaxOp, true>,
+      ConvertFusedBatchNormV3Op, ConvertInfeedDequeueTupleOp,
+      ConvertInplaceUpdateOp, ConvertLinSpaceOp, ConvertMaxOp, ConvertMinOp,
+      ConvertAvgPoolOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp,
+      ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp,
+      ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp,
+      ConvertRangeOp, ConvertSelectV2Op, ConvertSigmoidOp, ConvertSizeOp,
+      ConvertSoftmaxOp<TF::LogSoftmaxOp, true>,
       ConvertSoftmaxOp<TF::SoftmaxOp, false>, ConvertSplitOp, ConvertSplitVOp,
       ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp,
       ConvertTensorScatterUpdateOp, ConvertTileOp, ConvertTopKV2Op,
@@ -4820,7 +4957,12 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
       ConvertRandomShuffleOp, ConvertVariableShapeOp, ConvertXlaShardingOp,
       ConvertXlaDynamicUpdateSliceOp>(op->getContext());
 
+  // Populate with CHLO->HLO lowerings to account for TF ops legalized to
+  // CHLO first.
+  xla_chlo::PopulateLegalizeChloToHloPatterns(context, &patterns);
+
   ConversionTarget target(*context);
+  target.addIllegalDialect<xla_chlo::XlaHloClientDialect>();
   target.addLegalDialect<XlaHloDialect>();
   target.addLegalDialect<StandardOpsDialect>();
   target.addLegalDialect<shape::ShapeDialect>();
@@ -4830,23 +4972,21 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
   if (!allow_partial_conversion) {
     // Fully qualify ReturnOp here as xla_hlo dialect also defines a ReturnOp.
     target.addLegalOp<ModuleOp, FuncOp, ModuleTerminatorOp, ::mlir::ReturnOp>();
-    return applyFullConversion(op, target, patterns);
+    DenseSet<Operation *> nonlegalized_ops;
+    LogicalResult result = applyPartialConversion(
+        op, target, patterns, /*converter=*/nullptr, &nonlegalized_ops);
+    // In order to enforce that the conversion result is fully converted,
+    // fail if there are any nonlegalized ops in the set.
+    if (failed(result) || !nonlegalized_ops.empty()) {
+      EmitLegalizationErrors(op, nonlegalized_ops);
+      return failure();
+    }
+    return result;
   }
 
   return applyPartialConversion(op, target, patterns);
 }
 
-/// Performs the lowering to XLA dialect.
-void LegalizeTF::runOnFunction() {
-  if (failed(legalizeTF(getFunction(), allow_partial_conversion_)))
-    signalPassFailure();
-}
-
-static PassRegistration<LegalizeTF> pass(
-    "xla-legalize-tf", "Legalize from TensorFlow to the XLA dialect");
-
-}  // end namespace
-
 std::unique_ptr<OperationPass<FuncOp>> createLegalizeTFPass(
     bool allow_partial_conversion) {
   return std::make_unique<LegalizeTF>(allow_partial_conversion);
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
index b2a7c1e7f62..959902692dc 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
@@ -18,6 +18,7 @@ limitations under the License.
 include "mlir/IR/OpBase.td"
 include "mlir/Dialect/StandardOps/IR/Ops.td"
 include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td"
+include "tensorflow/compiler/mlir/xla/ir/chlo_ops.td"
 include "tensorflow/compiler/mlir/xla/ir/hlo_ops.td"
 
 def SignedIntTensor : TensorOf<[I1, I8, I16, I32, I64]>;
@@ -80,6 +81,9 @@ def BiasAddFeatureDimension : NativeCodeCall<
 
 // $input needs to be a ranked tensor to identify index of the feature
 // dimension depending on the data_format 'NHWC' or 'NCHW'.
+// TODO(laurenzo): This should be converted to do explicit broadcasting since
+// it can generate broadcast dimensions that are not compatible with the simple
+// xla_chlo.add broadcast_dims.
 def : Pat<(TF_BiasAddOp AnyRankedTensor:$input, $bias, $data_format),
           (HLO_AddOp $input, $bias,
               (BiasAddFeatureDimension $data_format, $input))>;
@@ -96,16 +100,16 @@ class DirectBinaryPat<Op FromOp, Op ToOp>
   : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r),
         (ToOp $l, $r, (BinBroadcastDimensions $l, $r))>;
 
-foreach fromToBinPair = [[TF_AddOp, HLO_AddOp],
-                         [TF_AddV2Op, HLO_AddOp],
-                         [TF_DivOp, HLO_DivOp],
-                         [TF_LeftShiftOp, HLO_ShiftLeftOp],
-                         [TF_MaximumOp, HLO_MaxOp],
-                         [TF_MinimumOp, HLO_MinOp],
-                         [TF_MulOp, HLO_MulOp],
-                         [TF_PowOp, HLO_PowOp],
-                         [TF_RealDivOp, HLO_DivOp],
-                         [TF_SubOp, HLO_SubOp]] in
+foreach fromToBinPair = [[TF_AddOp, HLOClient_BroadcastAddOp],
+                         [TF_AddV2Op, HLOClient_BroadcastAddOp],
+                         [TF_DivOp, HLOClient_BroadcastDivOp],
+                         [TF_LeftShiftOp, HLOClient_BroadcastShiftLeftOp],
+                         [TF_MaximumOp, HLOClient_BroadcastMaxOp],
+                         [TF_MinimumOp, HLOClient_BroadcastMinOp],
+                         [TF_MulOp, HLOClient_BroadcastMulOp],
+                         [TF_PowOp, HLOClient_BroadcastPowOp],
+                         [TF_RealDivOp, HLOClient_BroadcastDivOp],
+                         [TF_SubOp, HLOClient_BroadcastSubOp]] in
   def : DirectBinaryPat<fromToBinPair[0], fromToBinPair[1]>;
 
 def LowerRightShiftSigned :
@@ -196,10 +200,10 @@ class DirectLogicalBinaryPat<Op FromOp, Op ToOp>
         (ToOp $l, $r, (BinBroadcastDimensions $l, $r)),
         [(SignedIntTensor $l)]>;
 
-foreach fromToBinPair = [[TF_LogicalAndOp, HLO_AndOp],
-                         [TF_LogicalOrOp, HLO_OrOp],
-                         [TF_BitwiseOrOp, HLO_OrOp],
-                         [TF_BitwiseAndOp, HLO_AndOp]] in
+foreach fromToBinPair = [[TF_LogicalAndOp, HLOClient_BroadcastAndOp],
+                         [TF_LogicalOrOp, HLOClient_BroadcastOrOp],
+                         [TF_BitwiseOrOp, HLOClient_BroadcastOrOp],
+                         [TF_BitwiseAndOp, HLOClient_BroadcastAndOp]] in
   def : DirectLogicalBinaryPat<fromToBinPair[0], fromToBinPair[1]>;
 
 //===----------------------------------------------------------------------===//
@@ -208,7 +212,8 @@ foreach fromToBinPair = [[TF_LogicalAndOp, HLO_AndOp],
 
 class DirectComparePat<Op FromOp, StrEnumAttrCase direction>
   : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r),
-        (HLO_CompareOp $l, $r, (BinBroadcastDimensions $l, $r), direction)>;
+        (HLOClient_BroadcastCompareOp
+           $l, $r, (BinBroadcastDimensions $l, $r), direction)>;
 
 def : DirectComparePat<TF_GreaterOp, HLO_COMPARISON_DIRECTION_GT>;
 def : DirectComparePat<TF_GreaterEqualOp, HLO_COMPARISON_DIRECTION_GE>;
@@ -218,7 +223,8 @@ def : DirectComparePat<TF_LessEqualOp, HLO_COMPARISON_DIRECTION_LE>;
 class EqualityPat<Op FromOp, StrEnumAttrCase direction>
     : Pat<(FromOp AnyRankedTensor:$l, AnyRankedTensor:$r,
            TrueBoolAttr:$incompatible_shape_error),
-        (HLO_CompareOp $l, $r, (BinBroadcastDimensions $l, $r), direction),
+        (HLOClient_BroadcastCompareOp
+         $l, $r, (BinBroadcastDimensions $l, $r), direction),
         [(AreBroadcastCompatible $l, $r)]>;
 
 def : EqualityPat<TF_EqualOp, HLO_COMPARISON_DIRECTION_EQ>;
@@ -273,6 +279,13 @@ def : Pat<(TF_CrossReplicaSumOp $input, (TF_ConstOp $group_assignment)),
           (HLO_CrossReplicaSumOp $input,
             (CastElementsToI64Elements $group_assignment))>;
 
+//===----------------------------------------------------------------------===//
+// All2All op patterns.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(TF_AllToAllOp AnyRankedTensor:$input, (TF_ConstOp $group_assignment), I64Attr:$concat_dimension, $split_dimension, $split_count),
+          (HLO_AllToAllOp $input, $split_dimension, $concat_dimension, $split_count, (CastElementsToI64Elements $group_assignment))>;
+
 //===----------------------------------------------------------------------===//
 // FFT op patterns.
 //===----------------------------------------------------------------------===//
@@ -513,6 +526,16 @@ foreach callOp = [TF_PartitionedCallOp, TF_StatefulPartitionedCallOp] in {
           [(ArgTypesMatchCallee $op, $args, $f)]>;
 }
 
+//===----------------------------------------------------------------------===//
+// Reverse op patterns.
+//===----------------------------------------------------------------------===//
+
+// Handles axis conversion for TF reverse.
+def ConvertAxisAttr : NativeCodeCall<"ConvertAxisAttr($0, $1, &$_builder)">;
+
+def : Pat<(TF_ReverseV2Op AnyRankedTensor:$values, (TF_ConstOp $axis)),
+    (HLO_ReverseOp $values, (ConvertAxisAttr $values, $axis))>;
+
 //===----------------------------------------------------------------------===//
 // Ternary op patterns.
 //===----------------------------------------------------------------------===//
@@ -543,7 +566,6 @@ foreach Mapping = [
                    [TF_LogicalNotOp, HLO_NotOp],
                    [TF_NegOp, HLO_NegOp],
                    [TF_RealOp, HLO_RealOp],
-                   [TF_RoundOp, HLO_RoundOp],
                    [TF_RsqrtOp, HLO_RsqrtOp],
                    [TF_SinOp, HLO_SinOp],
                    [TF_SqrtOp, HLO_SqrtOp],
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc
index 25bdd0f5f62..76657bd5e20 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "absl/strings/string_view.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Function.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
@@ -37,6 +38,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h.inc"
 #include "tensorflow/compiler/mlir/tensorflow/translate/export_tf_dialect_op.h"
+#include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/translate_utils.h"
 #include "tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h"
@@ -81,28 +83,51 @@ static bool IsOpWhitelisted(Operation* op) {
   // clang-format off
   static llvm::SmallDenseSet<mlir::TypeID, 512> ops = {
     TypeID::get<TF::AbsOp>(),
+    TypeID::get<TF::AcoshOp>(),
+    TypeID::get<TF::AcosOp>(),
+    TypeID::get<TF::AddNOp>(),
     TypeID::get<TF::AddV2Op>(),
+    TypeID::get<TF::ApproximateEqualOp>(),
+    TypeID::get<TF::AsinhOp>(),
+    TypeID::get<TF::AsinOp>(),
     TypeID::get<TF::Atan2Op>(),
+    TypeID::get<TF::AtanhOp>(),
+    TypeID::get<TF::AtanOp>(),
     TypeID::get<TF::BatchMatMulV2Op>(),
-    TypeID::get<TF::BiasAddOp>(),
     TypeID::get<TF::BiasAddGradOp>(),
+    TypeID::get<TF::BiasAddOp>(),
     TypeID::get<TF::BitwiseAndOp>(),
     TypeID::get<TF::BitwiseOrOp>(),
     TypeID::get<TF::BitwiseXorOp>(),
     TypeID::get<TF::CastOp>(),
+    TypeID::get<TF::ClipByValueOp>(),
     TypeID::get<TF::ComplexAbsOp>(),
+    TypeID::get<TF::ConjugateTransposeOp>(),
+    TypeID::get<TF::CoshOp>(),
+    TypeID::get<TF::CrossOp>(),
+    TypeID::get<TF::DataFormatDimMapOp>(),
+    TypeID::get<TF::DataFormatVecPermuteOp>(),
+    TypeID::get<TF::DigammaOp>(),
     TypeID::get<TF::DivNoNanOp>(),
+    TypeID::get<TF::EluGradOp>(),
+    TypeID::get<TF::EluOp>(),
     TypeID::get<TF::EqualOp>(),
+    TypeID::get<TF::ErfcOp>(),
+    TypeID::get<TF::ErfOp>(),
+    TypeID::get<TF::Expm1Op>(),
     TypeID::get<TF::FloorDivOp>(),
     TypeID::get<TF::FloorModOp>(),
-    TypeID::get<TF::GreaterOp>(),
-    TypeID::get<TF::GreaterEqualOp>(),
     TypeID::get<TF::GatherNdOp>(),
-    TypeID::get<TF::InvOp>(),
+    TypeID::get<TF::GreaterEqualOp>(),
+    TypeID::get<TF::GreaterOp>(),
     TypeID::get<TF::InvertOp>(),
+    TypeID::get<TF::InvOp>(),
+    TypeID::get<TF::LeakyReluGradOp>(),
+    TypeID::get<TF::LeakyReluOp>(),
     TypeID::get<TF::LeftShiftOp>(),
-    TypeID::get<TF::LessOp>(),
     TypeID::get<TF::LessEqualOp>(),
+    TypeID::get<TF::LessOp>(),
+    TypeID::get<TF::LgammaOp>(),
     TypeID::get<TF::LogicalAndOp>(),
     TypeID::get<TF::LogicalNotOp>(),
     TypeID::get<TF::LogicalOrOp>(),
@@ -111,18 +136,43 @@ static bool IsOpWhitelisted(Operation* op) {
     TypeID::get<TF::MulOp>(),
     TypeID::get<TF::NegOp>(),
     TypeID::get<TF::NotEqualOp>(),
+    TypeID::get<TF::PadOp>(),
+    TypeID::get<TF::PlaceholderWithDefaultOp>(),
     TypeID::get<TF::PowOp>(),
     TypeID::get<TF::RealDivOp>(),
+    TypeID::get<TF::ReciprocalOp>(),
+    TypeID::get<TF::ReciprocalGradOp>(),
+    TypeID::get<TF::Relu6GradOp>(),
     TypeID::get<TF::RightShiftOp>(),
-    TypeID::get<TF::SinOp>(),
+    TypeID::get<TF::RintOp>(),
+    TypeID::get<TF::RoundOp>(),
     TypeID::get<TF::SelectV2Op>(),
-    TypeID::get<TF::SubOp>(),
+    TypeID::get<TF::SeluGradOp>(),
+    TypeID::get<TF::SeluOp>(),
+    TypeID::get<TF::SigmoidGradOp>(),
+    TypeID::get<TF::SinhOp>(),
+    TypeID::get<TF::SinOp>(),
+    TypeID::get<TF::SoftplusGradOp>(),
+    TypeID::get<TF::SoftsignGradOp>(),
+    TypeID::get<TF::SoftsignOp>(),
+    TypeID::get<TF::SqrtGradOp>(),
     TypeID::get<TF::SquareOp>(),
+    TypeID::get<TF::SubOp>(),
+    TypeID::get<TF::TanOp>(),
     TypeID::get<TF::TransposeOp>(),
     TypeID::get<TF::TruncateDivOp>(),
+    TypeID::get<TF::TruncatedNormalOp>(),
     TypeID::get<TF::TruncateModOp>(),
     TypeID::get<TF::UnpackOp>(),
-    TypeID::get<TF::XlaDotOp>()
+    TypeID::get<TF::XdivyOp>(),
+    TypeID::get<TF::XlaBroadcastHelperOp>(),
+    TypeID::get<TF::XlaConvOp>(),
+    TypeID::get<TF::XlaDotOp>(),
+    TypeID::get<TF::XlaDynamicSliceOp>(),
+    TypeID::get<TF::XlaDynamicUpdateSliceOp>(),
+    TypeID::get<TF::XlaPadOp>(),
+    TypeID::get<TF::Xlog1pyOp>(),
+    TypeID::get<TF::XlogyOp>()
   };
   // clang-format on
 
@@ -170,6 +220,10 @@ class FuncLegalizer {
   // legalization.
   LogicalResult LegalizeOp(Operation* op);
 
+  // Converts the given operand to expression of kind kConstant or kXlaOp.
+  // Emits a remark and returns expression of kind kInvalid on failure.
+  tensorflow::XlaExpression GetExprForOperand(Value operand, Operation* op);
+
   FuncOp func_;
   std::string device_type_;
 
@@ -296,6 +350,17 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) {
   // Transfer ownership of the kernel to a local smart pointer.
   auto op_kernel = absl::WrapUnique(op_kernel_raw);
 
+  std::vector<int> required_constants;
+  status = tensorflow::XlaOpRegistry::CompileTimeConstantInputs(
+      *op_kernel, &required_constants);
+  if (!status.ok()) {
+    op->emitRemark() << "failed to compute required constants: "
+                     << status.ToString();
+    return success();
+  }
+  llvm::SmallDenseSet<int, 4> required_consts;
+  required_consts.insert(required_constants.begin(), required_constants.end());
+
   // TensorValue in inputs are backed by tensors which in turn depend on
   // expressions. So, pre-allocate them to the required size.
   InlinedVector<tensorflow::XlaExpression, 4> expressions;
@@ -306,45 +371,39 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) {
   inputs.reserve(op->getNumOperands());
 
   // Prepare the list of Tensor inputs for the kernel.
-  for (Value operand : op->getOperands()) {
-    // Skip this op if XLA doesn't support this operand type.
-    auto xla_op_or = hlo_builder_.MakeXlaOp(operand);
-    if (!xla_op_or.ok()) {
-      op->emitRemark() << "skipping legalization due to "
-                       << xla_op_or.status().ToString();
+  for (auto it : llvm::enumerate(op->getOperands())) {
+    Value operand = it.value();
+    size_t idx = it.index();
+
+    tensorflow::XlaExpression expr = GetExprForOperand(operand, op);
+    tensorflow::XlaExpression::Kind kind = expr.kind();
+    if (kind == tensorflow::XlaExpression::Kind::kInvalid) return success();
+    if (required_consts.count(idx) &&
+        kind != tensorflow::XlaExpression::Kind::kConstant) {
+      op->emitRemark() << "lowering requires operand #" << idx
+                       << " to be a constant";
       return success();
     }
-    ::xla::XlaOp xla_op = xla_op_or.ValueOrDie();
+    expressions.push_back(expr);
 
-    tensorflow::DataType dtype;
-    status = tensorflow::ConvertToDataType(operand.getType(), &dtype);
-    if (!status.ok()) {
-      op->emitRemark() << "skipping legalization due to " << status.ToString();
-      return success();
-    }
-
-    auto expression = tensorflow::XlaExpression::XlaOp(xla_op, dtype);
-    expressions.push_back(expression);
-
-    if (!tensorflow::DataTypeCanUseMemcpy(dtype)) {
+    if (!tensorflow::DataTypeCanUseMemcpy(expr.dtype())) {
       op->emitRemark() << "skipping legalization due to unsupported type "
                        << operand.getType();
       return success();
     }
 
-    auto shape_or = expression.GetShape();
+    auto shape_or = expr.GetShape();
     if (!shape_or.ok()) {
       op->emitRemark() << "failed to get shape for expression. "
-                       << expression.HumanString();
+                       << expr.HumanString();
       return success();
     }
 
     tensors.emplace_back(
-        device_->GetAllocator(tensorflow::AllocatorAttributes()), dtype,
+        device_->GetAllocator(tensorflow::AllocatorAttributes()), expr.dtype(),
         shape_or.ValueOrDie());
     tensorflow::Tensor& tensor = tensors.back();
-    tensorflow::XlaOpKernelContext::AssignExpressionToTensor(expression,
-                                                             &tensor);
+    tensorflow::XlaOpKernelContext::AssignExpressionToTensor(expr, &tensor);
     inputs.emplace_back(&tensor);
   }
 
@@ -376,13 +435,51 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) {
       return op->emitError(
           "expects XlaExpression of kind kXlaOp in compiled output");
     auto value = hlo_builder_.GetValue(expr->handle());
-    op->getResult(i).replaceAllUsesWith(value);
+    mlir::OpResult old_result = op->getResult(i);
+    if (value.getType() != old_result.getType()) {
+      value =
+          hlo_builder_.create<mlir::TensorCastOp>(value, old_result.getType());
+    }
+    old_result.replaceAllUsesWith(value);
   }
 
   op->erase();
   return success();
 }
 
+tensorflow::XlaExpression FuncLegalizer::GetExprForOperand(Value operand,
+                                                           Operation* op) {
+  ElementsAttr const_attr;
+  auto defining_op = operand.getDefiningOp();
+  if (defining_op && matchPattern(defining_op, m_Constant(&const_attr))) {
+    tensorflow::Tensor tensor;
+    auto status = tensorflow::ConvertToTensor(const_attr, &tensor);
+    if (!status.ok()) {
+      op->emitRemark() << "skipping legalization due to failed const conversion"
+                       << status.ToString();
+      return tensorflow::XlaExpression::Invalid();
+    }
+    return tensorflow::XlaExpression::Constant(tensor);
+  }
+
+  // Skip this op if XLA doesn't support this operand type.
+  auto xla_op_or = hlo_builder_.MakeXlaOp(operand);
+  if (!xla_op_or.ok()) {
+    op->emitRemark() << "skipping legalization due to "
+                     << xla_op_or.status().ToString();
+    return tensorflow::XlaExpression::Invalid();
+  }
+  ::xla::XlaOp xla_op = xla_op_or.ValueOrDie();
+
+  tensorflow::DataType dtype;
+  auto status = tensorflow::ConvertToDataType(operand.getType(), &dtype);
+  if (!status.ok()) {
+    op->emitRemark() << "skipping legalization due to " << status.ToString();
+    return tensorflow::XlaExpression::Invalid();
+  }
+  return tensorflow::XlaExpression::XlaOp(xla_op, dtype);
+}
+
 class LegalizeTF : public PassWrapper<LegalizeTF, FunctionPass> {
  public:
   LegalizeTF() = default;
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
index bdee1b77cff..43c0911a4a6 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
@@ -19,7 +19,7 @@ limitations under the License.
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "absl/memory/memory.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "mlir/Dialect/Linalg/Utils/Utils.h"  // from @llvm-project
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Transforms/FoldUtils.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc
index e6f3ac02d4f..f0eb3cc1a0f 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_gpu.cc
@@ -21,7 +21,7 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"  // from @llvm-project
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // from @llvm-project
-#include "mlir/Dialect/LoopOps/LoopOps.h"  // from @llvm-project
+#include "mlir/Dialect/SCF/SCF.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BlockAndValueMapping.h"  // from @llvm-project
@@ -112,7 +112,7 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern<ReduceOp> {
       auto step = rewriter.create<mlir::ConstantOp>(
           loc, rewriter.getIndexType(),
           rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
-      auto loop = rewriter.create<mlir::loop::ForOp>(loc, zero, upper, step);
+      auto loop = rewriter.create<mlir::scf::ForOp>(loc, zero, upper, step);
 
       rewriter.setInsertionPointToStart(loop.getBody());
       // Compute memrefs for the value to reduce. This makes it easier to just
@@ -173,8 +173,7 @@ struct LhloLegalizeToGpu : public PassWrapper<LhloLegalizeToGpu, FunctionPass> {
     OwningRewritePatternList patterns;
     ConversionTarget target(getContext());
     target.addLegalDialect<linalg::LinalgDialect, StandardOpsDialect,
-                           gpu::GPUDialect, loop::LoopOpsDialect,
-                           XlaLhloDialect>();
+                           gpu::GPUDialect, scf::SCFDialect, XlaLhloDialect>();
     target.addIllegalOp<ReduceOp>();
     auto func = getFunction();
     patterns.insert<LhloReduceToGPULaunchConverter>(func.getContext());
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc
index 54b3acd3787..734a75a4307 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // from @llvm-project
-#include "mlir/Dialect/LoopOps/LoopOps.h"  // from @llvm-project
+#include "mlir/Dialect/SCF/SCF.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
@@ -61,15 +61,15 @@ Value ApplySingleResultLhloCode(Location loc, ValueRange operands,
 
 // Converts a block with LHLO ops and with signature:
 //   ^bb(%lhs: memref<f32>, %rhs: memref<f32>, %res: memref<f32>):
-// into a reduction operator of loop.reduce by doing buffer allocation for
-// scalar arguments and the result of `loop.reduce` to make it compatible with
+// into a reduction operator of scf.reduce by doing buffer allocation for
+// scalar arguments and the result of `scf.reduce` to make it compatible with
 // LHLO ops.
-void ConvertToReductionOperator(Location loc, loop::ReduceOp reduce_op,
+void ConvertToReductionOperator(Location loc, scf::ReduceOp reduce_op,
                                 Block* lhlo_block, OpBuilder* b) {
   Block& loop_reduce_op_body = reduce_op.reductionOperator().front();
   OpBuilder::InsertionGuard guard(*b);
   b->setInsertionPointToStart(&loop_reduce_op_body);
-  b->create<loop::ReduceReturnOp>(
+  b->create<scf::ReduceReturnOp>(
       loc, ApplySingleResultLhloCode(loc, loop_reduce_op_body.getArguments(),
                                      lhlo_block, b));
 }
@@ -136,9 +136,9 @@ MappedIvs MapWindowIvsToInput(OpTy op, ValueRange ivs, ValueRange window_ivs,
   return mapped_ivs;
 }
 
-// Returns loop::Parallel over a shaped value with static or dynamic shape.
-loop::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
-                                   OpBuilder* b) {
+// Returns scf::Parallel over a shaped value with static or dynamic shape.
+scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
+                                  OpBuilder* b) {
   Value zero = b->create<ConstantIndexOp>(loc, 0);
   Value one = b->create<ConstantIndexOp>(loc, 1);
 
@@ -151,10 +151,10 @@ loop::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
     lower.push_back(zero);
     step.push_back(one);
   }
-  return b->create<loop::ParallelOp>(loc, lower, upper, step);
+  return b->create<scf::ParallelOp>(loc, lower, upper, step);
 }
 
-// Converts `xla_lhlo.ReduceOp` into two loop::ParallelOp and a loop::ReduceOp.
+// Converts `xla_lhlo.ReduceOp` into two scf::ParallelOp and a scf::ReduceOp.
 // The outper `ParallelOp` refers to the parallel loops if there are
 // any. The inner `ParalleOp` refers to the reduction loops and `ReduceOp`
 // contains the reduction operator.
@@ -170,10 +170,10 @@ loop::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
 //  is roughly converted into:
 //
 //  %init = load %init_buf[] : memref<f32>
-//  loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
-//    %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
+//  scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
+//    %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
 //      %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32>
-//      loop.reduce(%elem_to_reduce)  {
+//      scf.reduce(%elem_to_reduce)  {
 //        ^bb0(%elem: f32, %acc: f32):   // no predecessors
 //          elem_buf = alloc() : memref<f32>
 //          store %elem, elem_buf[] : memref<f32>
@@ -181,11 +181,11 @@ loop::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
 //          store %acc, acc_buf[] : memref<f32>
 //          <LHLO_ops>
 //          %acc_result = load acc_buf[] : memref<f32>
-//          loop.reduce.return %acc_result : f32
+//          scf.reduce.return %acc_result : f32
 //      } : f32
-//      loop.yield
+//      scf.yield
 //    } : f32
-//    loop.yield
+//    scf.yield
 //  }
 class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
  public:
@@ -197,7 +197,7 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
     // TODO(b/137624192) Implement variadic reduce.
     if (xla_reduce_op.out().size() != 1) return failure();
 
-    loop::ReduceOp reduce_op =
+    scf::ReduceOp reduce_op =
         CreateReduceOpInNestedParallelLoops(xla_reduce_op, &rewriter);
     ConvertToReductionOperator(xla_reduce_op.getLoc(), reduce_op,
                                &xla_reduce_op.body().front(), &rewriter);
@@ -206,26 +206,26 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
   }
 
  private:
-  // Creates nested `loop.parallel` ops with `loop.reduce`. The outer ParallelOp
+  // Creates nested `scf.parallel` ops with `scf.reduce`. The outer ParallelOp
   // refers to the parallel dimensions of `xla_reduce_op` if any and the inner
-  // ParallelOp refers to the reduction dimensions. The loop.reduce op is
+  // ParallelOp refers to the reduction dimensions. The scf.reduce op is
   // returned.
   //
   // If the reduction argument is a memref<100x10x5xf32> and the
   // reduction is performed along dimension 1 then this method will generate
   //
   //  %init = load %init_buf[] : memref<f32>
-  //  loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
-  //    %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
+  //  scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
+  //    %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
   //      %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32>
-  //      loop.reduce(%elem_to_reduce)  {
+  //      scf.reduce(%elem_to_reduce)  {
   //        <THE BLOCK PTR TO BE RETURNED>
   //      } : f32
-  //      loop.yield
+  //      scf.yield
   //    } : f32
-  //    loop.yield
+  //    scf.yield
   //  }
-  loop::ReduceOp CreateReduceOpInNestedParallelLoops(
+  scf::ReduceOp CreateReduceOpInNestedParallelLoops(
       xla_lhlo::ReduceOp xla_reduce_op,
       ConversionPatternRewriter* rewriter) const {
     auto loc = xla_reduce_op.getLoc();
@@ -254,13 +254,13 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
     SmallVector<Value, 1> init_value = {
         rewriter->create<LoadOp>(loc, *xla_reduce_op.init_values().begin())};
     // Outer ParallelOp is not needed if it is a reduction across all dims.
-    loop::ParallelOp outer;
+    scf::ParallelOp outer;
     if (!parallel_lower.empty()) {
-      outer = rewriter->create<loop::ParallelOp>(loc, parallel_lower,
-                                                 parallel_upper, parallel_step);
+      outer = rewriter->create<scf::ParallelOp>(loc, parallel_lower,
+                                                parallel_upper, parallel_step);
       rewriter->setInsertionPointToStart(outer.getBody());
     }
-    loop::ParallelOp inner = rewriter->create<loop::ParallelOp>(
+    scf::ParallelOp inner = rewriter->create<scf::ParallelOp>(
         loc, reduce_lower, reduce_upper, reduce_step, init_value);
     Value reduction_result = *inner.getResults().begin();
 
@@ -294,7 +294,7 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
     rewriter->setInsertionPointToStart(inner.getBody());
     Value elem = rewriter->create<mlir::LoadOp>(
         loc, *xla_reduce_op.operands().begin(), indices);
-    return rewriter->create<loop::ReduceOp>(loc, elem);
+    return rewriter->create<scf::ReduceOp>(loc, elem);
   }
 };
 
@@ -314,8 +314,8 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
 //     accumulator = reduction_operator(output[O], value)
 //   output[O] = accumulator
 //
-// Converts `xla_lhlo.ReduceWindowOp` into two loop::ParallelOp and a
-// loop::ReduceOp.
+// Converts `xla_lhlo.ReduceWindowOp` into two scf::ParallelOp and a
+// scf::ReduceOp.
 // The outper `ParallelOp` refers to the parallel loops that traverese output
 // buffer. The inner `ParalleOp` refers to the reduction loops that traverse
 // reduction windows and `ReduceOp` contains the reduction operator.
@@ -341,20 +341,20 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
 // is roughly converted into:
 //
 //    %neutral_elem = load %init_buf[] : memref<f32>
-//    loop.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) {
-//      %result = loop.parallel (%iw, %jw) = (%c0, %c0)
+//    scf.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) {
+//      %result = scf.parallel (%iw, %jw) = (%c0, %c0)
 //                  to (%c3, %c3) step (%c1, %c1) neutral_elem (%0) -> f32 {
 //        %in_bounds = <COMPUTE IF INDEX IS IN OPERAND'S pad>
 //        %elem = load %operand[%computed_i, %computed_j]
 //        %elem_or_neutral = select %in_bounds, %elem, %neutral_elem : f32
-//        loop.reduce(%elem_to_reduce)  : f32 {
+//        scf.reduce(%elem_to_reduce)  : f32 {
 //          ^bb0(%arg7: f32, %arg8: f32):
 //            <LHLO ops>
 //        }
-//        loop.yield
+//        scf.yield
 //      }
 //      store %result, %output_buffer[%i, %j] : memref<56x56xf32>
-//      loop.yield
+//      scf.yield
 //    }
 //    return
 //  }
@@ -366,12 +366,12 @@ class ReduceWindowOpConverter
   LogicalResult matchAndRewrite(
       xla_lhlo::ReduceWindowOp xla_reduce_window_op, ArrayRef<Value> /*args*/,
       ConversionPatternRewriter& rewriter) const final {
-    loop::ParallelOp output_loop, window_loop;
+    scf::ParallelOp output_loop, window_loop;
     std::tie(output_loop, window_loop) =
         CreateParallelLoopsToTraverseOutputAndWindow(xla_reduce_window_op,
                                                      &rewriter);
 
-    loop::ReduceOp reduce_op = CreateReduceOpInNestedParallelLoops(
+    scf::ReduceOp reduce_op = CreateReduceOpInNestedParallelLoops(
         xla_reduce_window_op, output_loop, window_loop, &rewriter);
 
     ConvertToReductionOperator(xla_reduce_window_op.getLoc(), reduce_op,
@@ -381,7 +381,7 @@ class ReduceWindowOpConverter
   }
 
  private:
-  std::pair<loop::ParallelOp, loop::ParallelOp>
+  std::pair<scf::ParallelOp, scf::ParallelOp>
   CreateParallelLoopsToTraverseOutputAndWindow(
       xla_lhlo::ReduceWindowOp xla_reduce_window_op,
       ConversionPatternRewriter* rewriter) const {
@@ -405,7 +405,7 @@ class ReduceWindowOpConverter
       window_upper.push_back(
           rewriter->create<ConstantIndexOp>(loc, window_dim.getSExtValue()));
     }
-    auto window_loop = rewriter->create<loop::ParallelOp>(
+    auto window_loop = rewriter->create<scf::ParallelOp>(
         loc, window_lower, window_upper, window_step, init_value);
 
     Value reduction_result = *window_loop.getResults().begin();
@@ -414,9 +414,9 @@ class ReduceWindowOpConverter
     return std::make_pair(output_loop, window_loop);
   }
 
-  loop::ReduceOp CreateReduceOpInNestedParallelLoops(
+  scf::ReduceOp CreateReduceOpInNestedParallelLoops(
       xla_lhlo::ReduceWindowOp xla_reduce_window_op,
-      loop::ParallelOp output_loop, loop::ParallelOp window_loop,
+      scf::ParallelOp output_loop, scf::ParallelOp window_loop,
       ConversionPatternRewriter* rewriter) const {
     rewriter->setInsertionPointToStart(window_loop.getBody());
     auto loc = xla_reduce_window_op.getLoc();
@@ -436,20 +436,20 @@ class ReduceWindowOpConverter
         xla_reduce_window_op, output_loop.getInductionVars(),
         window_loop.getInductionVars(), rewriter);
 
-    auto elem_or_init = rewriter->create<loop::IfOp>(
+    auto elem_or_init = rewriter->create<scf::IfOp>(
         loc, xla_operand_type.getElementType(), mapped_ivs.in_bounds,
         /*withElseRegion=*/true);
 
     OpBuilder then_builder = elem_or_init.getThenBodyBuilder();
     Value elem = then_builder.create<mlir::LoadOp>(
         loc, xla_reduce_window_op.operand(), mapped_ivs.ivs);
-    then_builder.create<loop::YieldOp>(loc, elem);
+    then_builder.create<scf::YieldOp>(loc, elem);
 
     OpBuilder else_builder = elem_or_init.getElseBodyBuilder();
-    else_builder.create<loop::YieldOp>(loc, *window_loop.initVals().begin());
+    else_builder.create<scf::YieldOp>(loc, *window_loop.initVals().begin());
 
-    return rewriter->create<loop::ReduceOp>(loc,
-                                            *elem_or_init.results().begin());
+    return rewriter->create<scf::ReduceOp>(loc,
+                                           *elem_or_init.results().begin());
   }
 };
 
@@ -457,16 +457,16 @@ class ReduceWindowOpConverter
 // https://www.tensorflow.org/xla/operation_semantics#selectandscatter
 //
 // Pseudocode:
-//  loop.parallel(coordinates O in the output):
+//  scf.parallel(coordinates O in the output):
 //    output[O] = init
-//  loop.parallel(coordinates S in the source):
+//  scf.parallel(coordinates S in the source):
 //    selected_ivs = 0
 //    selected_val = 0
 //    initialized_flag = false
-//    loop.for (first dim W_1 in the window)
+//    scf.for (first dim W_1 in the window)
 //         iter_args (selected_ivs, selected_val, initialized_flag):
 //    ...
-//      loop.for (last dim W_N in the window):
+//      scf.for (last dim W_N in the window):
 //           iter_args (selected_ivs, selected_val, initialized_flag):
 //        I = S * stride + W - pad_low
 //        if I within bounds of operand:
@@ -490,7 +490,7 @@ class SelectAndScatterOpConverter
       ConversionPatternRewriter& rewriter) const final {
     auto loc = s_and_s_op.getLoc();
     InitializeOutput(s_and_s_op, &rewriter);
-    loop::ParallelOp loop_over_src =
+    scf::ParallelOp loop_over_src =
         MakeLoopOverShape(loc, s_and_s_op.source(), &rewriter);
     rewriter.setInsertionPointToStart(loop_over_src.getBody());
 
@@ -520,7 +520,7 @@ class SelectAndScatterOpConverter
     auto loc = s_and_s_op.getLoc();
     Value init_value = b->create<LoadOp>(loc, s_and_s_op.init_value());
 
-    loop::ParallelOp loop_over_output =
+    scf::ParallelOp loop_over_output =
         MakeLoopOverShape(loc, s_and_s_op.out(), b);
     OpBuilder::InsertionGuard guard(*b);
     b->setInsertionPointToStart(loop_over_output.getBody());
@@ -531,10 +531,10 @@ class SelectAndScatterOpConverter
   struct WindowLoops {
     SmallVector<Value, 2> selected_ivs;
     SmallVector<Value, 2> window_ivs;
-    loop::ForOp inner_loop;
+    scf::ForOp inner_loop;
   };
   WindowLoops InsertWindowLoops(xla_lhlo::SelectAndScatterOp s_and_s_op,
-                                loop::ParallelOp loop_over_src,
+                                scf::ParallelOp loop_over_src,
                                 OpBuilder* b) const {
     auto loc = s_and_s_op.getLoc();
     Value zero = b->create<ConstantIndexOp>(loc, 0);
@@ -558,12 +558,12 @@ class SelectAndScatterOpConverter
          s_and_s_op.window_dimensions()->getIntValues()) {
       Value upper = b->create<ConstantIndexOp>(loc, window_dim.getSExtValue());
       result.inner_loop =
-          b->create<loop::ForOp>(loc, zero, upper, one, iter_args);
+          b->create<scf::ForOp>(loc, zero, upper, one, iter_args);
       if (b->getInsertionBlock() == loop_over_src.getBody()) {
         ip = b->saveInsertionPoint();
         result.selected_ivs = result.inner_loop.getResults().take_front(rank);
       } else {
-        b->create<loop::YieldOp>(loc, result.inner_loop.getResults());
+        b->create<scf::YieldOp>(loc, result.inner_loop.getResults());
       }
       b->setInsertionPointToStart(result.inner_loop.getBody());
       iter_args = ValueRange{result.inner_loop.getRegionIterArgs()};
@@ -599,7 +599,7 @@ class SelectAndScatterOpConverter
   };
 
   SmallVector<Value, 2> SelectIvs(xla_lhlo::SelectAndScatterOp s_and_s_op,
-                                  loop::ParallelOp loop_over_src,
+                                  scf::ParallelOp loop_over_src,
                                   OpBuilder* b) const {
     auto loc = s_and_s_op.getLoc();
 
@@ -614,7 +614,7 @@ class SelectAndScatterOpConverter
 
     IterArgs ivs_val_flag(window_loops.inner_loop.getRegionIterArgs());
 
-    auto if_in_bounds = inner_loop_b.create<loop::IfOp>(
+    auto if_in_bounds = inner_loop_b.create<scf::IfOp>(
         loc, window_loops.inner_loop.getResultTypes(), mapped_ivs.in_bounds,
         /*withElseRegion=*/true);
 
@@ -623,16 +623,16 @@ class SelectAndScatterOpConverter
       OpBuilder in_bounds_then_b = if_in_bounds.getThenBodyBuilder();
       auto select_or_init_results = SelectOrInitialize(
           s_and_s_op, mapped_ivs.ivs, &ivs_val_flag, &in_bounds_then_b);
-      in_bounds_then_b.create<loop::YieldOp>(loc, select_or_init_results);
+      in_bounds_then_b.create<scf::YieldOp>(loc, select_or_init_results);
     }
 
     // Case when we are in the pad.
     {
       OpBuilder in_bounds_else_b = if_in_bounds.getElseBodyBuilder();
-      in_bounds_else_b.create<loop::YieldOp>(loc, ivs_val_flag.to_vector());
+      in_bounds_else_b.create<scf::YieldOp>(loc, ivs_val_flag.to_vector());
     }
 
-    inner_loop_b.create<loop::YieldOp>(loc, if_in_bounds.getResults());
+    inner_loop_b.create<scf::YieldOp>(loc, if_in_bounds.getResults());
     return window_loops.selected_ivs;
   }
 
@@ -647,8 +647,8 @@ class SelectAndScatterOpConverter
     Value operand_elem =
         b->create<LoadOp>(loc, s_and_s_op.operand(), operand_ivs);
     auto if_init =
-        b->create<loop::IfOp>(loc, iter_arg_types, ivs_val_flag->is_init(),
-                              /*withElseRegion=*/true);
+        b->create<scf::IfOp>(loc, iter_arg_types, ivs_val_flag->is_init(),
+                             /*withElseRegion=*/true);
     // Init == true, i.e. iter args are already initialized with a selected
     // element in boundaries of the operand. Select function has to be computed
     // here.
@@ -660,32 +660,31 @@ class SelectAndScatterOpConverter
           ApplySingleResultLhloCode(loc, {operand_elem, ivs_val_flag->value()},
                                     &lhlo_select, &if_init_then_b);
 
-      auto if_pred =
-          if_init_then_b.create<loop::IfOp>(loc, iter_arg_types, pred,
-                                            /*withElseRegion=*/true);
+      auto if_pred = if_init_then_b.create<scf::IfOp>(loc, iter_arg_types, pred,
+                                                      /*withElseRegion=*/true);
 
       // Pred == true, therefore pack newly selected ivs, val and init flag back
       // to iter_args and return.
       {
         OpBuilder if_pred_then_b = if_pred.getThenBodyBuilder();
-        if_pred_then_b.create<loop::YieldOp>(
+        if_pred_then_b.create<scf::YieldOp>(
             loc, IterArgs{operand_ivs, operand_elem, true_i1}.to_vector());
       }
 
       // Pred == false, therefore return old iter_args.
       {
         OpBuilder if_pred_else_b = if_pred.getElseBodyBuilder();
-        if_pred_else_b.create<loop::YieldOp>(loc, ivs_val_flag->to_vector());
+        if_pred_else_b.create<scf::YieldOp>(loc, ivs_val_flag->to_vector());
       }
 
-      if_init_then_b.create<loop::YieldOp>(loc, if_pred.getResults());
+      if_init_then_b.create<scf::YieldOp>(loc, if_pred.getResults());
     }
     // Init == false, i.e. only pad was visited before and this is the first
     // element in the boundaries of the operand.
     {
       OpBuilder if_init_else_b = if_init.getElseBodyBuilder();
 
-      if_init_else_b.create<loop::YieldOp>(
+      if_init_else_b.create<scf::YieldOp>(
           loc, IterArgs{operand_ivs, operand_elem, true_i1}.to_vector());
     }
     return if_init.getResults();
@@ -708,7 +707,7 @@ struct LhloLegalizeToParallelLoops
 
     ConversionTarget target(getContext());
     target.addLegalDialect<linalg::LinalgDialect, StandardOpsDialect,
-                           loop::LoopOpsDialect, XlaLhloDialect>();
+                           scf::SCFDialect, XlaLhloDialect>();
     target.addIllegalOp<xla_lhlo::ReduceOp, xla_lhlo::ReduceWindowOp,
                         xla_lhlo::SelectAndScatterOp>();
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h b/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h
index 6178434c8bb..fed21e9bafc 100644
--- a/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h
+++ b/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h
@@ -63,6 +63,7 @@ MAP_HLO_TO_LHLO(RemOp);
 MAP_HLO_TO_LHLO(RsqrtOp);
 MAP_HLO_TO_LHLO(SelectOp);
 MAP_HLO_TO_LHLO(SignOp);
+MAP_HLO_TO_LHLO(SinOp);
 MAP_HLO_TO_LHLO(SqrtOp);
 MAP_HLO_TO_LHLO(SubOp);
 MAP_HLO_TO_LHLO(TanhOp);
diff --git a/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h b/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h
index 8296011bf54..c317dc36b3c 100644
--- a/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h
+++ b/tensorflow/compiler/mlir/xla/transforms/map_xla_to_scalar_op.h
@@ -227,6 +227,28 @@ inline Value MapLhloOpToStdScalarOp<xla_lhlo::CeilOp>(
       loc, result_types, args, b);
 }
 
+template <>
+inline Value MapLhloOpToStdScalarOp<xla_lhlo::ComplexOp>(
+    Location loc, ArrayRef<Type> result_types, ArrayRef<Value> args,
+    OpBuilder* b) {
+  return MapLhloOpToStdScalarOpImpl<CreateComplexOp>{}(loc, result_types, args,
+                                                       b);
+}
+
+template <>
+inline Value MapLhloOpToStdScalarOp<xla_lhlo::RealOp>(
+    Location loc, ArrayRef<Type> result_types, ArrayRef<Value> args,
+    OpBuilder* b) {
+  return MapLhloOpToStdScalarOpImpl<ReOp>{}(loc, result_types, args, b);
+}
+
+template <>
+inline Value MapLhloOpToStdScalarOp<xla_lhlo::ImagOp>(
+    Location loc, ArrayRef<Type> result_types, ArrayRef<Value> args,
+    OpBuilder* b) {
+  return MapLhloOpToStdScalarOpImpl<ImOp>{}(loc, result_types, args, b);
+}
+
 template <>
 inline Value MapLhloOpToStdScalarOp<xla_lhlo::ConvertOp>(
     Location loc, ArrayRef<Type> result_types, ArrayRef<Value> args,
@@ -259,11 +281,9 @@ inline Value MapLhloOpToStdScalarOp<xla_lhlo::ConvertOp>(
     // No conversion is needed for the same width integers
     return args.front();
   }
-  // TODO(dfki-ehna): Add other primitive type conversions
-  // if (mlir::FpToSiOp::areCastCompatible(sourceType, targetType)) {
-  //   return b.create<mlir::FpToSiOp>(loc, result_types,
-  //   args,mlir::None);
-  // }
+  if (mlir::FPToSIOp::areCastCompatible(sourceType, targetType)) {
+    return b->create<mlir::FPToSIOp>(loc, result_types, args, mlir::None);
+  }
   return nullptr;
 }
 
@@ -275,6 +295,14 @@ inline Value MapLhloOpToStdScalarOp<xla_lhlo::CosOp>(
       loc, result_types, args, b);
 }
 
+template <>
+inline Value MapLhloOpToStdScalarOp<xla_lhlo::SinOp>(
+    Location loc, ArrayRef<Type> result_types, ArrayRef<Value> args,
+    OpBuilder* b) {
+  return MapLhloOpToStdScalarOpImpl<FloatType, ::mlir::SinOp>{}(
+      loc, result_types, args, b);
+}
+
 /// Implements the conversion of XLA op to scalar op (to use within region of a
 /// linalg.generic op) for compare-select style operations like min/max.
 template <typename... Args>
diff --git a/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc b/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc
index a4ffa57957e..bf666400900 100644
--- a/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/materialize_broadcasts.cc
@@ -50,12 +50,6 @@ static DenseIntElementsAttr GetI64ElementsAttrForSeq(int start, int end,
 template <typename SrcOp>
 bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter,
                                        Value *out_lhs, Value *out_rhs) {
-  if (!op.broadcast_dimensions().hasValue()) {
-    // Note: the op may still have an implicit broadcast on it, such as
-    // for (tensor<1xf32>, tensor<4xf32>).
-    return false;
-  }
-
   // Insert BroadcastInDimOps for the left-hand-side and right-hand-side args,
   // replacing the original LHS and RHS args in the source op with the results
   // of the broadcasts.
@@ -79,25 +73,7 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter,
 
   auto lhs_rank = lhs_ranked_type.getRank();
   auto rhs_rank = rhs_ranked_type.getRank();
-
-  // Set broadcast_dimensions to [0, ..., rank] for the higher rank arg.
-  // Use the original op.broadcast_dimensions for the lower rank arg.
-  auto higher_rank_broadcast_dims =
-      GetI64ElementsAttrForSeq(0, std::max(lhs_rank, rhs_rank), rewriter);
-  DenseIntElementsAttr lhs_broadcast_dims;
-  DenseIntElementsAttr rhs_broadcast_dims;
-  if (lhs_rank > rhs_rank) {
-    lhs_broadcast_dims = higher_rank_broadcast_dims;
-    rhs_broadcast_dims = op.broadcast_dimensions().getValue();
-  } else if (lhs_rank < rhs_rank) {
-    lhs_broadcast_dims = op.broadcast_dimensions().getValue();
-    rhs_broadcast_dims = higher_rank_broadcast_dims;
-  } else {
-    // This shouldn't happen for legal ops. If the broadcast_dimensions
-    // attribute is set, the ranks should be different.
-    // TODO(scotttodd): Add a custom verification for ops and assert here.
-    return false;
-  }
+  ArrayRef<int64_t> op_shape = op_ranked_type.getShape();
 
   // BroadcastInDimOp must have the same element type for operands and results,
   // so preserve the original output shape and the original input element type.
@@ -105,16 +81,32 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter,
   //   broadcast_in_dim (tensor<1x4xf32>) -> tensor<1x4xf32>
   //   broadcast_in_dim (tensor<4xf32>) -> tensor<1x4xf32>
   //   SrcOp (tensor<1x4xf32>, tensor<1x4xf32>) -> tensor<1x4xi1>
-  ArrayRef<int64_t> op_shape = op_ranked_type.getShape();
-  auto lhs_type =
-      RankedTensorType::get(op_shape, lhs_ranked_type.getElementType());
-  auto rhs_type =
-      RankedTensorType::get(op_shape, rhs_ranked_type.getElementType());
+  if (lhs_ranked_type.getShape() != op_ranked_type.getShape()) {
+    auto type =
+        RankedTensorType::get(op_shape, lhs_ranked_type.getElementType());
+    DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, lhs_rank, rewriter);
+    if (lhs_rank < rhs_rank) {
+      attr = op.broadcast_dimensions().getValue();
+    }
 
-  *out_lhs = rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), lhs_type,
-                                                      lhs, lhs_broadcast_dims);
-  *out_rhs = rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), rhs_type,
-                                                      rhs, rhs_broadcast_dims);
+    lhs =
+        rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), type, lhs, attr);
+  }
+
+  if (rhs_ranked_type.getShape() != op_ranked_type.getShape()) {
+    auto type =
+        RankedTensorType::get(op_shape, rhs_ranked_type.getElementType());
+    DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, rhs_rank, rewriter);
+    if (rhs_rank < lhs_rank) {
+      attr = op.broadcast_dimensions().getValue();
+    }
+
+    rhs =
+        rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), type, rhs, attr);
+  }
+
+  *out_lhs = lhs;
+  *out_rhs = rhs;
   return true;
 }
 
@@ -359,9 +351,15 @@ struct CompareWithBroadcastConvert : public OpRewritePattern<CompareOp> {
 
 void SetupMaterializeBroadcastsLegality(MLIRContext *context,
                                         ConversionTarget *conversionTarget) {
-#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType) \
-  conversionTarget->addDynamicallyLegalOp<OpType>(      \
-      [](OpType op) { return !op.broadcast_dimensions().hasValue(); });
+#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType)           \
+  conversionTarget->addDynamicallyLegalOp<OpType>([](OpType op) { \
+    if (op.broadcast_dimensions().hasValue()) return false;       \
+    auto l = op.lhs().getType().cast<ShapedType>();               \
+    auto r = op.rhs().getType().cast<ShapedType>();               \
+    if (!l.hasRank() || !r.hasRank()) return false;               \
+    return l.getShape() == r.getShape();                          \
+  });
+
   // Binary elementwise ops.
   ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(AddOp);
   ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(Atan2Op);
diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h
index 2d0164981a3..39375e210d5 100644
--- a/tensorflow/compiler/mlir/xla/transforms/passes.h
+++ b/tensorflow/compiler/mlir/xla/transforms/passes.h
@@ -81,8 +81,8 @@ std::unique_ptr<OperationPass<FuncOp>> createLegalizeToGpuPass();
 // Fuses linalg ops obtained after LHLO lowering. To enable fusion,
 // operations are first tiled.
 //
-// When 'use_parallel_loops' is set, the tiling will use loop.parallel
-// operations. Otherwise, loop.for operations are used.
+// When 'use_parallel_loops' is set, the tiling will use scf.parallel
+// operations. Otherwise, scf.for operations are used.
 //
 // 'tile_sizes' provides the tile sizes to use for tiling. If the linalg
 // operation has more dimensions than tile sizes provided, 1 is used as
diff --git a/tensorflow/compiler/mlir/xla/transforms/rewriters.h b/tensorflow/compiler/mlir/xla/transforms/rewriters.h
index ad81cda19b9..9cde6f84474 100644
--- a/tensorflow/compiler/mlir/xla/transforms/rewriters.h
+++ b/tensorflow/compiler/mlir/xla/transforms/rewriters.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 
 namespace mlir {
+class BufferAssignmentPlacer;
 namespace xla_hlo {
 
 // Collection of rewrite patterns for lowering a general dot product.
@@ -38,9 +39,9 @@ void PopulateXlaToStdPatterns(OwningRewritePatternList *patterns,
                               MLIRContext *ctx);
 
 // Collection of rewrite patterns for lowering of HLO to LHLO dialect.
-void populateHLOToLHLOConversionPattern(MLIRContext *context,
-                                        OwningRewritePatternList *patterns);
-
+void populateHLOToLHLOConversionPattern(
+    MLIRContext *context, BufferAssignmentPlacer *bufferAssignment,
+    TypeConverter *converter, OwningRewritePatternList *patterns);
 // Collection of rewrite patterns for lowering of HLO to Linalg dialect.
 void populateHLOToLinalgConversionPattern(MLIRContext *context,
                                           OwningRewritePatternList *patterns);
diff --git a/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc b/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc
index 8976bd5b7d2..71441656c08 100644
--- a/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/test_infer_shaped_type_pass.cc
@@ -38,7 +38,8 @@ struct InferReturnTypeComponentsPattern : public RewritePattern {
     SmallVector<ShapedTypeComponents, 4> components;
     if (failed(defining_op_int.inferReturnTypeComponents(
             op->getContext(), op->getLoc(), defining_op->getOperands(),
-            defining_op->getAttrs(), defining_op->getRegions(), components))) {
+            defining_op->getAttrDictionary(), defining_op->getRegions(),
+            components))) {
       return failure();
     }
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc
index ee75ceac2d1..a12bd9e7c1a 100644
--- a/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.h"
+
 #include <memory>
 #include <tuple>
 
@@ -72,15 +74,6 @@ StatusOr<std::unique_ptr<HloModule>> HloModuleFromProto(
 // dialect.
 class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault {
  public:
-  // Populate the MLIR `module` with the computation from the `hlo_module` using
-  // the provided buffer `assignment`. The returned `Status` indicates success
-  // or failure in the conversion.
-  static Status EmitModule(const BufferAssignment& assignment,
-                           const HloModule& hlo_module, ModuleOp module) {
-    return LhloDialectEmitter(assignment, hlo_module, module).Run();
-  }
-
- private:
   // Main entry point of the processing: after this call the MLIR ModuleOp is
   // populated with the computation from the HloModule. The returned `Status`
   // indicates success or failure in the conversion.
@@ -94,24 +87,13 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault {
         builder_(module.getContext()),
         i8_type_(builder_.getIntegerType(8)) {}
 
-  Status DefaultAction(HloInstruction* hlo) final {
-    return ::xla::Unimplemented("unsupported HLO %s", hlo->name());
-  }
+ private:
+  Status DefaultAction(HloInstruction* instr) final;
 
   // Computation parameters don't need any specific handling when they are
   // visited, they are already processed when we enter a new computation.
   Status HandleParameter(HloInstruction* instr) final { return Status::OK(); }
 
-  // HLO Copy is translated 1:1 to an lhlo.copy operation.
-  Status HandleCopy(HloInstruction* instr) final {
-    TF_ASSIGN_OR_RETURN(Value source, GetOrCreateView(instr->operand(0)));
-    TF_ASSIGN_OR_RETURN(Value dest, GetOrCreateView(instr));
-    if (source != dest)
-      builder_.create<xla_lhlo::CopyOp>(getLocation(instr),
-                                        llvm::ArrayRef<Type>{}, source, dest);
-    return Status::OK();
-  }
-
   // Helper function to create view in a buffer for a given slice. The view is
   // cached in the `slices_` map.
   Value GetOrCreateView(const BufferAllocation::Slice& slice);
@@ -160,6 +142,98 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault {
   Type i8_type_;
 };
 
+Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) {
+  llvm::SmallVector<Value, 4> operands(instr->operand_count() + 1);
+  for (int arg_idx = 0; arg_idx < instr->operand_count(); ++arg_idx) {
+    TF_ASSIGN_OR_RETURN(operands[arg_idx],
+                        GetOrCreateView(instr->operand(arg_idx)));
+  }
+
+  TF_ASSIGN_OR_RETURN(operands.back(), GetOrCreateView(instr));
+  Location loc = getLocation(instr);
+  ArrayRef<std::pair<Identifier, Attribute>> attrs;
+  ArrayRef<Type> rets{};
+
+  using ::xla::HloOpcode;
+  switch (instr->opcode()) {
+    case HloOpcode::kAbs:
+      builder_.create<xla_lhlo::AbsOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kAdd:
+      builder_.create<xla_lhlo::AddOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kAnd:
+      builder_.create<xla_lhlo::AndOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kCeil:
+      builder_.create<xla_lhlo::CeilOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kComplex:
+      builder_.create<xla_lhlo::ComplexOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kCopy:
+      builder_.create<xla_lhlo::CopyOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kCos:
+      builder_.create<xla_lhlo::CosOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kDivide:
+      builder_.create<xla_lhlo::DivOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kExp:
+      builder_.create<xla_lhlo::ExpOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kImag:
+      builder_.create<xla_lhlo::ImagOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kLog:
+      builder_.create<xla_lhlo::LogOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kMaximum:
+      builder_.create<xla_lhlo::MaxOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kMinimum:
+      builder_.create<xla_lhlo::MinOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kMultiply:
+      builder_.create<xla_lhlo::MulOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kNegate:
+      builder_.create<xla_lhlo::NegOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kReal:
+      builder_.create<xla_lhlo::RealOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kRemainder:
+      builder_.create<xla_lhlo::RemOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kRsqrt:
+      builder_.create<xla_lhlo::RsqrtOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kSelect:
+      builder_.create<xla_lhlo::SelectOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kSign:
+      builder_.create<xla_lhlo::SignOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kSqrt:
+      builder_.create<xla_lhlo::SqrtOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kSubtract:
+      builder_.create<xla_lhlo::SubOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    case HloOpcode::kTanh:
+      builder_.create<xla_lhlo::TanhOp>(loc, rets, operands, attrs);
+      return Status::OK();
+    default:
+      llvm::errs() << instr->ToString();
+      return tensorflow::errors::Internal(
+          absl::StrCat("LHLO opcode ", ::xla::HloOpcodeString(instr->opcode()),
+                       " is not supported."));
+  }
+  return Status::OK();
+}
+
 Value LhloDialectEmitter::GetOrCreateView(
     const BufferAllocation::Slice& slice) {
   // Check if we already have a view for this slice, otherwise we need to create
@@ -177,17 +251,15 @@ Value LhloDialectEmitter::GetOrCreateView(
 
   // Create the view for this slice size, possible with an affine map to model
   // the offset. The result is cached in the slices_ map.
-  SmallVector<AffineMap, 1> offset_map;
-  if (slice.offset()) {
-    offset_map.push_back(AffineMap::get(
-        /*dimCount=*/1, /*symbolCount=*/0,
-        {getAffineDimExpr(0, builder_.getContext()) + slice.offset()},
-        builder_.getContext()));
-  }
-  auto slice_type = MemRefType::get({slice.size()}, i8_type_, offset_map);
+  // The std.view result type does not carry the static offset: this is not
+  // useful information. Rather, the view op must have the static offset.
+  auto slice_type = MemRefType::get({slice.size()}, i8_type_, {});
 
-  auto slice_view = builder_.create<ViewOp>(
-      alloc_buffer.getLoc(), slice_type, alloc_buffer, /*operands=*/llvm::None);
+  Value byte_shift =
+      builder_.create<ConstantIndexOp>(alloc_buffer.getLoc(), slice.offset());
+  auto slice_view =
+      builder_.create<ViewOp>(alloc_buffer.getLoc(), slice_type, alloc_buffer,
+                              byte_shift, /*sizes=*/ArrayRef<Value>{});
   slices_.insert({slice_key, slice_view});
   return slice_view;
 }
@@ -203,9 +275,12 @@ StatusOr<Value> LhloDialectEmitter::GetOrCreateView(
   Value slice_view = GetOrCreateView(out_slice);
   TF_ASSIGN_OR_RETURN(Type out_type, ::xla::ConvertShapeToType<MemRefType>(
                                          target_shape, builder_));
+  Value byte_shift =
+      builder_.create<ConstantIndexOp>(builder_.getUnknownLoc(), 0);
   if (slice_view.getType() != out_type)
-    slice_view = builder_.create<ViewOp>(builder_.getUnknownLoc(), out_type,
-                                         slice_view, llvm::None);
+    slice_view =
+        builder_.create<ViewOp>(builder_.getUnknownLoc(), out_type, slice_view,
+                                byte_shift, /*sizes=*/ArrayRef<Value>{});
   return slice_view;
 }
 
@@ -334,8 +409,7 @@ Status ConvertModule(ModuleOp module, StringRef platform_name) {
   module.ensureTerminator(module.getBodyRegion(), builder, module.getLoc());
 
   TF_RETURN_WITH_CONTEXT_IF_ERROR(
-      LhloDialectEmitter::EmitModule(*assignment, *optimized_hlo_module,
-                                     module),
+      HloToLhloModule(*assignment, *optimized_hlo_module, module),
       "converting HLO to LHLO");
 
   return Status::OK();
@@ -372,6 +446,11 @@ std::unique_ptr<OperationPass<ModuleOp>> createXlaHloToLhloWithXlaPass() {
   return std::make_unique<XlaHloToLhloPass>();
 }
 
+Status HloToLhloModule(const BufferAssignment& assignment,
+                       const HloModule& hlo_module, ModuleOp module) {
+  return LhloDialectEmitter(assignment, hlo_module, module).Run();
+}
+
 static PassRegistration<XlaHloToLhloPass> registration(
     "xla-hlo-to-lhlo-with-xla",
     "Emit LHLO from HLO using the existing XLA implementation");
diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.h b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.h
new file mode 100644
index 00000000000..1018bdbf408
--- /dev/null
+++ b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_to_lhlo_with_xla.h
@@ -0,0 +1,34 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_XLA_HLO_TO_LHLO_WITH_XLA_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_XLA_HLO_TO_LHLO_WITH_XLA_H_
+
+#include "mlir/IR/Module.h"  // from @llvm-project
+#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace mlir {
+
+// Populate the MLIR `module` with the computation from the `hlo_module` using
+// the provided buffer `assignment`. The returned `Status` indicates success
+// or failure in the conversion.
+tensorflow::Status HloToLhloModule(const xla::BufferAssignment& assignment,
+                                   const xla::HloModule& hlo_module,
+                                   ModuleOp module);
+
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_XLA_HLO_TO_LHLO_WITH_XLA_H_
diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc
index 1a206d5d8a3..799a20aa693 100644
--- a/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc
@@ -84,7 +84,8 @@ class PointwiseToLinalgConverter : public OpConversionPattern<OpTy> {
       emitError(loc, "lhlo to linalg conversion expects ranked args");
       return failure();
     }
-    if (!argType.getElementType().isSignlessIntOrFloat()) {
+    auto elemTy = argType.getElementType();
+    if (!elemTy.isSignlessIntOrFloat() && !elemTy.template isa<ComplexType>()) {
       return failure();
     }
 
@@ -284,34 +285,32 @@ class BroadcastInDimConverter
         broadcastOp.operand().getType().template cast<ShapedType>();
     unsigned nloops = resultType.getRank();
 
+    // The input is a scalar, i.e. this is a scalar broadcast op.
+    if (operandType.getRank() == 0) {
+      return b->getAffineMapArrayAttr(
+          {AffineMap::get(nloops, /*symbolCount=*/0, b->getContext()),
+           b->getMultiDimIdentityMap(nloops)});
+    }
+
     auto operandShape = operandType.getShape();
     SmallVector<AffineExpr, 4> dimExprs;
-    AffineMap inputMap = AffineMap::get(b->getContext());
-    {
-      dimExprs.reserve(nloops);
+    dimExprs.reserve(nloops);
 
-      if (broadcastOp.broadcast_dimensions()) {
-        for (const auto& broadcastDim :
-             enumerate(broadcastOp.broadcast_dimensions().getIntValues())) {
-          int size = broadcastDim.value().getSExtValue();
-          // TODO(pifon): Add support for args with dynamic shapes for the case
-          // when a dimension of size 1 is broadcasted into dim of size N.
-          AffineExpr affineExpr = operandShape[broadcastDim.index()] == 1
-                                      ? b->getAffineConstantExpr(0)
-                                      : b->getAffineDimExpr(size);
-          dimExprs.push_back(affineExpr);
-        }
-      }
-      if (dimExprs.empty()) {
-        // The input is a scalar, i.e. this is a scalar broadcast op.
-        inputMap = AffineMap::get(nloops, /*symbolCount=*/0, b->getContext());
-      } else {
-        inputMap = AffineMap::get(nloops, /*symbolCount=*/0, dimExprs,
-                                  b->getContext());
+    if (broadcastOp.broadcast_dimensions()) {
+      for (const auto& broadcastDim :
+           enumerate(broadcastOp.broadcast_dimensions().getIntValues())) {
+        int size = broadcastDim.value().getSExtValue();
+        bool expansion_needed = operandShape[broadcastDim.index()] == 1 &&
+                                resultType.getShape()[size] != 1;
+        // TODO(pifon): Add support for args with dynamic shapes for the case
+        // when a dimension of size 1 is broadcasted into dim of size N.
+        dimExprs.push_back(expansion_needed ? b->getAffineConstantExpr(0)
+                                            : b->getAffineDimExpr(size));
       }
     }
     return b->getAffineMapArrayAttr(
-        {inputMap, b->getMultiDimIdentityMap(nloops)});
+        {AffineMap::get(nloops, /*symbolCount=*/0, dimExprs, b->getContext()),
+         b->getMultiDimIdentityMap(nloops)});
   }
 };
 
@@ -620,21 +619,25 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context,
                    PointwiseToLinalgConverter<xla_lhlo::AndOp>,
                    PointwiseToLinalgConverter<xla_lhlo::CeilOp>,
                    PointwiseToLinalgConverter<xla_lhlo::CompareOp>,
+                   PointwiseToLinalgConverter<xla_lhlo::ComplexOp>,
                    PointwiseToLinalgConverter<xla_lhlo::ConvertOp>,
                    // TODO(ataei): Remove this pattern, CopyOp is folded away.
                    PointwiseToLinalgConverter<xla_lhlo::CopyOp>,
                    PointwiseToLinalgConverter<xla_lhlo::CosOp>,
                    PointwiseToLinalgConverter<xla_lhlo::DivOp>,
                    PointwiseToLinalgConverter<xla_lhlo::ExpOp>,
+                   PointwiseToLinalgConverter<xla_lhlo::ImagOp>,
                    PointwiseToLinalgConverter<xla_lhlo::LogOp>,
                    PointwiseToLinalgConverter<xla_lhlo::MaxOp>,
                    PointwiseToLinalgConverter<xla_lhlo::MinOp>,
                    PointwiseToLinalgConverter<xla_lhlo::MulOp>,
                    PointwiseToLinalgConverter<xla_lhlo::NegOp>,
+                   PointwiseToLinalgConverter<xla_lhlo::RealOp>,
                    PointwiseToLinalgConverter<xla_lhlo::RemOp>,
                    PointwiseToLinalgConverter<xla_lhlo::RsqrtOp>,
                    PointwiseToLinalgConverter<xla_lhlo::SelectOp>,
                    PointwiseToLinalgConverter<xla_lhlo::SignOp>,
+                   PointwiseToLinalgConverter<xla_lhlo::SinOp>,
                    PointwiseToLinalgConverter<xla_lhlo::SqrtOp>,
                    PointwiseToLinalgConverter<xla_lhlo::SubOp>,
                    PointwiseToLinalgConverter<xla_lhlo::TanhOp>,
@@ -717,18 +720,23 @@ void populateHLOToLinalgConversionPattern(MLIRContext* context,
                    PointwiseToLinalgConverter<xla_hlo::AndOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::CeilOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::CompareOp, false>,
-                   PointwiseToLinalgConverter<xla_hlo::CosOp, false>,
+                   PointwiseToLinalgConverter<xla_hlo::ComplexOp, false>,
+                   PointwiseToLinalgConverter<xla_hlo::ConvertOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::CopyOp, false>,
+                   PointwiseToLinalgConverter<xla_hlo::CosOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::DivOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::ExpOp, false>,
+                   PointwiseToLinalgConverter<xla_hlo::ImagOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::LogOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::MaxOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::MinOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::MulOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::NegOp, false>,
+                   PointwiseToLinalgConverter<xla_hlo::RealOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::RemOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::RsqrtOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::SelectOp, false>,
+                   PointwiseToLinalgConverter<xla_hlo::SinOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::SqrtOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::SubOp, false>,
                    PointwiseToLinalgConverter<xla_hlo::TanhOp, false>,
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 538b0cf492d..ea4ba8dab6b 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -128,6 +128,7 @@ tf_xla_py_test(
     name = "adagrad_da_test",
     size = "small",
     srcs = ["adagrad_da_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -165,6 +166,7 @@ tf_xla_py_test(
     srcs = ["add_n_test.py"],
     # TensorList ops are not implemented in the on-demand compilation model yet.
     disabled_backends = ["cpu_ondemand"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -225,6 +227,7 @@ tf_xla_py_test(
     name = "complex_div_test",
     size = "medium",
     srcs = ["complex_div_test.py"],
+    enable_mlir_bridge = True,
     enabled_backends = [
         "cpu",
         "gpu",
@@ -449,6 +452,7 @@ tf_xla_py_test(
     name = "clustering_test",
     size = "small",
     srcs = ["clustering_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -466,6 +470,7 @@ tf_xla_py_test(
     name = "concat_ops_test",
     size = "medium",
     srcs = ["concat_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "many_xla_args",
@@ -488,6 +493,7 @@ tf_xla_py_test(
     name = "conv2d_test",
     size = "medium",
     srcs = ["conv2d_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     shard_count = 10,
     tags = [
@@ -510,6 +516,7 @@ tf_xla_py_test(
     name = "conv3d_test",
     size = "medium",
     srcs = ["conv3d_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     shard_count = 5,
     tags = [
@@ -555,6 +562,7 @@ tf_xla_py_test(
     name = "dynamic_slice_ops_test",
     size = "small",
     srcs = ["dynamic_slice_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -571,6 +579,7 @@ tf_xla_py_test(
     name = "einsum_op_test",
     size = "medium",
     srcs = ["einsum_op_test.py"],
+    enable_mlir_bridge = True,
     enabled_backends = [
         "cpu",
         "gpu",
@@ -592,6 +601,7 @@ tf_xla_py_test(
     name = "reshape_op_test",
     size = "small",
     srcs = ["reshape_op_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -663,6 +673,7 @@ tf_xla_py_test(
     name = "fifo_queue_test",
     size = "medium",
     srcs = ["fifo_queue_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -702,6 +713,7 @@ tf_xla_py_test(
     name = "slice_ops_test",
     size = "small",
     srcs = ["slice_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -737,6 +749,7 @@ tf_xla_py_test(
     name = "function_test",
     size = "small",
     srcs = ["function_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -881,6 +894,7 @@ tf_xla_py_test(
     name = "nary_ops_test",
     size = "small",
     srcs = ["nary_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -898,6 +912,7 @@ tf_xla_py_test(
     name = "nullary_ops_test",
     size = "small",
     srcs = ["nullary_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -1220,6 +1235,7 @@ tf_xla_py_test(
     name = "stack_ops_test",
     size = "small",
     srcs = ["stack_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "config-cuda-only",
@@ -1280,6 +1296,7 @@ tf_xla_py_test(
     srcs = ["tensor_array_ops_test.py"],
     # TensorArray ops are not implemented in the on-demand compilation model yet.
     disabled_backends = ["cpu_ondemand"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "config-cuda-only",
@@ -1308,6 +1325,7 @@ tf_xla_py_test(
     srcs = ["tensor_list_ops_test.py"],
     # TensorList ops are not implemented in the on-demand compilation model yet.
     disabled_backends = ["cpu_ondemand"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -1326,6 +1344,7 @@ tf_xla_py_test(
     name = "ternary_ops_test",
     size = "medium",
     srcs = ["ternary_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -1368,6 +1387,7 @@ tf_xla_py_test(
     size = "medium",
     srcs = ["fused_batchnorm_test.py"],
     python_version = "PY3",
+    shard_count = 5,
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
     ],
@@ -1501,6 +1521,7 @@ tf_xla_py_test(
     name = "data_format_ops_test",
     size = "small",
     srcs = ["data_format_ops_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -1735,6 +1756,7 @@ tf_xla_py_test(
     name = "placeholder_test",
     size = "small",
     srcs = ["placeholder_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
@@ -1791,6 +1813,7 @@ tf_xla_py_test(
     name = "conv_node_name_test",
     size = "medium",
     srcs = ["conv_node_name_test.py"],
+    enable_mlir_bridge = True,
     python_version = "PY3",
     shard_count = 5,
     tags = [
@@ -1837,6 +1860,7 @@ tf_xla_py_test(
     name = "special_math_test",
     size = "medium",
     srcs = ["special_math_test.py"],
+    enable_mlir_bridge = True,
     shard_count = 5,
     tags = [
         "no_pip",  # TODO(b/149738646): fix pip install so these tests run on kokoro pip
diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index d9721a3c8ac..00ed6d83e2e 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -73,8 +73,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
       self.assertAllCloseAccordingToType(
           result[i], expected[i], rtol=rtol, atol=atol)
 
-  @test_util.disable_mlir_bridge(
-      "F16 type is not supported in CreateDenseElementsAttrFromLiteral")
   def testFloatOps(self):
     for dtype in self.float_types:
       if dtype == dtypes.bfloat16.as_numpy_dtype:
@@ -299,7 +297,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
         ]
         self._testBinary(bitwise_ops.right_shift, lhs, rhs, expected=expected)
 
-  @test_util.disable_mlir_bridge("TODO(b/153896312): Handle unsigned ints")
   def testAdd(self):
     for dtype in self.numeric_types:
       self._testBinary(
@@ -326,7 +323,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
             expected=np.array([3.0269620882574744, 3.3149631512242195],
                               dtype=dtype))
 
-  @test_util.disable_mlir_bridge("TODO(b/153896312): Handle unsigned ints")
   def testMultiply(self):
     for dtype in self.numeric_types:
       self._testBinary(
@@ -390,7 +386,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
           expected=np.array([[16], [81]], dtype=dtype),
           rtol=rtol)
 
-  @test_util.disable_mlir_bridge("TODO(b/153896312): Handle unsigned ints")
   def testNumericOps(self):
     for dtype in self.numeric_types:
       self._testBinary(
@@ -934,7 +929,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
       expected = np.array([op(l, r) for l, r in zip(lhs, rhs)], dtype=np.bool)
       self._testBinary(op, lhs, rhs, expected=expected)
 
-  @test_util.disable_mlir_bridge("TODO(b/153896312): Handle unsigned ints")
   def testBroadcasting(self):
     """Tests broadcasting behavior of an operator."""
 
@@ -1230,6 +1224,8 @@ class BinaryOpsTest(xla_test.XLATestCase):
                [7, 7, 7, 7, 7, 7]],
               dtype=dtype))
 
+  @test_util.disable_mlir_bridge(
+      "Requires concatenate op support in MlirHloBuilder")
   def testSymmetricMirrorPad(self):
     mirror_pad = lambda t, paddings: array_ops.pad(t, paddings, "SYMMETRIC")
     for dtype in self.numeric_types:
@@ -1261,6 +1257,8 @@ class BinaryOpsTest(xla_test.XLATestCase):
           np.array([[0, 0], [0, 0]], dtype=np.int32),
           expected=np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype))
 
+  @test_util.disable_mlir_bridge(
+      "Requires concatenate op support in MlirHloBuilder")
   def testReflectMirrorPad(self):
     mirror_pad = lambda t, paddings: array_ops.pad(t, paddings, "REFLECT")
     for dtype in self.numeric_types:
@@ -1414,6 +1412,7 @@ class BinaryOpsTest(xla_test.XLATestCase):
             ],
             equality_test=self.ListsAreClose)
 
+  @test_util.disable_mlir_bridge("TODO(b/155097657): Debug incorrect answer")
   def testTile(self):
     for dtype in self.numeric_types:
       self._testBinary(
@@ -1502,7 +1501,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
           np.array([1, 0], dtype=np.int32),
           expected=np.array([[1 + 1j, 3 + 3j], [2 - 2j, 4 - 4j]], dtype=dtype))
 
-  @test_util.disable_mlir_bridge("Enable tf.Cross Compilation")
   def testCross(self):
     for dtype in self.float_types:
       self._testBinary(
@@ -1572,6 +1570,8 @@ class BinaryOpsTest(xla_test.XLATestCase):
                      np.array([2, 1, 5], dtype=np.int32),
                      expected=np.array([2, 3, 5], dtype=np.int32))
 
+  @test_util.disable_mlir_bridge("Error handling")
+  def testBroadcastArgsError(self):
     with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
                                              "Incompatible shapes"):
       self._testBinary(array_ops.broadcast_dynamic_shape,
@@ -1579,6 +1579,8 @@ class BinaryOpsTest(xla_test.XLATestCase):
                        np.array([4, 5, 6], dtype=np.int32),
                        expected=None)
 
+  @test_util.disable_mlir_bridge(
+      "Requires BroadcastInDim method in MlirHloBuilder")
   def testBroadcastTo(self):
     for dtype in self.all_types:
       x = np.random.randint(0, high=100, size=[2, 3])
diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py
index 10dd2d6542c..f35ded924d5 100644
--- a/tensorflow/compiler/tests/concat_ops_test.py
+++ b/tensorflow/compiler/tests/concat_ops_test.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradients_impl
@@ -293,6 +294,7 @@ class ConcatTest(xla_test.XLATestCase):
 
   # The purpose of this is to ensure that XLA on GPU will not run out of memory
   # with too many arguments.
+  @test_util.disable_mlir_bridge("TODO(b/153895138): Debug.")
   def testConcatLargeNumberOfTensors(self):
     if "CPU" in self.device:
       self.skipTest("This test can time out on CPU, so we will just allow "
diff --git a/tensorflow/compiler/tests/gather_nd_op_test.py b/tensorflow/compiler/tests/gather_nd_op_test.py
index 70377af6bdc..90ac515764b 100644
--- a/tensorflow/compiler/tests/gather_nd_op_test.py
+++ b/tensorflow/compiler/tests/gather_nd_op_test.py
@@ -38,7 +38,6 @@ class GatherNdTest(xla_test.XLATestCase):
       feed_dict = {paramsp: params, indicesp: indices}
       return gather_nd_t.eval(feed_dict=feed_dict)
 
-  @test_util.disable_mlir_bridge("TODO(b/153896312): Handle unsigned ints")
   def testSimpleDtype(self):
     for dtype in self.numeric_types:
       self.assertAllEqual(
@@ -47,6 +46,7 @@ class GatherNdTest(xla_test.XLATestCase):
               np.array([8, 1, 2, 3, 7, 5], dtype=dtype),
               np.array([[4], [4], [0]], np.int32)))
 
+  @test_util.disable_mlir_bridge("Error handling")
   def testEmptyIndicesAndParamsOKButJustEmptyParamsFails(self):
     with self.session():
       params = np.ones((3, 3), dtype=np.float32)
diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py
index b89472b8085..81779203955 100644
--- a/tensorflow/compiler/tests/image_ops_test.py
+++ b/tensorflow/compiler/tests/image_ops_test.py
@@ -30,7 +30,6 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_image_ops
 from tensorflow.python.ops import image_ops
@@ -979,7 +978,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase):
 
 class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSFrom6(self):
     boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                    [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]],
@@ -1017,7 +1015,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
                         indices_output)
     self.assertAllEqual([5, 4], num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSFrom6Max3(self):
     boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                    [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]],
@@ -1051,7 +1048,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
     self.assertAllEqual([[0, 1, 2], [0, 1, 3]], indices_output)
     self.assertAllEqual([3, 3], num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSSingleFrom6Max3(self):
     boxes_data = [[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                   [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]]
@@ -1082,7 +1078,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
     self.assertAllEqual([0, 1, 2], indices_output)
     self.assertAllEqual(3, num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSSingleFrom6NoPad(self):
     boxes_data = [[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                   [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]]
@@ -1112,7 +1107,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
     self.assertAllEqual([0, 1, 2, 4, 5], indices_output)
     self.assertAllEqual(5, num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSBatchDimsFrom6Max3(self):
     boxes_data = [[[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                     [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]],
@@ -1146,7 +1140,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
     self.assertAllEqual([[[0, 1, 2], [0, 1, 3]]], indices_output)
     self.assertAllEqual([[3, 3]], num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSScoreThresholdFrom6Max3(self):
     boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                    [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]],
@@ -1182,7 +1175,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
     self.assertAllEqual([3, 2], num_valid_output)
     self.assertAllEqual([[0, 1, 2], [0, 1, invalid_index]], indices_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSUnsortedInputFrom6(self):
     boxes_data = [[[0, 2, 1, 2], [3, 3, 4, 4], [0, 0, 1, 1],
                    [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8]],
@@ -1219,7 +1211,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
                         indices_output)
     self.assertAllEqual([5, 4], num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSNoncanonicalizedInputFrom6(self):
     boxes_data = [[[1, 0, 0, 1], [4, 3, 3, 4], [1, 0.4, 0, 1.4],
                    [1, 0.6, 0, 1.6], [1, 0.8, 0, 1.8], [1, 2, 0, 2]],
@@ -1257,7 +1248,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
                         indices_output)
     self.assertAllEqual([5, 4], num_valid_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSScoreThresholdCanInputsFrom6Max3(self):
     boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                    [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]],
@@ -1293,7 +1283,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase):
     self.assertAllEqual([3, 2], num_valid_output)
     self.assertAllEqual([[0, 1, 2], [0, 1, invalid_index]], indices_output)
 
-  @test_util.with_forward_compatibility_horizons(None, [2020, 4, 21])
   def testBatchedNMSFrom6DynamicInput(self):
     boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4],
                    [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]],
diff --git a/tensorflow/compiler/tests/ternary_ops_test.py b/tensorflow/compiler/tests/ternary_ops_test.py
index 465f368db82..a1bb64eb88d 100644
--- a/tensorflow/compiler/tests/ternary_ops_test.py
+++ b/tensorflow/compiler/tests/ternary_ops_test.py
@@ -24,6 +24,7 @@ import scipy.special as sps
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
@@ -47,6 +48,8 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
       {'start': 1, 'end': 2, 'num': 1},
       {'start': 1, 'end': 4, 'num': 3},
       {'start': 0, 'end': 41, 'num': 42})
+  @test_util.disable_mlir_bridge(
+      'TODO(b/156174708): Dynamic result types not supported')
   def testLinspace(self, start, end, num):
     expected = np.linspace(start, end, num, dtype=np.float32)
     result = self._testTernary(
@@ -74,6 +77,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
         np.int32(2),
         expected=np.array([1, 3, 5], dtype=np.int32))
 
+  @test_util.disable_mlir_bridge('TODO(b/155949336)')
   def testSelect(self):
     for dtype in self.numeric_types:
       self._testTernary(
@@ -211,6 +215,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
             upper,
             expected=np.minimum(np.maximum(x, lower), upper))
 
+  @test_util.disable_mlir_bridge('Enable tf.Betainc Compilation')
   def testBetaincSanity(self):
     # This operation is only supported for float32 and float64.
     for dtype in self.numeric_types & {np.float32, np.float64}:
@@ -230,7 +235,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
       {
           'sigma': 1e15,
           'rtol': 1e-6,
-          'atol': 1e-6
+          'atol': 1e-4
       },
       {
           'sigma': 30,
@@ -240,7 +245,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
       {
           'sigma': 1e-8,
           'rtol': 5e-4,
-          'atol': 3e-6
+          'atol': 3e-4
       },
       {
           'sigma': 1e-16,
@@ -248,6 +253,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
           'atol': 2e-4
       },
   )
+  @test_util.disable_mlir_bridge('Enable tf.Betainc Compilation')
   def testBetainc(self, sigma, rtol, atol):
     # This operation is only supported for float32 and float64.
     for dtype in self.numeric_types & {np.float32, np.float64}:
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index cd9ba983785..3e36f67615b 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -186,8 +186,6 @@ class UnaryOpsTest(xla_test.XLATestCase):
         self._assertOpOutputMatchesExpected(
             math_ops.cos, x, expected=np.cos(x), rtol=tol, atol=1e-5)
 
-  @test_util.disable_mlir_bridge(
-      "TODO(b/153812660): Handle tf.Softmax compilation")
   def testFloatOps(self):
     for dtype in self.float_types:
       x = np.arange(-0.90, 0.90, 0.25)
@@ -514,6 +512,11 @@ class UnaryOpsTest(xla_test.XLATestCase):
               ],
               dtype=dtype))
 
+  @test_util.disable_mlir_bridge(
+      "TODO(b/153812660): Handle tf.QuantizeAndDequantize compilation")
+  def testQuantizeAndDequantize(self):
+    for dtype in self.float_types:
+
       def quantize_and_dequantize_v2(x):
         return array_ops.quantize_and_dequantize_v2(
             x, -127, 127, signed_input=True, num_bits=8)
@@ -598,8 +601,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
           np.array([-1, -0.5, 0, 0.3], dtype=dtype),
           expected=np.array([-1., -0.5, 0., 0.296875], dtype=dtype))
 
-  @test_util.disable_mlir_bridge(
-      "Complex types not supported in CreateDenseElementsAttrFromLiteral")
+  @test_util.disable_mlir_bridge("TODO(b/156135423): Fix ConvertSigmoidOp")
   def testComplexOps(self):
     for dtype in self.complex_types:
 
@@ -757,7 +759,6 @@ class UnaryOpsTest(xla_test.XLATestCase):
           np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype),
           expected=np.array([1, -4, 2.7, 0], dtype=ctypes[dtype]))
 
-  @test_util.disable_mlir_bridge("TODO(b/153896312): Handle unsigned ints")
   def testIntOps(self):
     for dtype in self.int_types:
       self._assertOpOutputMatchesExpected(
diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py
index df388c655d0..f3e915daa67 100644
--- a/tensorflow/compiler/tests/xla_ops_test.py
+++ b/tensorflow/compiler/tests/xla_ops_test.py
@@ -51,7 +51,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
         equality_fn = self.assertAllClose
       equality_fn(result, expected, rtol=1e-3)
 
-  @test_util.disable_mlir_bridge('Not supported yet')
   def testAdd(self):
     for dtype in self.numeric_types:
       self._assertOpOutputMatchesExpected(
@@ -72,7 +71,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
                 np.array([7, 11], dtype=dtype)),
           expected=np.array([[8, 13], [10, 15]], dtype=dtype))
 
-  @test_util.disable_mlir_bridge('Not supported yet')
   def testBroadcast(self):
     for dtype in self.numeric_types:
       v = np.arange(4, dtype=np.int32).astype(dtype).reshape([2, 2])
@@ -81,7 +79,7 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
           args=(v,),
           expected=np.tile(v, (7, 42, 1, 1)))
 
-  @test_util.disable_mlir_bridge('Unsigned ints are not supported yet')
+  @test_util.disable_mlir_bridge('Dynamic result types not supported')
   def testShiftRightLogical(self):
     self._assertOpOutputMatchesExpected(
         xla.shift_right_logical,
@@ -93,7 +91,7 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
         args=(np.array([0xFFFFFFFF, 16], dtype=np.uint32), np.uint32(4)),
         expected=np.array([0x0FFFFFFF, 1], dtype=np.uint32))
 
-  @test_util.disable_mlir_bridge('Unsigned ints are not supported yet')
+  @test_util.disable_mlir_bridge('Dynamic result types not supported')
   def testShiftRightArithmetic(self):
     self._assertOpOutputMatchesExpected(
         xla.shift_right_arithmetic,
@@ -110,7 +108,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
                       xla_data_pb2.PrecisionConfig.HIGHEST)
 
   @parameterized.parameters(*PRECISION_VALUES)
-  @test_util.disable_mlir_bridge('Not supported yet')
   def testConv(self, precision):
     for dtype in set(self.float_types).intersection(
         set([dtypes.bfloat16.as_numpy_dtype, np.float32])):
@@ -195,7 +192,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
           args=(np.array([1, 2, 3], dtype=dtype),),
           expected=np.array([-1, -2, -3], dtype=dtype))
 
-  @test_util.disable_mlir_bridge('Not supported yet')
   def testPad(self):
     for dtype in self.numeric_types:
 
@@ -320,6 +316,7 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
                         [[673, 674], [683, 684], [693, 694]]]),
               dtype=dtype))
 
+  @test_util.disable_mlir_bridge('Error handling')
   def testDynamicSliceWithIncorrectStartIndicesShape(self):
     with self.session() as session:
       with self.test_scope():
@@ -333,6 +330,7 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
           (r'start_indices must be a vector with length equal to input rank, '
            r'but input rank is 3 and start_indices has shape \[2\].*'))
 
+  @test_util.disable_mlir_bridge('Error handling')
   def testDynamicSliceWithIncorrectSizeIndicesShape(self):
     with self.session() as session:
       with self.test_scope():
diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD
index 8ca30479330..356798c19bd 100644
--- a/tensorflow/compiler/tf2tensorrt/BUILD
+++ b/tensorflow/compiler/tf2tensorrt/BUILD
@@ -496,6 +496,7 @@ cc_library(
         "//tensorflow/core/grappler/costs:graph_properties",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/types:optional",
         "@com_google_protobuf//:protobuf_headers",
     ],
 )
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
index a90ac172c32..a43b16e9e6a 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@@ -1456,12 +1456,13 @@ Status Converter::TransposeTensor(nvinfer1::ITensor* input_tensor,
                                   absl::string_view name,
                                   nvinfer1::ITensor** output_tensor) {
   const auto dims = input_tensor->getDimensions();
-
-  if (order_with_batch_dim.size() - 1 != size_t(dims.nbDims)) {
+  const int order_size = use_implicit_batch_ ? order_with_batch_dim.size() - 1
+                                             : order_with_batch_dim.size();
+  if (order_size != size_t(dims.nbDims)) {
     return errors::InvalidArgument(
         "Rank of perm for transpose does not match with that of the input.");
   }
-  if (order_with_batch_dim[0] != 0) {
+  if (use_implicit_batch_ && order_with_batch_dim[0] != 0) {
     return errors::Unimplemented(
         "Transpose at batch dimension is not supported.");
   }
@@ -1472,8 +1473,13 @@ Status Converter::TransposeTensor(nvinfer1::ITensor* input_tensor,
   MarkQuantizationRangesAsInferrable(input_tensor, layer->getOutput(0));
 
   nvinfer1::Permutation permutation;
-  for (int32_t i = 0; i < dims.nbDims; ++i) {
-    permutation.order[i] = order_with_batch_dim[i + 1] - 1;
+  if (use_implicit_batch_) {
+    for (int32_t i = 0; i < dims.nbDims; ++i) {
+      permutation.order[i] = order_with_batch_dim[i + 1] - 1;
+    }
+  } else {
+    std::copy(order_with_batch_dim.begin(), order_with_batch_dim.end(),
+              permutation.order);
   }
   VLOG(1) << "TransposeTensor permutation: "
           << DebugString(permutation, dims.nbDims);
@@ -2271,11 +2277,13 @@ Status ConvertTranspose(OpConverterParams* params) {
 
   // Verify the permutation.
   nvinfer1::ITensor* input_tensor = inputs.at(0).tensor();
-  if (perm.size() - 1 != size_t(input_tensor->getDimensions().nbDims)) {
+  const int perm_size =
+      params->use_implicit_batch ? perm.size() - 1 : perm.size();
+  if (perm_size != size_t(input_tensor->getDimensions().nbDims)) {
     return errors::InvalidArgument(
         "Rank of perm for transpose does not match with that of the input.");
   }
-  if (perm[0] != 0) {
+  if (params->use_implicit_batch && perm[0] != 0) {
     return errors::Unimplemented(
         "Transpose at batch dimension is not supported.");
   }
@@ -2405,26 +2413,19 @@ Status ConvertExpandDims(OpConverterParams* params) {
 }
 
 Status Converter::SqueezeTensor(nvinfer1::ITensor* input,
-                                const std::vector<int>& trt_axes,
+                                std::vector<int>* input_dims,
                                 nvinfer1::ITensor** output) {
-  const nvinfer1::Dims dims = input->getDimensions();
-  std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
-  // Mark axes to remove by setting them to 0.
-  for (int axis : trt_axes) {
-    input_dims[axis] = 0;
-  }
-
 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
   // If the remaining dimensions of a squeeze operation have dynamic sizes, we
   // need to use TRT ops to build the result shape for the squeeze operation.
   // This is because IShuffleLayer::setReshapeDimensions treats -1 as a special
   // value.
-  if (absl::c_any_of(input_dims, [](int i) { return i == -1; })) {
+  if (absl::c_any_of(*input_dims, [](int i) { return i == -1; })) {
     nvinfer1::ITensor* shape = network()->addShape(*input)->getOutput(0);
     std::vector<nvinfer1::ITensor const*> concat_inputs;
-    for (int i = 0; i < input_dims.size(); i++) {
+    for (int i = 0; i < input_dims->size(); i++) {
       // If input dim wasn't set to 0 earlier, we include it in new shape.
-      if (input_dims[i] != 0) {
+      if (input_dims->at(i) != 0) {
         concat_inputs.push_back(
             network()
                 ->addSlice(*shape, {1, {i}}, {1, {1}}, {1, {1}})
@@ -2444,11 +2445,12 @@ Status Converter::SqueezeTensor(nvinfer1::ITensor* input,
   }
 #endif
   // Remove all dims which are equal to 0.
-  input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
-                   input_dims.end());
+  input_dims->erase(std::remove(input_dims->begin(), input_dims->end(), 0),
+                    input_dims->end());
   // Reshape tensor.
   nvinfer1::Dims new_dims;
-  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims));
+  VLOG(2) << "input_dims" << input_dims;
+  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(*input_dims, &new_dims));
   TF_RETURN_IF_ERROR(PrepareTensorForShape(TRT_TensorOrWeights(input), new_dims,
                                            /*validation_only=*/false, output));
   return Status::OK();
@@ -2467,31 +2469,48 @@ Status ConvertSqueeze(OpConverterParams* params) {
   TFAttrs attrs(node_def);
   auto squeeze_dims = attrs.get<std::vector<int64>>("squeeze_dims");
   if (squeeze_dims.empty()) {
-    return errors::Unimplemented(
-        "Squeeze is only implemented for explicit dims, at ", node_def.name());
-  }
-  std::vector<int> trt_axes;
-  trt_axes.reserve(squeeze_dims.size());
-  for (int tf_axis : squeeze_dims) {
-    // If the axis is valid, then convert it to TRT axis, otherwise abort
-    // conversion.
-    int trt_axis;
-    TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
-                                   params->use_implicit_batch, &trt_axis));
-    // Make sure target dimension is size 1 or unknown size (-1)
-    if (input_dims[trt_axis] != -1 && input_dims[trt_axis] != 1) {
-      return errors::InvalidArgument(
-          "Dimension ", tf_axis, " with size ", input_dims[trt_axis],
-          " cannot be squeezed because it must be size 1, at ",
+    if (params->use_implicit_batch || !HasStaticShape(dims)) {
+      return errors::Unimplemented(
+          "Squeeze is not implemented for empty squeeze_dims, at ",
           node_def.name());
+    } else {
+      // explicit batch mode with static input shape we squeeze all singleton
+      // dimensions
+      for (int& dim : input_dims) {
+        if (dim == 1) {
+          // Mark it for removal by setting it to 0
+          dim = 0;
+        }
+      }
+    }
+  } else {
+    std::vector<int> trt_axes;
+    trt_axes.reserve(squeeze_dims.size());
+    for (int tf_axis : squeeze_dims) {
+      // If the axis is valid, then convert it to TRT axis, otherwise abort
+      // conversion.
+      int trt_axis;
+      TF_RETURN_IF_ERROR(ConvertAxis(tf_axis, dims.nbDims, node_def.name(),
+                                     params->use_implicit_batch, &trt_axis));
+      // Make sure target dimension is size 1 or unknown size (-1)
+      if (input_dims[trt_axis] != -1 && input_dims[trt_axis] != 1) {
+        return errors::InvalidArgument(
+            "Dimension ", tf_axis, " with size ", input_dims[trt_axis],
+            " cannot be squeezed because it must be size 1, at ",
+            node_def.name());
+      }
+      trt_axes.push_back(trt_axis);
+    }
+    // Mark axes to remove by setting them to 0.
+    for (int axis : trt_axes) {
+      input_dims[axis] = 0;
     }
-    trt_axes.push_back(trt_axis);
   }
   if (params->validation_only) return Status::OK();
 
   nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->SqueezeTensor(
-      input_tensor.tensor(), trt_axes, &output_tensor));
+      input_tensor.tensor(), &input_dims, &output_tensor));
   params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
   return Status::OK();
 }
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
index 8608c8226ee..2092aecd657 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
@@ -529,11 +529,9 @@ class Converter {
 
   // Helper function to add a squeeze op to the network.
   //
-  // The trt_axes argument lists those axes that need to be squeezed. Each axis
-  // in the list is numbered according to TRT convention (see ConvertAxis for
-  // details).
-  Status SqueezeTensor(nvinfer1::ITensor* input,
-                       const std::vector<int>& trt_axes,
+  // The input_dims argument stores the TRT dimensions of the input tensor,
+  // where the dimensions to be squeezed are replaced by 0.
+  Status SqueezeTensor(nvinfer1::ITensor* input, std::vector<int>* input_dims,
                        nvinfer1::ITensor** output);
 
   // Creates an IConstantLayer using 'weights' whose dimensions are specified by
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
index 3e9c5db80d0..884ed7a5771 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
 
+#include <algorithm>
+#include <functional>
 #include <memory>
 #include <unordered_map>
 #include <vector>
@@ -24,6 +26,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "absl/algorithm/container.h"
 #include "absl/strings/match.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
@@ -64,8 +67,45 @@ namespace convert {
 using absl::StrCat;
 using ::testing::ElementsAre;
 using ::testing::ElementsAreArray;
+using ::testing::FloatNear;
+using ::testing::Matcher;
 using ::testing::NanSensitiveFloatNear;
 
+// TensorRT modes for testing. We define the following three modes:
+// 1. Implicit batch mode: The tensors have static (known) input shape and the
+//    the batch dimension (first dim) is removed from the TRT tensor shape. In
+//    a loose notation: trt_shape = tf_shape[1:]. This is the standard mode of
+//    a TensorRT network definition  before TensorRT 6.
+// 2. Explicit batch mode: static (known) input shape, but the batch dimension
+//    is part of the trt tensor shape. (trt_shape = tf_shape)
+// 3. Dynamic shape mode allows unknown input shapes, and requires explicit
+//    batch size definition (trt_shape = tf_shape).
+//
+// Note that the Converter only distinguishes between two modes:
+// - use_implicit_batch == true, this corresponds to kImplicitBatch,
+// - use_implicit_batch == false which includes both kExplicitBatch and
+//   kDynamicShape.
+//
+// For the converter, the distinction between explicit batch or dynamic shape
+// mode follows from the input tensors of the network: dynamic shape input
+// implies dynamic shape mode, while static shape input tensors imply explicit
+// batch mode. We want to test all these modes, therefore we define the
+// TrtTestMode with the following three options.
+enum class TrtTestMode {
+  kImplicitBatch = 0,
+  kExplicitBatch = 1,
+  kDynamicShape = 2
+};
+
+#if IS_TRT_VERSION_GE(6, 0, 0, 0)
+constexpr std::array<TrtTestMode, 3> ValidTrtModes = {
+    TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch,
+    TrtTestMode::kDynamicShape};
+#else
+constexpr std::array<TrtTestMode, 1> ValidTrtModes = {
+    TrtTestMode::kImplicitBatch};
+#endif
+
 // TODO(laigd): put this into some test utils file.
 void ExpectStatus(Status status, error::Code code = error::OK,
                   const char* substr = nullptr) {
@@ -86,6 +126,17 @@ nvinfer1::Dims GetTestDims(const std::vector<int>& d) {
   return dims;
 }
 
+// Prints the vector to the output stream.
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) {
+  if (!v.empty()) {
+    os << '[';
+    std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os, ", "));
+    os << "\b\b]";
+  }
+  return os;
+}
+
 nvinfer1::DataType TfDataTypeToTrt(DataType tf_dtype) {
   switch (tf_dtype) {
     case DT_FLOAT:
@@ -167,6 +218,21 @@ void ExpectTrtDimsEqualsArray(const std::vector<int>& lhs,
       << "  actual: " << DebugString(rhs);
 }
 
+Matcher<std::vector<float>> ArrayFloatNear(const std::vector<float>& values,
+                                           float max_abs_error = 1e-5,
+                                           bool nan_sensitive = false) {
+  std::vector<Matcher<float>> matchers;
+  matchers.reserve(values.size());
+  for (const float& v : values) {
+    if (nan_sensitive) {
+      matchers.emplace_back(NanSensitiveFloatNear(v, max_abs_error));
+    } else {
+      matchers.emplace_back(FloatNear(v, max_abs_error));
+    }
+  }
+  return ElementsAreArray(matchers);
+}
+
 template <typename T>
 void ExpectArrayNear(const std::vector<T>& lhs, absl::Span<const T> rhs) {
   ASSERT_EQ(lhs.size(), rhs.size());
@@ -1217,6 +1283,17 @@ TEST_F(ConvertGraphDefToEngineTest, IdentityGraph) {
   TF_EXPECT_OK(RunConvertGraphDefToEngine(&s));
 }
 
+// Returns a vector of shapes from a vector of input tensors. This can be used
+// to create optimization profiles.
+Status GetShapeFromDataVec(DataVec input_data,
+                           std::vector<TensorShape>* shape_vec) {
+  shape_vec->reserve(input_data.size());
+  std::transform(input_data.begin(), input_data.end(),
+                 std::back_inserter(*shape_vec),
+                 [](InputOutputData x) { return x.tensor.shape(); });
+  return Status::OK();
+}
+
 template <typename T>
 inline absl::Span<const T> GetSpanForData(const InputOutputData& data) {
   const auto& tensor_map = data.tensor.flat<T>();
@@ -1239,16 +1316,18 @@ class OpConverterTest : public ::testing::Test {
     return converter_->GetTensorOrWeights(name, output);
   }
 
-  void Reset() {
+  void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32,
+             TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) {
     // Destroy existing TRT objects in a proper order.
     converter_.reset(nullptr);
     engine_.reset(nullptr);
 
     // Re-create them in proper order.
     converter_ =
-        std::move(Converter::Create(precision_mode_to_test_,
+        std::move(Converter::Create(precision_mode_to_test,
                                     /*use_calibration=*/false, &logger_,
-                                    /*use_implicit_batch=*/true)
+                                    /*use_implicit_batch=*/trt_mode ==
+                                        TrtTestMode::kImplicitBatch)
                       .ValueOrDie());
 
     // Reset other related artifacts.
@@ -1294,9 +1373,7 @@ class OpConverterTest : public ::testing::Test {
     }
   }
 
-  // TODO(laigd): test fp16 and int8 support for more converters.
   void BuildAndRun(const DataVec& input_data, DataVec* output_data,
-                   TrtPrecisionMode precision_mode = TrtPrecisionMode::FP32,
                    const int batch_size = 1) {
     // Mark the output tensor as TRT engine output.
     std::vector<Converter::EngineOutputInfo> output_info;
@@ -1308,13 +1385,21 @@ class OpConverterTest : public ::testing::Test {
 
     // Build the TRT engine.
     ASSERT_EQ(nullptr, engine_.get());
+    TrtShapeOptimizationProfile profiles;
+    if (!converter_->use_implicit_batch()) {
+      // Create a single optimization profile for explicit batch mode
+      std::vector<TensorShape> input_shapes;
+      TF_ASSERT_OK(GetShapeFromDataVec(input_data, &input_shapes));
+      profiles.AddShape(input_shapes);
+      profiles.InitProfiles();
+    }
     TF_ASSERT_OK(
         converter_->BuildCudaEngine(&engine_,
                                     /*max_batch_size=*/batch_size,
                                     /*max_workspace_size_bytes=*/1 << 26,
                                     /*allocator=*/nullptr,
                                     /*calibrator=*/nullptr,
-                                    /*profiles=*/nullptr));
+                                    /*profiles=*/&profiles));
     CHECK_NOTNULL(engine_.get());
     CheckDataTypeMatches(input_data);
     CheckDataTypeMatches(*output_data);
@@ -1323,6 +1408,9 @@ class OpConverterTest : public ::testing::Test {
     std::vector<void*> buffers(num_bindings);
 
     ASSERT_EQ(engine_->getNbBindings(), num_bindings);
+    // Since we have only 1 optimization profile (which is enabled by default)
+    // it is fine to create execution context directly, instead of calling
+    // profiles.CreateExecutionContexts()
     TrtUniquePtrType<nvinfer1::IExecutionContext> execution_context(
         engine_->createExecutionContext());
 
@@ -1350,22 +1438,81 @@ class OpConverterTest : public ::testing::Test {
     return true;
   }
 
-  // Add ITensor for both validation and conversion.
-  void AddTestTensor(
-      const string& name, const std::vector<int32>& dims, int batch_size = 1,
+  bool HasStaticShape(std::vector<int> dims) const {
+    return !absl::c_any_of(dims, [](int i) { return i < 0; });
+  }
+
+  // Adds ITensor for both validation and conversion, assuming explicit batch
+  // dimension is included in dims (ie for an NCHW tensor dims = {N, C, H, W}).
+  void AddTestTensorWithExplicitBatchDim(
+      const string& name, const std::vector<int32>& dims,
       nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT) {
     DataType tf_dtype = TrtDataTypeToTf(trt_dtype);
     ops::Placeholder::Attrs attrs;
     TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &attrs.shape_));
-    attrs.shape_.InsertDim(0, batch_size);
+
     auto input = ops::Placeholder(scope_.WithOpName(name), tf_dtype, attrs);
     node_inputs_[name] = input.output;
 
     // Add a real ITensor for conversion conditionally.
-    const nvinfer1::Dims trt_dims = GetTestDims(dims);
-    if (HasStaticShape(trt_dims)) {
+    const nvinfer1::Dims trt_dims =
+        TensorShapeToTrtDims(attrs.shape_, converter_->use_implicit_batch());
+    if (!converter_->use_implicit_batch() || HasStaticShape(trt_dims)) {
+      int batch_size = dims[0];
       TF_EXPECT_OK(
           converter_->AddInputTensor(name, trt_dtype, trt_dims, batch_size));
+    }
+  }
+
+  // Adds ITensor for both validation and conversion. The tensor can have
+  // partial input shape. This function defines static or dynamic shape input
+  // tensor for the network based on the trt_mode attribute. This is done
+  // automatically, unless the user overrides it with an explicit
+  // partial_input_shape_dims argument.
+  //
+  // Parameters:
+  // - dims actual dimensions of the tensor that we will use during the test
+  //   (including explicit batch dim). This is not used if partial_input_shape
+  //   is defined.
+  // - partial_input_shape dimensions which can incude unknown shapes. This can
+  //   be empty, in that case the partial_input_shape will be set automatically
+  //   depending on the trt_mode argument. (This also includse explicit batch
+  //   dim).
+  //
+  //  On return skip_test is false if trt_mode is not compatible with the
+  // partial input shape.
+  void AddTestTensor(
+      const string& name, const std::vector<int32>& dims,
+      nvinfer1::DataType trt_dtype, TrtTestMode trt_mode,
+      const std::vector<int32>* partial_input_shape_dims = nullptr) {
+    std::vector<int32> partial_shape;
+    if (partial_input_shape_dims && !partial_input_shape_dims->empty()) {
+      partial_shape = *partial_input_shape_dims;
+    } else {
+      if (trt_mode == TrtTestMode::kDynamicShape) {
+        // In dynamic shape mode we set the all dims unknown.
+        partial_shape = std::vector<int32>(dims.size(), -1);
+      } else {
+        // Use static (known) input shapes.
+        partial_shape = dims;
+      }
+    }
+    AddTestTensorWithExplicitBatchDim(name, partial_shape, trt_dtype);
+  }
+
+  // Adds ITensor for both validation and conversion. The difference compared to
+  // AddTestTensorWithExplicitBatchDim is in the meaning of the dims parameter.
+  // To define a tensor with NCHW shape, here we set dims = {C,H,W} and
+  // batch_size = N. TODO(tfeher) remove this function once all test are updated
+  // to use the other version of AddTestTensor which has the trt_mode arg.
+  void AddTestTensor(
+      const string& name, const std::vector<int32>& dims, int batch_size = 1,
+      nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT) {
+    std::vector<int32> dims_with_batch(dims.size() + 1);
+    dims_with_batch[0] = batch_size;
+    std::copy(dims.begin(), dims.end(), dims_with_batch.begin() + 1);
+    AddTestTensorWithExplicitBatchDim(name, dims_with_batch, trt_dtype);
+    if (HasStaticShape(dims)) {
       ASSERT_EQ(batch_size, converter_->batch_size_);
     }
   }
@@ -1405,9 +1552,9 @@ class OpConverterTest : public ::testing::Test {
     grappler::GraphProperties graph_properties(item);
     TF_EXPECT_OK(graph_properties.InferStatically(true));
 
-    TrtNodeValidator validator(graph_properties, precision_mode_to_test_,
+    TrtNodeValidator validator(graph_properties, converter_->precision_mode(),
                                /*use_calibration=*/false,
-                               /*use_implicit_batch=*/true);
+                               converter_->use_implicit_batch());
     ExpectStatus(validator.IsTensorRTCandidate(node), expected_code,
                  expected_msg_substr);
   }
@@ -1446,6 +1593,33 @@ class OpConverterTest : public ::testing::Test {
     }
   }
 
+  // Helper method to run both validation and conversion, and check the output
+  // shape.
+  void RunValidationAndConversion(const NodeDef& node_def, const Status& status,
+                                  const char* output_name,
+                                  const std::vector<int>& exp_out_dims) {
+    RunValidationAndConversion(node_def, status.code(),
+                               status.error_message().c_str(), true);
+    if (status.ok()) {
+      TRT_TensorOrWeights output;
+      TF_EXPECT_OK(GetTensorOrWeights(output_name, &output));
+      ASSERT_TRUE(output.is_tensor());
+      if (converter_->use_implicit_batch() && !exp_out_dims.empty()) {
+        // We only check output shape implicit batch mode. In dynamic shape
+        // mode we need to wait for the concrate input shapes to be defined
+        // (by setBindingDimensions before enqueue) before we can check
+        // whether the output dims are equal.
+        //
+        // TODO(tamas) enable this check in explicit_batch_mode
+
+        // Removing batch dim
+        auto out_dims =
+            std::vector<int>(exp_out_dims.begin() + 1, exp_out_dims.end());
+        ExpectTrtDimsEqualsArray(out_dims, output.tensor()->getDimensions());
+      }
+    }
+  }
+
   // Expose quantization_ranges_ for tests
   std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
     return converter_->quantization_ranges_;
@@ -1456,10 +1630,6 @@ class OpConverterTest : public ::testing::Test {
   }
   std::unique_ptr<Converter> converter_;
 
- protected:
-  // TODO(laigd): parameterize the test and make the precision mode a parameter.
-  TrtPrecisionMode precision_mode_to_test_ = TrtPrecisionMode::FP32;
-
  private:
   Logger logger_;
   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
@@ -1473,6 +1643,127 @@ class OpConverterTest : public ::testing::Test {
   std::unique_ptr<Allocator> allocator_;
 };
 
+// General test parameters to be used with ops that take a single input tensor.
+struct TestParamBase {
+  // Concrete input dimensions for the test (including the batch dim)
+  std::vector<int> input_dims;
+
+  // Dimensions to define an input with PartialTensorShape. This can be used to
+  // define networks with dynamic input shape. It can be left empty, in that
+  // case AddTestTensor sets partial shapes that are appropriate to TrtTestMode.
+  std::vector<int> partial_input_dims;
+
+  // Concrete (static) output dimensions, including batch size as first dim
+  std::vector<int> expected_output_dims;
+
+  // Parameter vector, has converter specific meaning.
+  std::vector<int> param;
+
+  // Expected status of conversion (with concrete error message)
+  Status status;
+
+  // Expected status of BuildAndRun
+  Status runtime_status;
+};
+
+std::ostream& operator<<(std::ostream& os, const TestParamBase& p) {
+  os << "input_dims" << p.input_dims;
+  if (!p.partial_input_dims.empty()) {
+    os << ", partial_input_dims" << p.partial_input_dims;
+  }
+  if (!p.expected_output_dims.empty()) {
+    os << ", exp_out_dims" << p.expected_output_dims;
+  }
+  if (!p.param.empty()) {
+    os << ", param" << p.param;
+  }
+  os << ", " << p.status;
+  return os;
+}
+
+// Parameterized version of OpConverterTest. This class will be instantiated
+// to test all the TrtTestModes but only in FP32 precision. This means that we
+// will use the following combinations of test parameters:
+// 1. TrtTestMode: implicit batch, explicit batch, dynamic shape modes
+// 2. DataType of the input TF tensors: DT_FLOAT
+// 3. TrtPrecisionMode argument for the Converter: FP32
+class ParameterizedOpConverterTest
+    : public OpConverterTest,
+      public ::testing::WithParamInterface<
+          std::tuple<TrtTestMode, DataType, TrtPrecisionMode>> {};
+
+// Instantiate parameter combinations to test. For debugging purposes it might
+// make sense to run over all possible combinations, but normally a subset of
+// them would be sufficient:
+// - All valid options to TrtTestMode (implicit, explicit, dynamic shape)
+// - DataType: is the TF data type of the input tensors. This usually only
+//   influences the data type added by Converter::AddInputTensor. We test the
+//   valid combinations of input data types in AddAndGetInputs, therefore
+//   for most of the OpConverterTest its is sufficient to test for DT_FLOAT.
+// - TrtPrecisionMode: valid options are FP32, FP16 and INT8. This influences
+//   how TRT handles the precision inside the TRT network, but should not matter
+//   for the TF -> TRT conversion. Therefore it should be sufficient to test
+//   for FP32.
+INSTANTIATE_TEST_CASE_P(
+    OpConvTestInstantiation, ParameterizedOpConverterTest,
+    ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
+                       ::testing::Values(DT_FLOAT),
+                       ::testing::Values(TrtPrecisionMode::FP32)));
+
+// Builds and runs the converted network. Checks output tensor shape. Tests
+// output values using a matcher.
+template <DataType dtype>
+void BuildAndRunConvertedNetwork(const string& name, OpConverterTest* test,
+                                 const TestParamBase& p,
+                                 const std::vector<float>& input_vec,
+                                 const Matcher<std::vector<float>>& matcher) {
+  if (!p.status.ok()) {
+    // conversion was not successful, we cannot run the network
+    return;
+  }
+  if (!p.runtime_status.ok()) {
+    // Runtime error is expected. This can happen if the operation is invalid
+    // for the actual input shape. Usually we catch these errors during
+    // conversion. If the network was defined with dynamic input shape than we
+    // have to postpone these steps until runtime.
+    //
+    // TODO(tfeher) Instead of early return, modify BuildAndRun to handle
+    // runtime errors.
+    return;
+  }
+  typedef typename EnumToDataType<dtype>::Type T;
+  TensorShape shape;
+  TF_EXPECT_OK(TensorShapeUtils::MakeShape(p.input_dims, &shape));
+  const DataVec input_data{
+      {"input", test->AsTensor<T>(CastTestVector<float, T>(input_vec), shape)}};
+  DataVec output_data{{name, test->ConstructTensor<T>(6)}};
+  test->BuildAndRun(input_data, &output_data);
+  // Check the shape of the actual output tensor
+  TF_EXPECT_OK(TensorShapeUtils::MakeShape(p.expected_output_dims, &shape));
+  EXPECT_TRUE(output_data[0].tensor.shape() == shape)
+      << "Expected shape: " << shape.DebugString() << ", actual shape"
+      << output_data[0].tensor.shape().DebugString();
+  // Cast the output to float and compare to expected output
+  auto out_span = GetSpanForData<T>(output_data[0]);
+  std::vector<float> casted_output(out_span.begin(), out_span.end());
+  EXPECT_THAT(casted_output, matcher);
+}
+
+void InstantiateBuildAndRun(DataType tf_dtype, const string& name,
+                            OpConverterTest* test, const TestParamBase& p,
+                            const std::vector<float>& input_vec,
+                            const Matcher<std::vector<float>>& matcher) {
+  if (tf_dtype == DT_FLOAT) {
+    BuildAndRunConvertedNetwork<DT_FLOAT>(name, test, p, input_vec, matcher);
+  } else if (tf_dtype == DT_HALF) {
+    BuildAndRunConvertedNetwork<DT_HALF>(name, test, p, input_vec, matcher);
+  } else if (tf_dtype == DT_INT32) {
+    BuildAndRunConvertedNetwork<DT_INT32>(name, test, p, input_vec, matcher);
+  } else {
+    FAIL() << "Test not supported for " << tf_dtype;
+  }
+}
+
 template <typename T>
 void CopyTensorElements(const Tensor& tensor, protobuf::RepeatedField<T>* out) {
   out->Clear();
@@ -1610,56 +1901,72 @@ TEST_F(OpConverterTest, ConvertConst) {
   TestConvertConst<DT_UINT64, uint64, int32>(this);
 }
 
-TEST_F(OpConverterTest, ConvertTranspose) {
+TEST_P(ParameterizedOpConverterTest, ConvertTranspose) {
+  const auto& spec = GetParam();
+  const TrtTestMode trt_mode = std::get<0>(spec);
+  // Data type of TF input tensors
+  const DataType tf_dtype = std::get<1>(spec);
+  // Precision mode used for  TensorRT engine
+  TrtPrecisionMode converter_precision = std::get<2>(spec);
+
   // Get the NodeDef for Transpose.
   Scope s = Scope::NewRootScope();
-  auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+  auto input = ops::Placeholder(s.WithOpName("input"), tf_dtype);
   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
   auto transpose = ops::Transpose(s.WithOpName("my_transpose"), input, weights);
   const NodeDef& node_def = transpose.operation.node()->def();
 
-  {
-    // Permutation is a tensor, should fail.
-    Reset();
-    AddTestTensor("input", {1, 2, 3});
-    AddTestTensor("weights", {3});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "The input \"perm\" for Transpose must be a constant, at my_transpose");
+  std::vector<TestParamBase> test_params = {
+      // For the first test we leave param empty. This signals to use a
+      // input as weight which will be invalid
+      TestParamBase{{1, 1, 2, 3},
+                    {},
+                    {},
+                    {},
+                    Status(error::UNIMPLEMENTED,
+                           "The input \"perm\" for Transpose must be a "
+                           "constant, at my_transpose")},
+      TestParamBase{{1, 1, 2, 3},
+                    {},
+                    {},
+                    {0, 1, 2},
+                    Status(error::INVALID_ARGUMENT,
+                           "Rank of perm for transpose does not match with "
+                           "that of the input.")},
+      // Transpose batch dim
+      TestParamBase{
+          {1, 1, 2, 3},
+          {},
+          {3, 2, 1, 1},
+          {3, 2, 1, 0},
+          (trt_mode == TrtTestMode::kImplicitBatch)
+              ? Status(error::UNIMPLEMENTED,
+                       "Transpose at batch dimension is not supported")
+              : Status::OK()},
+      TestParamBase{{1, 1, 2, 3}, {}, {1, 3, 1, 2}, {0, 3, 1, 2}},
+  };
+  if (trt_mode == TrtTestMode::kDynamicShape) {
+    // Dynamic shape tests where some shapes are known
+    test_params.push_back(TestParamBase{
+        {1, 1, 2, 3}, {-1, 1, 2, -1}, {1, 3, 1, 2}, {0, 3, 1, 2}});
   }
-  {
-    // Transpose at batch dimension, should fail.
-    Reset();
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("weights", {4}, {1, 0, 2, 3});
-    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
-                               "Transpose at batch dimension is not supported");
-  }
-  {
-    // Permutation rank doesn't match, should fail.
-    Reset();
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("weights", {3}, {0, 1, 2});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "Rank of perm for transpose does not match with that of the input.");
-  }
-  {
-    // Ok.
-    Reset();
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("weights", {4}, {0, 3, 1, 2});
-    RunValidationAndConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_transpose", &output));
-    ASSERT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({3, 1, 2}, output.tensor()->getDimensions());
-
-    const DataVec input_data{{"input", AsTensor<float>({1, 2, 3, 4, 5, 6})}};
-    DataVec output_data{{"my_transpose", ConstructTensor<float>(6)}};
-    BuildAndRun(input_data, &output_data);
-    EXPECT_THAT(GetSpanForData<float>(output_data[0]),
-                ElementsAre(1, 4, 2, 5, 3, 6));
+  std::vector<float> expected_values{1, 4, 2, 5, 3, 6};
+  for (auto p : test_params) {
+    SCOPED_TRACE(p);
+    Reset(converter_precision, trt_mode);
+    AddTestTensor("input", p.input_dims, TfDataTypeToTrt(tf_dtype), trt_mode,
+                  &p.partial_input_dims);
+    if (p.param.empty()) {
+      AddTestTensor("weights", {3});
+    } else {
+      AddTestWeights<int32>("weights", {static_cast<int>(p.param.size())},
+                            p.param);
+    }
+    RunValidationAndConversion(node_def, p.status, "my_transpose",
+                               p.expected_output_dims);
+    InstantiateBuildAndRun(tf_dtype, "my_transpose", this, p,
+                           {1, 2, 3, 4, 5, 6},
+                           ElementsAreArray(expected_values));
   }
 }
 
@@ -1756,7 +2063,7 @@ TEST_F(OpConverterTest, ConvertReshape) {
     const DataVec input_data{{"input", AsTensor<float>(input_vec)}};
     DataVec output_data{
         {"my_reshape", ConstructTensor<float>(input_vec.size())}};
-    BuildAndRun(input_data, &output_data, TrtPrecisionMode::FP32, batch_size);
+    BuildAndRun(input_data, &output_data, batch_size);
     EXPECT_THAT(GetSpanForData<float>(output_data[0]),
                 ElementsAreArray(input_vec));
   }
@@ -1908,28 +2215,24 @@ TEST_F(OpConverterTest, ConvertMatMul) {
   }
   {
     // Make sure that INT8 mode uses IFullyConnectedLayer when possible.
-    precision_mode_to_test_ = TrtPrecisionMode::INT8;
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
     AddTestTensor("input", {2, 1, 1});
     AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
     RunValidationAndConversion(node_def);
     CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, false);
     CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, true);
-    precision_mode_to_test_ = TrtPrecisionMode::FP32;
   }
   {
     // Make sure that INT8 mode doesn't try to use IFullyConnectedLayer when not
     // compatible. In this case we can't use FC because weights is a tensor.
-    precision_mode_to_test_ = TrtPrecisionMode::INT8;
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     NodeDef node_def = get_matmul_nodedef(DT_FLOAT, false, false);
     AddTestTensor("input", {2, 1, 1});
     AddTestTensor("weights", {2, 2});
     RunValidationAndConversion(node_def);
     CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, true);
     CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, false);
-    precision_mode_to_test_ = TrtPrecisionMode::FP32;
   }
   TestMatMulHelper(this, get_matmul_nodedef, "MatMul");
 }
@@ -1961,15 +2264,13 @@ TEST_F(OpConverterTest, ConvertBatchMatMul) {
   {
     // Make sure that INT8 mode doesn't try to use IFullyConnectedLayer when not
     // compatible. In this case we can't use FC because transpose_a is true.
-    precision_mode_to_test_ = TrtPrecisionMode::INT8;
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, true, false);
     AddTestTensor("input", {1, 2, 2});
     AddTestWeights<float>("weights", {2, 2}, {0, 1, 2, 3});
     RunValidationAndConversion(node_def);
     CheckAddedLayers<nvinfer1::IMatrixMultiplyLayer>(this, true);
     CheckAddedLayers<nvinfer1::IFullyConnectedLayer>(this, false);
-    precision_mode_to_test_ = TrtPrecisionMode::FP32;
   }
 
   for (bool transpose_a : {false, true}) {
@@ -2144,10 +2445,7 @@ void TestBinaryOp(OpConverterTest* test, bool operand_1_is_tensor,
   ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions());
   // After broadcasting first input becomes {3, 6, 3, 6} and second input
   // becomes {2, 3, 2, 3}.
-  test->BuildAndRun(
-      input_data, &output_data,
-      dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32,
-      /*batch_size=*/2);
+  test->BuildAndRun(input_data, &output_data, /*batch_size=*/2);
   if (node_def.op() == "Add") {
     EXPECT_THAT(
         GetSpanForData<CType>(output_data[0]),
@@ -2281,10 +2579,7 @@ void TestAddN(OpConverterTest* test) {
     ExpectTrtDimsEqualsArray({1, 2}, output.tensor()->getDimensions());
 
     DataVec output_data{{"my_addn", test->ConstructTensor<CType>(4)}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32,
-        /*batch_size=*/2);
+    test->BuildAndRun(input_data, &output_data, /*batch_size=*/2);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(CastTestVector<int, CType>({3, 6, 9, 12})));
   }
@@ -2308,9 +2603,7 @@ void TestAddN(OpConverterTest* test) {
     ExpectTrtDimsEqualsArray({1, 2}, output.tensor()->getDimensions());
 
     DataVec output_data{{"my_addn", test->ConstructTensor<CType>(2)}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(CastTestVector<int, CType>({5, 8})));
   }
@@ -2332,10 +2625,9 @@ TEST_F(OpConverterTest, ConvertAddN) {
 }
 
 TEST_F(OpConverterTest, ConvertQuantize) {
-  precision_mode_to_test_ = TrtPrecisionMode::INT8;
   {
     // FakeQuantWithMinMaxArgs attributes are empty, should fail.
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     NodeDef node_def =
         MakeNodeDef("my_quantize", "FakeQuantWithMinMaxArgs", {"input"});
     AddTestTensor("input", {1, 2, 3});
@@ -2346,7 +2638,7 @@ TEST_F(OpConverterTest, ConvertQuantize) {
   }
   {
     // FakeQuantWithMinMaxArgs ranges set via attributes, ok.
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
@@ -2364,7 +2656,7 @@ TEST_F(OpConverterTest, ConvertQuantize) {
   }
   {
     // FakeQuantWithMinMaxVars ranges set via inputs, ok.
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
@@ -2385,7 +2677,7 @@ TEST_F(OpConverterTest, ConvertQuantize) {
   }
   {
     // QuantizeAndDequantizeV2 ranges set via inputs, ok.
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
@@ -2406,7 +2698,7 @@ TEST_F(OpConverterTest, ConvertQuantize) {
   }
   {
     // QuantizeAndDequantizeV2 Range inputs are tensors, should fail.
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
@@ -2424,7 +2716,7 @@ TEST_F(OpConverterTest, ConvertQuantize) {
   }
   {
     // QuantizeAndDequantizeV3 ranges set via inputs, ok.
-    Reset();
+    Reset(TrtPrecisionMode::INT8);
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
@@ -2477,9 +2769,7 @@ void TestConvertSquare(OpConverterTest* test) {
   // Engine outputs are converted to FP16 automatically if we set FP16 mode in
   // the builder.
   DataVec output_data{{"my_square", test->ConstructTensor<CType>(num_inputs)}};
-  test->BuildAndRun(
-      input_data, &output_data,
-      dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+  test->BuildAndRun(input_data, &output_data);
   ExpectArrayNear(expected_outputs, GetSpanForData<CType>(output_data[0]));
 }
 
@@ -2828,124 +3118,117 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
   }
 }
 
-TEST_F(OpConverterTest, ConvertSqueeze) {
-  {
-    // No attrs, should fail.
-    Reset();
-    Scope s = Scope::NewRootScope();
-    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
-    auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
-    const NodeDef& node_def = squeeze.operation.node()->def();
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "Squeeze is only implemented for explicit dims, at my_squeeze");
-  }
+TEST_P(ParameterizedOpConverterTest, ConvertSqueeze) {
+  const auto& spec = GetParam();
+  const TrtTestMode trt_mode = std::get<0>(spec);
+  const bool use_implicit_batch = (trt_mode == TrtTestMode::kImplicitBatch);
+  // Data type of TF input tensors
+  const DataType tf_dtype = std::get<1>(spec);
+  // Precision mode used for  TensorRT engine
+  TrtPrecisionMode converter_precision = std::get<2>(spec);
 
   // Get the NodeDef for Squeeze.
-  auto get_squeeze_nodedef = [](std::vector<int> axis) -> NodeDef {
+  auto get_squeeze_nodedef = [tf_dtype](std::vector<int> axes) -> NodeDef {
     Scope s = Scope::NewRootScope();
-    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
-    ops::Squeeze::Attrs squeeze_attrs;
-    squeeze_attrs.axis_ = gtl::ArraySlice<int>(axis);  // non-absl ok
-    auto squeeze =
-        ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
-    return squeeze.operation.node()->def();
+    auto input = ops::Placeholder(s.WithOpName("input"), tf_dtype);
+    if (!axes.empty()) {
+      ops::Squeeze::Attrs squeeze_attrs;
+      squeeze_attrs.axis_ = gtl::ArraySlice<int>(axes);  // non-absl ok
+      auto squeeze =
+          ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
+      return squeeze.operation.node()->def();
+    } else {
+      auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
+      return squeeze.operation.node()->def();
+    }
   };
-
-  {
-    // Input is weights, should fail.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({0});
-    AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "The input \"input\" for Squeeze must be a tensor, at my_squeeze");
-  }
-  {
-    // Squeeze batch dim, should fail.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({0});
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
-                               "TensorRT does not allow manipulation of the "
-                               "batch dimension, at my_squeeze");
-  }
-  {
-    // Squeeze batch dim via negative axis, should fail.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({-4});
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
-                               "TensorRT does not allow manipulation of the "
-                               "batch dimension, at my_squeeze");
-  }
-  {
-    // Squeeze >= rank(input), should fail.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({4});
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "Axis value of 4 is out of bounds, must be in range [-4, 4), at "
-        "my_squeeze");
-  }
-  {
-    // Squeeze < -rank(input), should fail.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({-5});
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "Axis value of -5 is out of bounds, must be in range [-4, 4), at "
-        "my_squeeze");
-  }
-  {
-    // Squeeze an axis with size != 1, should fail.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({2});
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "Dimension 2 with size 2 cannot be squeezed because it must be size 1, "
-        "at my_squeeze");
-  }
-
-  struct TestParams {
-    std::vector<int> input_dims;
-    std::vector<int> axis;
-    std::vector<int> expected_output_dims;
+  std::vector<TestParamBase> test_params = {
+      TestParamBase{
+          {1, 2, 1, 3},  // input dims
+          {},            // input partial dims
+          {2, 3},        // expected output dims
+          {},            // axis
+          trt_mode == TrtTestMode::kExplicitBatch
+              ? Status::OK()
+              : Status{error::UNIMPLEMENTED,
+                       "Squeeze is not implemented for empty squeeze_dims, at "
+                       "my_squeeze"}},
+      TestParamBase{{1, 2, 1, 3},
+                    {},
+                    {2, 1, 3},
+                    {0},
+                    use_implicit_batch
+                        ? Status{error::UNIMPLEMENTED,
+                                 "TensorRT does not allow manipulation of the "
+                                 "batch dimension, at my_squeeze"}
+                        : Status::OK()},
+      TestParamBase{{1, 2, 1, 3},
+                    {},
+                    {2, 1, 3},
+                    {-4},
+                    use_implicit_batch
+                        ? Status{error::UNIMPLEMENTED,
+                                 "TensorRT does not allow manipulation of the "
+                                 "batch dimension, at my_squeeze"}
+                        : Status::OK()},
+      TestParamBase{
+          {1, 1, 2, 3},
+          {},
+          {},
+          {4},
+          Status{error::INVALID_ARGUMENT,
+                 "Axis value of 4 is out of bounds, must be in range [-4, 4), "
+                 "at my_squeeze"}},
+      TestParamBase{
+          {1, 1, 2, 3},
+          {},
+          {},
+          {-5},
+          Status{error::INVALID_ARGUMENT,
+                 "Axis value of -5 is out of bounds, must be in range [-4, 4), "
+                 "at my_squeeze"}},
+      TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {1}},
+      TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {-3}},
+      TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {3}},
+      TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {-1}},
+      TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, 3, 5}},
+      TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {3, 1, 5}},
+      TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {-1, -3, -5}},
+      TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, -3, 5}},
+      TestParamBase{{1, 1, 6}, {}, {1, 6}, {1}},
+      TestParamBase{{1, 6, 1}, {}, {1, 6}, {2}},
   };
+  auto squeeze_non_singleton = TestParamBase{
+      {1, 1, 2, 3},
+      {},
+      {},
+      {2},
+      Status{error::INVALID_ARGUMENT,
+             "Dimension 2 with size 2 cannot be squeezed because it must be "
+             "size 1, at my_squeeze"}};
 
-  // Ok.
-  std::vector<TestParams> ok_params = {
-      TestParams{{1, 2, 3}, {1}, {2, 3}},
-      TestParams{{1, 2, 3}, {-3}, {2, 3}},
-      TestParams{{2, 3, 1}, {3}, {2, 3}},
-      TestParams{{2, 3, 1}, {-1}, {2, 3}},
-      TestParams{{1, 2, 1, 3, 1}, {1, 3, 5}, {2, 3}},
-      TestParams{{1, 2, 1, 3, 1}, {3, 1, 5}, {2, 3}},
-      TestParams{{1, 2, 1, 3, 1}, {-1, -3, -5}, {2, 3}},
-      TestParams{{1, 2, 1, 3, 1}, {1, -3, 5}, {2, 3}},
-      TestParams{{1, 6}, {1}, {6}},
-      TestParams{{6, 1}, {2}, {6}},
-  };
-  for (int i = 0; i < ok_params.size(); ++i) {
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef(ok_params[i].axis);
-    AddTestTensor("input", ok_params[i].input_dims);
-    RunValidationAndConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
-    ASSERT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
-                             output.tensor()->getDimensions());
+  if (trt_mode == TrtTestMode::kDynamicShape) {
+    // In this test we try to squeeze axis=2 which has size > 1. In dynamic
+    // shape mode the converter sees only -1, so it cannot catch this error.
+    squeeze_non_singleton.status = Status::OK();  // conversion status
+    squeeze_non_singleton.runtime_status =
+        errors::InvalidArgument("Negative number of dimensions -1");
+    // Dynamic shape tests with partially known input shape
+    test_params.push_back(TestParamBase{{2, 1, 3}, {2, -1, 3}, {2, 3}, {1}});
+    test_params.push_back(TestParamBase{{2, 1, 3}, {2, 1, -1}, {2, 3}, {1}});
+  }
+  test_params.push_back(squeeze_non_singleton);
 
-    const DataVec input_data{{"input", AsTensor<float>({1, 2, 3, 4, 5, 6})}};
-    DataVec output_data{{"my_squeeze", ConstructTensor<float>(6)}};
-    BuildAndRun(input_data, &output_data);
-    EXPECT_THAT(GetSpanForData<float>(output_data[0]),
-                ElementsAre(1, 2, 3, 4, 5, 6));
+  for (TestParamBase p : test_params) {
+    SCOPED_TRACE(p);
+    Reset(converter_precision, trt_mode);
+    NodeDef node_def = get_squeeze_nodedef(p.param);
+    AddTestTensor("input", p.input_dims, TfDataTypeToTrt(tf_dtype), trt_mode,
+                  &p.partial_input_dims);
+    RunValidationAndConversion(node_def, p.status, "my_squeeze",
+                               p.expected_output_dims);
+    InstantiateBuildAndRun(tf_dtype, "my_squeeze", this, p, {1, 2, 3, 4, 5, 6},
+                           ElementsAreArray({1, 2, 3, 4, 5, 6}));
   }
 }
 
@@ -4776,10 +5059,8 @@ void TestConvertGather(OpConverterTest* test) {
     }
     DataVec output_data{
         {"my_gather", test->ConstructTensor<CType>(expected_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32,
-        /*batch_size=*/expected_output_shape[0]);
+    test->BuildAndRun(input_data, &output_data,
+                      /*batch_size=*/expected_output_shape[0]);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(converted_expected_output));
   }
@@ -4850,135 +5131,54 @@ TEST_F(OpConverterTest, ConvertGather) {
   TestConvertGather<DT_INT32>(this);
 }
 
-TEST_F(OpConverterTest, ConvertUnary) {
+template <typename T>
+NodeDef CreateUnaryOp() {
+  Scope s = Scope::NewRootScope();
+  auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+  return T(s.WithOpName("my_unary"), input).operation.node()->def();
+}
+
+TEST_P(ParameterizedOpConverterTest, ConvertUnary) {
+  const auto& spec = GetParam();
+  const TrtTestMode trt_mode = std::get<0>(spec);
+  const DataType tf_dtype = std::get<1>(spec);
+  TrtPrecisionMode converter_precision = std::get<2>(spec);
   {
     // Input is weights, should fail.
-    Reset();
-    Scope s = Scope::NewRootScope();
-    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
-    auto neg = ops::Neg(s.WithOpName("my_unary"), input);
-    const NodeDef& node_def = neg.operation.node()->def();
+    Reset(converter_precision, trt_mode);
+    const NodeDef node_def = CreateUnaryOp<ops::Neg>();
     AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
     RunValidationAndConversion(
         node_def, error::UNIMPLEMENTED,
         "The input \"x\" for Neg must be a tensor, at my_unary");
   }
-
-  // Get nodedef for unary layer.
-  auto get_unary_nodedef = [](string op_name) -> NodeDef {
-    Scope s = Scope::NewRootScope();
-    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
-    if (op_name == "Abs") {
-      auto unary = ops::Abs(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Acos") {
-      auto unary = ops::Acos(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Acosh") {
-      auto unary = ops::Acosh(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Asin") {
-      auto unary = ops::Asin(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Asinh") {
-      auto unary = ops::Asinh(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Atan") {
-      auto unary = ops::Atan(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Atanh") {
-      auto unary = ops::Atanh(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Ceil") {
-      auto unary = ops::Ceil(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Cos") {
-      auto unary = ops::Cos(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Cosh") {
-      auto unary = ops::Cosh(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Exp") {
-      auto unary = ops::Exp(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Floor") {
-      auto unary = ops::Floor(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Log") {
-      auto unary = ops::Log(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Neg") {
-      auto unary = ops::Neg(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Reciprocal") {
-      auto unary = ops::Reciprocal(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Rsqrt") {
-      auto unary = ops::Rsqrt(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Sin") {
-      auto unary = ops::Sin(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Sinh") {
-      auto unary = ops::Sinh(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Sqrt") {
-      auto unary = ops::Sqrt(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    } else if (op_name == "Tan") {
-      auto unary = ops::Tan(s.WithOpName("my_unary"), input);
-      return unary.operation.node()->def();
-    }
-    EXPECT_TRUE(false);
-    return NodeDef();
-  };
-  // Get expected output for unary layer.
-  auto get_unary_output = [](string op_name, float input) -> float {
-    if (op_name == "Abs") {
-      return std::abs(input);
-    } else if (op_name == "Acos") {
-      return std::acos(input);
-    } else if (op_name == "Acosh") {
-      return std::acosh(input);
-    } else if (op_name == "Asin") {
-      return std::asin(input);
-    } else if (op_name == "Asinh") {
-      return std::asinh(input);
-    } else if (op_name == "Atan") {
-      return std::atan(input);
-    } else if (op_name == "Atanh") {
-      return std::atanh(input);
-    } else if (op_name == "Ceil") {
-      return std::ceil(input);
-    } else if (op_name == "Cos") {
-      return std::cos(input);
-    } else if (op_name == "Cosh") {
-      return std::cosh(input);
-    } else if (op_name == "Exp") {
-      return std::exp(input);
-    } else if (op_name == "Floor") {
-      return std::floor(input);
-    } else if (op_name == "Log") {
-      return std::log(input);
-    } else if (op_name == "Neg") {
-      return -input;
-    } else if (op_name == "Reciprocal") {
-      return 1.0 / input;
-    } else if (op_name == "Rsqrt") {
-      return 1.0 / std::sqrt(input);
-    } else if (op_name == "Sin") {
-      return std::sin(input);
-    } else if (op_name == "Sinh") {
-      return std::sinh(input);
-    } else if (op_name == "Sqrt") {
-      return std::sqrt(input);
-    } else if (op_name == "Tan") {
-      return std::tan(input);
-    }
-    EXPECT_TRUE(false);
-    return 0;
-  };
-
+  using OpFunc = std::function<NodeDef(void)>;
+  using ValFunc = float (*)(float);
+  std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
+#define ADD_OP(name, op, compute) \
+  op_map[name] =                  \
+      std::make_pair(CreateUnaryOp<op>, static_cast<ValFunc>(compute))
+  ADD_OP("Abs", ops::Abs, std::abs);
+  ADD_OP("Acos", ops::Acos, std::acos);
+  ADD_OP("Acosh", ops::Acosh, std::acosh);
+  ADD_OP("Asin", ops::Asin, std::asin);
+  ADD_OP("Asinh", ops::Asinh, std::asinh);
+  ADD_OP("Atan", ops::Atan, std::atan);
+  ADD_OP("Atanh", ops::Atanh, std::atanh);
+  ADD_OP("Ceil", ops::Ceil, std::ceil);
+  ADD_OP("Cos", ops::Cos, std::cos);
+  ADD_OP("Cosh", ops::Cosh, std::cosh);
+  ADD_OP("Exp", ops::Exp, std::exp);
+  ADD_OP("Floor", ops::Floor, std::floor);
+  ADD_OP("Log", ops::Log, std::log);
+  ADD_OP("Neg", ops::Neg, [](float x) { return -x; });
+  ADD_OP("Reciprocal", ops::Reciprocal, [](float x) { return 1.0f / x; });
+  ADD_OP("Rsqrt", ops::Rsqrt, [](float x) { return 1.0f / std::sqrt(x); });
+  ADD_OP("Sin", ops::Sin, std::sin);
+  ADD_OP("Sinh", ops::Sinh, std::sinh);
+  ADD_OP("Sqrt", ops::Sqrt, std::sqrt);
+  ADD_OP("Tan", ops::Tan, std::tan);
+#undef ADD_OP
   // Get list of ops to test.
   std::vector<string> ops_to_test;
   // Add all ops supported by ConvertUnary.
@@ -4989,26 +5189,30 @@ TEST_F(OpConverterTest, ConvertUnary) {
   }
   // Add other unary ops to test.
   ops_to_test.push_back("Rsqrt");
-  // Ok.
+  // Prepare test parameters
+  auto p = TestParamBase{
+      {1, 1, 2, 3},  // input dims
+      {},            // input partial dims
+      {1, 1, 2, 3},  // expected output dims
+  };
   for (const string& op_name : ops_to_test) {
-    Reset();
-    NodeDef node_def = get_unary_nodedef(op_name);
-    AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_unary", &output));
-    ASSERT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions());
-
-    const std::vector<float> input = {-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
-    const DataVec input_data{{"input", AsTensor<float>(input)}};
-    DataVec output_data{{"my_unary", ConstructTensor<float>(6)}};
-    BuildAndRun(input_data, &output_data);
-    for (int i = 0; i < input.size(); ++i) {
-      const float expected_output = get_unary_output(op_name, input[i]);
-      EXPECT_THAT(GetSpanForData<float>(output_data[0])[i],
-                  NanSensitiveFloatNear(expected_output, 0.0001));
+    SCOPED_TRACE(op_name);
+    Reset(converter_precision, trt_mode);
+    if (!op_map.count(op_name)) {
+      FAIL() << "Unary op test map does not contain op " << op_name;
     }
+    NodeDef node_def = op_map[op_name].first();
+
+    AddTestTensor("input", p.input_dims, TfDataTypeToTrt(tf_dtype), trt_mode);
+    RunValidationAndConversion(node_def, Status::OK(), "my_unary",
+                               p.expected_output_dims);
+
+    std::vector<float> input_values{-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
+    std::vector<float> output;
+    std::transform(input_values.begin(), input_values.end(),
+                   std::back_inserter(output), op_map[op_name].second);
+    InstantiateBuildAndRun(tf_dtype, "my_unary", this, p, input_values,
+                           ArrayFloatNear(output, 0.0001, true));
   }
 }
 
@@ -5112,9 +5316,7 @@ void TestConvertConcat(OpConverterTest* test) {
     DataVec output_data{
         {"my_concat",
          test->ConstructTensor<CType>(ok_params[i].expected_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(ok_params[i].expected_output));
   }
@@ -5279,9 +5481,7 @@ void TestConvertSplit(OpConverterTest* test) {
     // Verify output values are correct.
     const DataVec input_data{
         {"value", test->AsTensor<CType>(ok_params[i].value)}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     for (int j = 0; j < outputs.size(); ++j) {
       EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
                   ElementsAreArray(ok_params[i].expected_outputs[j]));
@@ -5458,9 +5658,7 @@ void TestConvertUnpack(OpConverterTest* test) {
     // Verify output values are correct.
     const DataVec input_data{
         {"value", test->AsTensor<CType>(ok_params[i].value)}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     for (int j = 0; j < outputs.size(); ++j) {
       EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
                   ElementsAreArray(ok_params[i].expected_outputs[j]));
@@ -5629,9 +5827,7 @@ void TestConvertPack(OpConverterTest* test) {
     }
     DataVec output_data{{"my_pack", test->ConstructTensor<CType>(
                                         params[i].expected_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(params[i].expected_output));
   }
@@ -5779,9 +5975,7 @@ void TestConvertArgMinMax(OpConverterTest* test) {
     DataVec output_data{
         {"my_arg", test->ConstructTensor<int32>(
                        params[i].expected_argmax_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
 
     if (node_def.op() == "ArgMax") {
       EXPECT_THAT(GetSpanForData<int32>(output_data[0]),
@@ -5880,9 +6074,7 @@ void TestConvertDepthSpaceShuffle(
     DataVec input_data{{"input", test->AsTensor<CType>(params[i].input_value)}};
     DataVec output_data{{"my_shuffle", test->ConstructTensor<CType>(
                                            params[i].expected_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(params[i].expected_output));
   }
@@ -6158,9 +6350,7 @@ void TestConvertClipByValue(OpConverterTest* test) {
     DataVec input_data{{"t", test->AsTensor<CType>(params[i].input_value)}};
     DataVec output_data{{"my_clip", test->ConstructTensor<CType>(
                                         params[i].expected_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(params[i].expected_output));
   }
@@ -6268,9 +6458,7 @@ void TestConvertSquaredDifference(OpConverterTest* test) {
     DataVec output_data{
         {"my_squared_diff",
          test->ConstructTensor<CType>(params[i].expected_output.size())}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
                 ElementsAreArray(params[i].expected_output));
   }
@@ -6375,9 +6563,7 @@ void TestConvertResize(OpConverterTest* test) {
         {"my_resize", test->ConstructTensor<CType>(
                           params[i].expected_nearest_output_values.size())}};
 
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
 
     if (node_def.op() == "ResizeBilinear") {
       ExpectArrayAlmostEqual(params[i].expected_bilinear_output_values,
@@ -6477,9 +6663,7 @@ void TestConvertPad(OpConverterTest* test) {
         {"my_pad", test->ConstructTensor<CType>(
                        params[i].expected_output_values.size())}};
 
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+    test->BuildAndRun(input_data, &output_data);
     ExpectArrayAlmostEqual(params[i].expected_output_values,
                            GetSpanForData<CType>(output_data[0]), CType(1e-5));
   }
diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.cc b/tensorflow/compiler/tf2tensorrt/segment/segment.cc
index 5b97a8f1aa2..749335f1b09 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc
@@ -371,6 +371,174 @@ string TensorPropertiesToString(
                  });
 }
 
+// From the given list of input properties, returns the leading shape, which is
+// the shape that determines the batch size of the operation. The leading shape
+// is selected from the group of input shapes with the highest rank as follows:
+//  . If all of those shapes have non-negative values for the batch dimension,
+//    the leading shape is the one with the largest value for the batch
+//    dimension.
+//  . If some or all of those shapes have negative values for the batch
+//    dimension, and the rest of those shapes have 1 for the batch dimension,
+//    the leading shape is the first of those shapes with a negative value for
+//    the batch dimension.
+//  . Otherwise, we can't determine the leading shape for the operation and
+//    have to exclude the operation from TRT.
+//
+// Examples:
+//    case-1: a[1,3,4] + b[2,3,4] => leading shape [2,3,4]
+//    case-2: a[2,3,4] + b[scalar] => leading shape [2,3,4]
+//    case-3: a[-1,3,4] + b[1,3,4] => leading shape [-1,3,4]
+//    case-4: a[-1,3,4] + b[2,3,4] => no leading shape
+//
+// We have to return "no leading shape" for case-4 to exclude such operation
+// from being translated for this reason:
+//   The actually input for "a" have to be in the shape of [2,3,4] for the
+//   operation to be valid. On the other hand, if we translate the operation
+//   to implicit batch mode, it will becomes a[3,4]+b[3,4] which is valid for
+//   any input shape of "a".
+//
+// This routine assumes the input program is valid. For example, we shouldn't
+// see invalid operation like a[2,3,4] + b[3,3,4]. It also assumes the input
+// properties is not empty and all input have known shapes.
+//
+// TODO(bixia): find a way to share this knowledge with the converter.
+// TODO(bixia): investigate the use of symbolic shape analysis to improve
+//   segmentation, such as by requiring the dynamic dimensions to have the same
+//   negative value.
+absl::optional<const TensorShapeProto*> FindLeadingShape(
+    absl::Span<const OpInfo::TensorProperties> properties) {
+  DCHECK(!properties.empty());
+  const TensorShapeProto* result;
+  int max_batch_dim_value;
+  auto choose_shape_with_higher_rank = [&](const TensorShapeProto* s) {
+    result = s;
+    max_batch_dim_value = s->dim_size() < 1 ? 1 : s->dim(0).size();
+  };
+
+  DCHECK(!properties[0].shape().unknown_rank());
+  choose_shape_with_higher_rank(&properties[0].shape());
+
+  for (const OpInfo::TensorProperties& p : properties.subspan(1)) {
+    DCHECK(!p.shape().unknown_rank());
+    if (p.shape().dim_size() < result->dim_size()) continue;
+
+    if (p.shape().dim_size() > result->dim_size()) {
+      choose_shape_with_higher_rank(&p.shape());
+      continue;
+    }
+
+    // Among the shapes with the same rank, choose the one with a dynamic batch
+    // size. If no shapes have a dynamic batch size, choose the one with the
+    // largest size.
+    if (result->dim_size() < 1) continue;
+
+    if (p.shape().dim(0).size() < 0 || result->dim(0).size() < 0) {
+      if (p.shape().dim(0).size() < 0 && result->dim(0).size() >= 0) {
+        result = &p.shape();
+      } else {
+        max_batch_dim_value =
+            std::max<int>(max_batch_dim_value, p.shape().dim(0).size());
+      }
+
+      continue;
+    }
+
+    if (p.shape().dim(0).size() > result->dim(0).size()) {
+      result = &p.shape();
+      max_batch_dim_value = result->dim(0).size();
+    }
+  }
+
+  if (result->dim_size() > 0 && result->dim(0).size() < 0) {
+    // dynamic batch size
+    if (max_batch_dim_value <= 1) {
+      return result;
+    } else {
+      return absl::nullopt;
+    }
+  }
+
+  return result;
+}
+
+// Returns the inputs that are relevant to determinate the batch size of the
+// operation. This routine handles the following cases:
+//   . Operations that support implicit boradcasting, such as operation mul.
+//     In this case, we need to inspect all the inputs in order to determine the
+//     batch size of the operation.
+//   . Special cases. Such as "Conv2DBackpropInput", "Conv3DBackpropInputV2".
+//   . The batch size of a operation is determined by the first input of the
+//     operation.
+absl::Span<const OpInfo::TensorProperties> GetInputsToDeterminateBatchSize(
+    const Node* node, const std::vector<OpInfo::TensorProperties>& all_inputs) {
+  // TODO(bixia): Find a way to share this knowledge with the converter.
+  static std::set<string> broadcast_supporting_ops = {
+      // ops corresponding to ConvertBinary in the converter
+      "Add",
+      "AddV2",
+      "Mul",
+      "Sub"
+      "Div",
+      "FloorDiv",
+      "RealDiv",
+      "Minimum",
+      "Maximum",
+      "Pow",
+      // other ops that need to need GetTrtBroadcastShape to convert
+      "BiasAdd",
+      "SquaredDifference",
+      "BatchMatMul",
+      "BatchMatMulV2",
+  };
+  const string& op = node->def().op();
+
+  if (op == "Conv2DBackpropInput" || op == "Conv3DBackpropInputV2") {
+    DCHECK_EQ(all_inputs.size(), 3);
+    return absl::MakeSpan(all_inputs).subspan(2, 1);
+  }
+
+  if (broadcast_supporting_ops.count(op)) {
+    return absl::MakeSpan(all_inputs);
+  }
+
+  // This is the common case for the operations that don't support implicit
+  // broadcasting: the first operand determines its batch size. All otherwise
+  // cases are handled before reaching here.
+  return absl::MakeSpan(all_inputs).subspan(0, 1);
+}
+
+// Returns true if the operation we can remove the implicit batch of the
+// operation.
+//
+// In particular, if the input shape has dynamic rank or the input shape rank
+// is less than 2, we can't remove the implicit batch dimension and generate
+// a new operation for TRT translation.
+bool OperationCanBeTranslatedToImplicitBatch(
+    const grappler::GraphProperties* graph_properties, const Node* node) {
+  VLOG(3) << "process node " << node->name();
+  if (node->num_inputs() == 0) return true;
+  if (!graph_properties || !graph_properties->HasInputProperties(node->name()))
+    return false;
+
+  VLOG(3) << "input shapes "
+          << TensorPropertiesToString(
+                 graph_properties->GetInputProperties(node->name()));
+
+  const std::vector<OpInfo::TensorProperties>& all_input_properties =
+      graph_properties->GetInputProperties(node->name());
+  absl::Span<const OpInfo::TensorProperties> input_properties =
+      GetInputsToDeterminateBatchSize(node, all_input_properties);
+  if (absl::c_any_of(input_properties, [](const OpInfo::TensorProperties& p) {
+        return p.shape().unknown_rank();
+      })) {
+    return false;
+  }
+
+  absl::optional<const TensorShapeProto*> leading_shape =
+      FindLeadingShape(input_properties);
+  return leading_shape.has_value() && leading_shape.value()->dim_size() >= 2;
+}
+
 // Returns true if we can't be sure that the operand with the given properties
 // won't have negative values for non-batch dimensions.
 //
@@ -467,6 +635,42 @@ void ContractEdge(SimpleEdge* edge, SimpleGraph* graph,
   }
 }
 
+// Returns a batch size representation for a segment that only contains the
+// given node.
+ClusterBatchSize GetClusterBatchSizeForNode(
+    const grappler::GraphProperties* graph_properties, const Node* node,
+    bool use_implicit_batch) {
+  ClusterBatchSize cluster_batch_size;
+  if (!use_implicit_batch || !node || node->num_inputs() == 0) {
+    return cluster_batch_size;
+  }
+
+  if (!graph_properties ||
+      !graph_properties->HasInputProperties(node->name())) {
+    VLOG(3) << "doesn't have input property";
+    return cluster_batch_size.SetBatchSizeValue(-1);
+  }
+
+  const std::vector<OpInfo::TensorProperties>& input_properties =
+      graph_properties->GetInputProperties(node->name());
+  absl::optional<const TensorShapeProto*> optional_leading_shape =
+      FindLeadingShape(GetInputsToDeterminateBatchSize(node, input_properties));
+  DCHECK(optional_leading_shape.has_value());
+  const TensorShapeProto* leading_shape = optional_leading_shape.value();
+
+  DCHECK(!leading_shape->unknown_rank() && leading_shape->dim_size() >= 2);
+  return cluster_batch_size.SetBatchSizeValue(leading_shape->dim(0).size());
+}
+
+void AddSegmentForNode(const grappler::GraphProperties* graph_properties,
+                       std::vector<UnionFind<SimpleNode*>>* segments,
+                       SimpleNode* node, bool use_implicit_batch) {
+  segments->emplace_back(
+      node, GetClusterBatchSizeForNode(
+                graph_properties, node == nullptr ? nullptr : node->tf_node(),
+                use_implicit_batch));
+}
+
 }  // namespace
 
 Status SegmentGraph(const Graph* tf_graph,
@@ -528,6 +732,12 @@ Status SegmentGraph(const Graph* tf_graph,
     };
     if (options.exclude_node_list.count(node->name()) != 0) {
       exclude_node("excluded by segmenter option");
+    } else if (options.use_implicit_batch &&
+               !OperationCanBeTranslatedToImplicitBatch(graph_properties,
+                                                        node->tf_node())) {
+      exclude_node(
+          "implicit batch mode requires input shape with at least two "
+          "dimensions");
     } else if (!options.allow_dynamic_non_batch_dim &&
                OperationHasDynamicNonBatchDimension(graph_properties,
                                                     node->tf_node())) {
@@ -548,7 +758,8 @@ Status SegmentGraph(const Graph* tf_graph,
                 << "(Op name: " << node->name();
       }
     }
-    node_segments.emplace_back(node);
+    AddSegmentForNode(graph_properties, &node_segments, node,
+                      options.use_implicit_batch);
   }
   string msg = StrCat(
       "There are ", num_unsupported_ops, " ops of ", unsupported_ops.size(),
@@ -581,18 +792,23 @@ Status SegmentGraph(const Graph* tf_graph,
               return true;
             });
   for (const SimpleNode* node : order) {
-    // All output nodes of 'node' have been visited...
+    // All output nodes of 'node' have been visited.
     VLOG(3) << "Trying node " << node->name() << " id=" << node->id();
-    // 'node' must be a TRT candidate...
+    // 'node' must be a TRT candidate.
     if (node_segments[node->id()].Value() == nullptr) {
       VLOG(3) << "... not a TRT candidate";
       continue;
     }
-    // Contract output edges to combine 'node' with output
-    // nodes. Iterate since combining two nodes may unblock other
-    // combining.
+    // Contract output edges to combine 'node' with output nodes. Repeat this
+    // step until no output edges can be further contracted. This is because
+    // contracting an output edge may unblock new edges for contracting.
+    ClusterBatchSize expected_batch_size =
+        node_segments[node->id()].BatchSize();
+    VLOG(3) << "batch size " << expected_batch_size;
     while (true) {
       std::set<const SimpleEdge*, SimpleEdgePtrCompare> contract_edges;
+      // TODO(bixia): consider merging the loop to find the edges and the loop
+      // to contract the edges.
       for (const SimpleEdge* out_edge : node->out_edges()) {
         VLOG(3) << "... out node " << out_edge->dst()->name() << " ( "
                 << out_edge->dst()->id() << " <- " << node->id() << " )";
@@ -600,14 +816,26 @@ Status SegmentGraph(const Graph* tf_graph,
           VLOG(3) << "... ... Control Edge, Skipping";
           continue;
         }
-        // Out node must be TRT candidate...
+        // Out node must be a TRT candidate.
         if (node_segments[out_edge->dst()->id()].Value() == nullptr) {
           VLOG(3) << "... ... not a TRT candidate";
           continue;
         }
+        // Out node must have compatible batch size.
+        ClusterBatchSize out_batch_size =
+            node_segments[out_edge->dst()->id()].BatchSize();
+        ClusterBatchSize merged_batch_size = expected_batch_size;
+        if (!merged_batch_size.MergeIfCompatible(out_batch_size)) {
+          VLOG(3) << "... ... incompatible batch size "
+                  << expected_batch_size.ToString() << " "
+                  << out_batch_size.ToString();
+          continue;
+        }
         if (CanContractEdge(out_edge, graph)) {
-          VLOG(3) << "... ... can contract";
+          VLOG(3) << "... ... can contract. new batch size "
+                  << merged_batch_size.ToString();
           contract_edges.insert(out_edge);
+          expected_batch_size = merged_batch_size;
         } else {
           VLOG(3) << "... ... cannot contract, would form cycle";
         }
@@ -624,7 +852,8 @@ Status SegmentGraph(const Graph* tf_graph,
 
         VLOG(3) << "Merge " << src->name() << " <- " << dst->name() << " ("
                 << src->id() << " <- " << dst->id();
-        node_segments[src->id()].Merge(&node_segments[dst->id()]);
+        TF_RETURN_IF_ERROR(
+            node_segments[src->id()].Merge(&node_segments[dst->id()]));
 
         // Contracting the edge leaves disconnected graph edges.
         // Remove these from the graph and from 'contract_edges' so we
@@ -638,6 +867,12 @@ Status SegmentGraph(const Graph* tf_graph,
           graph->RemoveEdge(r);
         }
       }
+      ClusterBatchSize actual_batch_size =
+          node_segments[node->id()].BatchSize();
+      if (expected_batch_size != actual_batch_size) {
+        return errors::Internal(
+            "expected batch size is not the same as the actual batch size");
+      }
     }
   }
 
diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc b/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc
index 68195addb03..2437481a9c4 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/segment/segment_test.cc
@@ -369,6 +369,154 @@ TEST_F(SegmentTest, ExcludeReshapeWithDynamicNonBatchDimensionInOutput) {
   RunTest(&g, &static_graph_properties, all_nodes, all_nodes, all_nodes, {});
 }
 
+TEST_F(SegmentTest, RankOneCannotUseImplicitBatch) {
+  Scope s = Scope::NewRootScope();
+  auto input_0_shape = ops::Placeholder::Shape(TensorShape({3}));
+  auto input_1_shape = ops::Placeholder::Shape(TensorShape({3}));
+  auto input_0 =
+      ops::Placeholder(s.WithOpName("input-0"), DT_FLOAT, input_0_shape);
+  auto input_1 =
+      ops::Placeholder(s.WithOpName("input-1"), DT_FLOAT, input_1_shape);
+  auto const_val = ops::Const(s.WithOpName("const-scalar"), 1.0f, {});
+  auto output_0 = ops::Add(s.WithOpName("output-0"), input_0, const_val);
+  auto output_1 = ops::Add(s.WithOpName("output-1"), input_1, const_val);
+
+  grappler::GrapplerItem item;
+  item.fetch.push_back("output-0");
+  item.fetch.push_back("output-1");
+  TF_EXPECT_OK(s.ToGraphDef(&item.graph));
+
+  grappler::GraphProperties static_graph_properties(item);
+  TF_EXPECT_OK(static_graph_properties.InferStatically(true));
+
+  Graph g(OpRegistry::Global());
+  TF_CHECK_OK(
+      ConvertGraphDefToGraph(GraphConstructorOptions(), item.graph, &g));
+
+  const std::set<string> all_nodes = {"const-scalar", "output-0", "output-1"};
+  EnableImplicitBatchModeForStaticEngine();
+  RunTest(&g, &static_graph_properties, all_nodes, all_nodes, all_nodes, {});
+}
+
+TEST_F(SegmentTest, TwoChainsDiffBatchSizes) {
+  Scope s = Scope::NewRootScope();
+  auto input_0_shape = ops::Placeholder::Shape(TensorShape({2, 3}));
+  auto input_1_shape = ops::Placeholder::Shape(TensorShape({5, 3}));
+  auto input_0 =
+      ops::Placeholder(s.WithOpName("input-0"), DT_FLOAT, input_0_shape);
+  auto input_1 =
+      ops::Placeholder(s.WithOpName("input-1"), DT_FLOAT, input_1_shape);
+  auto const_val = ops::Const(s.WithOpName("const-scalar"), 1.0f, {});
+  auto output_0 = ops::Add(s.WithOpName("output-0"), input_0, const_val);
+  auto output_1 = ops::Add(s.WithOpName("output-1"), input_1, const_val);
+
+  grappler::GrapplerItem item;
+  item.fetch.push_back("output-0");
+  item.fetch.push_back("output-1");
+  TF_EXPECT_OK(s.ToGraphDef(&item.graph));
+
+  grappler::GraphProperties static_graph_properties(item);
+  TF_EXPECT_OK(static_graph_properties.InferStatically(true));
+
+  Graph g(OpRegistry::Global());
+  TF_CHECK_OK(
+      ConvertGraphDefToGraph(GraphConstructorOptions(), item.graph, &g));
+
+  const std::set<string> all_nodes = {"const-scalar", "output-0", "output-1"};
+  EnableImplicitBatchModeForStaticEngine();
+  RunTest(&g, &static_graph_properties, all_nodes, all_nodes, all_nodes,
+          {{"output-0", "const-scalar"}});
+}
+
+TEST_F(SegmentTest, SameRankImplicitBroadcastingStaticBatchSize) {
+  Scope s = Scope::NewRootScope();
+  auto input_0_shape = ops::Placeholder::Shape(TensorShape({2, 3, 1}));
+  auto input_1_shape = ops::Placeholder::Shape(TensorShape({1, 3, 4}));
+  auto input_2_shape = ops::Placeholder::Shape(TensorShape({2, 3, 4}));
+  auto input_0 =
+      ops::Placeholder(s.WithOpName("input-0"), DT_FLOAT, input_0_shape);
+  auto input_1 =
+      ops::Placeholder(s.WithOpName("input-1"), DT_FLOAT, input_1_shape);
+  auto input_2 =
+      ops::Placeholder(s.WithOpName("input-2"), DT_FLOAT, input_2_shape);
+  auto multiple = ops::Mul(s.WithOpName("multiple"), input_2, input_2);
+  auto output_0 = ops::Add(s.WithOpName("output-0"), input_0, multiple);
+  auto output_1 = ops::Add(s.WithOpName("output-1"), input_1, multiple);
+
+  grappler::GrapplerItem item;
+  item.fetch.push_back("output-0");
+  item.fetch.push_back("output-1");
+  TF_EXPECT_OK(s.ToGraphDef(&item.graph));
+
+  grappler::GraphProperties static_graph_properties(item);
+  TF_EXPECT_OK(static_graph_properties.InferStatically(true));
+
+  Graph g(OpRegistry::Global());
+  TF_CHECK_OK(
+      ConvertGraphDefToGraph(GraphConstructorOptions(), item.graph, &g));
+
+  const std::set<string> all_nodes = {"multiple", "output-0", "output-1"};
+  EnableImplicitBatchModeForStaticEngine();
+  RunTest(&g, &static_graph_properties, all_nodes, all_nodes, all_nodes,
+          {all_nodes});
+}
+
+TEST_F(SegmentTest, SameRankImplicitBroadcastingDynamicBatchSize) {
+  Scope s = Scope::NewRootScope();
+  auto input_0_shape = ops::Placeholder::Shape(PartialTensorShape({-1, 2}));
+  auto input_1_shape = ops::Placeholder::Shape(TensorShape({1, 2}));
+  auto input_0 =
+      ops::Placeholder(s.WithOpName("input-0"), DT_FLOAT, input_0_shape);
+  auto input_1 =
+      ops::Placeholder(s.WithOpName("input-1"), DT_FLOAT, input_1_shape);
+  auto const_val = ops::Const(s.WithOpName("const-val"), 1.0f, {1, 1});
+  auto add_0 = ops::Add(s.WithOpName("add-0"), input_0, const_val);
+  auto output_0 = ops::Add(s.WithOpName("output-0"), input_0, add_0);
+
+  grappler::GrapplerItem item;
+  item.fetch.push_back("output-0");
+  TF_EXPECT_OK(s.ToGraphDef(&item.graph));
+
+  grappler::GraphProperties static_graph_properties(item);
+  TF_EXPECT_OK(static_graph_properties.InferStatically(true));
+
+  Graph g(OpRegistry::Global());
+  TF_CHECK_OK(
+      ConvertGraphDefToGraph(GraphConstructorOptions(), item.graph, &g));
+
+  const std::set<string> all_nodes = {"const-val", "add-0", "output-0"};
+  EnableImplicitBatchModeForStaticEngine();
+  RunTest(&g, &static_graph_properties, all_nodes, all_nodes, all_nodes,
+          {{"const-val", "add-0", "output-0"}});
+}
+
+TEST_F(SegmentTest, IncompatibleBatchSizes) {
+  Scope s = Scope::NewRootScope();
+  auto input_0_shape = ops::Placeholder::Shape(PartialTensorShape({-1, 2}));
+  auto input_1_shape = ops::Placeholder::Shape(TensorShape({2, 2}));
+  auto input_0 =
+      ops::Placeholder(s.WithOpName("input-0"), DT_FLOAT, input_0_shape);
+  auto input_1 =
+      ops::Placeholder(s.WithOpName("input-1"), DT_FLOAT, input_1_shape);
+  auto const_val = ops::Const(s.WithOpName("const-val"), 1.0f, {2, 2});
+  auto add_0 = ops::Add(s.WithOpName("add-0"), input_0, const_val);
+  auto output_0 = ops::Add(s.WithOpName("output-0"), input_0, add_0);
+
+  grappler::GrapplerItem item;
+  item.fetch.push_back("output-0");
+  TF_EXPECT_OK(s.ToGraphDef(&item.graph));
+
+  grappler::GraphProperties static_graph_properties(item);
+  TF_EXPECT_OK(static_graph_properties.InferStatically(true));
+
+  Graph g(OpRegistry::Global());
+  TF_CHECK_OK(
+      ConvertGraphDefToGraph(GraphConstructorOptions(), item.graph, &g));
+
+  const std::set<string> all_nodes = {"const-val", "add-0", "output-0"};
+  EnableImplicitBatchModeForStaticEngine();
+  RunTest(&g, &static_graph_properties, all_nodes, all_nodes, all_nodes, {});
+}
 }  // namespace test
 }  // namespace segment
 }  // namespace tensorrt
diff --git a/tensorflow/compiler/tf2tensorrt/segment/union_find.h b/tensorflow/compiler/tf2tensorrt/segment/union_find.h
index 6458ae692fd..70e83c12fca 100644
--- a/tensorflow/compiler/tf2tensorrt/segment/union_find.h
+++ b/tensorflow/compiler/tf2tensorrt/segment/union_find.h
@@ -16,51 +16,192 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_TF2TENSORRT_SEGMENT_UNION_FIND_H_
 #define TENSORFLOW_COMPILER_TF2TENSORRT_SEGMENT_UNION_FIND_H_
 
+#include "absl/strings/str_format.h"
+#include "absl/types/optional.h"
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+
 namespace tensorflow {
 namespace tensorrt {
 namespace segment {
 
-// Union-Find data structure.
-// Each cluster has an associated value; when merging clusters we can control
-// which value becomes the representative of the merged clusters. Values must be
-// copyable.
+// ClusterBatchSize is a data structure to record the batch size we have seen
+// for a cluster during segmentation.
+//
+// When constructing clusters for implicit batch mode, we support the
+// with both dynamic batch size and static batch size. We restrict nodes inside
+// a cluster to either have dynamic batch size or have the same value for static
+// batch size. For this reason, we use a field has_dynamic_batch_value_ to keep
+// track of whether the cluster has any node with dynamic batch size. We use
+// field static_batch_value_ to keep track of whether the cluster has any node
+// with static batch size and what the value of the static batch size, if any.
+// Examples:
+// cluster:  a = a1[1,3] + a1[1,3]
+// ClusterBatchSize: has_dynamic_batch_size_ = false
+//                   static_batch_value_ = {has value, 1}
+//
+// cluster:  b = b1[-1,3] + b2[-1, 3]
+// ClusterBatchSize: has_dynamic_batch_size_ = true
+//                   static_batch_value_ = {has no value}
+//
+// cluster:  a = a1[1,3] + a1[1,3]; b = b1[-1,3] + b2[-1, 3]
+// ClusterBatchSize: has_dynamic_batch_size_ = true
+//                   static_batch_value_ = {has value, 1}
+//
+// When constructing cluster for explicit batch mode, all ClusterBatchSize is
+// irrelevant.
+//
+//
+absl::optional<int> static_batch_value_;
+class ClusterBatchSize {
+ public:
+  ClusterBatchSize()
+      : has_dynamic_batch_value_(false), static_batch_value_(absl::nullopt) {}
+
+  bool operator==(const ClusterBatchSize& b) {
+    return HasDynamicBatchValue() == b.HasDynamicBatchValue() &&
+           static_batch_value_ == b.static_batch_value_;
+  }
+
+  bool operator!=(const ClusterBatchSize& b) { return !(*this == b); }
+
+  int GetStaticBatchValue() const {
+    DCHECK(HasStaticBatchValue());
+    return static_batch_value_.value();
+  }
+
+  // Sets the batch size value assuming that the object doesn't have a batch
+  // size value yet:
+  //   a non-negative input value representing a known batch size.
+  //   a negative input value representing a dynamic batch size.
+  ClusterBatchSize SetBatchSizeValue(int value) {
+    if (value < 0) {
+      has_dynamic_batch_value_ = true;
+      return *this;
+    }
+    static_batch_value_ = value;
+    return *this;
+  }
+
+  bool MergeIfCompatible(const ClusterBatchSize& b) {
+    bool is_compatible = MergeIfCompatible(b.static_batch_value_);
+    if (!is_compatible) return false;
+
+    if (!HasDynamicBatchValue() && b.HasDynamicBatchValue()) {
+      has_dynamic_batch_value_ = true;
+    }
+
+    return true;
+  }
+
+  // Returns a string for the batch size value. If the object has a static
+  // batch size value, return a string for the value. If the object has a
+  // dynamic size value, return -1. Otherwise, returns -2 to represent that
+  // a batch size hasn't been set yet.
+  string ToString() const {
+    string s;
+    absl::StrAppendFormat(&s, "batch_size=(%d,%d,", HasDynamicBatchValue(),
+                          HasStaticBatchValue());
+    if (HasStaticBatchValue()) {
+      absl::StrAppendFormat(&s, "%d", GetStaticBatchValue());
+    }
+    absl::StrAppend(&s, ")");
+    return s;
+  }
+
+ private:
+  bool HasStaticBatchValue() const { return static_batch_value_.has_value(); }
+  bool HasDynamicBatchValue() const { return has_dynamic_batch_value_; }
+
+ private:
+  bool MergeIfCompatible(const absl::optional<int>& b) {
+    bool is_compatible = !HasStaticBatchValue() || !b.has_value() ||
+                         GetStaticBatchValue() == b.value();
+    if (!is_compatible) {
+      return false;
+    }
+    if (!HasStaticBatchValue() && b.has_value()) {
+      static_batch_value_ = b;
+    }
+    return true;
+  }
+
+ private:
+  // To track whether the cluster has any node with dynamic batch size.
+  bool has_dynamic_batch_value_;
+  // To track whether the cluster has any node with static batch size, and the
+  // unique value for static batch size.
+  absl::optional<int> static_batch_value_;
+};
+
+inline std::ostream& operator<<(std::ostream& os,
+                                const ClusterBatchSize& batch_size) {
+  return os << batch_size.ToString();
+}
+
+// Represents a disjoint set of copyable values with type T. We use this data
+// structure to construct clusters for TRTEngineOp. As such, this data structure
+// has a field to record the batch size for the current cluster and merges the
+// corresponding batch sizes when merging two clusters. Most of the methods in
+// this class are side-effecting as they also compress the path from the object
+// to the parent of its containing set.
 template <typename T>
 class UnionFind {
  public:
   UnionFind() : size_(1), parent_(nullptr) {}
-  explicit UnionFind(const T& v) : size_(1), parent_(nullptr), value_(v) {}
+  explicit UnionFind(const T& v, ClusterBatchSize batch_size)
+      : size_(1),
+        cluster_batch_size_(batch_size),
+        parent_(nullptr),
+        value_(v) {}
 
-  // Returns the number of elements in a cluster.
+  // Returns the number of elements in the cluster and compresses the path from
+  // this object to the root of the cluster.
   int Size() { return FindRoot()->size_; }
 
-  // Merges this cluster with 'other'. This cluster's value becomes
-  // the value of the merged cluster; the value of 'other' is ignored.
-  void Merge(UnionFind* other);
+  // Returns the batch size of the cluster and compress the path from this
+  // object to the root object.
+  ClusterBatchSize BatchSize() { return FindRoot()->cluster_batch_size_; }
 
-  // Each cluster has an associated value. Retrieves the value associated
-  // with this cluster.
+  // Merges this cluster with 'other'. This cluster's size_ is updated to
+  // the size of the merged cluster; the size_ of 'other' becomes inaccessible
+  // as only the size_ of the root object is accessible.
+  Status Merge(UnionFind* other);
+
+  // Retrieves the value for the root of the cluster.
   T& ParentValue() { return FindRoot()->value_; }
 
-  // Get the original value of this node.
+  // Returns the value for the object.
   T& Value() { return value_; }
 
  private:
-  // Finds the root element of the cluster. Performs path compression.
+  // Returns the root object for the cluster and compresses the path from this
+  // object to the root object.
   UnionFind* FindRoot();
 
   int size_;
+  ClusterBatchSize cluster_batch_size_;
   UnionFind* parent_;
   T value_;
 };
 
 template <typename T>
-void UnionFind<T>::Merge(UnionFind* other) {
+Status UnionFind<T>::Merge(UnionFind* other) {
   UnionFind<T>* a = FindRoot();
   UnionFind<T>* b = other->FindRoot();
-  if (a == b) return;
+  if (a == b) return Status::OK();
 
+  ClusterBatchSize batch_size = a->cluster_batch_size_;
+  bool merged = batch_size.MergeIfCompatible(other->cluster_batch_size_);
+  if (!merged) {
+    return errors::Internal("trying to merge incompatible cluster.");
+  }
+
+  a->cluster_batch_size_ = batch_size;
   b->parent_ = a;
   a->size_ += b->size_;
+  return Status::OK();
 }
 
 template <typename T>
@@ -76,4 +217,7 @@ UnionFind<T>* UnionFind<T>::FindRoot() {
 }  // namespace tensorrt
 }  // namespace tensorflow
 
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
+
 #endif  // TENSORFLOW_COMPILER_TF2TENSORRT_SEGMENT_UNION_FIND_H_
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index a5332385994..55341c0a01f 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -81,7 +81,7 @@ tf_portable_proto_library(
     name = "portable_tf2xla_proto",
     config_string = "allow_all:true",
     header_outs = ["//tensorflow/compiler/tf2xla/tf2xla.proto.h"],
-    portable_deps = ["//tensorflow/core:portable_proto_lib_full_runtime"],
+    portable_deps = ["//tensorflow/core:portable_proto_lib"],
     proto_deps = [
         ":tf2xla_proto",
         "//tensorflow/core:protos_all",
@@ -182,6 +182,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/strings",
         "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Shape",
         "@llvm-project//mlir:StandardOps",
     ],
 )
@@ -703,12 +704,8 @@ cc_library(
     deps = [
         "//tensorflow/compiler/mlir:mlir_graph_optimization_pass",
         "//tensorflow/compiler/mlir/tensorflow",
-        "//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
-        "//tensorflow/compiler/mlir/tensorflow:device_util",
-        "//tensorflow/compiler/mlir/tensorflow:dump_mlir_util",
-        "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags",
         "//tensorflow/core:core_cpu",
-        "@com_google_absl//absl/container:flat_hash_set",
+        "//tensorflow/core:lib",
         "@llvm-project//llvm:support",
     ],
     alwayslink = 1,
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 4780bd7455e..bfdfe38305b 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -103,6 +103,7 @@ tf_kernel_library(
         "spacetodepth_op.cc",
         "sparse_to_dense_op.cc",
         "split_op.cc",
+        "spmd_manual_sharding_ops.cc",
         "stack_ops.cc",
         "stateful_random_ops.cc",
         "stateless_random_ops.cc",
diff --git a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc
index bb2c0d9ddb8..5dbc083368c 100644
--- a/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/dynamic_slice_ops.cc
@@ -28,6 +28,15 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+absl::InlinedVector<xla::XlaOp, 4> SliceVector(xla::XlaOp input, int64 rank) {
+  absl::InlinedVector<xla::XlaOp, 4> scalar_indices;
+  scalar_indices.reserve(rank);
+  for (int i = 0; i < rank; i++)
+    scalar_indices.push_back(
+        xla::Reshape(xla::Slice(input, {i}, {i + 1}, {1}), {}));
+  return scalar_indices;
+}
+
 class DynamicUpdateSliceOp : public XlaOpKernel {
  public:
   explicit DynamicUpdateSliceOp(OpKernelConstruction* context)
@@ -41,21 +50,23 @@ class DynamicUpdateSliceOp : public XlaOpKernel {
     const TensorShape update_shape = ctx->InputShape("update");
     const TensorShape index_shape = ctx->InputShape("indices");
 
+    int64 rank = input_shape.dims();
     OP_REQUIRES(
         ctx,
         TensorShapeUtils::IsVector(index_shape) &&
-            index_shape.num_elements() == input_shape.dims(),
+            index_shape.num_elements() == rank,
         errors::InvalidArgument("index must be a vector with length equal to "
                                 "the number of input dimensions"));
     OP_REQUIRES(
-        ctx, input_shape.dims() == update_shape.dims(),
+        ctx, rank == update_shape.dims(),
         errors::InvalidArgument("input and update must have the same rank,"
                                 " input shape is ",
                                 input_shape.DebugString(), "; update shape is ",
                                 update_shape.DebugString()));
 
+    xla::XlaOp indices = ctx->Input("indices");
     xla::XlaOp result = xla::DynamicUpdateSlice(
-        ctx->Input("input"), ctx->Input("update"), ctx->Input("indices"));
+        ctx->Input("input"), ctx->Input("update"), SliceVector(indices, rank));
     ctx->SetOutput(0, result);
   }
 };
@@ -76,17 +87,18 @@ class DynamicSliceOp : public XlaOpKernel {
     const TensorShape start_indices_shape = ctx->InputShape("start_indices");
     const TensorShape size_indices_shape = ctx->InputShape("size_indices");
 
+    int64 rank = input_shape.dims();
     OP_REQUIRES(ctx,
                 TensorShapeUtils::IsVector(start_indices_shape) &&
-                    start_indices_shape.num_elements() == input_shape.dims(),
+                    start_indices_shape.num_elements() == rank,
                 errors::InvalidArgument(
                     "start_indices must be a vector with length equal to "
                     "input rank, but input rank is ",
-                    input_shape.dims(), " and start_indices has shape ",
+                    rank, " and start_indices has shape ",
                     start_indices_shape.DebugString()));
     OP_REQUIRES(ctx,
                 TensorShapeUtils::IsVector(size_indices_shape) &&
-                    size_indices_shape.num_elements() == input_shape.dims(),
+                    size_indices_shape.num_elements() == rank,
                 errors::InvalidArgument(
                     "size_indices must be a vector with length equal to "
                     "input rank, but input rank is ",
@@ -96,8 +108,10 @@ class DynamicSliceOp : public XlaOpKernel {
     std::vector<int64> size_indices;
     OP_REQUIRES_OK(
         ctx, ctx->ConstantInputAsIntVector("size_indices", &size_indices));
+
+    xla::XlaOp start_indices = ctx->Input("start_indices");
     xla::XlaOp result = xla::DynamicSlice(
-        ctx->Input("input"), ctx->Input("start_indices"), size_indices);
+        ctx->Input("input"), SliceVector(start_indices, rank), size_indices);
     ctx->SetOutput(0, result);
   }
 };
diff --git a/tensorflow/compiler/tf2xla/kernels/slice_op.cc b/tensorflow/compiler/tf2xla/kernels/slice_op.cc
index 17d0b87edda..7f274c6b00f 100644
--- a/tensorflow/compiler/tf2xla/kernels/slice_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/slice_op.cc
@@ -42,19 +42,17 @@ class SliceOp : public XlaOpKernel {
     const TensorShape begin_tensor_shape = ctx->InputShape(1);
     const TensorShape size_tensor_shape = ctx->InputShape(2);
 
+    const int input_dims = input_shape.dims();
     OP_REQUIRES(
         ctx,
         TensorShapeUtils::IsVector(begin_tensor_shape) &&
             TensorShapeUtils::IsVector(size_tensor_shape) &&
-            begin_tensor_shape.num_elements() == input_shape.dims() &&
-            size_tensor_shape.num_elements() == input_shape.dims(),
+            begin_tensor_shape.num_elements() == input_dims &&
+            size_tensor_shape.num_elements() == input_dims,
         errors::InvalidArgument(
             "Expected begin and size arguments to be 1-D tensors of size ",
-            input_shape.dims(), ", but got shapes ",
-            begin_tensor_shape.DebugString(), " and ",
-            size_tensor_shape.DebugString(), " instead."));
-
-    const int input_dims = input_shape.dims();
+            input_dims, ", but got shapes ", begin_tensor_shape.DebugString(),
+            " and ", size_tensor_shape.DebugString(), " instead."));
 
     std::vector<int64> begin;
     std::vector<int64> size;
@@ -129,7 +127,15 @@ class SliceOp : public XlaOpKernel {
                                             input_shape.dim_size(i), "], but ",
                                             "got ", size[i]));
       }
-      ctx->SetOutput(0, xla::DynamicSlice(ctx->Input(0), ctx->Input(1), size));
+
+      absl::InlinedVector<xla::XlaOp, 4> scalar_indices;
+      scalar_indices.reserve(input_dims);
+      xla::XlaOp begin = ctx->Input("begin");
+      for (int i = 0; i < input_dims; i++)
+        scalar_indices.push_back(
+            xla::Reshape(xla::Slice(begin, {i}, {i + 1}, {1}), {}));
+
+      ctx->SetOutput(0, xla::DynamicSlice(ctx->Input(0), scalar_indices, size));
     }
   }
 };
diff --git a/tensorflow/compiler/tf2xla/kernels/spmd_manual_sharding_ops.cc b/tensorflow/compiler/tf2xla/kernels/spmd_manual_sharding_ops.cc
new file mode 100644
index 00000000000..cd28fe8fa3f
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/spmd_manual_sharding_ops.cc
@@ -0,0 +1,147 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+namespace {
+
+class XlaSpmdFullToShardShapeOp : public XlaOpKernel {
+ public:
+  explicit XlaSpmdFullToShardShapeOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("manual_sharding", &manual_sharding_str_));
+  }
+
+  ~XlaSpmdFullToShardShapeOp() override = default;
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaOp input = ctx->Input(0);
+    auto input_shape_or = ctx->InputXlaShape(0);
+    OP_REQUIRES_OK(ctx, input_shape_or.status());
+    xla::OpSharding sharding;
+    if (!sharding.ParseFromString(manual_sharding_str_)) {
+      OP_REQUIRES_OK(ctx,
+                     xla::InvalidArgument("manual_sharding attribute was not a "
+                                          "valid encoded xla::OpSharding "
+                                          "proto."));
+    }
+    auto output_shape = input_shape_or.ValueOrDie();
+    int64 rank = output_shape.rank();
+    if (sharding.type() == xla::OpSharding::OTHER) {
+      for (int64 i = 0; i < rank; ++i) {
+        int64 partitions_i = sharding.tile_assignment_dimensions(i);
+        if (partitions_i == 1) continue;
+        int64 dim_size =
+            xla::CeilOfRatio(output_shape.dimensions(i), partitions_i);
+        output_shape.set_dimensions(i, dim_size);
+      }
+    }
+    xla::XlaOp input_annotation;
+    {
+      // Annotate the full-shape input with the manual sharding.
+      xla::XlaScopedShardingAssignment assign_sharding(ctx->builder(),
+                                                       sharding);
+      input_annotation =
+          xla::CustomCall(ctx->builder(), /*call_target_name=*/"Sharding",
+                          {input}, input_shape_or.ValueOrDie());
+    }
+
+    {
+      // Annotate the shard-shape output with replicated sharding, so that the
+      // partitioner will leave it as is.
+      xla::OpSharding replicated;
+      replicated.set_type(xla::OpSharding::REPLICATED);
+      xla::XlaScopedShardingAssignment assign_sharding(ctx->builder(),
+                                                       replicated);
+      auto output = xla::CustomCall(ctx->builder(),
+                                    /*call_target_name=*/"SPMDFullToShardShape",
+                                    {input_annotation}, output_shape);
+      ctx->SetOutput(0, output);
+    }
+  }
+
+ private:
+  string manual_sharding_str_;
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaSpmdFullToShardShapeOp);
+};
+
+class XlaSpmdShardToFullShapeOp : public XlaOpKernel {
+ public:
+  explicit XlaSpmdShardToFullShapeOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("full_shape", &full_shape_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("manual_sharding", &manual_sharding_str_));
+  }
+
+  ~XlaSpmdShardToFullShapeOp() override = default;
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    xla::XlaOp input = ctx->Input(0);
+    auto input_shape_or = ctx->InputXlaShape(0);
+    OP_REQUIRES_OK(ctx, input_shape_or.status());
+    auto output_shape = TensorShapeToXLAShape(
+        input_shape_or.ValueOrDie().element_type(), full_shape_);
+
+    xla::OpSharding sharding;
+    if (!sharding.ParseFromString(manual_sharding_str_)) {
+      OP_REQUIRES_OK(ctx,
+                     xla::InvalidArgument("manual_sharding attribute was not a "
+                                          "valid encoded xla::OpSharding "
+                                          "proto."));
+    }
+    xla::XlaOp input_annotation;
+    {
+      // Annotate the shard-shape input with replicated sharding, so that the
+      // partitioner will leave it as is.
+      xla::OpSharding replicated;
+      replicated.set_type(xla::OpSharding::REPLICATED);
+      xla::XlaScopedShardingAssignment assign_sharding(ctx->builder(),
+                                                       replicated);
+      input_annotation =
+          xla::CustomCall(ctx->builder(), /*call_target_name=*/"Sharding",
+                          {input}, input_shape_or.ValueOrDie());
+    }
+
+    {
+      // Annotate the full-shape output with the manual sharding.
+      xla::XlaScopedShardingAssignment assign_sharding(ctx->builder(),
+                                                       sharding);
+      ctx->SetOutput(
+          0, xla::CustomCall(ctx->builder(),
+                             /*call_target_name=*/"SPMDShardToFullShape",
+                             {input_annotation}, output_shape));
+    }
+  }
+
+ private:
+  TensorShape full_shape_;
+  string manual_sharding_str_;
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaSpmdShardToFullShapeOp);
+};
+
+REGISTER_XLA_OP(Name("XlaSpmdFullToShardShape"), XlaSpmdFullToShardShapeOp);
+REGISTER_XLA_OP(Name("XlaSpmdShardToFullShape"), XlaSpmdShardToFullShapeOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc b/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc
index 6d0d569724f..c398e5f129e 100644
--- a/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc
+++ b/tensorflow/compiler/tf2xla/mlir_bridge_pass.cc
@@ -18,10 +18,18 @@ limitations under the License.
 #include <string>
 
 #include "tensorflow/compiler/mlir/tensorflow/transforms/bridge.h"
+#include "tensorflow/core/lib/monitoring/gauge.h"
 #include "tensorflow/core/public/session_options.h"
 
 namespace tensorflow {
 
+auto* mlir_bridge_gauge_v1 = monitoring::Gauge<bool, 0>::New(
+    "/tensorflow/config/experimental/enable_mlir_bridge_gauge_v1",
+    "Tracks usage of the MLIR-based TF2XLA bridge among TF1 models");
+auto* mlir_bridge_gauge_v2 = monitoring::Gauge<bool, 0>::New(
+    "/tensorflow/config/experimental/enable_mlir_bridge_gauge_v2",
+    "Tracks usage of the MLIR-based TF2XLA bridge among TF2 models");
+
 // This runs the first phase of the "bridge", transforming the graph in a form
 // that can be executed with delegation of some computations to an accelerator.
 // This builds on the model of XLA where a subset of the graph is encapsulated
@@ -31,11 +39,13 @@ namespace tensorflow {
 Status MlirBridgePass::Run(const ConfigProto& config_proto,
                            mlir::ModuleOp module) {
   if (!config_proto.experimental().enable_mlir_bridge()) {
-    VLOG(1) << "Skipping MLIR Bridge Pass, session flag not enabled";
+    VLOG(0) << "Skipping MLIR TPU Bridge, session flag not enabled";
+    mlir_bridge_gauge_v2->GetCell()->Set(false);
     return Status::OK();
   }
 
-  VLOG(1) << "Running MLIR Bridge Pass";
+  VLOG(0) << "Running MLIR TPU Bridge";
+  mlir_bridge_gauge_v2->GetCell()->Set(true);
   TF_RETURN_IF_ERROR(
       mlir::TFTPU::TPUBridge(module, /*enable_logging=*/VLOG_IS_ON(1)));
 
@@ -47,11 +57,13 @@ Status MlirBridgeV1CompatPass::Run(const GraphOptimizationPassOptions& options,
   if (options.is_function_graph) return Status::OK();
 
   if (!options.session_options->config.experimental().enable_mlir_bridge()) {
-    VLOG(1) << "Skipping MLIR Bridge V1 Compat Pass, session flag not enabled";
+    VLOG(0) << "Skipping MLIR TPU Bridge V1 Compat, session flag not enabled";
+    mlir_bridge_gauge_v1->GetCell()->Set(false);
     return Status::OK();
   }
 
-  VLOG(1) << "Running MLIR Bridge V1 Compat Pass";
+  VLOG(0) << "Running MLIR TPU Bridge V1 Compat";
+  mlir_bridge_gauge_v1->GetCell()->Set(true);
   TF_RETURN_IF_ERROR(
       mlir::TFTPU::TPUBridgeV1Compat(module, /*enable_logging=*/VLOG_IS_ON(1)));
 
diff --git a/tensorflow/compiler/tf2xla/mlir_tf2xla.cc b/tensorflow/compiler/tf2xla/mlir_tf2xla.cc
index daf261fa5d8..43793be56a7 100644
--- a/tensorflow/compiler/tf2xla/mlir_tf2xla.cc
+++ b/tensorflow/compiler/tf2xla/mlir_tf2xla.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "mlir/Dialect/Shape/IR/Shape.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
@@ -95,6 +96,7 @@ static void RegisterDialects() {
     mlir::registerDialect<mlir::TF::TensorFlowDialect>();
     mlir::registerDialect<mlir::StandardOpsDialect>();
     mlir::registerDialect<mlir::xla_hlo::XlaHloDialect>();
+    mlir::registerDialect<mlir::shape::ShapeDialect>();
     return true;
   }();
   (void)init_once;
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index c0bf423a644..862da1f3f95 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -648,6 +648,62 @@ This op has better TPU performance since it doesn't have explicitly reshape and
 transpose operations as tf.einsum does.
 )doc");
 
+REGISTER_OP("XlaSpmdFullToShardShape")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: type")
+    .Attr("manual_sharding: string")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      auto input_handle = c->input(0);
+      if (!c->RankKnown(input_handle)) {
+        return shape_inference::UnknownShape(c);
+      }
+      string sharding_attr;
+      TF_RETURN_IF_ERROR(c->GetAttr("manual_sharding", &sharding_attr));
+      std::vector<shape_inference::DimensionHandle> dims;
+      for (int64 i = 0; i < c->Rank(input_handle); ++i) {
+        auto dim = c->Value(c->Dim(input_handle, i));
+        xla::OpSharding sharding;
+        sharding.ParseFromString(sharding_attr);
+        int64 partitions_i = sharding.tile_assignment_dimensions(i);
+        if (dim != shape_inference::InferenceContext::kUnknownDim &&
+            sharding.type() == xla::OpSharding::OTHER && partitions_i != 1) {
+          dim = (dim + partitions_i - 1) / partitions_i;
+        }
+        dims.push_back(c->MakeDim(dim));
+      }
+      c->set_output(0, c->MakeShape(dims));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+An op used by XLA SPMD partitioner to switch from automatic partitioning to
+manual partitioning. It annotates the input (full-shape, to be automatically
+partitioned) with the same sharding used by manual partitioning, and outputs a
+shard-shaped tensor to be consumed by later manually-partitioned ops. If the
+shape is not evenly partitionable, the padding region will be masked with 0s.
+)doc");
+
+REGISTER_OP("XlaSpmdShardToFullShape")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: type")
+    .Attr("manual_sharding: string")
+    .Attr("full_shape: shape")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      TensorShape shape_attr;
+      TF_RETURN_IF_ERROR(c->GetAttr("full_shape", &shape_attr));
+      shape_inference::ShapeHandle s;
+      TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(shape_attr, &s));
+      c->set_output(0, s);
+      return Status::OK();
+    })
+    .Doc(R"doc(
+An op used by XLA SPMD partitioner to switch from manual partitioning to
+automatic partitioning. It converts the shard-shaped, manually partitioned input
+into full-shaped tensor to be partitioned automatically with the same sharding
+used by manual partitioning.
+)doc");
+
 REGISTER_OP("XlaSharding")
     .Input("input: T")
     .Output("output: T")
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 0df61da57a3..c59c47e92fb 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -418,6 +418,26 @@ def _sharding_grad(op, grad):
   return [grad]
 
 
+spmd_full_to_shard_shape = gen_xla_ops.xla_spmd_full_to_shard_shape
+spmd_shard_to_full_shape = gen_xla_ops.xla_spmd_shard_to_full_shape
+
+
+@ops.RegisterGradient("XlaSpmdFullToShardShape")
+def _spmd_full_to_shard_shape_grad(op, grad):
+  s2f = gen_xla_ops.xla_spmd_shard_to_full_shape(
+      grad,
+      manual_sharding=op.get_attr("manual_sharding"),
+      full_shape=op.inputs[0].shape.as_list())
+  return [s2f]
+
+
+@ops.RegisterGradient("XlaSpmdShardToFullShape")
+def _spmd_shard_to_full_shape_grad(op, grad):
+  f2s = gen_xla_ops.xla_spmd_full_to_shard_shape(
+      grad, manual_sharding=op.get_attr("manual_sharding"))
+  return [f2s]
+
+
 sort = gen_xla_ops.xla_sort
 key_value_sort = gen_xla_ops.xla_key_value_sort
 while_loop = gen_xla_ops.xla_while
diff --git a/tensorflow/compiler/tf2xla/xla_expression.cc b/tensorflow/compiler/tf2xla/xla_expression.cc
index 0aa139ce4f0..49f108ed6c8 100644
--- a/tensorflow/compiler/tf2xla/xla_expression.cc
+++ b/tensorflow/compiler/tf2xla/xla_expression.cc
@@ -121,6 +121,9 @@ xla::StatusOr<absl::optional<Tensor>> XlaExpression::ResolveConstant(
                       handle().builder()->IsConstant(handle()));
   if (!is_constant) return {absl::nullopt};
 
+  if (!client)
+    return errors::InvalidArgument("client is required to resolve constant");
+
   TF_ASSIGN_OR_RETURN(xla::XlaComputation constant_graph,
                       handle().builder()->BuildConstantSubGraph(
                           handle(), dynamic_dimension_is_minus_one));
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index a394de1a9e8..2c6edf5389e 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -175,8 +175,9 @@ Status XlaOpKernelContext::ConstantInputReshaped(
     int index, absl::Span<const int64> new_dims,
     xla::Literal* constant_literal) {
   XlaExpression e = InputExpression(index);
+  auto* client = compiler() ? compiler()->client() : nullptr;
   xla::StatusOr<absl::optional<Tensor>> constant_or_status =
-      e.ResolveConstant(compiler()->client(), dynamic_dimension_is_minus_one_);
+      e.ResolveConstant(client, dynamic_dimension_is_minus_one_);
   if (!constant_or_status.ok()) {
     Status status = constant_or_status.status();
     errors::AppendToMessage(&status, "while evaluating input ", index, " of ",
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 1350f9e3e0b..45f49cee328 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -17,7 +17,6 @@ package_group(
         "//tensorflow/compiler/...",
         "//tensorflow/python/tpu/...",
         "//third_party/py/jax/...",
-        "//third_party/tf_runtime/tools/tf_kernel_gen/...",
     ],
 )
 
@@ -332,6 +331,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:regexp_internal",
+        "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc
index cd52e2f5e45..404f9eb7519 100644
--- a/tensorflow/compiler/xla/client/executable_build_options.cc
+++ b/tensorflow/compiler/xla/client/executable_build_options.cc
@@ -70,6 +70,12 @@ ExecutableBuildOptions& ExecutableBuildOptions::set_num_partitions(
   return *this;
 }
 
+ExecutableBuildOptions& ExecutableBuildOptions::set_use_spmd_partitioning(
+    bool use_spmd_partitioning) {
+  use_spmd_partitioning_ = use_spmd_partitioning;
+  return *this;
+}
+
 ExecutableBuildOptions& ExecutableBuildOptions::set_device_assignment(
     const DeviceAssignment& device_assignment) {
   device_assignment_ = device_assignment;
diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h
index 360ad0260df..9a7fdd974b1 100644
--- a/tensorflow/compiler/xla/client/executable_build_options.h
+++ b/tensorflow/compiler/xla/client/executable_build_options.h
@@ -77,6 +77,11 @@ class ExecutableBuildOptions {
   int num_partitions() const { return num_partitions_; }
   ExecutableBuildOptions& set_num_partitions(int num_partitions);
 
+  // Indicates whether to use SPMD (true) or MPMD (false) partitioning when
+  // num_partitions > 1 and XLA is requested to partition the input program.
+  bool use_spmd_partitioning() const { return use_spmd_partitioning_; }
+  ExecutableBuildOptions& set_use_spmd_partitioning(bool use_spmd_partitioning);
+
   // If set, this specifies a static device assignment for the computation.
   // Otherwise, the computation will be compiled generically and can be run with
   // any device assignment compatible with the computation's replica and
@@ -104,6 +109,7 @@ class ExecutableBuildOptions {
   se::DeviceMemoryAllocator* device_allocator_ = nullptr;
   int num_replicas_ = 1;
   int num_partitions_ = 1;
+  bool use_spmd_partitioning_ = false;
   absl::optional<DeviceAssignment> device_assignment_;
   bool alias_passthrough_params_ = false;
 };
diff --git a/tensorflow/compiler/xla/client/lib/math_test.cc b/tensorflow/compiler/xla/client/lib/math_test.cc
index 32796dd8d70..9b8156efe5b 100644
--- a/tensorflow/compiler/xla/client/lib/math_test.cc
+++ b/tensorflow/compiler/xla/client/lib/math_test.cc
@@ -298,6 +298,15 @@ XLA_TEST_F(MathTest, SqrtSixValues) {
   ComputeAndCompareR1<float>(&builder, expected, {}, error_spec_);
 }
 
+XLA_TEST_F(MathTest, CbrtSixValues) {
+  XlaBuilder builder(TestName());
+  auto x = ConstantR1<float>(&builder, {8.0, 1.0, 4096.0, -64.0, 1.728, 1331});
+  Cbrt(x);
+
+  std::vector<float> expected = {2, 1, 16, -4, 1.2, 11};
+  ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.001));
+}
+
 XLA_TEST_F(MathTest, SinhSmallValues) {
   XlaBuilder builder(TestName());
   auto x = ConstantR1<float>(&builder, {1e-3, 1e-5, 1e-7, 1e-9, 1e-11});
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 7de4cd4b3c7..a4e5b936153 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -860,34 +860,10 @@ XlaOp XlaBuilder::SliceInDim(XlaOp operand, int64 start_index,
   });
 }
 
-XlaOp XlaBuilder::DynamicSlice(XlaOp operand, XlaOp start_indices,
-                               absl::Span<const int64> slice_sizes) {
-  return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    HloInstructionProto instr;
-
-    TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand));
-    TF_ASSIGN_OR_RETURN(const Shape* start_indices_shape,
-                        GetShapePtr(start_indices));
-    TF_ASSIGN_OR_RETURN(
-        Shape shape, ShapeInference::InferDynamicSliceShape(
-                         *operand_shape, {*start_indices_shape}, slice_sizes));
-    *instr.mutable_shape() = shape.ToProto();
-
-    for (int64 size : slice_sizes) {
-      instr.add_dynamic_slice_sizes(size);
-    }
-
-    return AddInstruction(std::move(instr), HloOpcode::kDynamicSlice,
-                          {operand, start_indices});
-  });
-}
-
 XlaOp XlaBuilder::DynamicSlice(XlaOp operand,
                                absl::Span<const XlaOp> start_indices,
                                absl::Span<const int64> slice_sizes) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    HloInstructionProto instr;
-
     TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand));
     std::vector<const Shape*> start_indices_shape_ptrs;
     TF_ASSIGN_OR_RETURN(const auto& start_indices_shapes,
@@ -898,43 +874,28 @@ XlaOp XlaBuilder::DynamicSlice(XlaOp operand,
     TF_ASSIGN_OR_RETURN(Shape shape,
                         ShapeInference::InferDynamicSliceShape(
                             *operand_shape, start_indices_shapes, slice_sizes));
-    *instr.mutable_shape() = shape.ToProto();
-
-    for (int64 size : slice_sizes) {
-      instr.add_dynamic_slice_sizes(size);
-    }
-
-    std::vector<XlaOp> operands = {operand};
-    operands.insert(operands.end(), start_indices.begin(), start_indices.end());
-    return AddInstruction(std::move(instr), HloOpcode::kDynamicSlice, operands);
+    return DynamicSliceInternal(shape, operand, start_indices, slice_sizes);
   });
 }
 
-XlaOp XlaBuilder::DynamicUpdateSlice(XlaOp operand, XlaOp update,
-                                     XlaOp start_indices) {
-  return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    HloInstructionProto instr;
+StatusOr<XlaOp> XlaBuilder::DynamicSliceInternal(
+    const Shape& shape, XlaOp operand, absl::Span<const XlaOp> start_indices,
+    absl::Span<const int64> slice_sizes) {
+  HloInstructionProto instr;
+  *instr.mutable_shape() = shape.ToProto();
 
-    TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand));
-    TF_ASSIGN_OR_RETURN(const Shape* update_shape, GetShapePtr(update));
-    TF_ASSIGN_OR_RETURN(const Shape* start_indices_shape,
-                        GetShapePtr(start_indices));
-    TF_ASSIGN_OR_RETURN(
-        Shape shape,
-        ShapeInference::InferDynamicUpdateSliceShape(
-            *operand_shape, *update_shape, {*start_indices_shape}));
-    *instr.mutable_shape() = shape.ToProto();
+  for (int64 size : slice_sizes) {
+    instr.add_dynamic_slice_sizes(size);
+  }
 
-    return AddInstruction(std::move(instr), HloOpcode::kDynamicUpdateSlice,
-                          {operand, update, start_indices});
-  });
+  std::vector<XlaOp> operands = {operand};
+  operands.insert(operands.end(), start_indices.begin(), start_indices.end());
+  return AddInstruction(std::move(instr), HloOpcode::kDynamicSlice, operands);
 }
 
 XlaOp XlaBuilder::DynamicUpdateSlice(XlaOp operand, XlaOp update,
                                      absl::Span<const XlaOp> start_indices) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    HloInstructionProto instr;
-
     TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand));
     TF_ASSIGN_OR_RETURN(const Shape* update_shape, GetShapePtr(update));
     std::vector<const Shape*> start_indices_shape_ptrs;
@@ -946,15 +907,22 @@ XlaOp XlaBuilder::DynamicUpdateSlice(XlaOp operand, XlaOp update,
     TF_ASSIGN_OR_RETURN(
         Shape shape, ShapeInference::InferDynamicUpdateSliceShape(
                          *operand_shape, *update_shape, start_indices_shapes));
-    *instr.mutable_shape() = shape.ToProto();
-
-    std::vector<XlaOp> operands = {operand, update};
-    operands.insert(operands.end(), start_indices.begin(), start_indices.end());
-    return AddInstruction(std::move(instr), HloOpcode::kDynamicUpdateSlice,
-                          operands);
+    return DynamicUpdateSliceInternal(shape, operand, update, start_indices);
   });
 }
 
+StatusOr<XlaOp> XlaBuilder::DynamicUpdateSliceInternal(
+    const Shape& shape, XlaOp operand, XlaOp update,
+    absl::Span<const XlaOp> start_indices) {
+  HloInstructionProto instr;
+  *instr.mutable_shape() = shape.ToProto();
+
+  std::vector<XlaOp> operands = {operand, update};
+  operands.insert(operands.end(), start_indices.begin(), start_indices.end());
+  return AddInstruction(std::move(instr), HloOpcode::kDynamicUpdateSlice,
+                        operands);
+}
+
 XlaOp XlaBuilder::ConcatInDim(absl::Span<const XlaOp> operands,
                               int64 dimension) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
@@ -1301,7 +1269,6 @@ XlaOp XlaBuilder::ConvGeneralDilated(
     int64 feature_group_count, int64 batch_group_count,
     const PrecisionConfig* precision_config) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    HloInstructionProto instr;
     TF_ASSIGN_OR_RETURN(const Shape* lhs_shape, GetShapePtr(lhs));
     TF_ASSIGN_OR_RETURN(const Shape* rhs_shape, GetShapePtr(rhs));
     TF_RETURN_IF_ERROR(
@@ -1314,30 +1281,45 @@ XlaOp XlaBuilder::ConvGeneralDilated(
       window_dimensions[i] =
           rhs_shape->dimensions(dimension_numbers.kernel_spatial_dimensions(i));
     }
-    TF_ASSIGN_OR_RETURN(*instr.mutable_window(),
+
+    TF_ASSIGN_OR_RETURN(Window window,
                         ShapeInference::InferWindowFromDimensions(
                             window_dimensions, window_strides, padding,
                             lhs_dilation, rhs_dilation));
-
-    TF_ASSIGN_OR_RETURN(
-        Shape shape, ShapeInference::InferConvolveShape(
-                         *lhs_shape, *rhs_shape, feature_group_count,
-                         batch_group_count, instr.window(), dimension_numbers));
-    *instr.mutable_shape() = shape.ToProto();
-
-    *instr.mutable_convolution_dimension_numbers() = dimension_numbers;
-    instr.set_feature_group_count(feature_group_count);
-    instr.set_batch_group_count(batch_group_count);
-
-    if (precision_config != nullptr) {
-      *instr.mutable_precision_config() = *precision_config;
-    }
-
-    return AddInstruction(std::move(instr), HloOpcode::kConvolution,
-                          {lhs, rhs});
+    TF_ASSIGN_OR_RETURN(Shape shape,
+                        ShapeInference::InferConvolveShape(
+                            *lhs_shape, *rhs_shape, feature_group_count,
+                            batch_group_count, window, dimension_numbers));
+    return ConvGeneralDilatedInternal(shape, lhs, rhs, window, window_strides,
+                                      padding, lhs_dilation, rhs_dilation,
+                                      dimension_numbers, feature_group_count,
+                                      batch_group_count, precision_config);
   });
 }
 
+StatusOr<XlaOp> XlaBuilder::ConvGeneralDilatedInternal(
+    const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window,
+    absl::Span<const int64> window_strides,
+    absl::Span<const std::pair<int64, int64>> padding,
+    absl::Span<const int64> lhs_dilation, absl::Span<const int64> rhs_dilation,
+    const ConvolutionDimensionNumbers& dimension_numbers,
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
+  HloInstructionProto instr;
+  *instr.mutable_shape() = shape.ToProto();
+
+  *instr.mutable_window() = window;
+  *instr.mutable_convolution_dimension_numbers() = dimension_numbers;
+  instr.set_feature_group_count(feature_group_count);
+  instr.set_batch_group_count(batch_group_count);
+
+  if (precision_config != nullptr) {
+    *instr.mutable_precision_config() = *precision_config;
+  }
+
+  return AddInstruction(std::move(instr), HloOpcode::kConvolution, {lhs, rhs});
+}
+
 XlaOp XlaBuilder::Fft(XlaOp operand, const FftType fft_type,
                       const absl::Span<const int64> fft_length) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
@@ -1792,8 +1774,6 @@ XlaOp XlaBuilder::RngOp(RandomDistribution distribution,
                         absl::Span<const XlaOp> parameters,
                         const Shape& shape) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
-    HloInstructionProto instr;
-
     // Check the number of parameters per RNG distribution.
     switch (distribution) {
       case RandomDistribution::RNG_NORMAL:
@@ -1809,14 +1789,20 @@ XlaOp XlaBuilder::RngOp(RandomDistribution distribution,
     }
 
     TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(shape));
-    *instr.mutable_shape() = shape.ToProto();
-
-    instr.set_distribution(distribution);
-
-    return AddInstruction(std::move(instr), HloOpcode::kRng, parameters);
+    return RngOpInternal(distribution, parameters, shape);
   });
 }
 
+StatusOr<XlaOp> XlaBuilder::RngOpInternal(RandomDistribution distribution,
+                                          absl::Span<const XlaOp> parameters,
+                                          const Shape& shape) {
+  HloInstructionProto instr;
+  *instr.mutable_shape() = shape.ToProto();
+  instr.set_distribution(distribution);
+
+  return AddInstruction(std::move(instr), HloOpcode::kRng, parameters);
+}
+
 XlaOp XlaBuilder::RngNormal(XlaOp mu, XlaOp sigma, const Shape& shape) {
   return RngOp(RandomDistribution::RNG_NORMAL, {mu, sigma}, shape);
 }
@@ -2199,6 +2185,39 @@ XlaOp XlaBuilder::BatchNormGrad(XlaOp operand, XlaOp scale, XlaOp batch_mean,
   });
 }
 
+XlaOp XlaBuilder::AllGather(XlaOp operand, int64 all_gather_dimension,
+                            int64 shard_count,
+                            absl::Span<const ReplicaGroup> replica_groups,
+                            const absl::optional<ChannelHandle>& channel_id,
+                            const absl::optional<Layout>& layout) {
+  return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    HloInstructionProto instr;
+    TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand));
+
+    TF_ASSIGN_OR_RETURN(Shape inferred_shape,
+                        ShapeInference::InferAllGatherShape(
+                            *operand_shape, all_gather_dimension, shard_count));
+    if (layout) {
+      *inferred_shape.mutable_layout() = *layout;
+      instr.set_constrain_layout(true);
+    }
+    *instr.mutable_shape() = inferred_shape.ToProto();
+
+    instr.add_dimensions(all_gather_dimension);
+    for (const ReplicaGroup& group : replica_groups) {
+      *instr.add_replica_groups() = group;
+    }
+    if (channel_id.has_value()) {
+      instr.set_channel_id(channel_id->handle());
+    }
+
+    TF_ASSIGN_OR_RETURN(
+        auto all_gather,
+        AddInstruction(std::move(instr), HloOpcode::kAllGather, {operand}));
+    return all_gather;
+  });
+}
+
 XlaOp XlaBuilder::CrossReplicaSum(
     XlaOp operand, absl::Span<const ReplicaGroup> replica_groups) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
@@ -3101,20 +3120,11 @@ XlaOp SliceInDim(const XlaOp operand, int64 start_index, int64 limit_index,
                                        stride, dimno);
 }
 
-XlaOp DynamicSlice(const XlaOp operand, const XlaOp start_indices,
-                   absl::Span<const int64> slice_sizes) {
-  return operand.builder()->DynamicSlice(operand, start_indices, slice_sizes);
-}
 XlaOp DynamicSlice(const XlaOp operand, absl::Span<const XlaOp> start_indices,
                    absl::Span<const int64> slice_sizes) {
   return operand.builder()->DynamicSlice(operand, start_indices, slice_sizes);
 }
 
-XlaOp DynamicUpdateSlice(const XlaOp operand, const XlaOp update,
-                         const XlaOp start_indices) {
-  return operand.builder()->DynamicUpdateSlice(operand, update, start_indices);
-}
-
 XlaOp DynamicUpdateSlice(const XlaOp operand, const XlaOp update,
                          absl::Span<const XlaOp> start_indices) {
   return operand.builder()->DynamicUpdateSlice(operand, update, start_indices);
@@ -3466,6 +3476,16 @@ XlaOp ReduceWindowWithGeneralPadding(
       base_dilations, window_dilations, padding);
 }
 
+XlaOp AllGather(const XlaOp operand, int64 all_gather_dimension,
+                int64 shard_count,
+                absl::Span<const ReplicaGroup> replica_groups,
+                const absl::optional<ChannelHandle>& channel_id,
+                const absl::optional<Layout>& layout) {
+  return operand.builder()->AllGather(operand, all_gather_dimension,
+                                      shard_count, replica_groups, channel_id,
+                                      layout);
+}
+
 XlaOp CrossReplicaSum(const XlaOp operand,
                       absl::Span<const ReplicaGroup> replica_groups) {
   return operand.builder()->CrossReplicaSum(operand, replica_groups);
@@ -3571,6 +3591,9 @@ XlaOp Imag(const XlaOp operand) {
 XlaOp Sqrt(const XlaOp operand) {
   return operand.builder()->UnaryOp(HloOpcode::kSqrt, operand);
 }
+XlaOp Cbrt(const XlaOp operand) {
+  return operand.builder()->UnaryOp(HloOpcode::kCbrt, operand);
+}
 XlaOp Rsqrt(const XlaOp operand) {
   return operand.builder()->UnaryOp(HloOpcode::kRsqrt, operand);
 }
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 64424b9dd3c..b631514248c 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -421,16 +421,17 @@ class XlaBuilder {
   virtual XlaOp SliceInDim(XlaOp operand, int64 start_index, int64 limit_index,
                            int64 stride, int64 dimno);
 
-  ABSL_DEPRECATED("Use span-of-indices form instead")
-  XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices,
-                     absl::Span<const int64> slice_sizes);
   XlaOp DynamicSlice(XlaOp operand, absl::Span<const XlaOp> start_indices,
                      absl::Span<const int64> slice_sizes);
+  virtual StatusOr<XlaOp> DynamicSliceInternal(
+      const Shape& shape, XlaOp operand, absl::Span<const XlaOp> start_indices,
+      absl::Span<const int64> slice_sizes);
 
-  ABSL_DEPRECATED("Use span-of-indices form instead")
-  XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, XlaOp start_indices);
   XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update,
                            absl::Span<const XlaOp> start_indices);
+  virtual StatusOr<XlaOp> DynamicUpdateSliceInternal(
+      const Shape& shape, XlaOp operand, XlaOp update,
+      absl::Span<const XlaOp> start_indices);
 
   XlaOp ConcatInDim(absl::Span<const XlaOp> operands, int64 dimension);
   virtual StatusOr<XlaOp> ConcatInDimInternal(const Shape& shape,
@@ -491,6 +492,16 @@ class XlaBuilder {
                            int64 batch_group_count = 1,
                            const PrecisionConfig* precision_config = nullptr);
 
+  virtual StatusOr<XlaOp> ConvGeneralDilatedInternal(
+      const Shape& shape, XlaOp lhs, XlaOp rhs, const Window& window,
+      absl::Span<const int64> window_strides,
+      absl::Span<const std::pair<int64, int64>> padding,
+      absl::Span<const int64> lhs_dilation,
+      absl::Span<const int64> rhs_dilation,
+      const ConvolutionDimensionNumbers& dimension_numbers,
+      int64 feature_group_count, int64 batch_group_count,
+      const PrecisionConfig* precision_config);
+
   XlaOp Fft(XlaOp operand, FftType fft_type,
             absl::Span<const int64> fft_length);
 
@@ -549,6 +560,12 @@ class XlaBuilder {
   XlaOp CrossReplicaSum(XlaOp operand,
                         absl::Span<const ReplicaGroup> replica_groups = {});
 
+  XlaOp AllGather(
+      XlaOp operand, int64 all_gather_dimension, int64 shard_count,
+      absl::Span<const ReplicaGroup> replica_groups = {},
+      const absl::optional<ChannelHandle>& channel_id = absl::nullopt,
+      const absl::optional<Layout>& layout = absl::nullopt);
+
   XlaOp AllReduce(
       XlaOp operand, const XlaComputation& computation,
       absl::Span<const ReplicaGroup> replica_groups = {},
@@ -707,6 +724,10 @@ class XlaBuilder {
   XlaOp RngOp(RandomDistribution distribution,
               absl::Span<const XlaOp> parameters, const Shape& shape);
 
+  virtual StatusOr<XlaOp> RngOpInternal(RandomDistribution distribution,
+                                        absl::Span<const XlaOp> parameters,
+                                        const Shape& shape);
+
   virtual StatusOr<XlaOp> InDimBroadcast(
       const Shape& shape, XlaOp operand,
       absl::Span<const int64> broadcast_dimensions);
@@ -838,14 +859,10 @@ class XlaBuilder {
   friend XlaOp SliceInDim(XlaOp operand, int64 start_index, int64 limit_index,
                           int64 stride, int64 dimno);
 
-  friend XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices,
-                            absl::Span<const int64> slice_sizes);
   friend XlaOp DynamicSlice(XlaOp operand,
                             absl::Span<const XlaOp> start_indices,
                             absl::Span<const int64> slice_sizes);
 
-  friend XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update,
-                                  XlaOp start_indices);
   friend XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update,
                                   absl::Span<const XlaOp> start_indices);
 
@@ -988,6 +1005,11 @@ class XlaBuilder {
       absl::Span<const std::pair<int64, int64>> padding);
   friend XlaOp CrossReplicaSum(XlaOp operand,
                                absl::Span<const ReplicaGroup> replica_groups);
+  friend XlaOp AllGather(XlaOp operand, int64 all_gather_dimension,
+                         int64 shard_count,
+                         absl::Span<const ReplicaGroup> replica_groups,
+                         const absl::optional<ChannelHandle>& channel_id,
+                         const absl::optional<Layout>& layout);
   friend XlaOp AllReduce(XlaOp operand, const XlaComputation& computation,
                          absl::Span<const ReplicaGroup> replica_groups,
                          const absl::optional<ChannelHandle>& channel_id,
@@ -1030,6 +1052,7 @@ class XlaBuilder {
   friend XlaOp Imag(XlaOp operand);
   friend XlaOp Sqrt(XlaOp operand);
   friend XlaOp Rsqrt(XlaOp operand);
+  friend XlaOp Cbrt(XlaOp operand);
   friend XlaOp Pow(XlaOp lhs, XlaOp rhs,
                    absl::Span<const int64> broadcast_dimensions);
   friend XlaOp IsFinite(XlaOp operand);
@@ -1412,10 +1435,6 @@ XlaOp SliceInDim(XlaOp operand, int64 start_index, int64 limit_index,
 XlaOp DynamicSlice(XlaOp operand, absl::Span<const XlaOp> start_indices,
                    absl::Span<const int64> slice_sizes);
 
-ABSL_DEPRECATED("Use span-of-indices form instead")
-XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices,
-                   absl::Span<const int64> slice_sizes);
-
 // Enqueues a dynamic update slice operation onto the computation, which
 // updates a slice of 'operand' with 'update' at dynamic 'start_indices'.
 // The shape of 'update' determines the shape of the slice of 'operand'
@@ -1436,9 +1455,6 @@ XlaOp DynamicSlice(XlaOp operand, XlaOp start_indices,
 XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update,
                          absl::Span<const XlaOp> start_indices);
 
-ABSL_DEPRECATED("Use span-of-indices form instead")
-XlaOp DynamicUpdateSlice(XlaOp operand, XlaOp update, XlaOp start_indices);
-
 // Enqueues a concatenate instruction onto the computation. 'operands' must
 // have >= 1 entry.
 XlaOp ConcatInDim(XlaBuilder* builder, absl::Span<const XlaOp> operands,
@@ -1766,6 +1782,11 @@ XlaOp ReduceWindowWithGeneralPadding(
 XlaOp CrossReplicaSum(XlaOp operand,
                       absl::Span<const ReplicaGroup> replica_groups = {});
 
+XlaOp AllGather(XlaOp operand, int64 all_gather_dimension, int64 shard_count,
+                absl::Span<const ReplicaGroup> replica_groups = {},
+                const absl::optional<ChannelHandle>& channel_id = absl::nullopt,
+                const absl::optional<Layout>& layout = absl::nullopt);
+
 // Enqueues an operation that do an AllReduce of the operand cross cores. Here
 // AllReduce means doing a reduction on the input operand cross cores and then
 // broadcasting the reduction result to those cores. The reduction function is
@@ -1884,6 +1905,9 @@ XlaOp Imag(XlaOp operand);
 // Enqueues a sqrt computation onto the computation.
 XlaOp Sqrt(XlaOp operand);
 
+// Enqueues a cbrt computation onto the computation.
+XlaOp Cbrt(XlaOp operand);
+
 // Enqueues a rsqrt computation onto the computation.
 XlaOp Rsqrt(XlaOp operand);
 
diff --git a/tensorflow/compiler/xla/client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_builder_test.cc
index 1fa839b2014..e1733cd179c 100644
--- a/tensorflow/compiler/xla/client/xla_builder_test.cc
+++ b/tensorflow/compiler/xla/client/xla_builder_test.cc
@@ -381,6 +381,18 @@ TEST_F(XlaBuilderTest, Transpose) {
   EXPECT_THAT(root, op::Transpose(op::Parameter()));
 }
 
+TEST_F(XlaBuilderTest, AllGather) {
+  XlaBuilder b(TestName());
+  auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x");
+  AllGather(x, /*all_gather_dimension=*/1, /*shard_count=*/4);
+  TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b));
+  auto root = module->entry_computation()->root_instruction();
+
+  EXPECT_EQ(root->opcode(), HloOpcode::kAllGather);
+  EXPECT_TRUE(
+      ShapeUtil::Equal(root->shape(), ShapeUtil::MakeShape(F32, {4, 64})));
+}
+
 TEST_F(XlaBuilderTest, AllToAll) {
   XlaBuilder b(TestName());
   auto x = Parameter(&b, 0, ShapeUtil::MakeShape(F32, {4, 16}), "x");
diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index e6d60e51e75..60a563ee956 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -64,6 +64,9 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_force_host_platform_device_count(1);
   opts.set_xla_gpu_deterministic_reductions(false);
   opts.set_xla_cpu_enable_xprof_traceme(true);
+  // TODO(b/155295372): disable ptxas fallback by default.
+  opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(true);
+  opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_error(false);
 
   return opts;
 }
@@ -219,340 +222,347 @@ static void AllocateFlags() {
     return true;
   };
 
-  flag_objects = new std::vector<tensorflow::Flag>({
-      tensorflow::Flag(
-          "xla_cpu_enable_fast_math",
-          bool_setter_for(&DebugOptions::set_xla_cpu_enable_fast_math),
-          flag_values->xla_cpu_enable_fast_math(),
-          "Enable unsafe fast-math optimizations in the CPU compiler; "
-          "this may produce faster code at the expense of some accuracy."),
-      tensorflow::Flag(
-          "xla_cpu_fast_math_honor_nans",
-          bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_nans),
-          flag_values->xla_cpu_fast_math_honor_nans(),
-          "When xla_cpu_enable_fast_math is true then this controls whether we "
-          "allow operations to produce NaNs.  Ignored when "
-          "xla_cpu_enable_fast_math is false."),
-      tensorflow::Flag(
-          "xla_cpu_fast_math_honor_infs",
-          bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_infs),
-          flag_values->xla_cpu_fast_math_honor_infs(),
-          "When xla_cpu_enable_fast_math is true then this controls whether we "
-          "allow operations to produce infinites.  Ignored when "
-          "xla_cpu_enable_fast_math is false."),
-      tensorflow::Flag(
-          "xla_cpu_fast_math_honor_division",
-          bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_division),
-          flag_values->xla_cpu_fast_math_honor_division(),
-          "When xla_cpu_enable_fast_math is true then this controls whether "
-          "we forbid to use multiplication by the reciprocal instead of "
-          "division. Ignored when xla_cpu_enable_fast_math is false."),
-      tensorflow::Flag(
-          "xla_cpu_fast_math_honor_functions",
-          bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_functions),
-          flag_values->xla_cpu_fast_math_honor_functions(),
-          "When xla_cpu_enable_fast_math is true then this controls whether "
-          "we forbid to approximate calculations for functions. Ignored when "
-          "xla_cpu_enable_fast_math is false."),
-      tensorflow::Flag(
-          "xla_gpu_enable_fast_min_max",
-          bool_setter_for(&DebugOptions::set_xla_gpu_enable_fast_min_max),
-          flag_values->xla_gpu_enable_fast_min_max(),
-          "Enable fast floating point min/max lowering that does not propagate "
-          "NaNs."),
-      tensorflow::Flag(
-          "xla_llvm_enable_alias_scope_metadata",
-          bool_setter_for(
-              &DebugOptions::set_xla_llvm_enable_alias_scope_metadata),
-          flag_values->xla_llvm_enable_alias_scope_metadata(),
-          "In LLVM-based backends, enable the emission of "
-          "!alias.scope metadata in the generated IR."),
-      tensorflow::Flag(
-          "xla_llvm_enable_noalias_metadata",
-          bool_setter_for(&DebugOptions::set_xla_llvm_enable_noalias_metadata),
-          flag_values->xla_llvm_enable_noalias_metadata(),
-          "In LLVM-based backends, enable the emission of "
-          "!noalias metadata in the generated IR."),
-      tensorflow::Flag(
-          "xla_llvm_enable_invariant_load_metadata",
-          bool_setter_for(
-              &DebugOptions::set_xla_llvm_enable_invariant_load_metadata),
-          flag_values->xla_llvm_enable_invariant_load_metadata(),
-          "In LLVM-based backends, enable the emission of "
-          "!invariant.load metadata in "
-          "the generated IR."),
-      tensorflow::Flag(
-          "xla_llvm_disable_expensive_passes",
-          bool_setter_for(&DebugOptions::set_xla_llvm_disable_expensive_passes),
-          flag_values->xla_llvm_disable_expensive_passes(),
-          "In LLVM-based backends, disable a custom set of "
-          "expensive optimization passes."),
-      tensorflow::Flag(
-          "xla_backend_optimization_level",
-          int32_setter_for(&DebugOptions::set_xla_backend_optimization_level),
-          flag_values->xla_backend_optimization_level(),
-          "Numerical optimization level for the XLA compiler backend."),
-      tensorflow::Flag(
-          "xla_disable_hlo_passes", setter_for_xla_disable_hlo_passes, "",
-          "Comma-separated list of hlo passes to be disabled. These names "
-          "must exactly match the passes' names; no whitespace around "
-          "commas."),
-      tensorflow::Flag(
-          "xla_enable_hlo_passes_only", setter_for_xla_enable_hlo_passes_only,
-          "",
-          "Comma-separated list of hlo passes to be enabled. These names "
-          "must exactly match the passes' names; no whitespace around "
-          "commas. The unspecified passes are all disabled."),
-      tensorflow::Flag(
-          "xla_disable_all_hlo_passes",
-          bool_setter_for(&DebugOptions::set_xla_disable_all_hlo_passes), false,
-          "Disables all HLO passes.  Notes that some passes are necessary for "
-          "correctness and the invariants that must be satisfied by 'fully "
-          "optimized' HLO are different for different devices and may change "
-          "over time.  The only 'guarantee', such as it is, is that if you "
-          "compile XLA and dump the optimized HLO for some graph, you should "
-          "be able to run it again on the same device with the same build of "
-          "XLA."),
-      tensorflow::Flag(
-          "xla_embed_ir_in_executable",
-          bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable),
-          flag_values->xla_embed_ir_in_executable(),
-          "Embed the compiler IR as a string in the executable."),
-      tensorflow::Flag(
-          "xla_eliminate_hlo_implicit_broadcast",
-          bool_setter_for(
-              &DebugOptions::set_xla_eliminate_hlo_implicit_broadcast),
-          flag_values->xla_eliminate_hlo_implicit_broadcast(),
-          "Eliminate implicit broadcasts when lowering user "
-          "computations to HLO instructions; use explicit "
-          "broadcast instead."),
-      tensorflow::Flag(
-          "xla_cpu_multi_thread_eigen",
-          bool_setter_for(&DebugOptions::set_xla_cpu_multi_thread_eigen),
-          flag_values->xla_cpu_multi_thread_eigen(),
-          "When generating calls to Eigen in the CPU backend, "
-          "use multi-threaded Eigen mode."),
-      tensorflow::Flag("xla_gpu_cuda_data_dir",
-                       flag_values->mutable_xla_gpu_cuda_data_dir(),
-                       "If non-empty, specifies a local directory containing "
-                       "ptxas and nvvm libdevice files; otherwise we use "
-                       "those from runfile directories."),
-      tensorflow::Flag("xla_gpu_ftz",
-                       bool_setter_for(&DebugOptions::set_xla_gpu_ftz),
-                       flag_values->xla_gpu_ftz(),
-                       "If true, flush-to-zero semantics are enabled in the "
-                       "code generated for GPUs."),
-      tensorflow::Flag(
-          "xla_gpu_disable_multi_streaming",
-          bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming),
-          flag_values->xla_gpu_disable_multi_streaming(),
-          "If true, multi-streaming in the GPU backend is disabled."),
-      tensorflow::Flag(
-          "xla_gpu_max_kernel_unroll_factor",
-          int32_setter_for(&DebugOptions::set_xla_gpu_max_kernel_unroll_factor),
-          flag_values->xla_gpu_max_kernel_unroll_factor(),
-          "Specify the maximum kernel unroll factor for the GPU backend."),
-      tensorflow::Flag("xla_gpu_ptx_file", setter_for_xla_gpu_ptx_file, "",
-                       "If non-empty, specifies a file containing ptx to use. "
-                       "The filename prefix must have the same pattern as PTX "
-                       "dumped by XLA. This allows to match one specific "
-                       "module. General workflow. Get the generated module "
-                       "ptx from XLA. Modify it. Then pass it back via this "
-                       "option."),
-      tensorflow::Flag(
-          "xla_test_all_output_layouts",
-          bool_setter_for(&DebugOptions::set_xla_test_all_output_layouts),
-          flag_values->xla_test_all_output_layouts(),
-          "Let ClientLibraryTestBase::ComputeAndCompare* test "
-          "all permutations of output layouts. For example, with "
-          "a 3D shape, all permutations of the set {0, 1, 2} are "
-          "tried."),
-      tensorflow::Flag(
-          "xla_test_all_input_layouts",
-          bool_setter_for(&DebugOptions::set_xla_test_all_input_layouts),
-          flag_values->xla_test_all_input_layouts(),
-          "Let ClientLibraryTestBase::ComputeAndCompare* test "
-          "all permutations of *input* layouts. For example, for "
-          "2 input arguments with 2D shape and 4D shape, the "
-          "computation will run 2! * 4! times for every possible "
-          "layouts"),
-      tensorflow::Flag(
-          "xla_hlo_profile",
-          bool_setter_for(&DebugOptions::set_xla_hlo_profile),
-          flag_values->xla_hlo_profile(),
-          "Instrument the computation to collect per-HLO cycle counts"),
-      tensorflow::Flag("xla_backend_extra_options",
-                       setter_for_xla_backend_extra_options, "",
-                       "Extra options to pass to a backend; "
-                       "comma-separated list of 'key=val' strings (=val "
-                       "may be omitted); no whitespace around commas."),
-      tensorflow::Flag(
-          "xla_gpu_use_cudnn_batchnorm",
-          bool_setter_for(&DebugOptions::set_xla_gpu_use_cudnn_batchnorm),
-          flag_values->xla_gpu_use_cudnn_batchnorm(),
-          "Allows the GPU backend to implement batchnorm HLOs using cudnn, "
-          "rather than expanding them to a soup of HLOs."),
+  flag_objects = new std::vector<tensorflow::Flag>();
+  flag_objects->reserve(55);
+  // Don't use an initializer list for initializing the vector; this would
+  // create a temporary copy, and exceeds the stack space when compiling with
+  // certain configurations.
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_enable_fast_math",
+      bool_setter_for(&DebugOptions::set_xla_cpu_enable_fast_math),
+      flag_values->xla_cpu_enable_fast_math(),
+      "Enable unsafe fast-math optimizations in the CPU compiler; this may "
+      "produce faster code at the expense of some accuracy."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_fast_math_honor_nans",
+      bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_nans),
+      flag_values->xla_cpu_fast_math_honor_nans(),
+      "When xla_cpu_enable_fast_math is true then this controls whether we "
+      "allow operations to produce NaNs.  Ignored when "
+      "xla_cpu_enable_fast_math is false."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_fast_math_honor_infs",
+      bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_infs),
+      flag_values->xla_cpu_fast_math_honor_infs(),
+      "When xla_cpu_enable_fast_math is true then this controls whether we "
+      "allow operations to produce infinites.  Ignored when "
+      "xla_cpu_enable_fast_math is false."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_fast_math_honor_division",
+      bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_division),
+      flag_values->xla_cpu_fast_math_honor_division(),
+      "When xla_cpu_enable_fast_math is true then this controls whether we "
+      "forbid to use multiplication by the reciprocal instead of division. "
+      "Ignored when xla_cpu_enable_fast_math is false."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_fast_math_honor_functions",
+      bool_setter_for(&DebugOptions::set_xla_cpu_fast_math_honor_functions),
+      flag_values->xla_cpu_fast_math_honor_functions(),
+      "When xla_cpu_enable_fast_math is true then this controls whether we "
+      "forbid to approximate calculations for functions. Ignored when "
+      "xla_cpu_enable_fast_math is false."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_enable_fast_min_max",
+      bool_setter_for(&DebugOptions::set_xla_gpu_enable_fast_min_max),
+      flag_values->xla_gpu_enable_fast_min_max(),
+      "Enable fast floating point min/max lowering that does not propagate "
+      "NaNs."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_llvm_enable_alias_scope_metadata",
+      bool_setter_for(&DebugOptions::set_xla_llvm_enable_alias_scope_metadata),
+      flag_values->xla_llvm_enable_alias_scope_metadata(),
+      "In LLVM-based backends, enable the emission of !alias.scope metadata in "
+      "the generated IR."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_llvm_enable_noalias_metadata",
+      bool_setter_for(&DebugOptions::set_xla_llvm_enable_noalias_metadata),
+      flag_values->xla_llvm_enable_noalias_metadata(),
+      "In LLVM-based backends, enable the emission of !noalias metadata in the "
+      "generated IR."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_llvm_enable_invariant_load_metadata",
+      bool_setter_for(
+          &DebugOptions::set_xla_llvm_enable_invariant_load_metadata),
+      flag_values->xla_llvm_enable_invariant_load_metadata(),
+      "In LLVM-based backends, enable the emission of !invariant.load metadata "
+      "in the generated IR."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_llvm_disable_expensive_passes",
+      bool_setter_for(&DebugOptions::set_xla_llvm_disable_expensive_passes),
+      flag_values->xla_llvm_disable_expensive_passes(),
+      "In LLVM-based backends, disable a custom set of expensive optimization "
+      "passes."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_backend_optimization_level",
+      int32_setter_for(&DebugOptions::set_xla_backend_optimization_level),
+      flag_values->xla_backend_optimization_level(),
+      "Numerical optimization level for the XLA compiler backend."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_disable_hlo_passes", setter_for_xla_disable_hlo_passes, "",
+      "Comma-separated list of hlo passes to be disabled. These names must "
+      "exactly match the passes' names; no whitespace around commas."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_enable_hlo_passes_only", setter_for_xla_enable_hlo_passes_only, "",
+      "Comma-separated list of hlo passes to be enabled. These names must "
+      "exactly match the passes' names; no whitespace around commas. The "
+      "unspecified passes are all disabled."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_disable_all_hlo_passes",
+      bool_setter_for(&DebugOptions::set_xla_disable_all_hlo_passes), false,
+      "Disables all HLO passes.  Notes that some passes are necessary for "
+      "correctness and the invariants that must be satisfied by 'fully "
+      "optimized' HLO are different for different devices and may change "
+      "over time.  The only 'guarantee', such as it is, is that if you compile "
+      "XLA and dump the optimized HLO for some graph, you should be able to "
+      "run it again on the same device with the same build of XLA."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_embed_ir_in_executable",
+      bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable),
+      flag_values->xla_embed_ir_in_executable(),
+      "Embed the compiler IR as a string in the executable."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_eliminate_hlo_implicit_broadcast",
+      bool_setter_for(&DebugOptions::set_xla_eliminate_hlo_implicit_broadcast),
+      flag_values->xla_eliminate_hlo_implicit_broadcast(),
+      "Eliminate implicit broadcasts when lowering user computations to HLO "
+      "instructions; use explicit broadcast instead."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_multi_thread_eigen",
+      bool_setter_for(&DebugOptions::set_xla_cpu_multi_thread_eigen),
+      flag_values->xla_cpu_multi_thread_eigen(),
+      "When generating calls to Eigen in the CPU backend, use multi-threaded "
+      "Eigen mode."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_cuda_data_dir", flag_values->mutable_xla_gpu_cuda_data_dir(),
+      "If non-empty, specifies a local directory containing ptxas and nvvm "
+      "libdevice files; otherwise we use those from runfile directories."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_ftz", bool_setter_for(&DebugOptions::set_xla_gpu_ftz),
+      flag_values->xla_gpu_ftz(),
+      "If true, flush-to-zero semantics are enabled in the code generated for "
+      "GPUs."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_disable_multi_streaming",
+      bool_setter_for(&DebugOptions::set_xla_gpu_disable_multi_streaming),
+      flag_values->xla_gpu_disable_multi_streaming(),
+      "If true, multi-streaming in the GPU backend is disabled."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_max_kernel_unroll_factor",
+      int32_setter_for(&DebugOptions::set_xla_gpu_max_kernel_unroll_factor),
+      flag_values->xla_gpu_max_kernel_unroll_factor(),
+      "Specify the maximum kernel unroll factor for the GPU backend."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_ptx_file", setter_for_xla_gpu_ptx_file, "",
+      "If non-empty, specifies a file containing ptx to use. The filename "
+      "prefix must have the same pattern as PTX dumped by XLA. This allows to "
+      "match one specific module. General workflow. Get the generated module "
+      "ptx from XLA. Modify it. Then pass it back via this option."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_test_all_output_layouts",
+      bool_setter_for(&DebugOptions::set_xla_test_all_output_layouts),
+      flag_values->xla_test_all_output_layouts(),
+      "Let ClientLibraryTestBase::ComputeAndCompare* test all permutations of "
+      "output layouts. For example, with a 3D shape, all permutations of the "
+      "set {0, 1, 2} are tried."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_test_all_input_layouts",
+      bool_setter_for(&DebugOptions::set_xla_test_all_input_layouts),
+      flag_values->xla_test_all_input_layouts(),
+      "Let ClientLibraryTestBase::ComputeAndCompare* test all permutations of "
+      "*input* layouts. For example, for 2 input arguments with 2D shape and "
+      "4D shape, the computation will run 2! * 4! times for every possible "
+      "layouts"));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_hlo_profile", bool_setter_for(&DebugOptions::set_xla_hlo_profile),
+      flag_values->xla_hlo_profile(),
+      "Instrument the computation to collect per-HLO cycle counts"));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_backend_extra_options", setter_for_xla_backend_extra_options, "",
+      "Extra options to pass to a backend; comma-separated list of 'key=val' "
+      "strings (=val may be omitted); no whitespace around commas."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_use_cudnn_batchnorm",
+      bool_setter_for(&DebugOptions::set_xla_gpu_use_cudnn_batchnorm),
+      flag_values->xla_gpu_use_cudnn_batchnorm(),
+      "Allows the GPU backend to implement batchnorm HLOs using cudnn, rather "
+      "than expanding them to a soup of HLOs."));
+  flag_objects->push_back(
       tensorflow::Flag("xla_cpu_use_mkl_dnn",
                        bool_setter_for(&DebugOptions::set_xla_cpu_use_mkl_dnn),
                        flag_values->xla_cpu_use_mkl_dnn(),
-                       "Generate calls to MKL-DNN in the CPU backend."),
-      tensorflow::Flag(
-          "xla_gpu_crash_on_verification_failures",
-          bool_setter_for(
-              &DebugOptions::set_xla_gpu_crash_on_verification_failures),
-          flag_values->xla_gpu_crash_on_verification_failures(),
-          "Crashes the program on extra verification failures, e.g. cuDNN "
-          "cross checking failures"),
-      tensorflow::Flag(
-          "xla_gpu_autotune_level",
-          int32_setter_for(&DebugOptions::set_xla_gpu_autotune_level),
-          flag_values->xla_gpu_autotune_level(),
-          "Set GEMM and Convolution auto-tuning level."
-          "0 = off; 1 = on; 2 = on+init; 3 = on+init+reinit; 4 = "
-          "on+init+reinit+check."),
-      tensorflow::Flag(
-          "xla_force_host_platform_device_count",
-          int32_setter_for(
-              &DebugOptions::set_xla_force_host_platform_device_count),
-          flag_values->xla_force_host_platform_device_count(),
-          "Force the host platform to pretend that there are these many "
-          "host \"devices\". All of these host devices are backed by the same"
-          "threadpool.  Setting this to anything other than 1 can increase "
-          "overhead from context switching but we let the user override this "
-          "behavior to help run tests on the host that run models in parallel "
-          "across multiple devices."),
-      tensorflow::Flag(
-          "xla_gpu_disable_gpuasm_optimizations",
-          bool_setter_for(
-              &DebugOptions::set_xla_gpu_disable_gpuasm_optimizations),
-          flag_values->xla_gpu_disable_gpuasm_optimizations(),
-          "In XLA:GPU run ptxas in -O0 (default is -O3)."),
-      tensorflow::Flag(
-          "xla_fuel", setter_for_xla_fuel, /*default_value_for_display=*/"",
-          "Sets compiler fuel, useful for bisecting bugs in passes.  Format "
-          "--xla_fuel=PASS1=NUM1,PASS2=NUM2,..."),
-
-      tensorflow::Flag(
-          "xla_dump_to", string_setter_for(&DebugOptions::set_xla_dump_to),
-          flag_values->xla_dump_to(),
-          "Directory into which debugging data is written.  If not specified "
-          "but another dumping flag is passed, data will be written to stdout. "
-          " To explicitly write to stdout, set this to \"-\".  The values "
-          "\"sponge\" and \"test_undeclared_outputs_dir\" have a special "
-          "meaning: They cause us to dump into the directory specified by the "
-          "environment variable TEST_UNDECLARED_OUTPUTS_DIR."),
-      tensorflow::Flag(
-          "xla_dump_hlo_as_text",
-          bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_text),
-          flag_values->xla_dump_hlo_as_text(),
-          "Dumps HLO modules as text before and after optimizations.  Results "
-          "are written to the --xla_dump_to dir, or, if no dir is specified, "
-          "to stdout."),
-      tensorflow::Flag(
-          "xla_dump_hlo_as_proto",
-          bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_proto),
-          flag_values->xla_dump_hlo_as_proto(),
-          "Dumps HLO modules as HloProtos to the directory specified by "
-          "--xla_dump_to."),
-      tensorflow::Flag(
-          "xla_dump_hlo_as_dot",
-          bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_dot),
-          flag_values->xla_dump_hlo_as_dot(),
-          "Dumps HLO modules rendered as dot files to the directory "
-          "specified by --xla_dump_to."),
+                       "Generate calls to MKL-DNN in the CPU backend."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_crash_on_verification_failures",
+      bool_setter_for(
+          &DebugOptions::set_xla_gpu_crash_on_verification_failures),
+      flag_values->xla_gpu_crash_on_verification_failures(),
+      "Crashes the program on extra verification failures, e.g. cuDNN cross "
+      "checking failures"));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_autotune_level",
+      int32_setter_for(&DebugOptions::set_xla_gpu_autotune_level),
+      flag_values->xla_gpu_autotune_level(),
+      "Set GEMM and Convolution auto-tuning level. 0 = off; 1 = on; 2 = "
+      "on+init; 3 = on+init+reinit; 4 = on+init+reinit+check."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_force_host_platform_device_count",
+      int32_setter_for(&DebugOptions::set_xla_force_host_platform_device_count),
+      flag_values->xla_force_host_platform_device_count(),
+      "Force the host platform to pretend that there are these many host "
+      "\"devices\". All of these host devices are backed by the same "
+      "threadpool. Setting this to anything other than 1 can increase overhead "
+      "from context switching but we let the user override this behavior to "
+      "help run tests on the host that run models in parallel across multiple "
+      "devices."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_disable_gpuasm_optimizations",
+      bool_setter_for(&DebugOptions::set_xla_gpu_disable_gpuasm_optimizations),
+      flag_values->xla_gpu_disable_gpuasm_optimizations(),
+      "In XLA:GPU run ptxas in -O0 (default is -O3)."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_fuel", setter_for_xla_fuel, /*default_value_for_display=*/"",
+      "Sets compiler fuel, useful for bisecting bugs in passes.  Format "
+      "--xla_fuel=PASS1=NUM1,PASS2=NUM2,..."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_to", string_setter_for(&DebugOptions::set_xla_dump_to),
+      flag_values->xla_dump_to(),
+      "Directory into which debugging data is written. If not specified but "
+      "another dumping flag is passed, data will be written to stdout. To "
+      "explicitly write to stdout, set this to \"-\". The values \"sponge\" "
+      "and \"test_undeclared_outputs_dir\" have a special meaning: They cause "
+      "us to dump into the directory specified by the environment variable "
+      "TEST_UNDECLARED_OUTPUTS_DIR."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_hlo_as_text",
+      bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_text),
+      flag_values->xla_dump_hlo_as_text(),
+      "Dumps HLO modules as text before and after optimizations. Results are "
+      "written to the --xla_dump_to dir, or, if no dir is specified, to "
+      "stdout."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_hlo_as_proto",
+      bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_proto),
+      flag_values->xla_dump_hlo_as_proto(),
+      "Dumps HLO modules as HloProtos to the directory specified by "
+      "--xla_dump_to."));
+  flag_objects->push_back(
+      tensorflow::Flag("xla_dump_hlo_as_dot",
+                       bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_dot),
+                       flag_values->xla_dump_hlo_as_dot(),
+                       "Dumps HLO modules rendered as dot files to the "
+                       "directory specified by --xla_dump_to."));
+  flag_objects->push_back(
       tensorflow::Flag("xla_dump_hlo_as_html",
                        bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_html),
                        flag_values->xla_dump_hlo_as_html(),
                        "Dumps HLO modules rendered as HTML files to the "
-                       "directory specified by --xla_dump_to."),
-      tensorflow::Flag(
-          "xla_dump_hlo_as_url",
-          bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_url),
-          flag_values->xla_dump_hlo_as_url(),
-          "Tries to dump HLO modules rendered as URLs to stdout (and also to "
-          "the directory specified by --xla_dump_to). This is not implemented "
-          "by default; you need to add a plugin which calls "
-          "RegisterGraphToURLRenderer()."),
-      tensorflow::Flag(
-          "xla_dump_hlo_snapshots",
-          bool_setter_for(&DebugOptions::set_xla_dump_hlo_snapshots),
-          flag_values->xla_dump_hlo_snapshots(),
-          "Every time an HLO module is run, dumps an HloSnapshot to the "
-          "directory specified by --xla_dump_to."),
-      tensorflow::Flag(
-          "xla_dump_hlo_module_re",
-          string_setter_for(&DebugOptions::set_xla_dump_hlo_module_re),
-          flag_values->xla_dump_hlo_module_re(),
-          "Limits dumping only to modules which match this regular expression. "
-          " Default is to dump all modules."),
-      tensorflow::Flag(
-          "xla_dump_hlo_pass_re",
-          string_setter_for(&DebugOptions::set_xla_dump_hlo_pass_re),
-          flag_values->xla_dump_hlo_pass_re(),
-          "If specified, dumps HLO before and after optimization passes which "
-          "match this regular expression, in addition to dumping at the very "
-          "beginning and end of compilation."),
-      tensorflow::Flag(
-          "xla_dump_include_timestamp",
-          bool_setter_for(&DebugOptions::set_xla_dump_include_timestamp),
-          flag_values->xla_dump_include_timestamp(),
-          "If specified, includes a timestamp in the dumped filenames."),
-      tensorflow::Flag(
-          "xla_dump_max_hlo_modules",
-          int32_setter_for(&DebugOptions::set_xla_dump_max_hlo_modules),
-          flag_values->xla_dump_max_hlo_modules(),
-          "Max number of hlo module dumps in a directory. Set to < 0 for "
-          "unbounded."),
-      tensorflow::Flag(
-          "xla_hlo_graph_addresses",
-          bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
-          flag_values->xla_hlo_graph_addresses(),
-          "When rendering graphs (--xla_dump_hlo_as_{dot,html,url}), displays "
-          "the address in memory of each HloInstruction object."),
-      tensorflow::Flag(
-          "xla_hlo_graph_sharding_color",
-          bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color),
-          flag_values->xla_hlo_graph_sharding_color(),
-          "Assign colors based on sharding assignments when generating the "
-          "HLO graphs."),
-      tensorflow::Flag(
-          "xla_allow_excess_precision",
-          bool_setter_for(&DebugOptions::set_xla_allow_excess_precision),
-          flag_values->xla_allow_excess_precision(),
-          "Allow xla to increase the output precision of an instruction."),
-      tensorflow::Flag(
-          "xla_gpu_force_conv_nchw",
-          bool_setter_for(&DebugOptions::set_xla_gpu_force_conv_nchw),
-          flag_values->xla_gpu_force_conv_nchw(),
-          "For cuDNN convolutions, always NCHW layouts."),
-      tensorflow::Flag("xla_gpu_algorithm_blacklist_path",
-                       string_setter_for(
-                           &DebugOptions::set_xla_gpu_algorithm_blacklist_path),
-                       flag_values->xla_gpu_algorithm_blacklist_path(),
-                       "An AlgorithmBlacklist text proto file as a blacklist "
-                       "of convolutions to avoid to use."),
-      tensorflow::Flag(
-          "xla_gpu_deterministic_reductions",
-          bool_setter_for(&DebugOptions::set_xla_gpu_deterministic_reductions),
-          flag_values->xla_gpu_deterministic_reductions(),
-          "Always run deterministic reductions on GPU"),
-      tensorflow::Flag(
-          "xla_tpu_detect_nan",
-          bool_setter_for(&DebugOptions::set_xla_tpu_detect_nan),
-          flag_values->xla_tpu_detect_nan(),
-          "Trigger error on execution on TPU if a NAN value is detected"),
-      tensorflow::Flag(
-          "xla_tpu_detect_inf",
-          bool_setter_for(&DebugOptions::set_xla_tpu_detect_inf),
-          flag_values->xla_tpu_detect_inf(),
-          "Trigger error on execution on TPU if a INF value is detected"),
-      tensorflow::Flag(
-          "xla_cpu_enable_xprof_traceme",
-          bool_setter_for(&DebugOptions::set_xla_cpu_enable_xprof_traceme),
-          flag_values->xla_cpu_enable_xprof_traceme(),
-          "If true, XLA CPU generates code to call "
-          "TraceMe::Activity{Start|End} around HLO operations."),
-  });
+                       "directory specified by --xla_dump_to."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_hlo_as_url",
+      bool_setter_for(&DebugOptions::set_xla_dump_hlo_as_url),
+      flag_values->xla_dump_hlo_as_url(),
+      "Tries to dump HLO modules rendered as URLs to stdout (and also to the "
+      "directory specified by --xla_dump_to). This is not implemented by "
+      "default; you need to add a plugin which calls "
+      "RegisterGraphToURLRenderer()."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_hlo_snapshots",
+      bool_setter_for(&DebugOptions::set_xla_dump_hlo_snapshots),
+      flag_values->xla_dump_hlo_snapshots(),
+      "Every time an HLO module is run, dumps an HloSnapshot to the directory "
+      "specified by --xla_dump_to."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_hlo_module_re",
+      string_setter_for(&DebugOptions::set_xla_dump_hlo_module_re),
+      flag_values->xla_dump_hlo_module_re(),
+      "Limits dumping only to modules which match this regular expression. "
+      "Default is to dump all modules."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_hlo_pass_re",
+      string_setter_for(&DebugOptions::set_xla_dump_hlo_pass_re),
+      flag_values->xla_dump_hlo_pass_re(),
+      "If specified, dumps HLO before and after optimization passes which "
+      "match this regular expression, in addition to dumping at the very "
+      "beginning and end of compilation."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_include_timestamp",
+      bool_setter_for(&DebugOptions::set_xla_dump_include_timestamp),
+      flag_values->xla_dump_include_timestamp(),
+      "If specified, includes a timestamp in the dumped filenames."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_dump_max_hlo_modules",
+      int32_setter_for(&DebugOptions::set_xla_dump_max_hlo_modules),
+      flag_values->xla_dump_max_hlo_modules(),
+      "Max number of hlo module dumps in a directory. Set to < 0 for "
+      "unbounded."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_hlo_graph_addresses",
+      bool_setter_for(&DebugOptions::set_xla_hlo_graph_addresses),
+      flag_values->xla_hlo_graph_addresses(),
+      "When rendering graphs (--xla_dump_hlo_as_{dot,html,url}), displays "
+      "the address in memory of each HloInstruction object."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_hlo_graph_sharding_color",
+      bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color),
+      flag_values->xla_hlo_graph_sharding_color(),
+      "Assign colors based on sharding assignments when generating the HLO "
+      "graphs."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_allow_excess_precision",
+      bool_setter_for(&DebugOptions::set_xla_allow_excess_precision),
+      flag_values->xla_allow_excess_precision(),
+      "Allow xla to increase the output precision of an instruction."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_force_conv_nchw",
+      bool_setter_for(&DebugOptions::set_xla_gpu_force_conv_nchw),
+      flag_values->xla_gpu_force_conv_nchw(),
+      "For cuDNN convolutions, always NCHW layouts."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_algorithm_blacklist_path",
+      string_setter_for(&DebugOptions::set_xla_gpu_algorithm_blacklist_path),
+      flag_values->xla_gpu_algorithm_blacklist_path(),
+      "An AlgorithmBlacklist text proto file as a blacklist of convolutions to "
+      "avoid to use."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_deterministic_reductions",
+      bool_setter_for(&DebugOptions::set_xla_gpu_deterministic_reductions),
+      flag_values->xla_gpu_deterministic_reductions(),
+      "Always run deterministic reductions on GPU"));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_tpu_detect_nan",
+      bool_setter_for(&DebugOptions::set_xla_tpu_detect_nan),
+      flag_values->xla_tpu_detect_nan(),
+      "Trigger error on execution on TPU if a NAN value is detected"));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_tpu_detect_inf",
+      bool_setter_for(&DebugOptions::set_xla_tpu_detect_inf),
+      flag_values->xla_tpu_detect_inf(),
+      "Trigger error on execution on TPU if a INF value is detected"));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_cpu_enable_xprof_traceme",
+      bool_setter_for(&DebugOptions::set_xla_cpu_enable_xprof_traceme),
+      flag_values->xla_cpu_enable_xprof_traceme(),
+      "If true, XLA CPU generates code to call "
+      "TraceMe::Activity{Start|End} around HLO operations."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found",
+      bool_setter_for(
+          &DebugOptions::
+              set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found),
+      flag_values->xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(),
+      "If true, XLA GPU falls back to the driver if ptxas is not found. Note "
+      "that falling back to the driver can have drawbacks like using more "
+      "memory and/or other bugs during compilation, so we recommend setting "
+      "this flag to false."));
+  flag_objects->push_back(tensorflow::Flag(
+      "xla_gpu_unsafe_fallback_to_driver_on_ptxas_error",
+      bool_setter_for(
+          &DebugOptions::set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_error),
+      flag_values->xla_gpu_unsafe_fallback_to_driver_on_ptxas_error(),
+      "If true, XLA GPU falls back to the driver if there is an error when "
+      "running ptxas. Note that falling back to the driver can have drawbacks "
+      "like using more memory and/or other bugs during compilation, so we "
+      "recommend setting this flag to false."));
   ParseFlagsFromEnvAndDieIfUnknown("XLA_FLAGS", *flag_objects);
 }
 
diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h
index 43ee0fdd820..8ae8c418d5d 100644
--- a/tensorflow/compiler/xla/executable_run_options.h
+++ b/tensorflow/compiler/xla/executable_run_options.h
@@ -50,6 +50,7 @@ class RunId {
  public:
   // Creates a new, unique RunId.
   RunId();
+  explicit RunId(int64 value) : data_(value) {}
 
   RunId(const RunId&) = default;
   RunId& operator=(const RunId&) = default;
diff --git a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
index b89bfd68073..212ad87d94c 100644
--- a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
+++ b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
@@ -243,3 +243,54 @@ def split(tensor,
       tensor, split_dimension, num_devices, input_shape).apply_to_tensor(
           tensor, assign_tuple_sharding=assign_tuple_sharding)
   return tensor
+
+
+def get_op_sharding(op):
+  """Returns sharding attribute of an op.
+
+  Args:
+    op: a TensorFlow op.
+
+  Returns:
+    The attribute representing XLA sharding on this op.
+  """
+  return op.get_attr('_XlaSharding')
+
+
+def auto_to_manual_spmd_partition(tensor, manual_sharding):
+  """Switches from automatic SPMD partitioning to manual partitioning.
+
+  Converts a full-shaped tensor (to be automatically partitioned by SPMD
+  partitioner) to a shard-shaped tensor to be consumed by manually partitioned
+  ops.
+
+  Args:
+    tensor: A tf.Tensor in full shape.
+    manual_sharding: a serialized string of OpSharding to be used in manual
+      partitioning.
+
+  Returns:
+    A shard-shaped tensor to be consumed by manually partitioned ops.
+  """
+  return tf2xla.spmd_full_to_shard_shape(
+      tensor, manual_sharding=manual_sharding)
+
+
+def manual_to_auto_spmd_partition(tensor, manual_sharding, full_shape):
+  """Switches from manual partitioning to automatic SPMD partitioning.
+
+  Converts a shard-shaped tensor (manually partitioned in SPMD-style) to a
+  full-shaped tensor to be partitioned automatically by the SPMD partitioner.
+
+  Args:
+    tensor: A tf.Tensor in shard shape.
+    manual_sharding: a serialized string of OpSharding to be used in manual
+      partitioning.
+    full_shape: the shape of tensor before partitioning.
+
+  Returns:
+    A full-shaped tensor to be partitioned automatically by the SPMD
+    partitioner.
+  """
+  return tf2xla.spmd_shard_to_full_shape(
+      tensor, manual_sharding=manual_sharding, full_shape=full_shape)
diff --git a/tensorflow/compiler/xla/pjrt/BUILD b/tensorflow/compiler/xla/pjrt/BUILD
new file mode 100644
index 00000000000..dbd33705d0e
--- /dev/null
+++ b/tensorflow/compiler/xla/pjrt/BUILD
@@ -0,0 +1,213 @@
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "worker_thread",
+    srcs = ["worker_thread.cc"],
+    hdrs = ["worker_thread.h"],
+    deps = [
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "event_pool",
+    srcs = ["event_pool.cc"],
+    hdrs = ["event_pool.h"],
+    deps = [
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:stream_executor",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "semaphore",
+    srcs = ["semaphore.cc"],
+    hdrs = ["semaphore.h"],
+    deps = [
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+tf_cc_test(
+    name = "semaphore_test",
+    srcs = ["semaphore_test.cc"],
+    deps = [
+        ":semaphore",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test_main",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "tracked_device_buffer",
+    srcs = ["tracked_device_buffer.cc"],
+    hdrs = ["tracked_device_buffer.h"],
+    deps = [
+        ":event_pool",
+        ":local_device_state",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/service:shaped_buffer",
+        "//tensorflow/compiler/xla/service:transfer_manager",
+        "//tensorflow/core:lib",
+        "//tensorflow/stream_executor:device_memory",
+        "//tensorflow/stream_executor:device_memory_allocator",
+        "//tensorflow/stream_executor:event",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+tf_cc_test(
+    name = "tracked_device_buffer_test",
+    srcs = ["tracked_device_buffer_test.cc"],
+    deps = [
+        ":tracked_device_buffer",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/core:test_main",
+        "//tensorflow/stream_executor:device_memory",
+        "//tensorflow/stream_executor:device_memory_allocator",
+    ],
+)
+
+cc_library(
+    name = "local_device_state",
+    srcs = ["local_device_state.cc"],
+    hdrs = ["local_device_state.h"],
+    deps = [
+        ":event_pool",
+        ":semaphore",
+        ":worker_thread",
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:stream_executor",
+        "//tensorflow/stream_executor:event",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "pjrt_client",
+    srcs = ["pjrt_client.cc"],
+    hdrs = ["pjrt_client.h"],
+    visibility = ["//tensorflow/compiler/xla:friends"],
+    deps = [
+        ":event_pool",
+        ":local_device_state",
+        ":tracked_device_buffer",
+        "//tensorflow/compiler/xla:cpu_function_runtime",
+        "//tensorflow/compiler/xla:executable_run_options",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/client:executable_build_options",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/pjrt/distributed:protocol_proto_cc",
+        "//tensorflow/compiler/xla/service:computation_placer",
+        "//tensorflow/compiler/xla/service:executable",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:maybe_owning_device_memory",
+        "//tensorflow/compiler/xla/service:shaped_buffer",
+        "//tensorflow/compiler/xla/service/gpu:gpu_executable_run_options",
+        "//tensorflow/core:allocator",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/profiler/lib:traceme",
+        "//tensorflow/stream_executor:event",
+        "//tensorflow/stream_executor:stream",
+        "//tensorflow/stream_executor/host:host_platform_id",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/container:inlined_vector",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+cc_library(
+    name = "cpu_device",
+    srcs = ["cpu_device.cc"],
+    hdrs = ["cpu_device.h"],
+    deps = [
+        ":pjrt_client",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/service:platform_util",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "nvidia_gpu_device",
+    srcs = ["nvidia_gpu_device.cc"],
+    hdrs = ["nvidia_gpu_device.h"],
+    copts = if_cuda(["-DNCCL_ENABLED=1"]),
+    deps = [
+        ":pjrt_client",
+        "//tensorflow/compiler/xla/service/gpu:gpu_executable_run_options",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/pjrt/distributed:client",
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/core/common_runtime:bfc_allocator",
+        "//tensorflow/core/common_runtime/gpu:gpu_mem_allocator",
+        "//tensorflow/stream_executor:tf_allocator_adapter",
+    ] + if_cuda(["@local_config_nccl//:nccl"]),
+)
+
+tf_cc_test(
+    name = "gpu_multistream_test",
+    srcs = ["gpu_multistream_test.cc"],
+    tags = [
+        # TODO(phawkins): figure out TF test infra such that this only runs under GPU.
+        "no_oss",
+        "requires-gpu-nvidia",
+    ],
+    deps = [
+        ":nvidia_gpu_device",
+        ":pjrt_client",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/client:executable_build_options",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/platform:random",
+    ],
+)
diff --git a/tensorflow/compiler/xla/python/cpu_device.cc b/tensorflow/compiler/xla/pjrt/cpu_device.cc
similarity index 82%
rename from tensorflow/compiler/xla/python/cpu_device.cc
rename to tensorflow/compiler/xla/pjrt/cpu_device.cc
index 12e1e55723b..75c3bfc1277 100644
--- a/tensorflow/compiler/xla/python/cpu_device.cc
+++ b/tensorflow/compiler/xla/pjrt/cpu_device.cc
@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/cpu_device.h"
+#include "tensorflow/compiler/xla/pjrt/cpu_device.h"
 
+#include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 
@@ -40,8 +41,14 @@ StatusOr<std::shared_ptr<PjRtClient>> GetCpuClient(bool asynchronous) {
 
   std::vector<std::unique_ptr<Device>> devices;
   for (int i = 0; i < client->device_count(); ++i) {
-    se::StreamExecutor* executor =
-        client->backend().stream_executor(i).ValueOrDie();
+    se::StreamExecutorConfig config;
+    config.ordinal = i;
+    // 8MiB stacks seem to be necessary for running LAPACK/OpenBLAS
+    // computations.
+    config.device_options.non_portable_tags["host_thread_stack_size_in_bytes"] =
+        absl::StrCat(8192 * 1024);
+    TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
+                        platform->GetExecutor(config));
     auto device_state = absl::make_unique<LocalDeviceState>(
         executor, client, LocalDeviceState::kSynchronous, asynchronous,
         /*allow_event_reuse=*/false);
diff --git a/tensorflow/compiler/xla/python/cpu_device.h b/tensorflow/compiler/xla/pjrt/cpu_device.h
similarity index 81%
rename from tensorflow/compiler/xla/python/cpu_device.h
rename to tensorflow/compiler/xla/pjrt/cpu_device.h
index 38e81644b1e..c70d90ae228 100644
--- a/tensorflow/compiler/xla/python/cpu_device.h
+++ b/tensorflow/compiler/xla/pjrt/cpu_device.h
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_CPU_DEVICE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_CPU_DEVICE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_CPU_DEVICE_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_CPU_DEVICE_H_
 
 #include <memory>
 
-#include "tensorflow/compiler/xla/python/local_client.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 #include "tensorflow/compiler/xla/statusor.h"
 
 namespace xla {
@@ -32,4 +32,4 @@ StatusOr<std::shared_ptr<PjRtClient>> GetCpuClient(bool asynchronous);
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_CPU_DEVICE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_CPU_DEVICE_H_
diff --git a/tensorflow/compiler/xla/python/distributed/BUILD b/tensorflow/compiler/xla/pjrt/distributed/BUILD
similarity index 100%
rename from tensorflow/compiler/xla/python/distributed/BUILD
rename to tensorflow/compiler/xla/pjrt/distributed/BUILD
diff --git a/tensorflow/compiler/xla/python/distributed/client.cc b/tensorflow/compiler/xla/pjrt/distributed/client.cc
similarity index 94%
rename from tensorflow/compiler/xla/python/distributed/client.cc
rename to tensorflow/compiler/xla/pjrt/distributed/client.cc
index c50c3f50a9d..830e512b156 100644
--- a/tensorflow/compiler/xla/python/distributed/client.cc
+++ b/tensorflow/compiler/xla/pjrt/distributed/client.cc
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/distributed/client.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/client.h"
 
 #include <chrono>  // NOLINT
 
-#include "tensorflow/compiler/xla/python/distributed/protocol.h"
-#include "tensorflow/compiler/xla/python/distributed/util.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/util.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/python/distributed/client.h b/tensorflow/compiler/xla/pjrt/distributed/client.h
similarity index 85%
rename from tensorflow/compiler/xla/python/distributed/client.h
rename to tensorflow/compiler/xla/pjrt/distributed/client.h
index 1ab5292bea8..865a752849e 100644
--- a/tensorflow/compiler/xla/python/distributed/client.h
+++ b/tensorflow/compiler/xla/pjrt/distributed/client.h
@@ -13,15 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_CLIENT_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_CLIENT_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_CLIENT_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_CLIENT_H_
 
 #include <memory>
 
 #include "grpcpp/channel.h"
 #include "absl/synchronization/mutex.h"
 #include "absl/time/time.h"
-#include "tensorflow/compiler/xla/python/distributed/protocol.grpc.pb.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.grpc.pb.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/platform/env.h"
 
@@ -47,4 +47,4 @@ class DistributedRuntimeClient {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_CLIENT_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_CLIENT_H_
diff --git a/tensorflow/compiler/xla/python/distributed/client_server_test.cc b/tensorflow/compiler/xla/pjrt/distributed/client_server_test.cc
similarity index 95%
rename from tensorflow/compiler/xla/python/distributed/client_server_test.cc
rename to tensorflow/compiler/xla/pjrt/distributed/client_server_test.cc
index e78949933a2..cfe60a06207 100644
--- a/tensorflow/compiler/xla/python/distributed/client_server_test.cc
+++ b/tensorflow/compiler/xla/pjrt/distributed/client_server_test.cc
@@ -15,10 +15,10 @@ limitations under the License.
 
 #include "grpcpp/security/server_credentials.h"
 #include "absl/time/time.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/client.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.pb.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/service.h"
 #include "tensorflow/compiler/xla/protobuf_util.h"
-#include "tensorflow/compiler/xla/python/distributed/client.h"
-#include "tensorflow/compiler/xla/python/distributed/protocol.pb.h"
-#include "tensorflow/compiler/xla/python/distributed/service.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
diff --git a/tensorflow/compiler/xla/python/distributed/distributed.cc b/tensorflow/compiler/xla/pjrt/distributed/distributed.cc
similarity index 95%
rename from tensorflow/compiler/xla/python/distributed/distributed.cc
rename to tensorflow/compiler/xla/pjrt/distributed/distributed.cc
index 6afc7b1c4e9..7753e2dcfc7 100644
--- a/tensorflow/compiler/xla/python/distributed/distributed.cc
+++ b/tensorflow/compiler/xla/pjrt/distributed/distributed.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/distributed/distributed.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/distributed.h"
 
 #include "grpcpp/grpcpp.h"
 
diff --git a/tensorflow/compiler/xla/python/distributed/distributed.h b/tensorflow/compiler/xla/pjrt/distributed/distributed.h
similarity index 83%
rename from tensorflow/compiler/xla/python/distributed/distributed.h
rename to tensorflow/compiler/xla/pjrt/distributed/distributed.h
index 0475c3e9feb..b3909387259 100644
--- a/tensorflow/compiler/xla/python/distributed/distributed.h
+++ b/tensorflow/compiler/xla/pjrt/distributed/distributed.h
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_DISTRIBUTED_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_DISTRIBUTED_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_DISTRIBUTED_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_DISTRIBUTED_H_
 
 #include <memory>
 #include <string>
 
-#include "tensorflow/compiler/xla/python/distributed/client.h"
-#include "tensorflow/compiler/xla/python/distributed/service.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/client.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/service.h"
 #include "tensorflow/compiler/xla/statusor.h"
 
 namespace xla {
@@ -43,4 +43,4 @@ std::shared_ptr<DistributedRuntimeClient> GetDistributedRuntimeClient(
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_DISTRIBUTED_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_DISTRIBUTED_H_
diff --git a/tensorflow/compiler/xla/python/distributed/key_value_store.cc b/tensorflow/compiler/xla/pjrt/distributed/key_value_store.cc
similarity index 95%
rename from tensorflow/compiler/xla/python/distributed/key_value_store.cc
rename to tensorflow/compiler/xla/pjrt/distributed/key_value_store.cc
index 5966d4ce12b..e989b1384d2 100644
--- a/tensorflow/compiler/xla/python/distributed/key_value_store.cc
+++ b/tensorflow/compiler/xla/pjrt/distributed/key_value_store.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/distributed/key_value_store.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/key_value_store.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/python/distributed/key_value_store.h b/tensorflow/compiler/xla/pjrt/distributed/key_value_store.h
similarity index 89%
rename from tensorflow/compiler/xla/python/distributed/key_value_store.h
rename to tensorflow/compiler/xla/pjrt/distributed/key_value_store.h
index 8560305e6f6..d496de1feb5 100644
--- a/tensorflow/compiler/xla/python/distributed/key_value_store.h
+++ b/tensorflow/compiler/xla/pjrt/distributed/key_value_store.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_KEY_VALUE_STORE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_KEY_VALUE_STORE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_KEY_VALUE_STORE_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_KEY_VALUE_STORE_H_
 
 #include "grpcpp/grpcpp.h"
 #include "absl/base/thread_annotations.h"
@@ -50,4 +50,4 @@ class KeyValueStore {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_KEY_VALUE_STORE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_KEY_VALUE_STORE_H_
diff --git a/tensorflow/compiler/xla/python/distributed/protocol.h b/tensorflow/compiler/xla/pjrt/distributed/protocol.h
similarity index 80%
rename from tensorflow/compiler/xla/python/distributed/protocol.h
rename to tensorflow/compiler/xla/pjrt/distributed/protocol.h
index 208c6dab8c5..4daa939ac8d 100644
--- a/tensorflow/compiler/xla/python/distributed/protocol.h
+++ b/tensorflow/compiler/xla/pjrt/distributed/protocol.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_PROTOCOL_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_PROTOCOL_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_PROTOCOL_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_PROTOCOL_H_
 
 namespace xla {
 
@@ -22,4 +22,4 @@ static constexpr int kDistributedRuntimeProtocolVersion = 1;
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_PROTOCOL_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_PROTOCOL_H_
diff --git a/tensorflow/compiler/xla/python/distributed/protocol.proto b/tensorflow/compiler/xla/pjrt/distributed/protocol.proto
similarity index 100%
rename from tensorflow/compiler/xla/python/distributed/protocol.proto
rename to tensorflow/compiler/xla/pjrt/distributed/protocol.proto
diff --git a/tensorflow/compiler/xla/python/distributed/service.cc b/tensorflow/compiler/xla/pjrt/distributed/service.cc
similarity index 96%
rename from tensorflow/compiler/xla/python/distributed/service.cc
rename to tensorflow/compiler/xla/pjrt/distributed/service.cc
index cc2b3a5aca2..3325fcd8319 100644
--- a/tensorflow/compiler/xla/python/distributed/service.cc
+++ b/tensorflow/compiler/xla/pjrt/distributed/service.cc
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/distributed/service.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/service.h"
 
-#include "tensorflow/compiler/xla/python/distributed/protocol.h"
-#include "tensorflow/compiler/xla/python/distributed/util.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/util.h"
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/util.h"
 
diff --git a/tensorflow/compiler/xla/python/distributed/service.h b/tensorflow/compiler/xla/pjrt/distributed/service.h
similarity index 91%
rename from tensorflow/compiler/xla/python/distributed/service.h
rename to tensorflow/compiler/xla/pjrt/distributed/service.h
index baf470e4f13..725a76791ce 100644
--- a/tensorflow/compiler/xla/python/distributed/service.h
+++ b/tensorflow/compiler/xla/pjrt/distributed/service.h
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_SERVICE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_SERVICE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_SERVICE_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_SERVICE_H_
 
 #include "absl/time/time.h"
-#include "tensorflow/compiler/xla/python/distributed/key_value_store.h"
-#include "tensorflow/compiler/xla/python/distributed/protocol.grpc.pb.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/key_value_store.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.grpc.pb.h"
 #include "tensorflow/compiler/xla/statusor.h"
 
 namespace xla {
@@ -98,4 +98,4 @@ void BuildGlobalTopology(absl::Span<LocalTopologyProto> local_topologies,
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_SERVICE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_SERVICE_H_
diff --git a/tensorflow/compiler/xla/python/distributed/service_test.cc b/tensorflow/compiler/xla/pjrt/distributed/service_test.cc
similarity index 91%
rename from tensorflow/compiler/xla/python/distributed/service_test.cc
rename to tensorflow/compiler/xla/pjrt/distributed/service_test.cc
index 08326df2f38..b56dbb17d1a 100644
--- a/tensorflow/compiler/xla/python/distributed/service_test.cc
+++ b/tensorflow/compiler/xla/pjrt/distributed/service_test.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/distributed/service.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/service.h"
 
-#include "tensorflow/compiler/xla/python/distributed/protocol.pb.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
diff --git a/tensorflow/compiler/xla/python/distributed/util.h b/tensorflow/compiler/xla/pjrt/distributed/util.h
similarity index 87%
rename from tensorflow/compiler/xla/python/distributed/util.h
rename to tensorflow/compiler/xla/pjrt/distributed/util.h
index 07ae8d1f0ce..abb2b6089e7 100644
--- a/tensorflow/compiler/xla/python/distributed/util.h
+++ b/tensorflow/compiler/xla/pjrt/distributed/util.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_UTIL_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_UTIL_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_UTIL_H_
 
 #include "grpcpp/support/status.h"
 #include "tensorflow/compiler/xla/status.h"
@@ -41,4 +41,4 @@ inline ::grpc::Status ToGrpcStatus(const Status& s) {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_DISTRIBUTED_UTIL_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_DISTRIBUTED_UTIL_H_
diff --git a/tensorflow/compiler/xla/python/event_pool.cc b/tensorflow/compiler/xla/pjrt/event_pool.cc
similarity index 96%
rename from tensorflow/compiler/xla/python/event_pool.cc
rename to tensorflow/compiler/xla/pjrt/event_pool.cc
index c7b52f523d9..86aa38cdd0f 100644
--- a/tensorflow/compiler/xla/python/event_pool.cc
+++ b/tensorflow/compiler/xla/pjrt/event_pool.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/event_pool.h"
+#include "tensorflow/compiler/xla/pjrt/event_pool.h"
 
 #include "absl/memory/memory.h"
 #include "absl/synchronization/mutex.h"
diff --git a/tensorflow/compiler/xla/python/event_pool.h b/tensorflow/compiler/xla/pjrt/event_pool.h
similarity index 95%
rename from tensorflow/compiler/xla/python/event_pool.h
rename to tensorflow/compiler/xla/pjrt/event_pool.h
index bda3fb6baff..47768c28fd9 100644
--- a/tensorflow/compiler/xla/python/event_pool.h
+++ b/tensorflow/compiler/xla/pjrt/event_pool.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_EVENT_POOL_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_EVENT_POOL_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_EVENT_POOL_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_EVENT_POOL_H_
 
 #include <memory>
 #include <stack>
@@ -87,4 +87,4 @@ class EventPool {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_EVENT_POOL_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_EVENT_POOL_H_
diff --git a/tensorflow/compiler/xla/python/gpu_multistream_test.cc b/tensorflow/compiler/xla/pjrt/gpu_multistream_test.cc
similarity index 97%
rename from tensorflow/compiler/xla/python/gpu_multistream_test.cc
rename to tensorflow/compiler/xla/pjrt/gpu_multistream_test.cc
index bc6ecb14ae2..2db7de3720d 100644
--- a/tensorflow/compiler/xla/python/gpu_multistream_test.cc
+++ b/tensorflow/compiler/xla/pjrt/gpu_multistream_test.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/client/executable_build_options.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
-#include "tensorflow/compiler/xla/python/local_client.h"
-#include "tensorflow/compiler/xla/python/nvidia_gpu_device.h"
+#include "tensorflow/compiler/xla/pjrt/nvidia_gpu_device.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/core/platform/random.h"
diff --git a/tensorflow/compiler/xla/python/local_device_state.cc b/tensorflow/compiler/xla/pjrt/local_device_state.cc
similarity index 98%
rename from tensorflow/compiler/xla/python/local_device_state.cc
rename to tensorflow/compiler/xla/pjrt/local_device_state.cc
index 6a96908cb12..d173c891c95 100644
--- a/tensorflow/compiler/xla/python/local_device_state.cc
+++ b/tensorflow/compiler/xla/pjrt/local_device_state.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/local_device_state.h"
+#include "tensorflow/compiler/xla/pjrt/local_device_state.h"
 
 #include <memory>
 #include <vector>
diff --git a/tensorflow/compiler/xla/python/local_device_state.h b/tensorflow/compiler/xla/pjrt/local_device_state.h
similarity index 96%
rename from tensorflow/compiler/xla/python/local_device_state.h
rename to tensorflow/compiler/xla/pjrt/local_device_state.h
index 5cd2c0014a0..eb25c37878f 100644
--- a/tensorflow/compiler/xla/python/local_device_state.h
+++ b/tensorflow/compiler/xla/pjrt/local_device_state.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_DEVICE_STATE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_DEVICE_STATE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_LOCAL_DEVICE_STATE_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_LOCAL_DEVICE_STATE_H_
 
 #include <memory>
 #include <random>
@@ -22,9 +22,9 @@ limitations under the License.
 
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
-#include "tensorflow/compiler/xla/python/event_pool.h"
-#include "tensorflow/compiler/xla/python/semaphore.h"
-#include "tensorflow/compiler/xla/python/worker_thread.h"
+#include "tensorflow/compiler/xla/pjrt/event_pool.h"
+#include "tensorflow/compiler/xla/pjrt/semaphore.h"
+#include "tensorflow/compiler/xla/pjrt/worker_thread.h"
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/core/platform/stream_executor.h"
 
@@ -207,4 +207,4 @@ class LocalDeviceState {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_DEVICE_STATE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_LOCAL_DEVICE_STATE_H_
diff --git a/tensorflow/compiler/xla/python/nvidia_gpu_device.cc b/tensorflow/compiler/xla/pjrt/nvidia_gpu_device.cc
similarity index 99%
rename from tensorflow/compiler/xla/python/nvidia_gpu_device.cc
rename to tensorflow/compiler/xla/pjrt/nvidia_gpu_device.cc
index 886ed697f4e..4863e5e8165 100644
--- a/tensorflow/compiler/xla/python/nvidia_gpu_device.cc
+++ b/tensorflow/compiler/xla/pjrt/nvidia_gpu_device.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/nvidia_gpu_device.h"
+#include "tensorflow/compiler/xla/pjrt/nvidia_gpu_device.h"
 
 #ifdef NCCL_ENABLED
 #include "third_party/nccl/nccl.h"
diff --git a/tensorflow/compiler/xla/python/nvidia_gpu_device.h b/tensorflow/compiler/xla/pjrt/nvidia_gpu_device.h
similarity index 87%
rename from tensorflow/compiler/xla/python/nvidia_gpu_device.h
rename to tensorflow/compiler/xla/pjrt/nvidia_gpu_device.h
index 2f9922454fa..bf59ddef3a9 100644
--- a/tensorflow/compiler/xla/python/nvidia_gpu_device.h
+++ b/tensorflow/compiler/xla/pjrt/nvidia_gpu_device.h
@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_NVIDIA_GPU_DEVICE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_NVIDIA_GPU_DEVICE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_NVIDIA_GPU_DEVICE_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_NVIDIA_GPU_DEVICE_H_
 
 #include <memory>
 
-#include "tensorflow/compiler/xla/python/distributed/client.h"
-#include "tensorflow/compiler/xla/python/local_client.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/client.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
 
@@ -59,4 +59,4 @@ StatusOr<std::shared_ptr<PjRtClient>> GetNvidiaGpuClient(
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_NVIDIA_GPU_DEVICE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_NVIDIA_GPU_DEVICE_H_
diff --git a/tensorflow/compiler/xla/python/local_client.cc b/tensorflow/compiler/xla/pjrt/pjrt_client.cc
similarity index 99%
rename from tensorflow/compiler/xla/python/local_client.cc
rename to tensorflow/compiler/xla/pjrt/pjrt_client.cc
index f2acd0d6398..80fd0e0b658 100644
--- a/tensorflow/compiler/xla/python/local_client.cc
+++ b/tensorflow/compiler/xla/pjrt/pjrt_client.cc
@@ -62,7 +62,7 @@ limitations under the License.
 // See the comment on LocalDeviceState::AllocationModel for a discussion of the
 // different allocation semantics on CPU, GPU, and TPU.
 
-#include "tensorflow/compiler/xla/python/local_client.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 
 #include <cstddef>
 #include <memory>
@@ -83,10 +83,10 @@ limitations under the License.
 #include "tensorflow/compiler/xla/executable_run_options.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
-#include "tensorflow/compiler/xla/python/distributed/protocol.pb.h"
-#include "tensorflow/compiler/xla/python/event_pool.h"
-#include "tensorflow/compiler/xla/python/local_device_state.h"
-#include "tensorflow/compiler/xla/python/tracked_device_buffer.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/protocol.pb.h"
+#include "tensorflow/compiler/xla/pjrt/event_pool.h"
+#include "tensorflow/compiler/xla/pjrt/local_device_state.h"
+#include "tensorflow/compiler/xla/pjrt/tracked_device_buffer.h"
 #include "tensorflow/compiler/xla/service/executable.h"
 #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h"
diff --git a/tensorflow/compiler/xla/python/local_client.h b/tensorflow/compiler/xla/pjrt/pjrt_client.h
similarity index 99%
rename from tensorflow/compiler/xla/python/local_client.h
rename to tensorflow/compiler/xla/pjrt/pjrt_client.h
index f09e70037d6..775b44c7073 100644
--- a/tensorflow/compiler/xla/python/local_client.h
+++ b/tensorflow/compiler/xla/pjrt/pjrt_client.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_CLIENT_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_CLIENT_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_PJRT_CLIENT_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_PJRT_CLIENT_H_
 
 #include <memory>
 #include <string>
@@ -29,8 +29,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/executable_build_options.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
-#include "tensorflow/compiler/xla/python/local_device_state.h"
-#include "tensorflow/compiler/xla/python/tracked_device_buffer.h"
+#include "tensorflow/compiler/xla/pjrt/local_device_state.h"
+#include "tensorflow/compiler/xla/pjrt/tracked_device_buffer.h"
 #include "tensorflow/compiler/xla/service/computation_placer.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable_run_options.h"
 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
@@ -681,4 +681,4 @@ class PjRtExecutable {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_LOCAL_CLIENT_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_PJRT_CLIENT_H_
diff --git a/tensorflow/compiler/xla/python/semaphore.cc b/tensorflow/compiler/xla/pjrt/semaphore.cc
similarity index 97%
rename from tensorflow/compiler/xla/python/semaphore.cc
rename to tensorflow/compiler/xla/pjrt/semaphore.cc
index 5926618bddc..c1df52acc61 100644
--- a/tensorflow/compiler/xla/python/semaphore.cc
+++ b/tensorflow/compiler/xla/pjrt/semaphore.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/semaphore.h"
+#include "tensorflow/compiler/xla/pjrt/semaphore.h"
 
 #include "tensorflow/core/platform/logging.h"
 
diff --git a/tensorflow/compiler/xla/python/semaphore.h b/tensorflow/compiler/xla/pjrt/semaphore.h
similarity index 92%
rename from tensorflow/compiler/xla/python/semaphore.h
rename to tensorflow/compiler/xla/pjrt/semaphore.h
index 7d3e9ce6271..45345becf74 100644
--- a/tensorflow/compiler/xla/python/semaphore.h
+++ b/tensorflow/compiler/xla/pjrt/semaphore.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_SEMAPHORE_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_SEMAPHORE_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_SEMAPHORE_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_SEMAPHORE_H_
 
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -65,4 +65,4 @@ class Semaphore {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_SEMAPHORE_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_SEMAPHORE_H_
diff --git a/tensorflow/compiler/xla/python/semaphore_test.cc b/tensorflow/compiler/xla/pjrt/semaphore_test.cc
similarity index 97%
rename from tensorflow/compiler/xla/python/semaphore_test.cc
rename to tensorflow/compiler/xla/pjrt/semaphore_test.cc
index 5ef59618b8b..56f7e8c9a05 100644
--- a/tensorflow/compiler/xla/python/semaphore_test.cc
+++ b/tensorflow/compiler/xla/pjrt/semaphore_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/semaphore.h"
+#include "tensorflow/compiler/xla/pjrt/semaphore.h"
 
 #include "absl/synchronization/notification.h"
 #include "tensorflow/compiler/xla/test.h"
diff --git a/tensorflow/compiler/xla/python/tracked_device_buffer.cc b/tensorflow/compiler/xla/pjrt/tracked_device_buffer.cc
similarity index 98%
rename from tensorflow/compiler/xla/python/tracked_device_buffer.cc
rename to tensorflow/compiler/xla/pjrt/tracked_device_buffer.cc
index 5c6dbbf3289..32ca4e4550c 100644
--- a/tensorflow/compiler/xla/python/tracked_device_buffer.cc
+++ b/tensorflow/compiler/xla/pjrt/tracked_device_buffer.cc
@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/tracked_device_buffer.h"
+#include "tensorflow/compiler/xla/pjrt/tracked_device_buffer.h"
 
 #include <iterator>
 #include <memory>
 
 #include "absl/synchronization/mutex.h"
-#include "tensorflow/compiler/xla/python/local_device_state.h"
+#include "tensorflow/compiler/xla/pjrt/local_device_state.h"
 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/stream_executor/device_memory.h"
diff --git a/tensorflow/compiler/xla/python/tracked_device_buffer.h b/tensorflow/compiler/xla/pjrt/tracked_device_buffer.h
similarity index 97%
rename from tensorflow/compiler/xla/python/tracked_device_buffer.h
rename to tensorflow/compiler/xla/pjrt/tracked_device_buffer.h
index 27e7de6e2c2..562cb2f913e 100644
--- a/tensorflow/compiler/xla/python/tracked_device_buffer.h
+++ b/tensorflow/compiler/xla/pjrt/tracked_device_buffer.h
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TRACKED_DEVICE_BUFFER_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_TRACKED_DEVICE_BUFFER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_TRACKED_DEVICE_BUFFER_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_TRACKED_DEVICE_BUFFER_H_
 
 #include <memory>
 
 #include "absl/container/flat_hash_set.h"
-#include "tensorflow/compiler/xla/python/event_pool.h"
-#include "tensorflow/compiler/xla/python/local_device_state.h"
+#include "tensorflow/compiler/xla/pjrt/event_pool.h"
+#include "tensorflow/compiler/xla/pjrt/local_device_state.h"
 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
 #include "tensorflow/compiler/xla/service/transfer_manager.h"
 #include "tensorflow/compiler/xla/shape.h"
@@ -257,4 +257,4 @@ void WaitForBufferDefinitionEventsOnStream(const TrackedDeviceBuffer& buffer,
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_TRACKED_DEVICE_BUFFER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_TRACKED_DEVICE_BUFFER_H_
diff --git a/tensorflow/compiler/xla/python/tracked_device_buffer_test.cc b/tensorflow/compiler/xla/pjrt/tracked_device_buffer_test.cc
similarity index 98%
rename from tensorflow/compiler/xla/python/tracked_device_buffer_test.cc
rename to tensorflow/compiler/xla/pjrt/tracked_device_buffer_test.cc
index 354176654af..9373b57e7d1 100644
--- a/tensorflow/compiler/xla/python/tracked_device_buffer_test.cc
+++ b/tensorflow/compiler/xla/pjrt/tracked_device_buffer_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/tracked_device_buffer.h"
+#include "tensorflow/compiler/xla/pjrt/tracked_device_buffer.h"
 
 #include <memory>
 
diff --git a/tensorflow/compiler/xla/python/worker_thread.cc b/tensorflow/compiler/xla/pjrt/worker_thread.cc
similarity index 96%
rename from tensorflow/compiler/xla/python/worker_thread.cc
rename to tensorflow/compiler/xla/pjrt/worker_thread.cc
index d3fb02023a5..e8194534aef 100644
--- a/tensorflow/compiler/xla/python/worker_thread.cc
+++ b/tensorflow/compiler/xla/pjrt/worker_thread.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/python/worker_thread.h"
+#include "tensorflow/compiler/xla/pjrt/worker_thread.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/python/worker_thread.h b/tensorflow/compiler/xla/pjrt/worker_thread.h
similarity index 90%
rename from tensorflow/compiler/xla/python/worker_thread.h
rename to tensorflow/compiler/xla/pjrt/worker_thread.h
index 598f7b1d4ae..4fd2baa4cda 100644
--- a/tensorflow/compiler/xla/python/worker_thread.h
+++ b/tensorflow/compiler/xla/pjrt/worker_thread.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_WORKER_THREAD_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_WORKER_THREAD_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PJRT_WORKER_THREAD_H_
+#define TENSORFLOW_COMPILER_XLA_PJRT_WORKER_THREAD_H_
 
 #include <functional>
 #include <memory>
@@ -51,4 +51,4 @@ class WorkerThread {
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_WORKER_THREAD_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PJRT_WORKER_THREAD_H_
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index 3eb93f9559e..8c6bc84cf8e 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -1,7 +1,5 @@
 load("//tensorflow/core/platform:build_config.bzl", "pyx_library")
 load("//tensorflow/compiler/xla:xla.bzl", "xla_py_test_deps")
-load("//tensorflow:tensorflow.bzl", "py_test", "tf_cc_test")
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 
 # buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "pybind_extension")
@@ -78,16 +76,6 @@ py_test(
     ] + xla_py_test_deps(),
 )
 
-cc_library(
-    name = "worker_thread",
-    srcs = ["worker_thread.cc"],
-    hdrs = ["worker_thread.h"],
-    deps = [
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/synchronization",
-    ],
-)
-
 cc_library(
     name = "types",
     srcs = ["types.cc"],
@@ -99,7 +87,6 @@ cc_library(
     features = ["-use_header_modules"],
     deps = [
         ":bfloat16",
-        ":local_client",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status",
@@ -107,6 +94,7 @@ cc_library(
         "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/pjrt:pjrt_client",
         "//tensorflow/core:lib",
         "//third_party/py/numpy:headers",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -116,148 +104,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "event_pool",
-    srcs = ["event_pool.cc"],
-    hdrs = ["event_pool.h"],
-    deps = [
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:stream_executor",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/synchronization",
-    ],
-)
-
-cc_library(
-    name = "semaphore",
-    srcs = ["semaphore.cc"],
-    hdrs = ["semaphore.h"],
-    deps = [
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/core:lib",
-        "@com_google_absl//absl/base:core_headers",
-        "@com_google_absl//absl/synchronization",
-    ],
-)
-
-tf_cc_test(
-    name = "semaphore_test",
-    srcs = ["semaphore_test.cc"],
-    deps = [
-        ":semaphore",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:test_main",
-        "@com_google_absl//absl/synchronization",
-    ],
-)
-
-cc_library(
-    name = "tracked_device_buffer",
-    srcs = ["tracked_device_buffer.cc"],
-    hdrs = ["tracked_device_buffer.h"],
-    deps = [
-        ":event_pool",
-        ":local_device_state",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:types",
-        "//tensorflow/compiler/xla/service:shaped_buffer",
-        "//tensorflow/compiler/xla/service:transfer_manager",
-        "//tensorflow/core:lib",
-        "//tensorflow/stream_executor:device_memory",
-        "//tensorflow/stream_executor:device_memory_allocator",
-        "//tensorflow/stream_executor:event",
-        "@com_google_absl//absl/container:flat_hash_set",
-        "@com_google_absl//absl/synchronization",
-    ],
-)
-
-tf_cc_test(
-    name = "tracked_device_buffer_test",
-    srcs = ["tracked_device_buffer_test.cc"],
-    deps = [
-        ":tracked_device_buffer",
-        "//tensorflow/compiler/xla:literal_util",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla/client:client_library",
-        "//tensorflow/compiler/xla/service:cpu_plugin",
-        "//tensorflow/core:test_main",
-        "//tensorflow/stream_executor:device_memory",
-        "//tensorflow/stream_executor:device_memory_allocator",
-    ],
-)
-
-cc_library(
-    name = "local_device_state",
-    srcs = ["local_device_state.cc"],
-    hdrs = ["local_device_state.h"],
-    deps = [
-        ":event_pool",
-        ":semaphore",
-        ":worker_thread",
-        "//tensorflow/compiler/xla:status",
-        "//tensorflow/compiler/xla:util",
-        "//tensorflow/compiler/xla/client:local_client",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:stream_executor",
-        "//tensorflow/stream_executor:event",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/synchronization",
-    ],
-)
-
-cc_library(
-    name = "local_client",
-    srcs = ["local_client.cc"],
-    hdrs = ["local_client.h"],
-    visibility = ["//tensorflow/compiler/xla:friends"],
-    deps = [
-        ":event_pool",
-        ":local_device_state",
-        ":tracked_device_buffer",
-        "//tensorflow/compiler/xla:cpu_function_runtime",
-        "//tensorflow/compiler/xla:executable_run_options",
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:literal_util",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:util",
-        "//tensorflow/compiler/xla:xla_data_proto_cc",
-        "//tensorflow/compiler/xla/client:executable_build_options",
-        "//tensorflow/compiler/xla/client:local_client",
-        "//tensorflow/compiler/xla/client:xla_computation",
-        "//tensorflow/compiler/xla/python/distributed:protocol_proto_cc",
-        "//tensorflow/compiler/xla/service:computation_placer",
-        "//tensorflow/compiler/xla/service:executable",
-        "//tensorflow/compiler/xla/service:hlo",
-        "//tensorflow/compiler/xla/service:maybe_owning_device_memory",
-        "//tensorflow/compiler/xla/service:shaped_buffer",
-        "//tensorflow/compiler/xla/service/gpu:gpu_executable_run_options",
-        "//tensorflow/core:allocator",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/profiler/lib:traceme",
-        "//tensorflow/stream_executor:event",
-        "//tensorflow/stream_executor:stream",
-        "//tensorflow/stream_executor/host:host_platform_id",
-        "//tensorflow/stream_executor/lib",
-        "@com_google_absl//absl/base",
-        "@com_google_absl//absl/container:flat_hash_set",
-        "@com_google_absl//absl/container:inlined_vector",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/strings:str_format",
-        "@com_google_absl//absl/synchronization",
-        "@com_google_absl//absl/time",
-        "@com_google_absl//absl/types:span",
-    ],
-)
-
 cc_library(
     name = "python_ref_manager",
     srcs = ["python_ref_manager.cc"],
@@ -322,10 +168,10 @@ cc_library(
     ],
     features = ["-use_header_modules"],
     deps = [
-        ":local_client",
-        ":tracked_device_buffer",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/pjrt:pjrt_client",
+        "//tensorflow/compiler/xla/pjrt:tracked_device_buffer",
         "//tensorflow/stream_executor:device_memory",
         "//tensorflow/stream_executor:platform",
         "//tensorflow/stream_executor/cuda:cuda_platform_id",
@@ -340,37 +186,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "cpu_device",
-    srcs = ["cpu_device.cc"],
-    hdrs = ["cpu_device.h"],
-    deps = [
-        ":local_client",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla/client:client_library",
-        "//tensorflow/compiler/xla/service:platform_util",
-    ],
-)
-
-cc_library(
-    name = "nvidia_gpu_device",
-    srcs = ["nvidia_gpu_device.cc"],
-    hdrs = ["nvidia_gpu_device.h"],
-    copts = if_cuda(["-DNCCL_ENABLED=1"]),
-    deps = [
-        ":local_client",
-        "//tensorflow/compiler/xla/service/gpu:gpu_executable_run_options",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla/client:client_library",
-        "//tensorflow/compiler/xla/python/distributed:client",
-        "//tensorflow/compiler/xla/service:platform_util",
-        "//tensorflow/compiler/xla:util",
-        "//tensorflow/core/common_runtime:bfc_allocator",
-        "//tensorflow/core/common_runtime/gpu:gpu_mem_allocator",
-        "//tensorflow/stream_executor:tf_allocator_adapter",
-    ] + if_cuda(["@local_config_nccl//:nccl"]),
-)
-
 config_setting(
     name = "enable_gpu",
     values = {"define": "xla_python_enable_gpu=true"},
@@ -389,11 +204,7 @@ pybind_extension(
     module_name = "xla_extension",
     deps = [
         ":bfloat16",
-        ":cpu_device",
         ":dlpack",
-        ":local_client",
-        ":nvidia_gpu_device",
-        ":tracked_device_buffer",
         ":python_ref_manager",
         ":types",
         "@com_google_absl//absl/base",
@@ -423,9 +234,13 @@ pybind_extension(
         "//tensorflow/compiler/xla/client/lib:self_adjoint_eig",
         "//tensorflow/compiler/xla/client/lib:sorting",
         "//tensorflow/compiler/xla/client/lib:svd",
-        "//tensorflow/compiler/xla/python/distributed",
-        "//tensorflow/compiler/xla/python/distributed:client",
-        "//tensorflow/compiler/xla/python/distributed:service",
+        "//tensorflow/compiler/xla/pjrt:cpu_device",
+        "//tensorflow/compiler/xla/pjrt:nvidia_gpu_device",
+        "//tensorflow/compiler/xla/pjrt:pjrt_client",
+        "//tensorflow/compiler/xla/pjrt:tracked_device_buffer",
+        "//tensorflow/compiler/xla/pjrt/distributed",
+        "//tensorflow/compiler/xla/pjrt/distributed:client",
+        "//tensorflow/compiler/xla/pjrt/distributed:service",
         "//tensorflow/compiler/xla/service:computation_placer",
         "//tensorflow/compiler/xla/service:custom_call_target_registry",
         "//tensorflow/compiler/xla/service:hlo",
@@ -454,25 +269,3 @@ pybind_extension(
         "//conditions:default": [],
     }),
 )
-
-tf_cc_test(
-    name = "gpu_multistream_test",
-    srcs = ["gpu_multistream_test.cc"],
-    tags = [
-        # TODO(phawkins): figure out TF test infra such that this only runs under GPU.
-        "no_oss",
-        "requires-gpu-nvidia",
-    ],
-    deps = [
-        ":local_client",
-        ":nvidia_gpu_device",
-        "//tensorflow/compiler/xla:test",
-        "//tensorflow/compiler/xla/client:executable_build_options",
-        "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/service:gpu_plugin",
-        "//tensorflow/compiler/xla/tests:literal_test_util",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core/platform:random",
-    ],
-)
diff --git a/tensorflow/compiler/xla/python/dlpack.cc b/tensorflow/compiler/xla/python/dlpack.cc
index 31f51d70937..d37d480607a 100644
--- a/tensorflow/compiler/xla/python/dlpack.cc
+++ b/tensorflow/compiler/xla/python/dlpack.cc
@@ -23,7 +23,7 @@ limitations under the License.
 #include "absl/strings/str_join.h"
 #include "absl/types/span.h"
 #include "include/dlpack/dlpack.h"  // from @dlpack
-#include "tensorflow/compiler/xla/python/tracked_device_buffer.h"
+#include "tensorflow/compiler/xla/pjrt/tracked_device_buffer.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/stream_executor/cuda/cuda_platform_id.h"
diff --git a/tensorflow/compiler/xla/python/dlpack.h b/tensorflow/compiler/xla/python/dlpack.h
index 9d8965ac43d..6766bbe93b1 100644
--- a/tensorflow/compiler/xla/python/dlpack.h
+++ b/tensorflow/compiler/xla/python/dlpack.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_PYTHON_DLPACK_H_
 
 #include "pybind11/pybind11.h"
-#include "tensorflow/compiler/xla/python/local_client.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 
 namespace xla {
 
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/BUILD b/tensorflow/compiler/xla/python/tpu_driver/client/BUILD
index b5f1a831d4a..c460cc36f08 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/BUILD
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/BUILD
@@ -19,8 +19,8 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
         "//tensorflow/compiler/xla/client:executable_build_options",
-        "//tensorflow/compiler/xla/python:local_client",
-        "//tensorflow/compiler/xla/python:semaphore",
+        "//tensorflow/compiler/xla/pjrt:pjrt_client",
+        "//tensorflow/compiler/xla/pjrt:semaphore",
         "//tensorflow/compiler/xla/python/tpu_driver",
         "//tensorflow/compiler/xla/python/tpu_driver:direct_tpu_driver",
         "//tensorflow/compiler/xla/python/tpu_driver:grpc_tpu_driver",
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
index fe2cddd75ef..e78f04ff980 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.cc
@@ -24,7 +24,7 @@ limitations under the License.
 #include "absl/time/time.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/literal.h"
-#include "tensorflow/compiler/xla/python/semaphore.h"
+#include "tensorflow/compiler/xla/pjrt/semaphore.h"
 #include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h"
 #include "tensorflow/compiler/xla/service/computation_placer.h"
 #include "tensorflow/compiler/xla/shape_util.h"
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h
index f2c792d2a20..4c45df181db 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.h
@@ -24,7 +24,7 @@ limitations under the License.
 #include "absl/synchronization/notification.h"
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/client/executable_build_options.h"
-#include "tensorflow/compiler/xla/python/local_client.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 #include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h"
 #include "tensorflow/compiler/xla/python/tpu_driver/tpu_driver.pb.h"
 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py
index ef0caff0ae6..6d4482af43f 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py
@@ -20,6 +20,9 @@ from __future__ import print_function
 
 from absl import logging
 
+# Import xla_client to load shared C++ extensions (just CompileOptions at the
+# time of writing).
+from tensorflow.compiler.xla.python import xla_client  # pylint: disable=unused-import
 from tensorflow.compiler.xla.python.tpu_driver.client import tpu_client_extension as _tpu_client
 
 
diff --git a/tensorflow/compiler/xla/python/types.h b/tensorflow/compiler/xla/python/types.h
index 4ed4e9cb7f8..673f403d91e 100644
--- a/tensorflow/compiler/xla/python/types.h
+++ b/tensorflow/compiler/xla/python/types.h
@@ -26,7 +26,7 @@ limitations under the License.
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 #include "tensorflow/compiler/xla/literal.h"
-#include "tensorflow/compiler/xla/python/local_client.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 #include "tensorflow/compiler/xla/shape.h"
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/statusor.h"
diff --git a/tensorflow/compiler/xla/python/xla.cc b/tensorflow/compiler/xla/python/xla.cc
index 206c304abbb..f03595bf677 100644
--- a/tensorflow/compiler/xla/python/xla.cc
+++ b/tensorflow/compiler/xla/python/xla.cc
@@ -39,14 +39,14 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/pjrt/cpu_device.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/client.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/distributed.h"
+#include "tensorflow/compiler/xla/pjrt/distributed/service.h"
+#include "tensorflow/compiler/xla/pjrt/nvidia_gpu_device.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
 #include "tensorflow/compiler/xla/python/bfloat16.h"
-#include "tensorflow/compiler/xla/python/cpu_device.h"
-#include "tensorflow/compiler/xla/python/distributed/client.h"
-#include "tensorflow/compiler/xla/python/distributed/distributed.h"
-#include "tensorflow/compiler/xla/python/distributed/service.h"
 #include "tensorflow/compiler/xla/python/dlpack.h"
-#include "tensorflow/compiler/xla/python/local_client.h"
-#include "tensorflow/compiler/xla/python/nvidia_gpu_device.h"
 #include "tensorflow/compiler/xla/python/python_ref_manager.h"
 #include "tensorflow/compiler/xla/python/types.h"
 #include "tensorflow/compiler/xla/service/custom_call_target_registry.h"
@@ -980,10 +980,17 @@ PYBIND11_MODULE(xla_extension, m) {
               py::gil_scoped_release gil_release;
               TF_ASSIGN_OR_RETURN(LocalDeviceState * local_device,
                                   device.GetLocalDeviceState());
+              Shape shape_with_layout = shape;
+              ShapeUtil::ForEachMutableSubshape(
+                  &shape_with_layout, [](Shape* subshape, const ShapeIndex&) {
+                    if (!subshape->has_layout()) {
+                      LayoutUtil::SetToDefaultLayout(subshape);
+                    }
+                  });
               TF_ASSIGN_OR_RETURN(
                   Literal literal,
                   local_device->client()->TransferFromOutfeedLocal(
-                      shape, local_device->device_ordinal()));
+                      shape_with_layout, local_device->device_ordinal()));
 
               literal_shared = std::make_shared<Literal>(std::move(literal));
             }
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index 7f09a7e1698..d9cd906939d 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -261,44 +261,6 @@ class ProgramShape(object):
 """
 
 
-class Buffer(object):
-  """Represents a handle to data owned by XLA.
-
-  The referent is ready for use in executing a local, compiled
-  Computation. On XLA platforms involving a device (e.g. GPU), this
-  means the referent is in device memory.
-  """
-
-  @staticmethod
-  def from_pyval(pyval, device=None, backend=None, force_copy=False):
-    """Copies the `pyval` to a freshly allocated on-device buffer."""
-    backend = backend or get_local_backend()
-    return backend.buffer_from_pyval(pyval, device, force_copy=force_copy)
-
-  # Buffer is not an instantiable type and exists only for its static methods.
-  # The underlying buffer objects are C++ object with the following
-  # API:
-  # def shape(self) -> Shape:
-  # def device(self) -> int:
-  # def delete(self):
-  # def is_deleted(self) -> bool:
-  # def block_host_until_ready(self):
-  #    """Blocks the calling thread until the buffer is ready on device."""
-  # def copy_to_host_async(self):
-  #    """Requests a copy of the buffer to the host.
-  #
-  #       Does not block waiting for the copy. Values fetched are available via
-  #       `to_py()`; the purpose of `copy_to_host_async` is to prefetch values
-  #       for subsequent `to_py()` calls, especially when requesting many values
-  #       at once.
-  #    """
-  # def to_py(self):
-  #    """Returns the value of the buffer as a Python tuple tree of ndarrays."""
-  #
-  # TODO(phawkins): remove Buffer and its static methods completely, have
-  # clients call methods on Backend to create buffers.
-
-
 def shape_from_pyval(pyval):
   """Returns a Shape that describes a tuple-tree of Numpy arrays."""
 
@@ -311,43 +273,6 @@ def shape_from_pyval(pyval):
   return convert(pyval)
 
 
-def transfer_to_infeed(value, device=None):
-  """Transfers the given value into the XLA infeed queue.
-
-  XLA's infeed queue is a single queue that feeds the "XLA virtual machine" with
-  a totally ordered stream of values. This is dequeued from XLA computations via
-  the Infeed() operation.
-
-  Args:
-    value: the value that the caller would like to enqueue into the XLA infeed
-      queue
-    device: the device to infeed the value to. Each device has a distinct infeed
-      queue.
-  """
-  # TODO(phawkins): support non-default backends.
-  backend = get_local_backend()
-  device = device or backend.local_devices()[0]
-  device.transfer_to_infeed(value)
-
-
-def transfer_from_outfeed(shape, device=None):
-  """Transfers a literal of the given shape from `device`'s outfeed.
-
-  Args:
-    shape: The shape of the value to transfer from outfeed.
-    device: The device from which to transfer the outfeed value. Each device has
-      a distinct outfeed queue..
-
-  Returns:
-    The literal value that is produced from the outfeed queue.
-  """
-  # TODO(phawkins): support non-default backends.
-  backend = get_local_backend()
-  device = device or backend.local_devices()[0]
-  return device.transfer_from_outfeed(
-      shape.with_major_to_minor_layout_if_absent())
-
-
 DeviceAssignment = _xla.DeviceAssignment
 DeviceAssignment.__doc__ = """
 A DeviceAssignment is a C++ object with the following signature.
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index 62b3fae018a..fbdd9921a40 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -2029,8 +2029,11 @@ def TestFactory(xla_backend, cloud_tpu=False):
   return tests
 
 
-def InstantiateTests(globals_dict, backend, test_prefix="", **kw):
-  for klass in TestFactory(backend, **kw):
+def InstantiateTests(globals_dict, backend_fn, test_prefix="", **kw):
+  # Avoid creating a new backend per test (this causes GPU OOM, and is probably
+  # inefficient).
+  backend_fn = functools.lru_cache(maxsize=None)(backend_fn)
+  for klass in TestFactory(backend_fn, **kw):
     test = type(test_prefix + klass.__name__, (klass,), {})
     # Clean up the qualified names of the tests to not include the test factory.
     test.__qualname__ = test.__name__
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index aef215e23e8..126b62a8eb2 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -460,6 +460,37 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "hlo_sharding_util",
+    srcs = [
+        "hlo_sharding_util.cc",
+    ],
+    hdrs = [
+        "hlo_sharding_util.h",
+    ],
+    deps = [
+        ":hlo",
+        "//tensorflow/compiler/xla:array",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+tf_cc_test(
+    name = "hlo_sharding_util_test",
+    srcs = [
+        "hlo_sharding_util_test.cc",
+    ],
+    deps = [
+        ":hlo_sharding_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+    ],
+)
+
 tf_cc_test(
     name = "dynamic_parameter_binding_test",
     srcs = ["dynamic_parameter_binding_test.cc"],
@@ -2122,6 +2153,51 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "conditional_code_motion",
+    srcs = ["conditional_code_motion.cc"],
+    hdrs = ["conditional_code_motion.h"],
+    deps = [
+        ":call_graph",
+        ":call_inliner",
+        ":hlo",
+        ":hlo_casting_utils",
+        ":hlo_dce",
+        ":hlo_pass",
+        ":hlo_pass_pipeline",
+        ":tuple_simplifier",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+tf_cc_test(
+    name = "conditional_code_motion_test",
+    srcs = ["conditional_code_motion_test.cc"],
+    deps = [
+        ":conditional_code_motion",
+        ":hlo",
+        ":hlo_matchers",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "convolution_group_converter",
     srcs = ["convolution_group_converter.cc"],
@@ -2352,6 +2428,42 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "all_gather_decomposer",
+    srcs = ["all_gather_decomposer.cc"],
+    hdrs = ["all_gather_decomposer.h"],
+    deps = [
+        ":hlo",
+        ":hlo_casting_utils",
+        ":hlo_pass",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+tf_cc_test(
+    name = "all_gather_decomposer_test",
+    srcs = ["all_gather_decomposer_test.cc"],
+    deps = [
+        ":all_gather_decomposer",
+        ":hlo",
+        ":hlo_matchers",
+        ":hlo_parser",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "tuple_simplifier",
     srcs = ["tuple_simplifier.cc"],
@@ -3189,6 +3301,29 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "memory_space_propagation",
+    srcs = ["memory_space_propagation.cc"],
+    hdrs = ["memory_space_propagation.h"],
+    deps = [
+        ":hlo",
+        ":hlo_dataflow_analysis",
+        ":hlo_pass",
+    ],
+)
+
+tf_cc_test(
+    name = "memory_space_propagation_test",
+    srcs = ["memory_space_propagation_test.cc"],
+    deps = [
+        ":hlo_parser",
+        ":memory_space_propagation",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "hlo_dce",
     srcs = ["hlo_dce.cc"],
@@ -3742,6 +3877,7 @@ cc_library(
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
         "@llvm-project//llvm:core",
         "@llvm-project//llvm:transform_utils",
     ],
diff --git a/tensorflow/compiler/xla/service/all_gather_decomposer.cc b/tensorflow/compiler/xla/service/all_gather_decomposer.cc
new file mode 100644
index 00000000000..ad63218eca8
--- /dev/null
+++ b/tensorflow/compiler/xla/service/all_gather_decomposer.cc
@@ -0,0 +1,154 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/all_gather_decomposer.h"
+
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace xla {
+
+// Creates a computation of x + y.
+HloComputation* MakeBinaryAdd(PrimitiveType type, HloModule* module) {
+  HloComputation::Builder sum_b("add");
+  auto x = sum_b.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/0, ShapeUtil::MakeShape(type, {}), "x"));
+  auto y = sum_b.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/1, ShapeUtil::MakeShape(type, {}), "y"));
+  if (type == PRED) {
+    sum_b.AddInstruction(HloInstruction::CreateBinary(
+        ShapeUtil::MakeShape(type, {}), HloOpcode::kOr, x, y));
+  } else {
+    sum_b.AddInstruction(HloInstruction::CreateBinary(
+        ShapeUtil::MakeShape(type, {}), HloOpcode::kAdd, x, y));
+  }
+  HloComputation* reduction = module->AddEmbeddedComputation(sum_b.Build());
+  return reduction;
+}
+
+Status DecomposeAllGather(HloAllGatherInstruction* ag, int64 partition_count,
+                          HloComputation* comp) {
+  auto zero = comp->AddInstruction(HloInstruction::CreateConstant(
+      LiteralUtil::Zero(ag->shape().element_type())));
+  zero = comp->AddInstruction(
+      HloInstruction::CreateBroadcast(ag->shape(), zero, {}));
+  auto zero_index = comp->AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::Zero(S32)));
+  std::vector<HloInstruction*> start_indices(ag->shape().rank(), zero_index);
+  auto shard_id_from_subgroup = [&](HloInstruction* replica_or_global_id) {
+    if (ag->replica_groups().empty()) {
+      return replica_or_global_id;
+    }
+    if (ag->replica_groups().size() == 1) {
+      // Whether the group is {1, 2, ..., N - 1}.
+      bool trivial_group = true;
+      for (int64 i = 0; i < ag->replica_groups()[0].replica_ids_size(); ++i) {
+        if (ag->replica_groups()[0].replica_ids(i) != i) {
+          trivial_group = false;
+          break;
+        }
+      }
+      if (trivial_group) {
+        CHECK_EQ(partition_count, ag->replica_groups()[0].replica_ids_size());
+        return replica_or_global_id;
+      }
+    }
+    // Create a table of shard IDs for each replica_or_global_id, then slice it
+    // using replica_or_global_id.
+    std::vector<int32> shard_ids(ag->replica_groups().size() *
+                                 ag->replica_groups()[0].replica_ids_size());
+    for (const auto& group : ag->replica_groups()) {
+      for (int64 i = 0; i < group.replica_ids_size(); ++i) {
+        shard_ids[group.replica_ids(i)] = i;
+      }
+    }
+    auto id_table = comp->AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::CreateR1<int32>(shard_ids)));
+    auto shard_id = comp->AddInstruction(HloInstruction::CreateDynamicSlice(
+        ShapeUtil::MakeShape(S32, {1}), id_table, {replica_or_global_id}, {1}));
+    shard_id = comp->AddInstruction(
+        HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), shard_id));
+    return shard_id;
+  };
+  HloInstruction* shard_id;
+  if (ag->channel_id().has_value()) {
+    if (ag->use_global_device_ids()) {
+      auto pid = comp->AddInstruction(HloInstruction::CreatePartitionId());
+      auto rid = comp->AddInstruction(HloInstruction::CreateReplicaId());
+      auto pcount = comp->AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR0<int32>(partition_count)));
+      auto global_id = comp->AddInstruction(HloInstruction::CreateBinary(
+          pid->shape(), HloOpcode::kAdd, pid,
+          comp->AddInstruction(HloInstruction::CreateBinary(
+              pid->shape(), HloOpcode::kMultiply, rid, pcount))));
+      shard_id = shard_id_from_subgroup(global_id);
+    } else {
+      TF_RET_CHECK(!ag->replica_groups().empty());
+      TF_RET_CHECK(ag->replica_groups()[0].replica_ids_size() == 1);
+      shard_id = comp->AddInstruction(HloInstruction::CreatePartitionId());
+    }
+  } else {
+    shard_id = shard_id_from_subgroup(
+        comp->AddInstruction(HloInstruction::CreateReplicaId()));
+  }
+  start_indices[ag->all_gather_dimension()] =
+      comp->AddInstruction(HloInstruction::CreateBinary(
+          shard_id->shape(), HloOpcode::kMultiply, shard_id,
+          comp->AddInstruction(HloInstruction::CreateConstant(
+              LiteralUtil::CreateR0<int32>(ag->operand(0)->shape().dimensions(
+                  ag->all_gather_dimension()))))));
+  auto dus = comp->AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
+      zero->shape(), zero, ag->mutable_operand(0), start_indices));
+  auto ar = comp->AddInstruction(HloInstruction::CreateAllReduce(
+      dus->shape(), {dus},
+      MakeBinaryAdd(dus->shape().element_type(), comp->parent()),
+      ag->replica_groups(),
+      /*constrain_layout=*/ag->constrain_layout(), ag->channel_id(),
+      ag->use_global_device_ids()));
+  TF_RETURN_IF_ERROR(ag->ReplaceAllUsesWith(ar));
+  TF_RETURN_IF_ERROR(comp->RemoveInstructionAndUnusedOperands(ag));
+  return Status::OK();
+}
+
+StatusOr<bool> AllGatherDecomposer::Run(HloModule* module) {
+  bool changed = false;
+  for (auto comp : module->MakeNonfusionComputations()) {
+    for (auto hlo : comp->MakeInstructionPostOrder()) {
+      if (hlo->opcode() != HloOpcode::kAllGather) {
+        continue;
+      }
+      auto ag = Cast<HloAllGatherInstruction>(hlo);
+      if (should_decompose_(*ag)) {
+        TF_RETURN_IF_ERROR(DecomposeAllGather(ag, partition_count_, comp));
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/all_gather_decomposer.h b/tensorflow/compiler/xla/service/all_gather_decomposer.h
new file mode 100644
index 00000000000..d1983e37383
--- /dev/null
+++ b/tensorflow/compiler/xla/service/all_gather_decomposer.h
@@ -0,0 +1,51 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_ALL_GATHER_DECOMPOSER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_ALL_GATHER_DECOMPOSER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// AllGatherDecomposer is a pass which converts unsupported all-gathers into
+// dynamic-update-slices and all-reduces.
+class AllGatherDecomposer : public HloModulePass {
+ public:
+  AllGatherDecomposer(
+      std::function<bool(const HloAllGatherInstruction&)> should_decompose,
+      int64 partition_count)
+      : should_decompose_(std::move(should_decompose)),
+        partition_count_(partition_count) {}
+  explicit AllGatherDecomposer(int64 partition_count)
+      : should_decompose_(
+            [](const HloAllGatherInstruction& ag) { return true; }),
+        partition_count_(partition_count) {}
+  absl::string_view name() const override { return "all_gather_decomposer"; }
+
+  // Run AllGatherDecomposer pass on computations in 'module'.
+  // Returns whether the 'module' was changed.
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  std::function<bool(const HloAllGatherInstruction&)> should_decompose_;
+  int64 partition_count_;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_ALL_GATHER_DECOMPOSER_H_
diff --git a/tensorflow/compiler/xla/service/all_gather_decomposer_test.cc b/tensorflow/compiler/xla/service/all_gather_decomposer_test.cc
new file mode 100644
index 00000000000..ebcd66ffa07
--- /dev/null
+++ b/tensorflow/compiler/xla/service/all_gather_decomposer_test.cc
@@ -0,0 +1,161 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/all_gather_decomposer.h"
+
+#include <memory>
+
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/tests/test_utils.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+using ::testing::AllOf;
+namespace op = xla::testing::opcode_matchers;
+using AllGatherDecomposerTest = HloTestBase;
+
+TEST_F(AllGatherDecomposerTest, CrossReplicaAllGather) {
+  const string module_str = R"(
+HloModule module
+
+ENTRY entry {
+  param0 = f32[10,20] parameter(0)
+  ROOT ag = f32[10,80] all-gather(param0), replica_groups={}, dimensions={1}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnUnverifiedModule((module_str)));
+  AllGatherDecomposer decomposer(/*partition_count=*/4);
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get()));
+  EXPECT_TRUE(changed);
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::AllReduce(op::DynamicUpdateSlice(
+          op::Broadcast(op::Constant()), op::Parameter(0), op::Constant(),
+          op::Multiply(op::ReplicaId(), op::Constant()))));
+}
+
+TEST_F(AllGatherDecomposerTest, CrossPartitionAllGather) {
+  const string module_str = R"(
+HloModule module
+
+ENTRY entry {
+  param0 = f32[10,20] parameter(0)
+  ROOT ag = f32[10,80] all-gather(param0), replica_groups={{0}}, channel_id=1,
+    dimensions={1}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnUnverifiedModule((module_str)));
+  AllGatherDecomposer decomposer(/*partition_count=*/4);
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get()));
+  EXPECT_TRUE(changed);
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::AllReduce(op::DynamicUpdateSlice(
+          op::Broadcast(op::Constant()), op::Parameter(0), op::Constant(),
+          op::Multiply(op::PartitionId(), op::Constant()))));
+}
+
+TEST_F(AllGatherDecomposerTest, CrossReplicaAllGatherWithTrivialGroup) {
+  const string module_str = R"(
+HloModule module
+
+ENTRY entry {
+  param0 = f32[10,20] parameter(0)
+  ROOT ag = f32[10,80] all-gather(param0), replica_groups={{0,1,2,3}},
+    dimensions={1}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnUnverifiedModule((module_str)));
+  AllGatherDecomposer decomposer(/*partition_count=*/4);
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get()));
+  EXPECT_TRUE(changed);
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::AllReduce(op::DynamicUpdateSlice(
+          op::Broadcast(op::Constant()), op::Parameter(0), op::Constant(),
+          op::Multiply(op::ReplicaId(), op::Constant()))));
+}
+
+TEST_F(AllGatherDecomposerTest, CrossReplicaAllGatherWithSubgroups) {
+  const string module_str = R"(
+HloModule module
+
+ENTRY entry {
+  param0 = f32[10,20] parameter(0)
+  ROOT ag = f32[10,80] all-gather(param0),
+    replica_groups={{2,1,0,3}, {4,6,7,5}}, dimensions={1}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnUnverifiedModule((module_str)));
+  AllGatherDecomposer decomposer(/*partition_count=*/4);
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get()));
+  EXPECT_TRUE(changed);
+  auto id =
+      AllOf(op::Shape("s32[]"),
+            op::Reshape(op::DynamicSlice(op::Constant(), op::ReplicaId())));
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::AllReduce(op::DynamicUpdateSlice(
+                  op::Broadcast(op::Constant()), op::Parameter(0),
+                  op::Constant(), op::Multiply(id, op::Constant()))));
+}
+
+TEST_F(AllGatherDecomposerTest, CrossReplicaAllGatherWithSubgroupsGlobalIds) {
+  const string module_str = R"(
+HloModule module
+
+ENTRY entry {
+  param0 = f32[10,20] parameter(0)
+  ROOT ag = f32[10,80] all-gather(param0),
+    replica_groups={{2,1,0,3}, {4,6,7,5}}, dimensions={1}, channel_id=1,
+    use_global_device_ids=true
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnUnverifiedModule((module_str)));
+  AllGatherDecomposer decomposer(/*partition_count=*/4);
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, decomposer.Run(module.get()));
+  EXPECT_TRUE(changed);
+  LOG(ERROR) << module->ToString();
+  auto global_id =
+      op::Add(op::PartitionId(), op::Multiply(op::ReplicaId(), op::Constant()));
+  auto id = AllOf(op::Shape("s32[]"),
+                  op::Reshape(op::DynamicSlice(op::Constant(), global_id)));
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::AllReduce(op::DynamicUpdateSlice(
+                  op::Broadcast(op::Constant()), op::Parameter(0),
+                  op::Constant(), op::Multiply(id, op::Constant()))));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc
index abb695fa486..30d764225c2 100644
--- a/tensorflow/compiler/xla/service/bfloat16_support.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_support.cc
@@ -79,6 +79,7 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision(
     const HloInstruction& hlo, int64 operand_index) {
   switch (hlo.opcode()) {
     case HloOpcode::kAbs:
+    case HloOpcode::kAllGather:
     case HloOpcode::kAllToAll:
     case HloOpcode::kBroadcast:
     case HloOpcode::kClamp:
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index 8c76e912011..ce9c8a4ea62 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -91,6 +91,7 @@ CompileOnlyService::CompileAheadOfTime(
     TF_RETURN_IF_ERROR(options.static_device_assignment().Serialize(
         execution_options.mutable_device_assignment()));
   }
+  execution_options.set_use_spmd_partitioning(options.use_spmd_partitioning());
   for (const AotXlaComputationInstance& instance : computations) {
     TF_RET_CHECK(instance.computation.has_host_program_shape());
     *execution_options.mutable_shape_with_output_layout() =
diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h
index cf646159a38..57b24e372e6 100644
--- a/tensorflow/compiler/xla/service/compiler.h
+++ b/tensorflow/compiler/xla/service/compiler.h
@@ -76,6 +76,7 @@ class AotCompilationOptions {
 
   virtual int64 replica_count() const { return 0; }
   virtual int64 num_cores() const { return 0; }
+  virtual bool use_spmd_partitioning() const { return false; }
 
   // Optional allocator that may be used for allocating temp space on the device
   // during compilation.
diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.cc b/tensorflow/compiler/xla/service/conditional_code_motion.cc
new file mode 100644
index 00000000000..eecdcc851e9
--- /dev/null
+++ b/tensorflow/compiler/xla/service/conditional_code_motion.cc
@@ -0,0 +1,483 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/conditional_code_motion.h"
+
+#include <iterator>
+#include <stack>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_cat.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/map_util.h"
+#include "tensorflow/compiler/xla/service/call_graph.h"
+#include "tensorflow/compiler/xla/service/call_inliner.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
+#include "tensorflow/compiler/xla/service/tuple_simplifier.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
+
+namespace xla {
+
+namespace {
+
+struct ConditionalBoundary {
+  ConditionalBoundary(HloInstruction* op, int64 op_index, HloInstruction* usr)
+      : operand(op), operand_index(op_index), user(usr) {}
+  // `operand` is one of `user`'s operand.
+
+  // Instruction that remains in the conditional but one of its user
+  // is moved out of conditonal.
+  HloInstruction* operand;
+  // operand_index for `operand` in the `user`.
+  int64 operand_index;
+  // Instruction that moved out of conditional.
+  HloInstruction* user;
+};
+
+// Visit the root instructions to its operands follow BFS.
+// Will visit an instructions after all its users have been visited. Parameters
+// are not visited.
+class BranchVisitor {
+ public:
+  explicit BranchVisitor(const HloComputation* branch_computation) {
+    HloInstruction* root_inst = branch_computation->root_instruction();
+    worklist_.push_back(root_inst);
+    visited_.insert(root_inst);
+    for (auto parameter_inst : branch_computation->parameter_instructions()) {
+      parameter_instructions_.insert(parameter_inst);
+    }
+  }
+  // Get next intruction to visit.
+  HloInstruction* GetNextInstruction() {
+    if (!worklist_.empty()) {
+      HloInstruction* inst = worklist_.front();
+      worklist_.pop_front();
+      return inst;
+    }
+    return nullptr;
+  }
+
+  // Add operands of one instruction to worklist for further visit.
+  void AddInstructionOperands(HloInstruction* inst) {
+    int64 operand_count = inst->operand_count();
+    for (int i = 0; i < operand_count; i++) {
+      HloInstruction* operand = inst->mutable_operand(i);
+      if (ContainsKey(visited_, operand)) {
+        continue;
+      }
+      bool all_user_visited = std::all_of(
+          operand->users().begin(), operand->users().end(),
+          [&](HloInstruction* user) { return ContainsKey(visited_, user); });
+
+      if (!all_user_visited) {
+        continue;
+      }
+      // Do not visit parameter_instructions.
+      if (ContainsKey(parameter_instructions_, operand)) {
+        // Add the operand and this instruction to the boundaries.
+        boundaries_.emplace_back(operand, i, inst);
+        continue;
+      }
+
+      worklist_.push_back(operand);
+      visited_.insert(operand);
+    }
+  }
+
+  // Add instruction and its users to conditional boundaries.
+  void AddInstructionToBoundary(HloInstruction* inst) {
+    for (auto user : inst->users()) {
+      boundaries_.emplace_back(inst, user->operand_index(inst), user);
+    }
+  }
+
+  // Add instruction to the to be removed instructions set and vector.
+  void AddInstructionToHoist(HloInstruction* inst) {
+    instructions_to_hoist_set_.insert(inst);
+    instructions_to_hoist_.emplace_back(inst);
+  }
+
+  // If visitor has next instruction to visit.
+  bool HasNextInstruction() const { return !worklist_.empty(); }
+
+  // If there is no hoist intruction.
+  int64 HoistInstructionSize() { return instructions_to_hoist_.size(); }
+
+  // Get boundaries of this branch.
+  const std::vector<ConditionalBoundary>& boundaries() const {
+    return boundaries_;
+  }
+
+  // Get instructions to hoist in this branch.
+  const std::vector<HloInstruction*>& instructions_to_hoist() const {
+    return instructions_to_hoist_;
+  }
+
+  // Get hoist instruction set in this branch.
+  const std::unordered_set<HloInstruction*>& instructions_to_hoist_set() const {
+    return instructions_to_hoist_set_;
+  }
+
+ private:
+  // worklist is the deque that contains instructions to be visited.
+  std::deque<HloInstruction*> worklist_;
+
+  // instructions that has been visited.
+  std::unordered_set<HloInstruction*> visited_;
+
+  // parameter instructions of the branch.
+  std::unordered_set<HloInstruction*> parameter_instructions_;
+
+  // Boundaries contains the set of instructions that its operand is within
+  // conditional but it can be hoist out of conditional.
+  std::vector<ConditionalBoundary> boundaries_;
+
+  // Instructions to hoist.
+  std::unordered_set<HloInstruction*> instructions_to_hoist_set_;
+
+  // Instructions to hoist, the order within this vector is BFS and
+  // an instruction's order will always be after its users.
+  std::vector<HloInstruction*> instructions_to_hoist_;
+};
+
+// Returns true if `instruction` is worth hoisting out.
+bool WorthHoisting(HloInstruction* instruction) {
+  for (const auto* operand : instruction->operands()) {
+    // Only move out instructions that won't share the same operand
+    // to avoid copy of the operand.
+    if (operand->user_count() > 1) {
+      return false;
+    }
+  }
+  switch (instruction->opcode()) {
+    case HloOpcode::kConvert:
+      // If Convert is after AllReduce, it is worth moving out AllReduce out
+      // of conditional for AR/CRS combine. If Convert is after other ops such
+      // as Dot or Convolutional, it is better to keep convert within
+      // conditional so that convert can be fused with Dot or Convolutional.
+      //
+      // TODO(b/154283721): figure out the scenario when convert can be fused
+      // with AllReduce out of conditional.
+      if (instruction->operand(0)->opcode() == HloOpcode::kAllReduce) {
+        return true;
+      }
+      return false;
+    case HloOpcode::kAllReduce:
+    case HloOpcode::kAdd:
+    case HloOpcode::kConstant:
+    case HloOpcode::kSubtract:
+    case HloOpcode::kMultiply:
+    case HloOpcode::kDivide:
+    case HloOpcode::kTuple:
+    case HloOpcode::kGetTupleElement:
+      return true;
+    default:
+      return false;
+  }
+}
+
+// Compare if the instructions to be visited at each branches are identical.
+bool InstructionWithinBranchIdentical(
+    const std::vector<HloInstruction*>& instructions, bool is_layout_senstive) {
+  // Identical includes the shape of each operands are equal.
+  auto eq_operand = [&](const HloInstruction* a, const HloInstruction* b) {
+    bool eq_operands = is_layout_senstive
+                           ? ShapeUtil::Equal(a->shape(), b->shape())
+                           : ShapeUtil::Compatible(a->shape(), b->shape());
+    return eq_operands;
+  };
+
+  auto eq_computations = [](const HloComputation* a, const HloComputation* b) {
+    return *a == *b;
+  };
+
+  if (instructions[0] == nullptr) {
+    return false;
+  }
+
+  if (instructions[0]->IsCrossModuleAllReduce()) {
+    return std::all_of(
+        instructions.begin(), instructions.end(),
+        [&](HloInstruction* instruction) {
+          if (!instruction->IsCrossModuleAllReduce()) {
+            return false;
+          }
+          auto old_channel_id = instruction->channel_id();
+          instruction->set_channel_id(instructions[0]->channel_id());
+          bool eq_instructions = instructions[0]->Identical(
+              *instruction, eq_operand, eq_computations, is_layout_senstive);
+          instruction->set_channel_id(old_channel_id);
+          return eq_instructions;
+        });
+  }
+
+  return std::all_of(instructions.begin(), instructions.end(),
+                     [&](HloInstruction* instruction) {
+                       return instructions[0]->Identical(
+                           *instruction, eq_operand, eq_computations,
+                           is_layout_senstive);
+                     });
+}
+
+// Returns if all the visitors/branches has next instruction to visit.
+bool HasNextInstruction(const std::vector<BranchVisitor>& visitors) {
+  bool has_next = true;
+  for (const auto& visitor : visitors) {
+    has_next &= visitor.HasNextInstruction();
+  }
+  return has_next;
+}
+
+// Create tuple element as the new root of the branch. The tuple will contain
+// the operands that can't move out of conditional but its user will be moved
+// out of conditional.
+HloInstruction* CreateNewRoot(
+    const std::vector<ConditionalBoundary>& boundaries,
+    const std::unordered_set<HloInstruction*>& instructions_to_hoist_set,
+    HloComputation* computation) {
+  std::vector<HloInstruction*> elements;
+  elements.reserve(boundaries.size());
+  for (auto boundary : boundaries) {
+    if (ContainsKey(instructions_to_hoist_set, boundary.user)) {
+      elements.push_back(boundary.operand);
+    }
+  }
+  return computation->AddInstruction(HloInstruction::CreateTuple(elements));
+}
+
+// Copy identical instructions within conditional outside of conditional.
+void CopyIdenticalInstructionsOutOfConditional(
+    const std::vector<HloInstruction*>& instructions_to_hoist,
+    HloComputation* conditional_parent,
+    absl::flat_hash_map<HloInstruction*, HloInstruction*>*
+        hoisted_instructions) {
+  int64 instructions_size = instructions_to_hoist.size();
+  // Visit the operands before its users and copy it, so that the copied
+  // user will point to the correct operand.
+  for (int64 i = instructions_size - 1; i >= 0; i--) {
+    HloInstruction* old_instruction = instructions_to_hoist[i];
+    auto get_new_operand = [&](HloInstruction* old_operand) {
+      // If the operand can't be found in `instructions_to_hoist`, this
+      // operand will be in the `boundaries`, GetTupleElement instructions
+      // will be added later to replace this operand.
+      if (!ContainsKey(*hoisted_instructions, old_operand)) {
+        return old_operand;
+      }
+      return FindOrDie(*hoisted_instructions, old_operand);
+    };
+
+    absl::InlinedVector<HloInstruction*, 4> new_operands;
+    absl::c_transform(old_instruction->operands(),
+                      std::back_inserter(new_operands), get_new_operand);
+
+    HloInstruction* new_instruction = conditional_parent->AddInstruction(
+        old_instruction->CloneWithNewOperands(old_instruction->shape(),
+                                              new_operands));
+    // Maps the instruction outside of conditional to the instruction
+    // inside of the conditional.
+    InsertOrDie(hoisted_instructions, old_instruction, new_instruction);
+  }
+}
+
+// If there are instructions to hoist, the root of the conditional must be
+// moved out. Change the users of the conditional to the hoisted instruction
+// of the new root.
+Status ChangeConditionalUsers(
+    HloInstruction* conditional, HloInstruction* old_root,
+    const absl::flat_hash_map<HloInstruction*, HloInstruction*>&
+        hoisted_instructions) {
+  HloInstruction* new_root = FindOrDie(hoisted_instructions, old_root);
+  TF_RETURN_IF_ERROR(conditional->ReplaceAllUsesWith(new_root));
+  return Status::OK();
+}
+
+// Insert GetTupleElement before the instructions whose operands might still
+// be within the conditional.
+Status CreateGetTupleElementAfterConditional(
+    const std::vector<ConditionalBoundary>& boundaries,
+    const std::unordered_set<HloInstruction*>& instructions_to_hoist_set,
+    const absl::flat_hash_map<HloInstruction*, HloInstruction*>&
+        hoisted_instructions,
+    HloInstruction* conditional, HloComputation* computation) {
+  int boundary_instruction_size = boundaries.size();
+
+  // Inserts GetTupleElement before the boundary instructions.
+  for (int i = 0; i < boundary_instruction_size; i++) {
+    HloInstruction* gte =
+        computation->AddInstruction(HloInstruction::CreateGetTupleElement(
+            boundaries[i].operand->shape(), conditional, i));
+
+    HloInstruction* new_instruction =
+        FindOrDie(hoisted_instructions, boundaries[i].user);
+    TF_RETURN_IF_ERROR(
+        new_instruction->ReplaceOperandWith(boundaries[i].operand_index, gte));
+  }
+  return Status::OK();
+}
+
+// Remove instructions to be hoisted out of the branch computation.
+Status RemoveInstructionFromComputation(
+    const std::vector<HloInstruction*>& instructions_to_hoist,
+    HloComputation* branch) {
+  // Will visit the instructions after its users.
+  for (auto* instruction : instructions_to_hoist) {
+    TF_RETURN_IF_ERROR(branch->RemoveInstruction(instruction));
+  }
+  return Status::OK();
+}
+
+// Hoist identical ops out of the conditional. The definition of identical
+// are the shape of the operands are identical and their properties are
+// identical. Will start from the root instruction of each branch and get
+// the identical ops to hoist.
+StatusOr<bool> MergeIdenticalElements(HloInstruction* conditional,
+                                      bool is_layout_sensitive) {
+  int branch_count = conditional->branch_count();
+  if (branch_count <= 0) {
+    return false;
+  }
+
+  std::vector<BranchVisitor> visitors;
+  visitors.reserve(branch_count);
+  // Visit instructions from the root instruction to the operands using BFS.
+  for (int i = 0; i < branch_count; i++) {
+    visitors.emplace_back(BranchVisitor(conditional->branch_computation(i)));
+  }
+
+  // The instructions to be visited within each branch.
+  std::vector<HloInstruction*> front_instructions(branch_count);
+
+  while (HasNextInstruction(visitors)) {
+    for (int i = 0; i < branch_count; i++) {
+      front_instructions[i] = visitors[i].GetNextInstruction();
+    }
+    // If two instructions has the same shape, opcode and its operands has the
+    // same shape, then this instruction can be moved out of conditional.
+    if (WorthHoisting(front_instructions[0]) &&
+        InstructionWithinBranchIdentical(front_instructions,
+                                         is_layout_sensitive)) {
+      for (int i = 0; i < branch_count; i++) {
+        visitors[i].AddInstructionOperands(front_instructions[i]);
+        visitors[i].AddInstructionToHoist(front_instructions[i]);
+      }
+    } else {
+      for (int i = 0; i < branch_count; i++) {
+        // If the ops are not identical, these ops and its users will
+        // be in the boundaries` of the conditional. These ops will be stayed
+        // within the conditional, but one its only user will be moved out
+        // of conditional.
+        visitors[i].AddInstructionToBoundary(front_instructions[i]);
+      }
+    }
+  }
+
+  if (visitors[0].HoistInstructionSize() <= 1) {
+    return false;
+  }
+
+  HloInstruction* old_root =
+      conditional->branch_computation(0)->root_instruction();
+  HloComputation* conditional_parent = conditional->parent();
+  // Maps instructions in the conditional body to instructions hoisted outside
+  // the conditional that compute the same value.
+  absl::flat_hash_map<HloInstruction*, HloInstruction*> hoisted_instructions;
+  // Copy identical instructions out of the conditional.
+  CopyIdenticalInstructionsOutOfConditional(visitors[0].instructions_to_hoist(),
+                                            conditional_parent,
+                                            &hoisted_instructions);
+  // If there are instructions to hoist, the root of the conditional must be
+  // moved out. Change the users of the conditional to the hoisted instruction
+  // of the new root.
+  TF_RETURN_IF_ERROR(
+      ChangeConditionalUsers(conditional, old_root, hoisted_instructions));
+
+  // Create tuple element within each branch and set it as root.
+  for (int i = 0; i < branch_count; i++) {
+    HloInstruction* tuple = CreateNewRoot(
+        visitors[i].boundaries(), visitors[i].instructions_to_hoist_set(),
+        conditional->branch_computation(i));
+    conditional->branch_computation(i)->set_root_instruction(tuple, true);
+  }
+  // Changes conditional instruction shape to the shape of the new root.
+  *conditional->mutable_shape() =
+      conditional->branch_computation(0)->root_instruction()->shape();
+
+  // Insert GetTupleElement before the instructions whose operands might still
+  // be within the conditional.
+  TF_RETURN_IF_ERROR(CreateGetTupleElementAfterConditional(
+      visitors[0].boundaries(), visitors[0].instructions_to_hoist_set(),
+      hoisted_instructions, conditional, conditional_parent));
+
+  // Remove hoist instructions from the branches.
+  for (int i = 0; i < branch_count; i++) {
+    TF_RETURN_IF_ERROR(
+        RemoveInstructionFromComputation(visitors[i].instructions_to_hoist(),
+                                         conditional->branch_computation(i)));
+  }
+
+  return true;
+}
+
+}  // namespace
+
+StatusOr<bool> ConditionalCodeMotion::Run(HloModule* module) {
+  bool changed = false;
+
+  // Gather all the conditional ops in our module. We do this ahead of time so
+  // we don't have to worry about mutating the lists of computations or
+  // instructions as we iterate.
+  std::vector<HloInstruction*> conditional_ops;
+  for (auto* comp : module->MakeComputationPostOrder()) {
+    for (auto* instr : comp->MakeInstructionPostOrder()) {
+      if (instr->opcode() == HloOpcode::kConditional) {
+        conditional_ops.push_back(instr);
+      }
+    }
+  }
+
+  for (HloInstruction* conditional_op : conditional_ops) {
+    TF_ASSIGN_OR_RETURN(bool result, MergeIdenticalElements(
+                                         conditional_op, is_layout_sensitive_));
+    changed |= result;
+  }
+
+  if (changed) {
+    HloPassPipeline subpipeline("after_conditional_code_motion");
+    subpipeline.AddPass<TupleSimplifier>();
+    subpipeline.AddPass<HloDCE>();
+    TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module));
+    changed |= cleanup_changed;
+  }
+
+  return changed;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.h b/tensorflow/compiler/xla/service/conditional_code_motion.h
new file mode 100644
index 00000000000..1197a8b3620
--- /dev/null
+++ b/tensorflow/compiler/xla/service/conditional_code_motion.h
@@ -0,0 +1,49 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_CODE_MOTION_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_CODE_MOTION_H_
+
+#include "absl/strings/string_view.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+
+// HLO pass that moves identical ops out of conditional.
+// - The definition of identical are the shape of the operands are identical
+// and their properties are identical.
+// - Currently, only some types of instructions is supported.
+// TODO(b/154283721): relax non-sharable operand constraint and avoid copies in
+// the new root.
+// - Only the identical ops that won't share operands with other ops will
+// be moved out of conditional.
+class ConditionalCodeMotion : public HloModulePass {
+ public:
+  // If is_layout_sensitive is true, then the hoist process preserves layout
+  // during identical comparison. Otherwise, layout is ignored.
+  explicit ConditionalCodeMotion(bool is_layout_sensitive = true)
+      : is_layout_sensitive_(is_layout_sensitive) {}
+  absl::string_view name() const override { return "conditional-code-motion"; }
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  const bool is_layout_sensitive_;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_CODE_MOTION_H_
diff --git a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc
new file mode 100644
index 00000000000..4a52303a42a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc
@@ -0,0 +1,413 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/conditional_code_motion.h"
+
+#include <string>
+#include <utility>
+
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace {
+
+using ConditionalCodeMotionTest = HloTestBase;
+namespace op = xla::testing::opcode_matchers;
+
+TEST_F(ConditionalCodeMotionTest, DoNotMoveConvertOut) {
+  absl::string_view hlo_string =
+      R"(
+HloModule RemoveDotOpOut
+
+on_true {
+  %arg_tuple.1 = (f32[93184,4]{1,0}) parameter(0)
+  %get-tuple-element.1 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.1), index=0
+  %reshape.8493 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.1)
+  %convert.2894 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %reshape.8493)
+  ROOT %tuple.1 = ( bf16[2,512,364]{2,1,0}) tuple(%convert.2894)
+}
+
+on_false {
+  %arg_tuple.2 = (f32[93184,4]{1,0}) parameter(0)
+  %get-tuple-element.3 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.2), index=0
+  %reshape.9717 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.3)
+  %convert.3604 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %reshape.9717), metadata={op_type="Cast" op_name="gradients/Cast_125_grad/Cast"}
+  ROOT %tuple.2 = (bf16[2,512,364]{2,1,0}) tuple(%convert.3604)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  arg_tuple.11 = (f32[93184,4]{1,0}) parameter(1)
+  arg_tuple.22 = (f32[93184,4]{1,0}) parameter(2)
+  conditional = (bf16[2,512,364]{2,1,0}) conditional(pred.1, arg_tuple.11, arg_tuple.22), true_computation=on_true, false_computation=on_false
+  get-first-index = bf16[2,512,364]{2,1,0} get-tuple-element(conditional), index=0
+  ROOT result = (bf16[2,512,364]{2,1,0}) tuple(get-first-index)
+}
+)";
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass;
+  ASSERT_FALSE(pass.Run(&*module).ValueOrDie());
+}
+
+TEST_F(ConditionalCodeMotionTest, UserShareOperandCannotBeMoved) {
+  absl::string_view hlo_string =
+      R"(
+HloModule RemoveIdenticalInstruction
+
+on_true {
+  arg_tuple.1 = (f32[]) parameter(0)
+  get-tuple-element.1 = f32[] get-tuple-element(arg_tuple.1), index=0
+  constant.1 = f32[] constant(1)
+  constant.2 = f32[] constant(2)
+  constant.3 = f32[] constant(3)
+  constant.4 = f32[] constant(4)
+  constant.5 = f32[] constant(5)
+  add.1 = f32[] add(get-tuple-element.1, constant.1)
+  add.2 = f32[] add(add.1, constant.2)
+  add.3 = f32[] add(add.1, constant.3)
+  add.4 = f32[] add(add.3, constant.5)
+  multiply.1 = f32[] multiply(add.2, constant.4)
+  ROOT tuple.6 = (f32[], f32[]) tuple(multiply.1, add.4)
+}
+
+on_false {
+  arg_tuple.2 = (f32[]) parameter(0)
+  get-tuple-element.2 = f32[] get-tuple-element(arg_tuple.2), index=0
+  constant.6 = f32[] constant(1)
+  constant.7 = f32[] constant(2)
+  constant.8 = f32[] constant(3)
+  constant.9 = f32[] constant(4)
+  constant.10 = f32[] constant(5)
+  add.4 = f32[] add(get-tuple-element.2, constant.6)
+  sub.1 = f32[] subtract(add.4, constant.7)
+  add.5 = f32[] add(add.4, constant.8)
+  add.6 = f32[] add(add.5, constant.10)
+  multiply.2 = f32[] multiply(sub.1, constant.9)
+  ROOT tuple.6 = (f32[], f32[]) tuple(multiply.2, add.6)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  tuple.1 = (f32[]) parameter(1)
+  tuple.2 = (f32[]) parameter(2)
+  conditional = (f32[], f32[])
+    conditional(pred.1, tuple.1, tuple.2), true_computation=on_true,
+    false_computation=on_false
+  get-first-index = f32[] get-tuple-element(conditional), index=0
+  get-second-index = f32[] get-tuple-element(conditional), index=1
+  ROOT result = (f32[], f32[]) tuple(get-first-index, get-second-index)
+}
+)";
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass;
+  ASSERT_TRUE(pass.Run(&*module).ValueOrDie());
+
+  const HloInstruction* conditional =
+      FindInstruction(module.get(), "conditional");
+  const HloComputation* on_true = conditional->branch_computation(0);
+  ASSERT_EQ(on_true->instruction_count(), 9);
+  const HloComputation* on_false = conditional->branch_computation(1);
+  ASSERT_EQ(on_false->instruction_count(), 9);
+
+  // Check only one add and multiply is moved out.
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Tuple(
+          op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()),
+          op::Add(op::GetTupleElement(op::Conditional()), op::Constant()))));
+}
+
+TEST_F(ConditionalCodeMotionTest, ConditionalRootElementChanged) {
+  absl::string_view hlo_string =
+      R"(
+HloModule RemoveIdenticalInstruction
+
+on_true {
+  arg_tuple.1 = (f32[]) parameter(0)
+  get-tuple-element.1 = f32[] get-tuple-element(arg_tuple.1), index=0
+  constant.1 = f32[] constant(1)
+  constant.2 = f32[] constant(2)
+  add.1 = f32[] add(get-tuple-element.1, constant.1)
+  add.2 = f32[] add(get-tuple-element.1, constant.2)
+  add.3 = f32[] add(add.1, add.2)
+  ROOT tuple.3 = (f32[]) tuple(add.3)
+}
+
+on_false {
+  arg_tuple.2 = (f32[]) parameter(0)
+  get-tuple-element.2 = f32[] get-tuple-element(arg_tuple.2), index=0
+  constant.3 = f32[] constant(1)
+  constant.4 = f32[] constant(2)
+  add.4 = f32[] add(get-tuple-element.2, constant.3)
+  add.5 = f32[] add(get-tuple-element.2, constant.4)
+  add.6 = f32[] add(add.4, add.5)
+  ROOT tuple.4 = (f32[]) tuple(add.6)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  tuple.1 = (f32[]) parameter(1)
+  tuple.2 = (f32[]) parameter(2)
+  conditional = (f32[])
+    conditional(pred.1, tuple.1, tuple.2), true_computation=on_true,
+    false_computation=on_false
+  get-first-index = f32[] get-tuple-element(conditional), index=0
+  ROOT result = (f32[]) tuple(get-first-index)
+}
+)";
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass;
+  ASSERT_TRUE(pass.Run(&*module).ValueOrDie());
+  const HloInstruction* conditional =
+      FindInstruction(module.get(), "conditional");
+  const HloComputation* on_true = conditional->branch_computation(0);
+  ASSERT_EQ(on_true->instruction_count(), 7);
+  const HloComputation* on_false = conditional->branch_computation(1);
+  ASSERT_EQ(on_false->instruction_count(), 7);
+
+  // add.3 in on_true will be moved out, add.1 and add.2 will be in condtional
+  // root.
+  ASSERT_TRUE(ShapeUtil::Compatible(
+      conditional->shape(),
+      ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})})));
+}
+
+TEST_F(ConditionalCodeMotionTest, ConditionalIsRootInstruction) {
+  absl::string_view hlo_string =
+      R"(
+HloModule RemoveIdenticalInstruction
+
+on_true {
+  arg_tuple.1 = (f32[]) parameter(0)
+  get-tuple-element.1 = f32[] get-tuple-element(arg_tuple.1), index=0
+  constant.1 = f32[] constant(1)
+  constant.2 = f32[] constant(2)
+  constant.3 = f32[] constant(3)
+  constant.4 = f32[] constant(4)
+  constant.5 = f32[] constant(5)
+  add.1 = f32[] add(get-tuple-element.1, constant.1)
+  add.2 = f32[] add(add.1, constant.2)
+  add.3 = f32[] add(add.1, constant.3)
+  add.4 = f32[] add(add.3, constant.5)
+  multiply.1 = f32[] multiply(add.2, constant.4)
+  ROOT tuple.6 = (f32[], f32[]) tuple(multiply.1, add.4)
+}
+
+on_false {
+  arg_tuple.2 = (f32[]) parameter(0)
+  get-tuple-element.2 = f32[] get-tuple-element(arg_tuple.2), index=0
+  constant.6 = f32[] constant(1)
+  constant.7 = f32[] constant(2)
+  constant.8 = f32[] constant(3)
+  constant.9 = f32[] constant(4)
+  constant.10 = f32[] constant(5)
+  add.4 = f32[] add(get-tuple-element.2, constant.6)
+  sub.1 = f32[] subtract(add.4, constant.7)
+  add.5 = f32[] add(add.4, constant.8)
+  add.6 = f32[] add(add.5, constant.10)
+  multiply.2 = f32[] multiply(sub.1, constant.9)
+  ROOT tuple.6 = (f32[], f32[]) tuple(multiply.2, add.6)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  tuple.1 = (f32[]) parameter(1)
+  tuple.2 = (f32[]) parameter(2)
+  ROOT conditional = (f32[], f32[])
+    conditional(pred.1, tuple.1, tuple.2), true_computation=on_true,
+    false_computation=on_false
+}
+)";
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass;
+  ASSERT_TRUE(pass.Run(&*module).ValueOrDie());
+
+  const HloInstruction* conditional =
+      FindInstruction(module.get(), "conditional");
+  const HloComputation* on_true = conditional->branch_computation(0);
+  ASSERT_EQ(on_true->instruction_count(), 9);
+  const HloComputation* on_false = conditional->branch_computation(1);
+  ASSERT_EQ(on_false->instruction_count(), 9);
+
+  // Check only one add and multiply is moved out.
+  // add.3 and add.5 can't be moved out because they share operands with
+  // other instructions.
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Tuple(
+          op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()),
+          op::Add(op::GetTupleElement(op::Conditional()), op::Constant()))));
+}
+
+TEST_F(ConditionalCodeMotionTest, LayoutMisMatchCannotMovedOut) {
+  absl::string_view hlo_string =
+      R"(
+HloModule LayoutMisMatchCannotMovedOut
+
+%add.64 (x.139: bf16[], y.139: bf16[]) -> bf16[] {
+  %x.139 = bf16[]{:T(512)} parameter(0)
+  %y.139 = bf16[]{:T(512)} parameter(1)
+  ROOT %add.44073 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.139, bf16[]{:T(512)} %y.139)
+}
+
+%add.181 (x.256: bf16[], y.256: bf16[]) -> bf16[] {
+  %x.256 = bf16[]{:T(512)} parameter(0)
+  %y.256 = bf16[]{:T(512)} parameter(1)
+  ROOT %add.44842 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.256, bf16[]{:T(512)} %y.256)
+}
+
+on_true {
+  %arg_tuple.1 = (bf16[93184,4]{1,0}) parameter(0)
+  %get-tuple-element.1 = bf16[93184,4]{1,0} get-tuple-element(%arg_tuple.1), index=0
+  %all-reduce.1 = bf16[93184,4]{1,0}
+    all-reduce(bf16[93184,4]{1,0} %get-tuple-element.1),
+    channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.64
+  %convert.2894 = f32[93184,4]{1,0} convert(bf16[93184, 4]{1,0} %all-reduce.1)
+  ROOT %tuple.1 = (f32[93184,4]{1,0}) tuple(%convert.2894)
+}
+
+on_false {
+  %arg_tuple.2 = (bf16[93184,4]{1,0}) parameter(0)
+  %get-tuple-element.3 = bf16[93184,4]{1,0} get-tuple-element(%arg_tuple.2), index=0
+  %copy.1 = bf16[93184,4]{0,1} copy(bf16[93184,4]{1,0} %get-tuple-element.3)
+  %all-reduce.2 = bf16[93184,4]{0, 1}
+    all-reduce(bf16[93184,4]{0, 1} %copy.1),
+    channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.181
+  %convert.3604 = f32[93184,4]{0,1} convert(bf16[93184,4]{0,1} %all-reduce.2)
+  ROOT %tuple.2 = (f32[93184,4]{0,1}) tuple(%convert.3604)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  arg_tuple.11 = (bf16[93184,4]{1,0}) parameter(1)
+  arg_tuple.22 = (bf16[93184,4]{1,0}) parameter(2)
+  conditional = (f32[93184,4]{1,0}) conditional(pred.1, arg_tuple.11, arg_tuple.22), true_computation=on_true, false_computation=on_false
+  get-first-index = f32[93184,4]{1,0} get-tuple-element(conditional), index=0
+  ROOT result = (f32[93184,4]{1,0}) tuple(get-first-index)
+}
+)";
+
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass;
+  ASSERT_FALSE(pass.Run(&*module).ValueOrDie());
+}
+
+TEST_F(ConditionalCodeMotionTest, MoveCrossModuleAllReduceOut) {
+  absl::string_view hlo_string =
+      R"(
+HloModule RemoveIdenticalInstruction
+
+%add.64 (x.139: bf16[], y.139: bf16[]) -> bf16[] {
+  %x.139 = bf16[]{:T(512)} parameter(0)
+  %y.139 = bf16[]{:T(512)} parameter(1)
+  ROOT %add.44073 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.139, bf16[]{:T(512)} %y.139)
+}
+
+%add.181 (x.256: bf16[], y.256: bf16[]) -> bf16[] {
+  %x.256 = bf16[]{:T(512)} parameter(0)
+  %y.256 = bf16[]{:T(512)} parameter(1)
+  ROOT %add.44842 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.256, bf16[]{:T(512)} %y.256)
+}
+
+on_true {
+  arg_tuple.1 = (bf16[2,54,168,128], bf16[2,52,168,128]) parameter(0)
+  get-tuple-element.11 = bf16[2,54,168,128] get-tuple-element(arg_tuple.1), index=0
+  get-tuple-element.12 = bf16[2,52,168,128] get-tuple-element(arg_tuple.1), index=1
+  convolution.1 = bf16[3,3,128,128] convolution(bf16[2,54,168,128]
+    get-tuple-element.11, bf16[2,52,168,128]
+    get-tuple-element.12), window={size=52x168 pad=0_0x1_1},
+    dim_labels=f01b_i01o->01bf
+  all-reduce.1 = bf16[3,3,128,128]
+    all-reduce(bf16[3,3,128,128] %convolution.1),
+    channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.64, metadata={op_type="Conv2DBackpropFilter"
+    op_name="gradients/resnet50/conv2d_22/Conv2D_grad/Conv2DBackpropFilter"}
+  convert.1 = f32[3,3,128,128] convert(bf16[3,3,128,128] %all-reduce.1),
+    metadata={op_type="Cast" op_name="Cast_15"}
+  ROOT tuple.1 = (f32[3,3,128,128]) tuple(convert.1)
+}
+
+on_false {
+  arg_tuple.2 = (bf16[2,86,104,128], bf16[2,84,104,128]) parameter(0)
+  get-tuple-element.21 = bf16[2,86,104,128]
+    get-tuple-element(arg_tuple.2), index=0
+  get-tuple-element.22 = bf16[2,84,104,128]
+    get-tuple-element(arg_tuple.2), index=1
+  convolution.2 = bf16[3,3,128,128]
+    convolution(bf16[2,86,104,128] get-tuple-element.21, bf16[2,84,104,128]
+    get-tuple-element.22), window={size=84x104 pad=0_0x1_1},
+    dim_labels=f01b_i01o->01bf
+  all-reduce.2 = bf16[3,3,128,128]
+    all-reduce(bf16[3,3,128,128] %convolution.2),
+    channel_id=485, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.181, metadata={op_type="Conv2DBackpropFilter"
+    op_name="gradients/resnet50/conv2d_22/Conv2D_grad/Conv2DBackpropFilter"}
+  convert.2 = f32[3,3,128,128]
+    convert(bf16[3,3,128,128] %all-reduce.2),
+    metadata={op_type="Cast" op_name="Cast_15"}
+  ROOT tuple.2 = (f32[3,3,128,128]) tuple(convert.2)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  arg_tuple.3 = (bf16[2,54,168,128], bf16[2,52,168,128]) parameter(1)
+  arg_tuple.4 = (bf16[2,86,104,128], bf16[2,84,104,128]) parameter(2)
+  conditional = (f32[3,3,128,128])
+    conditional(pred.1, arg_tuple.3, arg_tuple.4), true_computation=on_true,
+    false_computation=on_false
+  get-first-index = f32[3,3,128,128]
+    get-tuple-element(conditional), index=0
+  ROOT result = (f32[3,3,128,128]) tuple(get-first-index)
+}
+)";
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass;
+  ASSERT_TRUE(pass.Run(&*module).ValueOrDie());
+  const HloInstruction* conditional =
+      FindInstruction(module.get(), "conditional");
+  const HloComputation* on_true = conditional->branch_computation(0);
+  ASSERT_EQ(on_true->instruction_count(), 5);
+  const HloComputation* on_false = conditional->branch_computation(1);
+  ASSERT_EQ(on_false->instruction_count(), 5);
+
+  // Checks if conditional shape has changed.
+  ASSERT_TRUE(ShapeUtil::Compatible(
+      conditional->shape(), ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(
+                                BF16, {3, 3, 128, 128})})));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Tuple(op::Convert(op::AllReduce(
+                        op::GetTupleElement(op::Conditional()))))));
+}
+
+}  // namespace
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index e8e1f044704..2f432cd9356 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -35,6 +35,7 @@ filegroup(
     srcs = [
         "runtime_fp16.cc",
         "runtime_key_value_sort.cc",
+        "runtime_pow.cc",
         "runtime_single_threaded_conv2d.cc",
         "runtime_single_threaded_fft.cc",
         "runtime_single_threaded_matmul.cc",
@@ -49,6 +50,7 @@ filegroup(
         "runtime_fft_impl.h",
         "runtime_fp16.h",
         "runtime_key_value_sort.h",
+        "runtime_pow.h",
         "runtime_single_threaded_conv2d.h",
         "runtime_single_threaded_fft.h",
         "runtime_single_threaded_matmul.h",
@@ -144,6 +146,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:conditional_simplifier",
         "//tensorflow/compiler/xla/service:convolution_group_converter",
         "//tensorflow/compiler/xla/service:dot_decomposer",
+        "//tensorflow/compiler/xla/service:dynamic_padder",
         "//tensorflow/compiler/xla/service:dynamic_index_splitter",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
@@ -204,6 +207,7 @@ cc_library(
         ":cpu_runtime",
         ":orc_jit_memory_mapper",
         ":runtime_fp16",
+        ":runtime_pow",
         ":runtime_conv2d",
         ":runtime_conv2d_mkl",
         ":runtime_fft",
@@ -250,6 +254,21 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "runtime_pow",
+    srcs = [
+        "runtime_pow.cc",
+    ],
+    hdrs = [
+        "runtime_pow.h",
+    ],
+    copts = runtime_copts(),
+    deps = [
+        "//tensorflow/core/platform:macros",
+        "//tensorflow/core/platform:types",
+    ],
+)
+
 cc_library(
     name = "cpu_executable",
     srcs = ["cpu_executable.cc"],
diff --git a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc
index 5e536d362d9..a21ace0d8b2 100644
--- a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc
+++ b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc
@@ -198,11 +198,6 @@ void CompilerFunctor::AddTargetInfoPasses(
   target_library_info_impl->addVectorizableFunctions(
       VectorFunctionsForTargetLibraryInfoImpl());
 
-  // TODO(b/136651482): Disable pow(f) so LLVM doesn't transform it into powi.
-  // It would be better to provide our own powi.
-  target_library_info_impl->setUnavailable(llvm::LibFunc_pow);
-  target_library_info_impl->setUnavailable(llvm::LibFunc_powf);
-
   passes->add(
       new llvm::TargetLibraryInfoWrapperPass(*target_library_info_impl));
   passes->add(createTargetTransformInfoWrapperPass(
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index b04237138e8..fe769bbdd2a 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -72,6 +72,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/dot_decomposer.h"
 #include "tensorflow/compiler/xla/service/dump.h"
 #include "tensorflow/compiler/xla/service/dynamic_index_splitter.h"
+#include "tensorflow/compiler/xla/service/dynamic_padder.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
@@ -239,7 +240,6 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
   HloPassPipeline pipeline("HLO passes through layout assignment");
   pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                             /*allow_mixed_precision=*/false);
-
   // Expand random number generation.
   pipeline.AddPass<RngExpander>();
   pipeline.AddPass<RngBitGeneratorExpander>(RandomAlgorithm::RNG_PHILOX);
@@ -273,6 +273,13 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
   pipeline.AddPass<ConvolutionGroupConverter>(
       cost_model,
       /*convert_batch_groups_only=*/false);
+  pipeline.AddPass<ScatterExpander>();
+  pipeline.AddPass<BatchNormExpander>(
+      /*rewrite_training_op=*/true,
+      /*rewrite_inference_op=*/true,
+      /*rewrite_grad_op=*/true);
+  pipeline.AddPass<DynamicPadder>();
+  pipeline.AddPass<HloGetDimensionSizeRewriter>();
   pipeline.AddPass<ConvCanonicalization>(target_machine_features);
   {
     auto& pass =
@@ -281,12 +288,6 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
                                                /*allow_mixed_precision=*/false);
 
     pass.AddPass<TreeReductionRewriter>();
-    pass.AddPass<ScatterExpander>();
-    pass.AddPass<BatchNormExpander>(
-        /*rewrite_training_op=*/true,
-        /*rewrite_inference_op=*/true,
-        /*rewrite_grad_op=*/true);
-    pipeline.AddPass<HloGetDimensionSizeRewriter>();
     AlgebraicSimplifierOptions options;
     options.set_enable_dot_strength_reduction(false);
     pass.AddPass<AlgebraicSimplifier>(options);
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index 8c1ae0179c0..f031daecb1f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -363,7 +363,12 @@ StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
   if (shape.IsOpaque()) {
     return sizeof(void*);
   }
-  return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
+  if (shape.is_static() || shape.IsTuple()) {
+    return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
+  }
+  // Each dynamic dimension size is represented as a S32.
+  int64 metadata_size = sizeof(int32) * shape.dimensions_size();
+  return ShapeUtil::ByteSizeOf(shape, sizeof(void*)) + metadata_size;
 }
 
 const InstructionValueSet& CpuExecutable::GetRootValueSet() const {
diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
index e21ca01c803..05364a4492b 100644
--- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.cc
@@ -109,24 +109,6 @@ llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator(
     const HloInstruction* hlo,
     const HloToElementGeneratorMap& operand_to_generator) {
   switch (hlo->opcode()) {
-    case HloOpcode::kMap:
-      return [this, hlo, &operand_to_generator](
-                 const IrArray::Index& index) -> StatusOr<llvm::Value*> {
-        std::vector<llvm::Value*> operands;
-        for (int i = 0; i < hlo->operand_count(); i++) {
-          TF_ASSIGN_OR_RETURN(llvm::Value * operand_value,
-                              operand_to_generator.at(hlo->operand(i))(index));
-          operands.push_back(operand_value);
-        }
-        return ir_emitter_->EmitElementalMap(*Cast<HloMapInstruction>(hlo),
-                                             operands, llvm_ir::IrName(hlo));
-      };
-    case HloOpcode::kReduceWindow:
-      return [this, hlo, &operand_to_generator](const IrArray::Index& index) {
-        return ir_emitter_->EmitElementalReduceWindow(
-            Cast<HloReduceWindowInstruction>(hlo),
-            operand_to_generator.at(hlo->operand(0)), index);
-      };
     case HloOpcode::kConvolution:
       return [this, hlo, &operand_to_generator](const IrArray::Index& index) {
         return ir_emitter_->EmitElementalConvolution(
@@ -134,22 +116,6 @@ llvm_ir::ElementGenerator CpuElementalIrEmitter::MakeElementGenerator(
             operand_to_generator.at(hlo->operand(0)),
             operand_to_generator.at(hlo->operand(1)), index);
       };
-    case HloOpcode::kReduce:
-      return [this, hlo, &operand_to_generator](const IrArray::Index& index) {
-        auto reduce_instr = Cast<HloReduceInstruction>(hlo);
-        std::vector<llvm_ir::ElementGenerator> input_generators;
-        for (const HloInstruction* instr : reduce_instr->inputs()) {
-          input_generators.push_back(operand_to_generator.at(instr));
-        }
-
-        std::vector<llvm_ir::ElementGenerator> initial_value_generators;
-        for (const HloInstruction* instr : reduce_instr->init_values()) {
-          initial_value_generators.push_back(operand_to_generator.at(instr));
-        }
-        return ir_emitter_->EmitElementalReduce(
-            reduce_instr, std::move(input_generators),
-            std::move(initial_value_generators), index);
-      };
     default:
       return ElementalIrEmitter::MakeElementGenerator(hlo,
                                                       operand_to_generator);
diff --git a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
index e3fba9306b7..5c9f6677ab3 100644
--- a/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/elemental_ir_emitter.h
@@ -44,6 +44,12 @@ class CpuElementalIrEmitter : public ElementalIrEmitter {
   StatusOr<llvm::Value*> EmitTanh(PrimitiveType prim_type,
                                   llvm::Value* value) override;
 
+  StatusOr<std::vector<llvm::Value*>> EmitThreadLocalCall(
+      const HloComputation& callee, absl::Span<llvm::Value* const> parameters,
+      absl::string_view name) override {
+    return ir_emitter_->EmitThreadLocalCall(callee, parameters, name);
+  }
+
   IrEmitter* ir_emitter_;
 };
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c19fa779b60..5a4c6250293 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <stddef.h>
 #include <stdint.h>
+
 #include <algorithm>
 #include <iterator>
 #include <limits>
@@ -570,25 +571,9 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) {
   TF_RETURN_IF_ERROR(EmitTargetAddressForOp(sort));
   Shape keys_shape = sort->keys()->shape();
   PrimitiveType keys_type = keys_shape.element_type();
-  switch (keys_type) {
-    case PRED:
-    case S8:
-    case U8:
-    case S16:
-    case U16:
-    case BF16:
-    case F16:
-    case S32:
-    case U32:
-    case F32:
-    case S64:
-    case U64:
-    case F64:
-      break;
-    default:
-      return Unimplemented(
-          "Element type %s not supported in the Sort op on CPU.",
-          PrimitiveType_Name(keys_type));
+  if (!primitive_util::IsArrayType(keys_type)) {
+    return Unimplemented("Element type %s not supported in the Sort op on CPU.",
+                         PrimitiveType_Name(keys_type));
   }
   std::vector<llvm::Value*> destination_addresses(sort->operand_count());
   for (int64 i = 0; i < sort->operand_count(); ++i) {
@@ -695,101 +680,6 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) {
   return Status::OK();
 }
 
-llvm::Value* IrEmitter::EmitElementalMap(
-    const HloMapInstruction& map_instr,
-    absl::Span<llvm::Value* const> elemental_operands, absl::string_view name) {
-  return EmitScalarReturningThreadLocalCall(*map_instr.to_apply(),
-                                            elemental_operands, name);
-}
-
-StatusOr<llvm::Value*> IrEmitter::EmitElementalReduceWindow(
-    const HloReduceWindowInstruction* reduce_window,
-    const llvm_ir::ElementGenerator& input_generator,
-    const llvm_ir::IrArray::Index& index) {
-  const HloInstruction* operand = reduce_window->operand(0);
-  const Window& window = reduce_window->window();
-
-  // We fold inputs into the accumulator and initialize it to
-  // the initial value on the reduce_window.
-  PrimitiveType operand_element_type = operand->shape().element_type();
-  llvm::Value* accumulator_address = llvm_ir::EmitAllocaAtFunctionEntry(
-      llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_),
-      "reduce_window_accumulator_address", &b_,
-      MinimumAlignmentForPrimitiveType(operand_element_type));
-  Store(Load(GetEmittedValueFor(reduce_window->operand(1))),
-        accumulator_address);
-
-  llvm_ir::ForLoopNest loops(IrName(reduce_window, "inner"), &b_);
-  std::vector<int64> window_size;
-  for (const auto& dim : window.dimensions()) {
-    window_size.push_back(dim.size());
-  }
-  const llvm_ir::IrArray::Index window_index = loops.AddLoopsForShape(
-      ShapeUtil::MakeShape(operand_element_type, window_size), "window");
-  CHECK_EQ(window_index.size(), index.size());
-
-  SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &b_);
-
-  std::vector<llvm::Value*> input_multi_index(index.size());
-  llvm::Value* in_bounds_condition = nullptr;
-  for (size_t i = 0; i < index.size(); ++i) {
-    llvm::Value* strided_index =
-        NSWMul(index[i], b_.getInt64(window.dimensions(i).stride()));
-    input_multi_index[i] = NSWSub(
-        NSWAdd(strided_index,
-               NSWMul(window_index[i],
-                      b_.getInt64(window.dimensions(i).window_dilation()))),
-        b_.getInt64(window.dimensions(i).padding_low()));
-
-    // We need to verify that we are not in the dilated base area.
-    llvm::Value* dilation_condition =
-        ICmpEQ(SRem(input_multi_index[i],
-                    b_.getInt64(window.dimensions(i).base_dilation())),
-               b_.getInt64(0));
-    if (in_bounds_condition == nullptr) {
-      in_bounds_condition = dilation_condition;
-    } else {
-      in_bounds_condition = And(in_bounds_condition, dilation_condition);
-    }
-
-    // Apply base dilation to the index.
-    input_multi_index[i] =
-        SDiv(input_multi_index[i],
-             b_.getInt64(window.dimensions(i).base_dilation()));
-
-    // We need to check if 0 <= input_multi_index[i] < bound, as otherwise we
-    // are in the padding so that we can skip the computation. That is
-    // equivalent to input_multi_index[i] < bound as an *unsigned* comparison,
-    // since a negative value will wrap to a large positive value.
-    llvm::Value* index_condition =
-        ICmpULT(input_multi_index[i],
-                b_.getInt64(ShapeUtil::GetDimension(operand->shape(), i)));
-    if (in_bounds_condition == nullptr) {
-      in_bounds_condition = index_condition;
-    } else {
-      in_bounds_condition = And(in_bounds_condition, index_condition);
-    }
-  }
-  CHECK(in_bounds_condition != nullptr);
-
-  llvm_ir::LlvmIfData if_data =
-      llvm_ir::EmitIfThenElse(in_bounds_condition, "in-bounds", &b_);
-  SetToFirstInsertPoint(if_data.true_block, &b_);
-
-  // We are not in the padding, so carry out the computation.
-  llvm_ir::IrArray::Index input_index(input_multi_index, operand->shape(),
-                                      b_.getInt64Ty());
-  TF_ASSIGN_OR_RETURN(llvm::Value* const input_value,
-                      input_generator(input_index));
-  llvm::Value* result = EmitScalarReturningThreadLocalCall(
-      *reduce_window->to_apply(), {Load(accumulator_address), input_value},
-      "reducer_function");
-  Store(result, accumulator_address);
-
-  SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &b_);
-  return Load(accumulator_address);
-}
-
 Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) {
   // Pseudo code for reduce window:
   //
@@ -2099,108 +1989,6 @@ StatusOr<bool> IrEmitter::EmitVectorizedReduce(
   return true;
 }
 
-StatusOr<llvm::Value*> IrEmitter::EmitElementalReduce(
-    const HloReduceInstruction* reduce,
-    std::vector<llvm_ir::ElementGenerator> input_generators,
-    std::vector<llvm_ir::ElementGenerator> initial_value_generators,
-    const llvm_ir::IrArray::Index& index) {
-  const Shape& out_shape = reduce->shape();
-  bool is_variadic = !out_shape.IsArray();
-  int accumulators_count = 1;
-  if (is_variadic) {
-    CHECK(out_shape.IsTuple());
-    accumulators_count = out_shape.tuple_shapes_size();
-  }
-
-  absl::Span<const int64> reduced_dimensions(reduce->dimensions());
-
-  std::vector<llvm::Value*> accumulator_addrs;
-  std::vector<llvm::Type*> accumulator_types;
-  for (int i = 0; i < accumulators_count; i++) {
-    const Shape& element_shape =
-        is_variadic ? out_shape.tuple_shapes(i) : out_shape;
-    PrimitiveType accumulator_type = element_shape.element_type();
-    llvm::Type* accumulator_llvm_type =
-        llvm_ir::PrimitiveTypeToIrType(accumulator_type, module_);
-    accumulator_types.push_back(accumulator_llvm_type);
-
-    // Initialize an accumulator with init_value.
-    llvm::AllocaInst* accumulator_addr = llvm_ir::EmitAllocaAtFunctionEntry(
-        accumulator_llvm_type, "accumulator_" + std::to_string(i), &b_,
-        MinimumAlignmentForPrimitiveType(accumulator_type));
-    TF_ASSIGN_OR_RETURN(
-        llvm::Value* const init_value,
-        initial_value_generators[i](llvm_ir::IrArray::Index(index.GetType())));
-    Store(init_value, accumulator_addr);
-    accumulator_addrs.push_back(accumulator_addr);
-  }
-
-  // The enclosing loops go over all the target elements. Now we have to compute
-  // the actual target element. For this, we build a new loop nest to iterate
-  // over all the reduction dimensions in the argument.
-  // AddLoopsForShapeOnDimensions will return an Index where induction Value*s
-  // are placed for each dimension in dimensions, and all the rest are nullptrs.
-  llvm_ir::ForLoopNest loops(IrName(reduce, "inner"), &b_);
-  const HloInstruction* arg = reduce->operand(0);
-  std::vector<llvm::Value*> input_multi_index =
-      loops.AddLoopsForShapeOnDimensions(arg->shape(), reduced_dimensions,
-                                         "reduction_dim");
-
-  SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &b_);
-
-  // Build a full index for the input argument, using input_multi_index as the
-  // base. In input_multi_index only the reduction dimensions are filled in. We
-  // fill in the rest of the dimensions with induction Value*s taken from
-  // 'index' which iterates over the target array.  See the high-level
-  // description in the XLA documentation for details.
-  llvm_ir::IrArray::Index::const_iterator it = index.begin();
-
-  for (auto& i : input_multi_index) {
-    if (i == nullptr) {
-      i = *it++;
-    }
-  }
-  CHECK(index.end() == it);
-  llvm_ir::IrArray::Index input_index(input_multi_index, arg->shape(),
-                                      b_.getInt64Ty());
-
-  std::vector<llvm::Value*> reduction_operands;
-  for (llvm::Value* accum : accumulator_addrs) {
-    llvm::Value* accum_value = Load(accum);
-    reduction_operands.push_back(accum_value);
-  }
-
-  for (int i = 0; i < accumulators_count; i++) {
-    TF_ASSIGN_OR_RETURN(llvm::Value* const input_element,
-                        input_generators[i](input_index));
-    reduction_operands.push_back(input_element);
-  }
-
-  std::vector<llvm::Value*> results = EmitThreadLocalCall(
-      *reduce->to_apply(), reduction_operands, "reduce_function");
-
-  CHECK(results.size() == accumulators_count);
-  for (int i = 0; i < accumulators_count; i++) {
-    Store(results[i], accumulator_addrs[i]);
-  }
-  SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &b_);
-
-  if (is_variadic) {
-    // Emit a structure, as that what the LoopEmitter expects.
-    llvm::Value* returned_structure = llvm::UndefValue::get(
-        llvm::StructType::get(b_.getContext(), accumulator_types));
-    for (int i = 0; i < accumulators_count; i++) {
-      llvm::Value* accumulator_value = Load(accumulator_addrs[i]);
-      returned_structure =
-          b_.CreateInsertValue(returned_structure, accumulator_value, i);
-    }
-    return returned_structure;
-  } else {
-    CHECK_EQ(accumulator_addrs.size(), 1);
-    return Load(accumulator_addrs[0]);
-  }
-}
-
 Status IrEmitter::HandleReduce(HloInstruction* reduce) {
   auto arg = reduce->mutable_operand(0);
   auto init_value = reduce->mutable_operand(1);
@@ -2554,7 +2342,95 @@ Status IrEmitter::HandleCall(HloInstruction* call) {
   return Status::OK();
 }
 
+Status IrEmitter::HandleSliceToDynamic(HloInstruction* hlo) {
+  // TODO(jackcao): Generalize this to generic llvm emitter.
+  TF_RET_CHECK(hlo->shape().rank() == 1);
+  TF_RETURN_IF_ERROR(EmitTargetAddressForOp(hlo));
+  for (int64 i = 1; i < hlo->operand_count(); ++i) {
+    const int64 dim_index = i - 1;
+    llvm::Value* source_buffer = GetEmittedValueFor(hlo->operand(i));
+    llvm::LoadInst* dim_size = b_.CreateLoad(source_buffer, "dim_size");
+    llvm::Value* dest_buffer = GetEmittedValueFor(hlo);
+    llvm::Value* raw_buffer =
+        b_.CreateBitCast(dest_buffer, b_.getInt8Ty()->getPointerTo());
+
+    int32 raw_data_size =
+        ShapeUtil::ByteSizeOf(ShapeUtil::MakeStaticShape(hlo->shape()));
+    llvm::Value* metadata = b_.CreateConstInBoundsGEP1_32(
+        b_.getInt8Ty(), raw_buffer, raw_data_size + dim_index * sizeof(int32));
+    b_.CreateStore(dim_size,
+                   b_.CreateBitCast(metadata, b_.getInt32Ty()->getPointerTo()));
+  }
+
+  return EmitTargetElementLoop(hlo,
+                               [=](const llvm_ir::IrArray::Index& dest_index) {
+                                 // TODO(jackcao): Properly linearize dest_index
+                                 // and delinearize to source index.
+                                 return GetIrArrayFor(hlo->operand(0))
+                                     .EmitReadArrayElement(dest_index, &b_);
+                               });
+}
+
+Status IrEmitter::HandlePadToStatic(HloInstruction* hlo) {
+  // TODO(jackcao): Generalize this to generic llvm emitter.
+  TF_RET_CHECK(hlo->operand(0)->shape().rank() == 1);
+  TF_RETURN_IF_ERROR(EmitTargetAddressForOp(hlo));
+
+  TF_ASSIGN_OR_RETURN(BufferAllocation::Slice data_slice,
+                      assignment_.GetUniqueSlice(hlo, {0}));
+  const Shape& data_shape = ShapeUtil::GetSubshape(hlo->shape(), {0});
+  llvm::Value* data_address = EmitBufferPointer(data_slice, data_shape);
+  llvm_ir::IrArray data_array(data_address, data_shape);
+  TF_RETURN_IF_ERROR(llvm_ir::LoopEmitter(
+                         [=](const llvm_ir::IrArray::Index& dest_index) {
+                           // TODO(jackcao): Properly linearize dest_index and
+                           // delinearize to source index.
+                           return GetIrArrayFor(hlo->operand(0))
+                               .EmitReadArrayElement(dest_index, &b_);
+                         },
+                         llvm_ir::IrArray(data_address, data_shape), &b_)
+                         .EmitLoop(IrName(hlo)));
+  std::vector<llvm::Value*> tuple_operand_ptrs;
+  tuple_operand_ptrs.push_back(data_array.GetBasePointer());
+
+  // PadToStatic has a dynamic tensor as input and variadic size of outputs:
+  // (static_tensor, dynamic_dim_0, dynamic_dim_1, ... )
+  // Dynamic dimension sizes starts from output index 1.
+  for (int64 i = 1; i < hlo->shape().tuple_shapes_size(); ++i) {
+    // Read from the metadata section of the dynamic input (operand 0).
+    const Shape& dim_shape = ShapeUtil::GetSubshape(hlo->shape(), {i});
+    TF_RET_CHECK(Shape::Equal()(dim_shape, ShapeUtil::MakeScalarShape(S32)));
+    TF_ASSIGN_OR_RETURN(BufferAllocation::Slice dim_size_slice,
+                        assignment_.GetUniqueSlice(hlo, {i}));
+    llvm::Value* dest_dim_size_address =
+        EmitBufferPointer(dim_size_slice, data_shape);
+    const int64 dim_index = i - 1;
+    llvm::Value* source_buffer = GetEmittedValueFor(hlo->operand(0));
+    llvm::Value* raw_buffer =
+        b_.CreateBitCast(source_buffer, b_.getInt8Ty()->getPointerTo());
+    int32 raw_data_size = ShapeUtil::ByteSizeOf(
+        ShapeUtil::MakeStaticShape(hlo->operand(0)->shape()));
+    llvm::Value* metadata = b_.CreateConstInBoundsGEP1_32(
+        b_.getInt8Ty(), raw_buffer, raw_data_size + dim_index * sizeof(int32));
+    llvm::Value* dim_size = b_.CreateLoad(
+        b_.CreateBitCast(metadata, b_.getInt32Ty()->getPointerTo()));
+    b_.CreateStore(dim_size, b_.CreateBitCast(dest_dim_size_address,
+                                              b_.getInt32Ty()->getPointerTo()));
+    tuple_operand_ptrs.push_back(dest_dim_size_address);
+  }
+
+  // Emit static tensor and dynamic sizes as one tuple.
+  llvm_ir::EmitTuple(GetIrArrayFor(hlo), tuple_operand_ptrs, &b_);
+  return Status::OK();
+}
+
 Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) {
+  if (custom_call->custom_call_target() == "PadToStatic") {
+    return HandlePadToStatic(custom_call);
+  }
+  if (custom_call->custom_call_target() == "SliceToDynamic") {
+    return HandleSliceToDynamic(custom_call);
+  }
   absl::Span<HloInstruction* const> operands(custom_call->operands());
   llvm::Type* i8_ptr_type = b_.getInt8PtrTy();
   llvm::AllocaInst* operands_alloca =
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index cc5aa3f37fc..9b0d11e9f3f 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -58,6 +58,8 @@ namespace cpu {
 // functions.
 class IrEmitter : public DfsHloVisitorWithDefault,
                   public IrBuilderMixin<IrEmitter> {
+  friend class CpuElementalIrEmitter;
+
  public:
   using GeneratorForOperandIrArrays =
       std::function<std::vector<llvm_ir::IrArray>()>;
@@ -113,28 +115,12 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // Emit an LLVM global variable for every constant buffer allocation.
   Status EmitConstantGlobals();
 
-  // Emit code to map one element according to `map_instr`.
-  llvm::Value* EmitElementalMap(
-      const HloMapInstruction& map_instr,
-      absl::Span<llvm::Value* const> elemental_operands,
-      absl::string_view name);
-  // Emit code to emit the element at `index` for a reduce window instruction.
-  StatusOr<llvm::Value*> EmitElementalReduceWindow(
-      const HloReduceWindowInstruction* reduce_window,
-      const llvm_ir::ElementGenerator& input_generator,
-      const llvm_ir::IrArray::Index& index);
   // Emit code to emit the element at `index` for a convolution instruction.
   StatusOr<llvm::Value*> EmitElementalConvolution(
       const HloConvolutionInstruction* convolution,
       const llvm_ir::ElementGenerator& input_generator,
       const llvm_ir::ElementGenerator& kernel_generator,
       const llvm_ir::IrArray::Index& index);
-  // Emit code to emit the element at `index` for a reduce instruction.
-  StatusOr<llvm::Value*> EmitElementalReduce(
-      const HloReduceInstruction* reduce,
-      std::vector<llvm_ir::ElementGenerator> input_generators,
-      std::vector<llvm_ir::ElementGenerator> initial_value_generator,
-      const llvm_ir::IrArray::Index& index);
 
  protected:
   //
@@ -197,6 +183,8 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   }
 
  private:
+  Status HandleSliceToDynamic(HloInstruction* hlo);
+  Status HandlePadToStatic(HloInstruction* hlo);
   Status HandleAllReduceSingleReplica(HloInstruction* crs);
   Status HandleAllReduceMultipleReplica(HloInstruction* crs);
 
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_pow.cc b/tensorflow/compiler/xla/service/cpu/runtime_pow.cc
new file mode 100644
index 00000000000..08308b4ce57
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_pow.cc
@@ -0,0 +1,39 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/runtime_pow.h"
+
+#include "tensorflow/core/platform/macros.h"
+
+template <typename T>
+static T Powi(T a, tensorflow::int32 b) {
+  const bool recip = b < 0;
+  T r = 1;
+  while (true) {
+    if (b & 1) r *= a;
+    b /= 2;
+    if (b == 0) break;
+    a *= a;
+  }
+  return recip ? 1 / r : r;
+}
+
+float TF_ATTRIBUTE_WEAK __powisf2(float a, tensorflow::int32 b) {
+  return Powi(a, b);
+}
+
+double TF_ATTRIBUTE_WEAK __powidf2(double a, tensorflow::int32 b) {
+  return Powi(a, b);
+}
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_pow.h b/tensorflow/compiler/xla/service/cpu/runtime_pow.h
new file mode 100644
index 00000000000..53f8094256d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_pow.h
@@ -0,0 +1,27 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_POW_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_POW_H_
+
+#include "tensorflow/core/platform/types.h"
+
+// Raises F32 value a to the power of b.
+extern "C" float __powisf2(float a, tensorflow::int32 b);
+
+// Raises F64 value a to the power of b.
+extern "C" double __powidf2(double a, tensorflow::int32 b);
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_POW_H_
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index 153bd572eba..395eb31c13f 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
+#include "tensorflow/compiler/xla/service/cpu/runtime_pow.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
@@ -56,9 +57,8 @@ llvm::SmallVector<std::string, 0> DetectMachineAttributes() {
   llvm::StringMap<bool> host_features;
   if (llvm::sys::getHostCPUFeatures(host_features)) {
     for (auto& feature : host_features) {
-      if (feature.second) {
-        result.push_back(std::string(feature.first()));
-      }
+      result.push_back((feature.second ? '+' : '-') +
+                       std::string(feature.first()));
     }
   }
   return result;
@@ -271,6 +271,8 @@ bool RegisterKnownJITSymbols() {
                      "Host");
   registry->Register("__truncdfhf2", reinterpret_cast<void*>(__truncdfhf2),
                      "Host");
+  registry->Register("__powisf2", reinterpret_cast<void*>(__powisf2), "Host");
+  registry->Register("__powidf2", reinterpret_cast<void*>(__powidf2), "Host");
 
 #undef REGISTER_CPU_RUNTIME_SYMBOL
 
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index e4676141f65..caea9d9095a 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -109,10 +109,14 @@ class DfsHloVisitorBase {
   virtual Status HandleRsqrt(HloInstructionPtr hlo) {
     return HandleElementwiseUnary(hlo);
   }
+  virtual Status HandleCbrt(HloInstructionPtr hlo) {
+    return HandleElementwiseUnary(hlo);
+  }
   virtual Status HandleConvolution(HloInstructionPtr hlo) = 0;
   virtual Status HandleFft(HloInstructionPtr fft) = 0;
   virtual Status HandleTriangularSolve(HloInstructionPtr hlo) = 0;
   virtual Status HandleCholesky(HloInstructionPtr hlo) = 0;
+  virtual Status HandleAllGather(HloInstructionPtr hlo) = 0;
   virtual Status HandleAllReduce(HloInstructionPtr hlo) = 0;
   virtual Status HandleAllToAll(HloInstructionPtr hlo) = 0;
   virtual Status HandleCollectivePermute(HloInstructionPtr hlo) = 0;
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index baa9240fb56..9cd220245ba 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
@@ -98,6 +98,9 @@ class DfsHloVisitorWithDefaultBase
   Status HandleCholesky(HloInstructionPtr hlo) override {
     return DefaultAction(hlo);
   }
+  Status HandleAllGather(HloInstructionPtr crs) override {
+    return DefaultAction(crs);
+  }
   Status HandleAllReduce(HloInstructionPtr crs) override {
     return DefaultAction(crs);
   }
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
index be26f9a50cd..e193df6d9bd 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc
@@ -1620,6 +1620,24 @@ Status DynamicDimensionInference::ForwardDynamicSize(HloInstruction* inst,
   return Status::OK();
 }
 
+bool DynamicDimensionInference::HasDynamicDimension(
+    HloInstruction* inst) const {
+  bool has_dynamic_dim = false;
+  ShapeUtil::ForEachSubshape(
+      inst->shape(), [&](const Shape& subshape, const ShapeIndex& index) {
+        if (subshape.IsTuple()) {
+          return;
+        }
+        for (int64 i = 0; i < subshape.dimensions_size(); ++i) {
+          HloInstruction* operand_dynamic_size = GetDynamicSize(inst, index, i);
+          if (operand_dynamic_size != nullptr) {
+            has_dynamic_dim = true;
+          }
+        }
+      });
+  return has_dynamic_dim;
+}
+
 HloInstruction* DynamicDimensionInference::GetDynamicSize(
     HloInstruction* inst, const ShapeIndex& index, int64 dim) const {
   auto iter = dynamic_mapping_.find(DynamicDimension{inst, index, dim});
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
index 6e3b9e26feb..417f0289143 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h
@@ -51,6 +51,10 @@ class DynamicDimensionInference {
   HloInstruction* GetDynamicSize(HloInstruction* inst, const ShapeIndex& index,
                                  int64 dim) const;
 
+  // Returns if current instruction contains any dynamic dimension. Recursively
+  // go into tuples.
+  bool HasDynamicDimension(HloInstruction* inst) const;
+
   // Forward dynamic dimension size at `dim` and its constraint from `inst` to
   // `new_inst`.
   Status ForwardDynamicSize(HloInstruction* inst, HloInstruction* new_inst,
diff --git a/tensorflow/compiler/xla/service/dynamic_padder.cc b/tensorflow/compiler/xla/service/dynamic_padder.cc
index 09b15781b32..44fdda0f411 100644
--- a/tensorflow/compiler/xla/service/dynamic_padder.cc
+++ b/tensorflow/compiler/xla/service/dynamic_padder.cc
@@ -37,6 +37,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace xla {
 
@@ -943,106 +944,6 @@ Status InsertPadToStaticAfterModuleInputs(HloModule* module) {
   return Status::OK();
 }
 
-// For all dynamic outputs that live out of the computation, add
-// slice-to-dynamic operations.
-Status InsertSliceToDynamicBeforeModuleOutputs(
-    const DynamicDimensionInference& dynamic_dimension_inference,
-    HloModule* module) {
-  auto root = module->entry_computation()->root_instruction();
-  absl::flat_hash_set<ShapeIndex> dynamic_outputs;
-  ShapeUtil::ForEachSubshape(
-      root->shape(), [&](const Shape& subshape, const ShapeIndex& index) {
-        if (subshape.IsArray()) {
-          bool has_dynamic_output = false;
-          for (int64 dim = 0; dim < subshape.rank(); ++dim) {
-            if (dynamic_dimension_inference.GetDynamicSize(root, index, dim) !=
-                nullptr) {
-              CHECK_LE(index.size(), 1) << "XLA doesn't support nested output "
-                                           "dimension that has dynamic size";
-              has_dynamic_output = true;
-            }
-          }
-          if (has_dynamic_output) {
-            dynamic_outputs.insert(index);
-          }
-        }
-      });
-  if (!dynamic_outputs.empty()) {
-    if (root->shape().IsTuple()) {
-      std::vector<HloInstruction*> new_root_operands;
-      ShapeUtil::ForEachSubshape(root->shape(), [&](const Shape& subshape,
-                                                    const ShapeIndex& index) {
-        if (!subshape.IsArray()) {
-          return;
-        }
-
-        auto gte = module->entry_computation()->AddInstruction(
-            HloInstruction::CreateGetTupleElement(
-                ShapeUtil::MakeShapeWithStaticDimensions(subshape), root,
-                index[0]));
-
-        if (dynamic_outputs.contains(index)) {
-          CHECK_EQ(index.size(), 1)
-              << "XLA only support 1 layer nested output tuple";
-          // For dynamic outputs, creates an slice operation.
-          std::vector<HloInstruction*> slice_operands;
-          // First operand is the original input. Rest are dimension values.
-          slice_operands.push_back(gte);
-          // Keep a dynamic version of the subshape as we are removing the
-          // dynamic dimension in the original root and gte.
-          Shape dynamic_subshape = subshape;
-          for (int64 dim = 0; dim < subshape.rank(); ++dim) {
-            HloInstruction* dynamic_size =
-                dynamic_dimension_inference.GetDynamicSize(root, index, dim);
-            if (dynamic_size != nullptr) {
-              slice_operands.push_back(dynamic_size);
-            } else {
-              auto const_size = HloInstruction::CreateConstant(
-                  LiteralUtil::CreateR0<int32>(subshape.dimensions(dim)));
-              slice_operands.push_back(
-                  module->entry_computation()->AddInstruction(
-                      std::move(const_size)));
-            }
-          }
-          // This is a dynamic output, add slice operation.
-          auto slice = HloInstruction::CreateCustomCall(
-              dynamic_subshape, slice_operands, "SliceToDynamic");
-          new_root_operands.push_back(
-              module->entry_computation()->AddInstruction(std::move(slice)));
-        } else {
-          new_root_operands.push_back(gte);
-        }
-      });
-
-      auto new_root = module->entry_computation()->AddInstruction(
-          HloInstruction::CreateTuple(new_root_operands));
-      module->entry_computation()->set_root_instruction(new_root);
-    } else {
-      std::vector<HloInstruction*> slice_operands;
-      // First operand is the original input. Rest are dimension values.
-      slice_operands.push_back(root);
-      for (int64 dim = 0; dim < root->shape().rank(); ++dim) {
-        HloInstruction* dynamic_size =
-            dynamic_dimension_inference.GetDynamicSize(root, {}, dim);
-        if (dynamic_size != nullptr) {
-          slice_operands.push_back(dynamic_size);
-        } else {
-          auto const_size = HloInstruction::CreateConstant(
-              LiteralUtil::CreateR0<int32>(root->shape().dimensions(dim)));
-          slice_operands.push_back(module->entry_computation()->AddInstruction(
-              std::move(const_size)));
-        }
-        // This is a dynamic output, add slice operation.
-        auto slice = module->entry_computation()->AddInstruction(
-            HloInstruction::CreateCustomCall(root->shape(), slice_operands,
-                                             "SliceToDynamic", "0-0"));
-        module->entry_computation()->set_root_instruction(slice);
-      }
-    }
-  }
-  return Status::OK();
-}
-
 // Remove all dynamic shapes between pad-to-static and slice-to-dynamic.
 //
 // After this visitor the entry computation then looks like:
@@ -1059,46 +960,217 @@ Status InsertSliceToDynamicBeforeModuleOutputs(
 // ROOT tuple (dynamic)
 class DynamicShapeRemovingVisitor : public DfsHloVisitorWithDefault {
  public:
+  explicit DynamicShapeRemovingVisitor(
+      const DynamicPadder::OpSupportsDynamismHandler&
+          op_supports_dynamism_handler,
+      const DynamicDimensionInference& dynamic_dimension_inference)
+      : op_supports_dynamism_handler_(op_supports_dynamism_handler),
+        dynamic_dimension_inference_(dynamic_dimension_inference) {}
+
   Status DefaultAction(HloInstruction* hlo) override;
 
   Status HandleCustomCall(HloInstruction* hlo) override;
 
+  Status HandleTuple(HloInstruction* hlo) override;
+  Status HandleGetTupleElement(HloInstruction* hlo) override;
+
   Status HandleParameter(HloInstruction* hlo) override;
 
-  static Status Run(HloComputation* computation) {
-    DynamicShapeRemovingVisitor visitor;
-    return computation->Accept(&visitor);
+  static Status Run(HloComputation* computation,
+                    const DynamicPadder::OpSupportsDynamismHandler&
+                        op_supports_dynamism_handler,
+                    const DynamicDimensionInference& dynamic_shape_inference,
+                    bool require_dynamic_output) {
+    DynamicShapeRemovingVisitor visitor(op_supports_dynamism_handler,
+                                        dynamic_shape_inference);
+    TF_RETURN_IF_ERROR(computation->Accept(&visitor));
+    // If the outputs is required to be dynamic form, insert static to dynamic
+    // conversion as root.
+    if (require_dynamic_output) {
+      HloInstruction* root = computation->root_instruction();
+      if (dynamic_shape_inference.HasDynamicDimension(root)) {
+        HloInstruction* new_root = visitor.ConvertToDynamic(root);
+        computation->set_root_instruction(new_root);
+      }
+    }
+    return Status::OK();
   }
+
+ private:
+  // If a tensor produced by `inst` is in dynamic form, convert it to static and
+  // returns the new instruction.
+  HloInstruction* ConvertToStatic(HloInstruction* inst);
+
+  // If a tensor produced by `inst` is in static form, convert it to dynamic and
+  // returns the new instruction.
+  HloInstruction* ConvertToDynamic(HloInstruction* inst);
+
+  const DynamicPadder::OpSupportsDynamismHandler& op_supports_dynamism_handler_;
+
+  const DynamicDimensionInference& dynamic_dimension_inference_;
 };
 
+HloInstruction* DynamicShapeRemovingVisitor::ConvertToDynamic(
+    HloInstruction* inst) {
+  auto* comp = inst->parent();
+  const Shape& shape = inst->shape();
+  if (shape.IsTuple()) {
+    std::vector<HloInstruction*> dynamic_operands;
+    for (int64 i = 0; i < shape.tuple_shapes_size(); ++i) {
+      auto operand = inst->mutable_operand(i);
+      if (dynamic_dimension_inference_.HasDynamicDimension(operand)) {
+        // Recurse.
+        dynamic_operands.push_back(ConvertToDynamic(operand));
+      } else {
+        dynamic_operands.push_back(operand);
+      }
+    }
+    return comp->AddInstruction(HloInstruction::CreateTuple(dynamic_operands));
+  } else {
+    // Collect the data input, as well as dimension sizes, and feed them to
+    // slice to dynamic to create a dynamic tensor.
+    Shape output_shape = shape;  // 0th element.
+    CHECK(output_shape.is_static());
+    std::vector<HloInstruction*> slice_operand;
+    slice_operand.push_back(inst);
+    for (int64 i = 0; i < output_shape.dimensions_size(); ++i) {
+      auto dimension_size =
+          dynamic_dimension_inference_.GetDynamicSize(inst, {}, i);
+      if (dimension_size == nullptr) {
+        dimension_size = comp->AddInstruction(HloInstruction::CreateConstant(
+            LiteralUtil::CreateR0<int32>(output_shape.dimensions(i))));
+      } else {
+        output_shape.set_dynamic_dimension(i, true);
+      }
+      slice_operand.push_back(dimension_size);
+    }
+    return comp->AddInstruction(HloInstruction::CreateCustomCall(
+        output_shape, slice_operand, "SliceToDynamic"));
+  }
+}
+
+HloInstruction* DynamicShapeRemovingVisitor::ConvertToStatic(
+    HloInstruction* inst) {
+  auto* comp = inst->parent();
+  const Shape& shape = inst->shape();
+  CHECK(shape.is_dynamic());
+  if (shape.IsTuple()) {
+    std::vector<HloInstruction*> static_operands;
+    for (int64 i = 0; i < shape.tuple_shapes_size(); ++i) {
+      auto operand = inst->mutable_operand(i);
+      if (shape.tuple_shapes(i).is_dynamic()) {
+        static_operands.push_back(ConvertToStatic(operand));
+      } else {
+        static_operands.push_back(operand);
+      }
+    }
+    return comp->AddInstruction(HloInstruction::CreateTuple(static_operands));
+  } else {
+    // The output shape of pad static is a tuple. The 0th element is the data
+    // output, which is the same as input shape, but without dynamic dimensions.
+    // i-th element is the dynamic dimension size for i-1th input dimension.
+    Shape data_output_shape = shape;  // 0th element.
+    data_output_shape.clear_dynamic_dimensions();
+    Shape output_shape = ShapeUtil::MakeTupleShape({data_output_shape});
+    for (int64 i = 0; i < shape.rank(); ++i) {
+      ShapeUtil::AppendShapeToTuple(ShapeUtil::MakeScalarShape(S32),
+                                    &output_shape);
+    }
+    HloInstruction* pad_to_static =
+        comp->AddInstruction(HloInstruction::CreateCustomCall(
+            output_shape, {inst}, "PadToStatic", ""));
+    HloInstruction* data_output =
+        comp->AddInstruction(HloInstruction::CreateGetTupleElement(
+            data_output_shape, pad_to_static, 0));
+    return data_output;
+  }
+}
+
 Status DynamicShapeRemovingVisitor::DefaultAction(HloInstruction* hlo) {
-  // Default rule: If input to an op is static, remove dynamism in output.
-  bool input_is_dynamic = false;
-  // Default rule:
-  for (int64 i = 0; i < hlo->operand_count(); ++i) {
-    if (!hlo->operand(i)->shape().is_static()) {
-      input_is_dynamic = true;
+  const bool input_is_dynamic = absl::c_any_of(
+      hlo->operands(),
+      [](const HloInstruction* hlo) { return hlo->shape().is_dynamic(); });
+
+  // By default, ops don't support dynamic lowering.
+  OpDynamismSupport op_support = OpDynamismSupport::kNoSupport;
+  if (op_supports_dynamism_handler_) {
+    op_support = op_supports_dynamism_handler_(hlo);
+  }
+  if (op_support == OpDynamismSupport::kNoSupport) {
+    for (auto* sub_computation : hlo->called_computations()) {
+      for (auto* param : sub_computation->parameter_instructions()) {
+        param->mutable_shape()->clear_dynamic_dimensions();
+      }
     }
   }
-
-  if (!input_is_dynamic) {
+  // If the input to an op is static and the op doesn't support
+  // dynamic output, remove dynamism in output -- dynamic_padder should have
+  // rewritten it to support static shapes.
+  if (!input_is_dynamic && op_support == OpDynamismSupport::kNoSupport) {
     hlo->mutable_shape()->clear_dynamic_dimensions();
+    return Status::OK();
   }
+
+  // Op doesn't support dynamic tensor: For each operand rewrite dynamic input
+  // into static input using pad_to_static.
+  if (input_is_dynamic && op_support == OpDynamismSupport::kNoSupport) {
+    VLOG(1) << "op doesn't support dynamic tensor: " << hlo->ToString();
+    for (int64 i = 0; i < hlo->operand_count(); ++i) {
+      if (hlo->operand(i)->shape().is_dynamic()) {
+        auto static_operand = ConvertToStatic(hlo->mutable_operand(i));
+        TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, static_operand));
+      }
+    }
+    // This op doesn't support dynamic lowering so the op has to be static.
+    hlo->mutable_shape()->clear_dynamic_dimensions();
+    return Status::OK();
+  }
+
+  // If the op requires dynamic tensor and input is static -- construct a
+  // dynamic tensor from the static tensor to feed it.
+  if (!input_is_dynamic && op_support == OpDynamismSupport::kRequired) {
+    VLOG(1) << "op doesn't support static tensor: " << hlo->ToString();
+    for (int64 i = 0; i < hlo->operand_count(); ++i) {
+      auto operand = hlo->mutable_operand(i);
+      if (dynamic_dimension_inference_.HasDynamicDimension(operand)) {
+        auto dynamic_operand = ConvertToDynamic(hlo->mutable_operand(i));
+        TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(i, dynamic_operand));
+      }
+    }
+    return Status::OK();
+  }
+
   return Status::OK();
 }
 
-Status DynamicShapeRemovingVisitor::HandleCustomCall(HloInstruction* hlo) {
-  if (hlo->custom_call_target() == "SliceToDynamic") {
-    // Don't remove slice-to-dynamic instruction.
-    return Status::OK();
+Status DynamicShapeRemovingVisitor::HandleGetTupleElement(HloInstruction* hlo) {
+  *hlo->mutable_shape() =
+      hlo->operand(0)->shape().tuple_shapes(hlo->tuple_index());
+  return Status::OK();
+}
+
+Status DynamicShapeRemovingVisitor::HandleTuple(HloInstruction* hlo) {
+  for (int64 i = 0; i < hlo->operand_count(); ++i) {
+    *hlo->mutable_shape()->mutable_tuple_shapes(i) = hlo->operand(i)->shape();
   }
-  return DefaultAction(hlo);
+  return Status::OK();
 }
 
 Status DynamicShapeRemovingVisitor::HandleParameter(HloInstruction* hlo) {
   return Status::OK();
 }
 
+Status DynamicShapeRemovingVisitor::HandleCustomCall(HloInstruction* hlo) {
+  if (hlo->custom_call_target() == "SliceToDynamic" ||
+      hlo->custom_call_target() == "PadToStatic") {
+    // Those ops support are created to handle dynamic tensors so by their
+    // nature they support dynamic lowering.
+    return Status::OK();
+  }
+
+  return DefaultAction(hlo);
+}
+
 }  // namespace
 
 StatusOr<bool> DynamicPadder::Run(HloModule* module) {
@@ -1137,11 +1209,20 @@ StatusOr<bool> DynamicPadder::Run(HloModule* module) {
       }));
 
   TF_RETURN_IF_ERROR(InsertPadToStaticAfterModuleInputs(module));
-  TF_ASSIGN_OR_RETURN(DynamicDimensionInference dynamic_dimension_inference,
-                      DynamicDimensionInference::Run(module));
+  TF_ASSIGN_OR_RETURN(
+      DynamicDimensionInference dynamic_dimension_inference,
+      DynamicDimensionInference::Run(module, custom_call_handler_));
 
   for (HloComputation* computation : module->computations()) {
     for (HloInstruction* inst : computation->MakeInstructionPostOrder()) {
+      OpDynamismSupport has_dynamism_support = OpDynamismSupport::kNoSupport;
+      if (op_supports_dynamism_handler_ != nullptr) {
+        has_dynamism_support = op_supports_dynamism_handler_(inst);
+      }
+      // This op support dynamic lowering, no padding is required.
+      if (has_dynamism_support != OpDynamismSupport::kNoSupport) {
+        continue;
+      }
       if (inst->opcode() == HloOpcode::kConcatenate) {
         TF_ASSIGN_OR_RETURN(
             changed, RewriteDynamicConcat(inst, &dynamic_dimension_inference));
@@ -1152,6 +1233,11 @@ StatusOr<bool> DynamicPadder::Run(HloModule* module) {
             changed, RewriteDynamicSort(inst, &dynamic_dimension_inference));
         continue;
       }
+      if (inst->opcode() == HloOpcode::kReshape) {
+        TF_ASSIGN_OR_RETURN(
+            changed, RewriteDynamicReshape(inst, &dynamic_dimension_inference));
+        continue;
+      }
       for (int64 operand_num = 0; operand_num < inst->operand_count();
            ++operand_num) {
         HloInstruction* original_operand = inst->mutable_operand(operand_num);
@@ -1160,11 +1246,6 @@ StatusOr<bool> DynamicPadder::Run(HloModule* module) {
           continue;
         }
 
-        if (inst->opcode() == HloOpcode::kReshape) {
-          TF_ASSIGN_OR_RETURN(changed, RewriteDynamicReshape(
-                                           inst, &dynamic_dimension_inference));
-          continue;
-        }
         for (int64 input_dim = 0; input_dim < operand->shape().rank();
              ++input_dim) {
           HloInstruction* operand_dynamic_size =
@@ -1195,37 +1276,28 @@ StatusOr<bool> DynamicPadder::Run(HloModule* module) {
       }
     }
   }
-  if (slice_dynamic_output_) {
-    TF_RETURN_IF_ERROR(InsertSliceToDynamicBeforeModuleOutputs(
-        dynamic_dimension_inference, module));
-  }
 
-  // Remove all dynamic dimensions after entry parameter and root instruction --
-  // Dynamic padder will produce an equivalent static shaped graph.
-  for (HloComputation* computation : module->computations()) {
-    if (computation == module->entry_computation()) {
-      TF_RETURN_IF_ERROR(DynamicShapeRemovingVisitor::Run(computation));
-    } else {
-      for (HloInstruction* inst : computation->MakeInstructionPostOrder()) {
-        bool operand_is_dynamic = false;
-        for (auto* operand : inst->operands()) {
-          if (!operand->shape().is_static()) {
-            operand_is_dynamic = true;
-          }
-        }
-        if (!operand_is_dynamic) {
-          inst->mutable_shape()->clear_dynamic_dimensions();
-        }
-      }
-    }
+  // There are ops that only support dynamic lowering and ops that only support
+  // static lowering, add dynamic<->static tensor conversion around the boundary
+  // between those ops, as well as the root instruction.
+  auto computations = module->MakeComputationPostOrder();
+  // Reverse postorder so that if caller doesn't support dynamic tensor (while,
+  // etc), change their called computation to only take static tensors.
+  for (auto it = computations.rbegin(); it != computations.rend(); ++it) {
+    HloComputation* computation = *it;
+    // if slice_dynamic_output_ is set and this is entry computation, we need
+    // the output tensor to be in dynamic form.
+    bool require_dynamic_output =
+        slice_dynamic_output_ && computation == module->entry_computation();
+    TF_RETURN_IF_ERROR(DynamicShapeRemovingVisitor::Run(
+        computation, op_supports_dynamism_handler_, dynamic_dimension_inference,
+        /*require_dynamic_output=*/require_dynamic_output));
   }
 
   HloDCE dce;
   TF_ASSIGN_OR_RETURN(changed, dce.Run(module));
-
   VLOG(2) << "Post DynamicPadder HLO:";
   XLA_VLOG_LINES(2, module->ToString());
-
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/service/dynamic_padder.h b/tensorflow/compiler/xla/service/dynamic_padder.h
index f0f3eed0a26..ca2513eaa5c 100644
--- a/tensorflow/compiler/xla/service/dynamic_padder.h
+++ b/tensorflow/compiler/xla/service/dynamic_padder.h
@@ -36,12 +36,38 @@ namespace xla {
 // Dynamic_padder removes dynamic shapes from the entry computation, and inserts
 // custom calls (with dynamic shapes), which are lowered by specialized
 // emitters: PadToStatic and SliceToDynamic.
+
+// Each instruction can have one of the three modes in supporting dynamic
+// lowering.
+enum OpDynamismSupport {
+  // There is no support for dynamic lowering -- dynamic padder will make sure
+  // the input to that op has static bound by rewriting the op (e.g, extra space
+  // in reduce_sum will be padded with 0).
+  kNoSupport = 0,
+  // The op can take either dynamic input or static input.
+  kOptional,
+  // The op only has a dynamic lowering, dynamic padder will make sure the input
+  // to this op is in dynamic form.
+  kRequired,
+};
+
 class DynamicPadder : public HloModulePass {
  public:
+  // Returns true if given instruction supports native dynamic lowering. If so,
+  // dynamic padder will not attempt to pad it.
+  using OpSupportsDynamismHandler =
+      std::function<OpDynamismSupport(HloInstruction*)>;
+
   // If `slice_dynamic_output` is true, insert 'slice_to_dynamic' ops to all
   // outputs that are inferred to be dynamic.
-  explicit DynamicPadder(bool slice_dynamic_output = true)
-      : slice_dynamic_output_(slice_dynamic_output) {}
+  explicit DynamicPadder(
+      bool slice_dynamic_output = true,
+      DynamicDimensionInference::CustomCallInferenceHandler
+          custom_call_handler = nullptr,
+      OpSupportsDynamismHandler op_supports_dynamism_handler = nullptr)
+      : slice_dynamic_output_(slice_dynamic_output),
+        custom_call_handler_(custom_call_handler),
+        op_supports_dynamism_handler_(op_supports_dynamism_handler) {}
 
   absl::string_view name() const override { return "dynamic_padder"; }
 
@@ -51,6 +77,13 @@ class DynamicPadder : public HloModulePass {
   // Insert 'slice_to_dynamic' ops to all outputs that are inferred to be
   // dynamic.
   bool slice_dynamic_output_;
+
+  // A handler for dynamic dimension inference of custom calls.
+  DynamicDimensionInference::CustomCallInferenceHandler custom_call_handler_;
+
+  // A handler to indicate if a given hlo instruction support native dynamism
+  // lowering.
+  OpSupportsDynamismHandler op_supports_dynamism_handler_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/dynamic_padder_test.cc b/tensorflow/compiler/xla/service/dynamic_padder_test.cc
index 31ae1ab60fd..e4c70317f2b 100644
--- a/tensorflow/compiler/xla/service/dynamic_padder_test.cc
+++ b/tensorflow/compiler/xla/service/dynamic_padder_test.cc
@@ -44,12 +44,49 @@ namespace op = xla::testing::opcode_matchers;
 namespace xla {
 namespace {
 
+OpDynamismSupport OpHasDynamismSupport(HloInstruction* hlo) {
+  if (hlo->opcode() != HloOpcode::kCustomCall) {
+    return OpDynamismSupport::kNoSupport;
+  }
+  if (hlo->custom_call_target() == "OpWithDynamicLowering") {
+    return OpDynamismSupport::kRequired;
+  }
+  return OpDynamismSupport::kNoSupport;
+}
+
+Status CustomCallDynamicDimensionInference(
+    HloInstruction* hlo, DynamicDimensionInference* inferencer) {
+  if (hlo->custom_call_target() == "OpWithDynamicLowering") {
+    if (hlo->shape().IsTuple()) {
+      // Use the operand's dynamic size as output dynamic size.
+      HloInstruction* dynamic_size =
+          inferencer->GetDynamicSize(hlo->mutable_operand(0), {1}, 0);
+      inferencer->SetDynamicSize(hlo, {1}, 0, dynamic_size);
+    } else {
+      // Use the operand's dynamic size as output dynamic size.
+      HloInstruction* dynamic_size =
+          inferencer->GetDynamicSize(hlo->mutable_operand(0), {}, 0);
+      inferencer->SetDynamicSize(hlo, {}, 0, dynamic_size);
+    }
+  }
+
+  return Status::OK();
+}
+
 class DynamicPadderTest : public HloTestBase {
  protected:
   DynamicPadderTest() : HloTestBase() { module_ = CreateNewVerifiedModule(); }
 
+  std::unique_ptr<HloModule> GetHloModule(const string& hlo_text) {
+    std::unique_ptr<HloModule> module =
+        ParseAndReturnVerifiedModule(hlo_text).ValueOrDie();
+    return module;
+  }
+
   StatusOr<bool> RunPadder() {
-    DynamicPadder padder;
+    DynamicPadder padder(/*slice_dynamic_output=*/true,
+                         CustomCallDynamicDimensionInference,
+                         OpHasDynamismSupport);
     return padder.Run(module_.get());
   }
 
@@ -105,6 +142,120 @@ TEST_F(DynamicPadderTest, ReduceTest) {
   ExpectPadded(reduce->operand(0));
 }
 
+TEST_F(DynamicPadderTest, DynamicLoweringTest) {
+  const string hlo_text = R"(
+HloModule DynamicLowering
+
+ENTRY main {
+  param = s32[5] parameter(0)
+  const = s32[] constant(3)
+  param_padded = s32[<=5] set-dimension-size(param, const),
+                dimensions={0}
+  custom-call.1 = s32[<=5] custom-call(param_padded),
+    custom_call_target="OpWithDynamicLowering"
+  custom-call.2 = s32[<=5] custom-call(custom-call.1),
+    custom_call_target="OpWithDynamicLowering"
+  // Negate doesn't support dynamic lowering.
+  ROOT negate = s32[<=5] negate(custom-call.2)
+}
+)";
+
+  module_ = GetHloModule(hlo_text);
+
+  TF_ASSERT_OK(RunPadder().status());
+  // After rewrite, we should have :
+  //
+  //   param
+  //     |
+  //  SliceToDynamic
+  //     |
+  //  OpWithDynamicLowering (custom_call_1)
+  //     |
+  //  OpWithDynamicLowering (custom_call_2)
+  //     |
+  //  PadToStatic
+  //     |
+  //   Negate
+  //     |
+  //   SliceToDynamic // Root require dynamic form tensor.
+  auto custom_call_1 =
+      module_->entry_computation()->GetInstructionWithName("custom-call.1");
+  auto custom_call_2 =
+      module_->entry_computation()->GetInstructionWithName("custom-call.2");
+  // Test that the input to custom call
+  HloInstruction* slice_to_dynamic = custom_call_1->mutable_operand(0);
+  ASSERT_THAT(slice_to_dynamic->opcode(), HloOpcode::kCustomCall);
+  ASSERT_THAT(slice_to_dynamic->custom_call_target(), "SliceToDynamic");
+  ASSERT_EQ(custom_call_2->user_count(), 1);
+  HloInstruction* pad_to_static = custom_call_2->users()[0];
+  ASSERT_THAT(pad_to_static->opcode(), HloOpcode::kCustomCall);
+  ASSERT_THAT(pad_to_static->custom_call_target(), "PadToStatic");
+  slice_to_dynamic = module_->entry_computation()->root_instruction();
+  ASSERT_THAT(slice_to_dynamic->opcode(), HloOpcode::kCustomCall);
+  ASSERT_THAT(slice_to_dynamic->custom_call_target(), "SliceToDynamic");
+}
+
+TEST_F(DynamicPadderTest, DynamicLoweringTestTupleInput) {
+  const string hlo_text = R"(
+HloModule DynamicLowering
+
+ENTRY main {
+  param = s32[5] parameter(0)
+  const = s32[] constant(3)
+  param_padded = s32[<=5] set-dimension-size(param, const),
+                dimensions={0}
+  // Create a tuple with static and dynamic componenet.
+  tuple_arg = (s32[], s32[<=5]) tuple(const, param_padded)
+  custom-call.1 = (s32[], s32[<=5]) custom-call(tuple_arg),
+    custom_call_target="OpWithDynamicLowering"
+  custom-call.2 = (s32[], s32[<=5]) custom-call(custom-call.1),
+    custom_call_target="OpWithDynamicLowering"
+  data = s32[<=5]{0} get-tuple-element(custom-call.2), index=1
+  // Negate doesn't support dynamic lowering.
+  ROOT negate = s32[<=5] negate(data)
+}
+)";
+
+  module_ = GetHloModule(hlo_text);
+
+  TF_ASSERT_OK(RunPadder().status());
+  // After rewrite, we should have :
+  //
+  //   param
+  //     |
+  //  SliceToDynamic
+  //     |
+  //    Tuple
+  //     |
+  //  OpWithDynamicLowering (custom_call_1)
+  //     |
+  //  OpWithDynamicLowering (custom_call_2)
+  //     |
+  //   GTE
+  //     |
+  //  PadToStatic
+  //     |
+  //   Negate
+  //     |
+  //   SliceToDynamic // Root require dynamic form tensor.
+
+  auto* root = module_->entry_computation()->root_instruction();
+  EXPECT_THAT(root,
+              op::CustomCall("SliceToDynamic", op::Negate(), op::Constant()));
+  HloInstruction* negate = root->mutable_operand(0);
+  EXPECT_THAT(
+      negate,
+      op::Negate(op::GetTupleElement(op::CustomCall(
+          "PadToStatic", op::GetTupleElement(op::CustomCall(
+                             "OpWithDynamicLowering", ::testing::_))))));
+  auto custom_call_1 =
+      module_->entry_computation()->GetInstructionWithName("custom-call.1");
+  EXPECT_THAT(custom_call_1,
+              op::CustomCall(
+                  "OpWithDynamicLowering",
+                  op::Tuple(op::Constant(), op::CustomCall("SliceToDynamic"))));
+}
+
 TEST_F(DynamicPadderTest, ConvolutionTest) {
   auto builder = HloComputation::Builder(TestName());
   constexpr int xdim = 3;
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 3eb6dab3129..8cb660de46c 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -461,6 +461,8 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
       return EmitSqrt(op->shape().element_type(), operand_value);
     case HloOpcode::kRsqrt:
       return EmitRsqrt(op->shape().element_type(), operand_value);
+    case HloOpcode::kCbrt:
+      return EmitCbrt(op->shape().element_type(), operand_value);
     case HloOpcode::kFloor:
       return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::floor,
                                           {operand_value},
@@ -787,6 +789,9 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexUnaryOp(
     case HloOpcode::kRsqrt: {
       return EmitComplexRsqrt(op, component_type, operand_value);
     }
+    case HloOpcode::kCbrt: {
+      return EmitComplexCbrt(op, component_type, operand_value);
+    }
     case HloOpcode::kNegate:
       return EmitComposeComplex(op, FNeg(EmitExtractReal(operand_value)),
                                 FNeg(EmitExtractImag(operand_value)));
@@ -1081,6 +1086,19 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexRsqrt(
   return EmitComposeComplex(op, real_part, imag_part);
 }
 
+//
+// Using EmitComplexPower with c=1.0/3.0 and d=0
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitComplexCbrt(
+    const HloInstruction* op, PrimitiveType prim_type,
+    llvm::Value* operand_value) {
+  auto type = llvm_ir::PrimitiveTypeToIrType(prim_type, module_);
+  auto third = llvm::ConstantFP::get(type, 1.0 / 3.0);
+  auto zero = llvm::ConstantFP::get(type, 0);
+  llvm::Value* a = EmitExtractReal(operand_value);
+  llvm::Value* b = EmitExtractImag(operand_value);
+  return EmitComplexPower(op, a, b, third, zero);
+}
+
 // (a+bi)^(c+di) =
 //    (a*a+b*b)^(0.5c) * exp(-d*atan2(b,a)) * (cos(q) + i*sin(q)),
 //    where q = c*atan2(b,a)+0.5d*ln(a*a+b*b)
@@ -1392,6 +1410,19 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitPow(PrimitiveType prim_type,
                                       {lhs->getType()}, b_);
 }
 
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitCbrt(PrimitiveType prim_type,
+                                                    llvm::Value* value) {
+  auto type = llvm_ir::PrimitiveTypeToIrType(prim_type, module_);
+  auto third = llvm::ConstantFP::get(type, 1.0 / 3.0);
+  auto abs_value =
+      llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::fabs, {value}, {type}, b_);
+  TF_ASSIGN_OR_RETURN(llvm::Value * abs_res,
+                      EmitPow(prim_type, abs_value, third));
+  auto signed_res = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::copysign,
+                                                 {abs_res, value}, {type}, b_);
+  return signed_res;
+}
+
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitAtan2(PrimitiveType prim_type,
                                                      llvm::Value* lhs,
                                                      llvm::Value* rhs) {
@@ -2181,6 +2212,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh:
       return [this, hlo, &operand_to_generator](
                  const IrArray::Index& index) -> StatusOr<llvm::Value*> {
@@ -2390,6 +2422,43 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
                  -> StatusOr<llvm::Value*> {
         return EmitElementalDot(hlo, operand_to_generator, dot_result_index);
       };
+    case HloOpcode::kMap:
+      return [this, hlo, &operand_to_generator](
+                 const IrArray::Index& index) -> StatusOr<llvm::Value*> {
+        std::vector<llvm::Value*> operands;
+        for (int i = 0; i < hlo->operand_count(); i++) {
+          TF_ASSIGN_OR_RETURN(llvm::Value * operand_value,
+                              operand_to_generator.at(hlo->operand(i))(index));
+          operands.push_back(operand_value);
+        }
+        std::vector<llvm_ir::ElementGenerator> input_generators;
+        for (const HloInstruction* instr : hlo->operands()) {
+          input_generators.push_back(operand_to_generator.at(instr));
+        }
+        return EmitElementalMap(Cast<HloMapInstruction>(hlo), operands);
+      };
+    case HloOpcode::kReduceWindow:
+      return [this, hlo, &operand_to_generator](const IrArray::Index& index) {
+        return EmitElementalReduceWindow(
+            Cast<HloReduceWindowInstruction>(hlo),
+            operand_to_generator.at(hlo->operand(0)),
+            operand_to_generator.at(hlo->operand(1)), index);
+      };
+    case HloOpcode::kReduce:
+      return [this, hlo, &operand_to_generator](const IrArray::Index& index) {
+        auto reduce_instr = Cast<HloReduceInstruction>(hlo);
+        std::vector<llvm_ir::ElementGenerator> input_generators;
+        for (const HloInstruction* instr : reduce_instr->inputs()) {
+          input_generators.push_back(operand_to_generator.at(instr));
+        }
+
+        std::vector<llvm_ir::ElementGenerator> initial_value_generators;
+        for (const HloInstruction* instr : reduce_instr->init_values()) {
+          initial_value_generators.push_back(operand_to_generator.at(instr));
+        }
+        return EmitElementalReduce(reduce_instr, std::move(input_generators),
+                                   std::move(initial_value_generators), index);
+      };
     default:
       return [hlo](const IrArray::Index& index) {
         return Unimplemented("Unhandled opcode for elemental IR emission: %s",
@@ -2419,4 +2488,215 @@ llvm::Value* ElementalIrEmitter::EmitComposeComplex(const HloInstruction* op,
   return complex;
 }
 
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitElementalMap(
+    const HloMapInstruction* map_instr,
+    absl::Span<llvm::Value* const> elemental_operands) {
+  TF_ASSIGN_OR_RETURN(
+      std::vector<llvm::Value*> values,
+      EmitThreadLocalCall(*map_instr->to_apply(), elemental_operands,
+                          llvm_ir::IrName(map_instr)));
+  CHECK_EQ(values.size(), 1);
+  return values[0];
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitElementalReduceWindow(
+    const HloReduceWindowInstruction* reduce_window,
+    const llvm_ir::ElementGenerator& input_generator,
+    const llvm_ir::ElementGenerator& initial_value_generator,
+    const llvm_ir::IrArray::Index& index) {
+  // Pseudocode:
+  // for each index I in output
+  //   value = init_value
+  //   for each index W in window
+  //     for each dimension i from 0 to rank - 1
+  //       (input index I)[i] = O[i] * stride[i] + W[i] - pad_low[i]
+  //     if I in bounds of input
+  //       value = function(value, input[I])
+  //     output[O] = value
+  const HloInstruction* operand = reduce_window->operand(0);
+  const Window& window = reduce_window->window();
+
+  PrimitiveType operand_element_type = operand->shape().element_type();
+  llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry(
+      llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_),
+      "reduce_window_accum_ptr", b_);
+  {
+    TF_ASSIGN_OR_RETURN(
+        llvm::Value* const init_value,
+        initial_value_generator(llvm_ir::IrArray::Index(index.GetType())));
+    Store(init_value, accum_ptr);
+  }
+
+  llvm::Type* index_type = index.GetType();
+  auto index_typed_const = [&](uint64 c) -> llvm::Constant* {
+    return index.GetConstantWithIndexType(c);
+  };
+
+  llvm_ir::ForLoopNest loops(IrName(reduce_window), b_, index_type);
+  std::vector<int64> window_size;
+  for (const auto& dim : window.dimensions()) {
+    window_size.push_back(dim.size());
+  }
+  const IrArray::Index window_index = loops.AddLoopsForShape(
+      ShapeUtil::MakeShape(operand_element_type, window_size), "window");
+  CHECK_EQ(window_index.size(), index.size());
+
+  SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b_);
+
+  std::vector<llvm::Value*> input_multi_index(index.size());
+  llvm::Value* in_bounds = b_->getInt1(true);
+  for (size_t i = 0; i < index.size(); ++i) {
+    llvm::Value* stridden_index =
+        NSWMul(index[i], index_typed_const(window.dimensions(i).stride()));
+    input_multi_index[i] = NSWSub(
+        NSWAdd(
+            stridden_index,
+            NSWMul(window_index[i],
+                   index_typed_const(window.dimensions(i).window_dilation()))),
+        index_typed_const(window.dimensions(i).padding_low()));
+
+    // We need to verify that we are not in the dilated base area.
+    llvm::Value* dilation_condition =
+        ICmpEQ(SRem(input_multi_index[i],
+                    index_typed_const(window.dimensions(i).base_dilation())),
+               index_typed_const(0));
+    in_bounds = And(in_bounds, dilation_condition);
+
+    // Apply base dilation to the index.
+    input_multi_index[i] =
+        SDiv(input_multi_index[i],
+             index_typed_const(window.dimensions(i).base_dilation()));
+
+    // We must check whether 0 <= input_multi_index[i] < bound, as
+    // otherwise we are in the pad and so can skip the computation. This
+    // comparison is equivalent to the unsigned comparison
+    // input_multi_index[i] < bound, as a negative value wraps to a large
+    // positive value.
+    in_bounds = And(in_bounds,
+                    ICmpULT(input_multi_index[i],
+                            index_typed_const(operand->shape().dimensions(i))));
+  }
+
+  llvm_ir::LlvmIfData if_data =
+      llvm_ir::EmitIfThenElse(in_bounds, "in_bounds", b_);
+  SetToFirstInsertPoint(if_data.true_block, b_);
+
+  // We are not in pad, so do the computation.
+  IrArray::Index input_index(input_multi_index, operand->shape(), index_type);
+  TF_ASSIGN_OR_RETURN(llvm::Value * input_value, input_generator(input_index));
+  TF_ASSIGN_OR_RETURN(
+      std::vector<llvm::Value*> accum_values,
+      EmitThreadLocalCall(*reduce_window->to_apply(),
+                          {Load(accum_ptr), input_value}, "reducer_function"));
+  CHECK_EQ(accum_values.size(), 1);
+  Store(accum_values[0], accum_ptr);
+
+  SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b_);
+  return Load(accum_ptr);
+}
+
+StatusOr<llvm::Value*> ElementalIrEmitter::EmitElementalReduce(
+    const HloReduceInstruction* reduce,
+    std::vector<llvm_ir::ElementGenerator> input_generators,
+    std::vector<llvm_ir::ElementGenerator> initial_value_generators,
+    const llvm_ir::IrArray::Index& index) {
+  const Shape& out_shape = reduce->shape();
+  bool is_variadic = !out_shape.IsArray();
+  int accumulators_count = 1;
+  if (is_variadic) {
+    CHECK(out_shape.IsTuple());
+    accumulators_count = out_shape.tuple_shapes_size();
+  }
+
+  absl::Span<const int64> reduced_dimensions(reduce->dimensions());
+
+  std::vector<llvm::Value*> accumulator_addrs;
+  std::vector<llvm::Type*> accumulator_types;
+  llvm::Type* index_type = index.GetType();
+  for (int i = 0; i < accumulators_count; i++) {
+    const Shape& element_shape =
+        is_variadic ? out_shape.tuple_shapes(i) : out_shape;
+    PrimitiveType accumulator_type = element_shape.element_type();
+    llvm::Type* accumulator_llvm_type =
+        llvm_ir::PrimitiveTypeToIrType(accumulator_type, module_);
+    accumulator_types.push_back(accumulator_llvm_type);
+
+    // Initialize an accumulator with init_value.
+    llvm::AllocaInst* accumulator_addr = llvm_ir::EmitAllocaAtFunctionEntry(
+        accumulator_llvm_type, "accumulator_" + std::to_string(i), b());
+    TF_ASSIGN_OR_RETURN(
+        llvm::Value* const init_value,
+        initial_value_generators[i](llvm_ir::IrArray::Index(index_type)));
+    Store(init_value, accumulator_addr);
+    accumulator_addrs.push_back(accumulator_addr);
+  }
+
+  // The enclosing loops go over all the target elements. Now we have to compute
+  // the actual target element. For this, we build a new loop nest to iterate
+  // over all the reduction dimensions in the argument.
+  // AddLoopsForShapeOnDimensions will return an Index where induction Value*s
+  // are placed for each dimension in dimensions, and all the rest are nullptrs.
+  llvm_ir::ForLoopNest loops(IrName(reduce, "inner"), b(), index_type);
+  const HloInstruction* arg = reduce->operand(0);
+  std::vector<llvm::Value*> input_multi_index =
+      loops.AddLoopsForShapeOnDimensions(arg->shape(), reduced_dimensions,
+                                         "reduction_dim");
+
+  SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b());
+
+  // Build a full index for the input argument, using input_multi_index as the
+  // base. In input_multi_index only the reduction dimensions are filled in. We
+  // fill in the rest of the dimensions with induction Value*s taken from
+  // 'index' which iterates over the target array.  See the high-level
+  // description in the XLA documentation for details.
+  auto it = index.begin();
+
+  for (auto& i : input_multi_index) {
+    if (i == nullptr) {
+      i = *it++;
+    }
+  }
+  CHECK(index.end() == it);
+  llvm_ir::IrArray::Index input_index(input_multi_index, arg->shape(),
+                                      index_type);
+
+  std::vector<llvm::Value*> reduction_operands;
+  for (llvm::Value* accum : accumulator_addrs) {
+    llvm::Value* accum_value = Load(accum);
+    reduction_operands.push_back(accum_value);
+  }
+
+  for (int i = 0; i < accumulators_count; i++) {
+    TF_ASSIGN_OR_RETURN(llvm::Value* const input_element,
+                        input_generators[i](input_index));
+    reduction_operands.push_back(input_element);
+  }
+
+  TF_ASSIGN_OR_RETURN(
+      std::vector<llvm::Value*> results,
+      EmitThreadLocalCall(*reduce->to_apply(), reduction_operands,
+                          "reduce_function"));
+
+  CHECK(results.size() == accumulators_count);
+  for (int i = 0; i < accumulators_count; i++) {
+    Store(results[i], accumulator_addrs[i]);
+  }
+  SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b());
+
+  if (is_variadic) {
+    // Emit a structure, as that what the LoopEmitter expects.
+    llvm::Value* returned_structure = llvm::UndefValue::get(
+        llvm::StructType::get(b()->getContext(), accumulator_types));
+    for (int i = 0; i < accumulators_count; i++) {
+      llvm::Value* accumulator_value = Load(accumulator_addrs[i]);
+      returned_structure =
+          b()->CreateInsertValue(returned_structure, accumulator_value, i);
+    }
+    return returned_structure;
+  } else {
+    CHECK_EQ(accumulator_addrs.size(), 1);
+    return Load(accumulator_addrs[0]);
+  }
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
index 99833a5525f..06a9d7b194c 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h
@@ -17,12 +17,17 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_ELEMENTAL_IR_EMITTER_H_
 
 #include <unordered_map>
+#include <vector>
 
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/ir_builder_mixin.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -116,6 +121,9 @@ class ElementalIrEmitter : public IrBuilderMixin<ElementalIrEmitter> {
   virtual StatusOr<llvm::Value*> EmitSqrt(PrimitiveType prim_type,
                                           llvm::Value* value);
 
+  virtual StatusOr<llvm::Value*> EmitCbrt(PrimitiveType prim_type,
+                                          llvm::Value* value);
+
   virtual StatusOr<llvm::Value*> EmitRsqrt(PrimitiveType prim_type,
                                            llvm::Value* value);
 
@@ -159,6 +167,10 @@ class ElementalIrEmitter : public IrBuilderMixin<ElementalIrEmitter> {
                                                  PrimitiveType prim_type,
                                                  llvm::Value* operand_value);
 
+  virtual StatusOr<llvm::Value*> EmitComplexCbrt(const HloInstruction* op,
+                                                 PrimitiveType prim_type,
+                                                 llvm::Value* operand_value);
+
   virtual StatusOr<llvm::Value*> EmitComplexRsqrt(const HloInstruction* op,
                                                   PrimitiveType prim_type,
                                                   llvm::Value* operand_value);
@@ -213,6 +225,26 @@ class ElementalIrEmitter : public IrBuilderMixin<ElementalIrEmitter> {
       const HloToElementGeneratorMap& operand_to_generator,
       const llvm_ir::IrArray::Index& dot_result_index);
 
+  virtual StatusOr<std::vector<llvm::Value*>> EmitThreadLocalCall(
+      const HloComputation& callee, absl::Span<llvm::Value* const> parameters,
+      absl::string_view name) = 0;
+
+  StatusOr<llvm::Value*> EmitElementalMap(
+      const HloMapInstruction* map_instr,
+      absl::Span<llvm::Value* const> elemental_operands);
+
+  StatusOr<llvm::Value*> EmitElementalReduceWindow(
+      const HloReduceWindowInstruction* reduce_window,
+      const llvm_ir::ElementGenerator& input_generator,
+      const llvm_ir::ElementGenerator& initial_value_generator,
+      const llvm_ir::IrArray::Index& index);
+
+  StatusOr<llvm::Value*> EmitElementalReduce(
+      const HloReduceInstruction* reduce,
+      std::vector<llvm_ir::ElementGenerator> input_generators,
+      std::vector<llvm_ir::ElementGenerator> initial_value_generators,
+      const llvm_ir::IrArray::Index& index);
+
   llvm::IRBuilder<>* const b_;
 
   llvm::Module* module_;
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 61bc41283e1..0f6b2cb72e6 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -684,7 +684,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_pass",
-        "//tensorflow/core:autotuning_proto_cc",
+        "//tensorflow/core/protobuf:autotuning_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:stream_executor_no_cuda",
         "//tensorflow/core/util/proto:proto_utils",
@@ -720,7 +720,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:hlo_pass",
-        "//tensorflow/core:autotuning_proto_cc",
+        "//tensorflow/core/protobuf:autotuning_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:stream_executor_no_cuda",
@@ -1674,7 +1674,7 @@ tf_proto_library_cc(
     protodeps = [
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/compiler/xla/service:hlo_proto",
-        "//tensorflow/core:autotuning_proto",
+        "//tensorflow/core/protobuf:autotuning_proto",
     ],
 )
 
@@ -1685,8 +1685,8 @@ cc_library(
     deps = [
         ":gpu_autotuning_proto_cc",
         "//tensorflow/compiler/xla:debug_options_flags",
-        "//tensorflow/core:autotuning_proto_cc",
         "//tensorflow/core:stream_executor_no_cuda",
+        "//tensorflow/core/protobuf:autotuning_proto_cc",
         "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index c6df786fb51..1be0b1b4e7b 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
@@ -305,168 +305,5 @@ llvm::Value* GpuElementalIrEmitter::EmitThreadId() {
   return NSWAdd(NSWMul(block_id, threads_per_block), thread_id_in_block);
 }
 
-llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator(
-    const HloInstruction* hlo,
-    const HloToElementGeneratorMap& operand_to_generator) {
-  switch (hlo->opcode()) {
-    case HloOpcode::kMap:
-      return [=, &operand_to_generator](
-                 const IrArray::Index& index) -> StatusOr<llvm::Value*> {
-        TF_RET_CHECK(!hlo->operands().empty())
-            << "Zero operand map not implemented in GPU backend.";
-        TF_RET_CHECK(hlo->to_apply()->num_parameters() > 0);
-        std::vector<llvm::Value*> operand_elements;
-        for (HloInstruction* operand : hlo->operands()) {
-          TF_ASSIGN_OR_RETURN(llvm::Value * value,
-                              operand_to_generator.at(operand)(index));
-          operand_elements.push_back(value);
-        }
-        return compute_nested_(*hlo->to_apply(), operand_elements);
-      };
-    case HloOpcode::kReduceWindow:
-      // Pseudocode:
-      // for each index I in output
-      //   value = init_value
-      //   for each index W in window
-      //     for each dimension i from 0 to rank - 1
-      //       (input index I)[i] = O[i] * stride[i] + W[i] - pad_low[i]
-      //     if I in bounds of input
-      //       value = function(value, input[I])
-      //     output[O] = value
-      return [=, &operand_to_generator](
-                 const IrArray::Index& index) -> StatusOr<llvm::Value*> {
-        const HloInstruction* operand = hlo->operand(0);
-        const Window& window = hlo->window();
-
-        PrimitiveType operand_element_type = operand->shape().element_type();
-        llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry(
-            llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_),
-            "reduce_window_accum_ptr", b_);
-        {
-          TF_ASSIGN_OR_RETURN(llvm::Value * init_value,
-                              operand_to_generator.at(hlo->operand(1))(
-                                  IrArray::Index(index.GetType())));
-          Store(init_value, accum_ptr);
-        }
-
-        llvm::Type* index_type = index.GetType();
-        auto index_typed_const = [&](uint64 c) -> llvm::Constant* {
-          return index.GetConstantWithIndexType(c);
-        };
-
-        llvm_ir::ForLoopNest loops(IrName(hlo), b_, index_type);
-        std::vector<int64> window_size;
-        for (const auto& dim : window.dimensions()) {
-          window_size.push_back(dim.size());
-        }
-        const IrArray::Index window_index = loops.AddLoopsForShape(
-            ShapeUtil::MakeShape(operand_element_type, window_size), "window");
-        CHECK_EQ(window_index.size(), index.size());
-
-        SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b_);
-
-        std::vector<llvm::Value*> input_multi_index(index.size());
-        llvm::Value* in_bounds = b_->getInt1(true);
-        for (size_t i = 0; i < index.size(); ++i) {
-          llvm::Value* stridden_index = NSWMul(
-              index[i], index_typed_const(window.dimensions(i).stride()));
-          input_multi_index[i] = NSWSub(
-              NSWAdd(stridden_index,
-                     NSWMul(window_index[i],
-                            index_typed_const(
-                                window.dimensions(i).window_dilation()))),
-              index_typed_const(window.dimensions(i).padding_low()));
-
-          // We need to verify that we are not in the dilated base area.
-          llvm::Value* dilation_condition = ICmpEQ(
-              SRem(input_multi_index[i],
-                   index_typed_const(window.dimensions(i).base_dilation())),
-              index_typed_const(0));
-          in_bounds = And(in_bounds, dilation_condition);
-
-          // Apply base dilation to the index.
-          input_multi_index[i] =
-              SDiv(input_multi_index[i],
-                   index_typed_const(window.dimensions(i).base_dilation()));
-
-          // We must check whether 0 <= input_multi_index[i] < bound, as
-          // otherwise we are in the pad and so can skip the computation. This
-          // comparison is equivalent to the unsigned comparison
-          // input_multi_index[i] < bound, as a negative value wraps to a large
-          // positive value.
-          in_bounds =
-              And(in_bounds,
-                  ICmpULT(input_multi_index[i],
-                          index_typed_const(operand->shape().dimensions(i))));
-        }
-
-        llvm_ir::LlvmIfData if_data =
-            llvm_ir::EmitIfThenElse(in_bounds, "in_bounds", b_);
-        SetToFirstInsertPoint(if_data.true_block, b_);
-
-        // We are not in pad, so do the computation.
-        IrArray::Index input_index(input_multi_index, operand->shape(),
-                                   index_type);
-        TF_ASSIGN_OR_RETURN(llvm::Value * input_value,
-                            operand_to_generator.at(operand)(input_index));
-        TF_ASSIGN_OR_RETURN(
-            llvm::Value * accum_value,
-            compute_nested_(*hlo->to_apply(), {Load(accum_ptr), input_value}));
-        Store(accum_value, accum_ptr);
-
-        SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b_);
-        return Load(accum_ptr);
-      };
-    case HloOpcode::kReduce:
-      // TODO(b/118332391): This should be supported.
-      CHECK_EQ(hlo->operand_count(), 2) << "Did not expect variadic reduce";
-      return [=, &operand_to_generator](
-                 const IrArray::Index& output_index) -> StatusOr<llvm::Value*> {
-        const HloInstruction* operand = hlo->operand(0);
-        llvm::Value* accum_ptr =
-            b()->CreateAlloca(llvm_ir::PrimitiveTypeToIrType(
-                hlo->shape().element_type(), module_));
-        llvm::Type* index_type = output_index.GetType();
-        TF_ASSIGN_OR_RETURN(llvm::Value * init_value,
-                            operand_to_generator.at(hlo->operand(1))(
-                                IrArray::Index(index_type)));
-        b()->CreateStore(init_value, accum_ptr);
-
-        llvm_ir::ForLoopNest loops(IrName(hlo), b_, index_type);
-        std::vector<llvm::Value*> input_multi_index =
-            loops.AddLoopsForShapeOnDimensions(
-                operand->shape(), hlo->dimensions(), "reduction_dim");
-        if (!ShapeUtil::IsScalar(hlo->shape())) {
-          // Here only input_multi_index[hlo->dimensions()] are non-null, so we
-          // must set the rest.
-          size_t j = 0;
-          for (auto& i : input_multi_index) {
-            if (i == nullptr) {
-              i = output_index[j++];
-            }
-          }
-          CHECK_EQ(output_index.size(), j);
-        }
-        llvm_ir::IrArray::Index input_index(
-            input_multi_index, hlo->operand(0)->shape(), index_type);
-
-        SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), b());
-        TF_ASSIGN_OR_RETURN(
-            llvm::Value * input_value,
-            operand_to_generator.at(hlo->operand(0))(input_index));
-        TF_ASSIGN_OR_RETURN(
-            llvm::Value * accum_value,
-            compute_nested_(*hlo->to_apply(),
-                            {b()->CreateLoad(accum_ptr), input_value}));
-        b()->CreateStore(accum_value, accum_ptr);
-        SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), b());
-        return b()->CreateLoad(accum_ptr);
-      };
-    default:
-      return ElementalIrEmitter::MakeElementGenerator(hlo,
-                                                      operand_to_generator);
-  }
-}
-
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
index c8a58a21980..3c4e9f7c1e6 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
@@ -47,10 +47,6 @@ class GpuElementalIrEmitter : public ElementalIrEmitter {
                         llvm::Module* module, llvm::IRBuilder<>* b,
                         NestedComputer compute_nested);
 
-  llvm_ir::ElementGenerator MakeElementGenerator(
-      const HloInstruction* hlo,
-      const HloToElementGeneratorMap& operand_to_generator) override;
-
  protected:
   StatusOr<llvm::Value*> EmitFloatBinaryOp(const HloInstruction* op,
                                            llvm::Value* lhs_value,
@@ -92,6 +88,17 @@ class GpuElementalIrEmitter : public ElementalIrEmitter {
   StatusOr<llvm::Value*> EmitComplexAbs(PrimitiveType prim_type,
                                         llvm::Value* value) override;
 
+  StatusOr<std::vector<llvm::Value*>> EmitThreadLocalCall(
+      const HloComputation& callee, absl::Span<llvm::Value* const> parameters,
+      absl::string_view) override {
+    // TODO(b/118332391): Supported variadic return values.
+    auto result = compute_nested_(callee, parameters);
+    if (!result.ok()) {
+      return result.status();
+    }
+    return std::vector<llvm::Value*>{result.ValueOrDie()};
+  }
+
   llvm::Value* EmitThreadId() override;
 
  private:
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index b6c1e671986..5f6dfd7d3a5 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -409,6 +409,16 @@ Status GpuCompiler::OptimizeHloPostLayoutAssignment(
     pipeline.AddPass<HloPassFix<GpuTreeReductionRewriter>>();
   }
 
+  // GemmRewriter assumes that all transposes are folded into gemms, but,
+  // since commit 7d529df, this is not always true at this point.
+  // Therefore, rerun transpose folding.
+  pipeline.AddPass<TransposeFolding>(
+      [](const HloInstruction& dot,
+         const TransposeFolding::OperandIndices& candidate_operands) {
+        return IsMatrixMultiplication(dot) ? candidate_operands
+                                           : TransposeFolding::OperandIndices{};
+      },
+      TransposeFolding::NeverFoldTranspose);
   // Rewrite GEMMs into custom calls.
   pipeline.AddPass<GemmRewriter>();
 
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index ec5f10bd2e8..a78ffc8dd1a 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -2016,7 +2016,9 @@ void IrEmitterUnnested::EmitTile(
 
   // True iff all threads always execute all instructions in the tiling
   // dimension X.
-  bool x_tile_fits = mapping_scheme.GetDimsInElems()[kDimX] % tile_size_x == 0;
+  bool x_tile_fits =
+      mapping_scheme.GetDimsInElems()[kDimX] % tile_size_x == 0 &&
+      mapping_scheme.GetRowContiguous();
 
   // The outer loop below is simply doing:
   //
@@ -2731,7 +2733,8 @@ void IrEmitterUnnested::EmitHlo021Tile(
                                      /*num_threads_y=*/kNumRows,
                                      /*num_threads_x=*/kWarpSize,
                                      /*indexing_order=*/kLinearIndexingX,
-                                     /*vector_size=*/1);
+                                     /*vector_size=*/1,
+                                     /*is_row_contiguous=*/false);
   LaunchDimensions launch_dimensions(mapping_scheme.GetNumberOfBlocks(),
                                      mapping_scheme.GetThreadsPerBlock());
   llvm::Type* index_type =
diff --git a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
index 5e15d0767a1..d5c4ecbc795 100644
--- a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
+++ b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
@@ -90,13 +90,14 @@ class KernelMappingScheme {
   KernelMappingScheme(absl::Span<const int64> dims_in_elems,
                       absl::Span<const int64> tile_sizes, int64 num_threads_y,
                       int64 num_threads_x, IndexingOrder indexing_order,
-                      int vector_size)
+                      int vector_size, bool is_row_contiguous = false)
       : dims_in_elems_{dims_in_elems[0], dims_in_elems[1], dims_in_elems[2]},
         tile_sizes_{tile_sizes[0], tile_sizes[1], tile_sizes[2]},
         num_threads_x_(num_threads_x),
         num_threads_y_(num_threads_y),
         indexing_order_(indexing_order),
-        vector_size_(vector_size) {
+        vector_size_(vector_size),
+        is_row_contiguous_(is_row_contiguous) {
     CHECK_EQ(tile_sizes[1] % num_threads_y_, 0);
     CHECK_EQ(tile_sizes[2] % num_threads_x_, 0);
     VLOG(10) << "dims_in_elems_ = " << absl::StrJoin(dims_in_elems_, ",");
@@ -134,6 +135,7 @@ class KernelMappingScheme {
 
   IndexingOrder GetIndexingOrder() const { return indexing_order_; }
   int GetVectorSize() const { return vector_size_; }
+  bool GetRowContiguous() const { return is_row_contiguous_; }
 
  private:
   // The number of elements in each dimension.
@@ -159,6 +161,7 @@ class KernelMappingScheme {
   // to trigger vectorized loads on GPUs while keeping memory
   // coalescing.
   const int vector_size_;
+  const bool is_row_contiguous_;
 };
 
 // Information to support the code generation for a tiled reduction kernel.
diff --git a/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc b/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc
index 2d255d76746..aff9e6f162b 100644
--- a/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.h"
 
 #include <chrono>  // NOLINT (required by TF interfaces)
+#include <cstdlib>
 #include <memory>
 #include <string>
 #include <utility>
@@ -85,6 +86,11 @@ namespace {
 
 using tensorflow::BlockingCounter;
 
+bool IsGlobalNcclConfig() {
+  static bool global_nccl_config = std::getenv("NCCL_COMM_ID") != nullptr;
+  return global_nccl_config;
+}
+
 // Functions to translate an ncclResult_t/cudaError_t to a Status object.  Used
 // by the macros below.
 Status TranslateStatus(ncclResult_t s, const char* file, int64 line,
@@ -285,7 +291,6 @@ class NcclClique {
     std::vector<ncclComm_t> raw_comms(local_device_ordinals_.size(), nullptr);
     TF_ASSIGN_OR_RETURN(const absl::optional<std::string>& nccl_id_string,
                         maybe_nccl_unique_id);
-
     ncclUniqueId nccl_id;
     if (nccl_id_string) {
       TF_RETURN_IF_ERROR(StringToNcclUniqueId(*nccl_id_string, &nccl_id));
@@ -416,10 +421,12 @@ RendezvousNcclAllReduce::SubmitParticipantImpl(
         nccl_unique_id = (*participant.nccl_unique_id_callback)(clique_key);
       } else {
         if (participant.rendezvous_key.global_devices.size() !=
-            participant.rendezvous_key.num_local_participants) {
+                participant.rendezvous_key.num_local_participants &&
+            !IsGlobalNcclConfig()) {
           nccl_unique_id = InvalidArgument(
-              "Multihost AllReduce on GPU requires a nccl_unique_id_callback "
-              "to be provided by the client.");
+              "If not local devices are taking part of a collective API on "
+              "GPU, the nccl_unique_id_callback must be provided by the "
+              "client.");
         } else {
           nccl_unique_id = absl::optional<std::string>();
         }
@@ -568,6 +575,13 @@ Status NcclAllReduceThunk::ExecuteOnStream(const ExecuteParams& params) {
       std::vector<int64> global_participating_replicas,
       GetParticipatingReplicas(global_device_id, instr->replica_groups(),
                                replica_count_, *params.device_assn));
+  if (IsGlobalNcclConfig() &&
+      global_participating_replicas.size() != replica_count_) {
+    return InvalidArgument(
+        "Partial replica groups are not allowed when using NCCL_COMM_ID "
+        "environment configuration.");
+  }
+
   std::vector<GlobalDeviceId> global_devices;
   std::vector<std::pair<GlobalDeviceId, int64>> local_devices;
   local_devices.reserve(global_participating_replicas.size());
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 0906c71064e..7ff8d40b440 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -402,10 +402,26 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
                   "using $PATH.",
                   hlo_module_config);
             }
+            CHECK(hlo_module_config.debug_options()
+                      .xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found())
+                << "There was an error when trying to compile ptx into sass "
+                   "code. If you want to try falling back to the GPU driver to "
+                   "jit compile ptx, you can use the flag "
+                   "--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found."
+                   " Use at your own risk though, it has known drawbacks like "
+                   "increased memory consumption.";
           } else {
             LOG(ERROR) << "Error during compilation of ptx to sass: "
-                       << maybe_cubin.status()
-                       << ". Falling back to the GPU driver.";
+                       << maybe_cubin.status();
+            CHECK(hlo_module_config.debug_options()
+                      .xla_gpu_unsafe_fallback_to_driver_on_ptxas_error())
+                << "There was an error when trying to compile ptx into sass "
+                   "code. Up until May 14 2020, XLA silently ignored such "
+                   "errors and fell back to the GPU driver. This is likely to "
+                   "trigger subtle runtime issues and is hence discouraged. "
+                   "If you want to temporarily restore this behavior use the "
+                   "flag --xla_gpu_unsafe_fallback_to_driver_on_ptxas_error "
+                   "and file a bug in b/components/366096.";
           }
 
           // We're going to use the driver to JIT our PTX->SASS, so warn if
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index e04dba418d9..7a9845d0f49 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -235,6 +235,20 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "gpu_copy_alone_test",
+    srcs = [
+        "gpu_copy_alone_test.cc",
+    ],
+    tags = tf_cuda_tests_tags() + ["no_rocm"],
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 tf_cc_test(
     name = "gpu_ftz_test",
     srcs = ["gpu_ftz_test.cc"],
diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_copy_alone_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_copy_alone_test.cc
new file mode 100644
index 00000000000..1c475ab4e10
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_copy_alone_test.cc
@@ -0,0 +1,61 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+
+namespace xla {
+namespace gpu {
+
+namespace {
+
+// WARNING: This tests must be alone in its file!  Otherwise, the
+// error isn't caught. We expect and CUDA_ERROR_ILLEGAL_ADDRESS to be
+// thrown with the old buggy code.
+class CopyAloneNoOptTest : public GpuCodegenTest {
+  DebugOptions GetDebugOptionsForTest() override {
+    DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
+    // The test MultiOutputStore contain a MOF fusion and XLA optimizer pass
+    // doesn't like this.
+    debug_options.set_xla_disable_all_hlo_passes(true);
+    return debug_options;
+  }
+};
+
+TEST_F(CopyAloneNoOptTest, CopyTranspose) {
+  const char* hlo_text = R"(
+HloModule mod
+ENTRY main {
+  %param = f32[8,32,32,32,16]{4,3,2,1,0} parameter(0)
+  ROOT %copy = f32[8,32,32,32,16]{3,2,1,4,0} copy(f32[8,32,32,32,16]{4,3,2,1,0} %param)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<VerifiedHloModule> optimized_module,
+                          ParseAndReturnVerifiedModule(hlo_text));
+
+  EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
+
+  CompileAndOptionallyVerifyPtx(std::move(optimized_module),
+                                R"(
+CHECK-NOT: ld.global.nc.v2
+)");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 94a4df43cf4..32a9038b15a 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -707,6 +707,10 @@ Status HloCostAnalysis::HandleCholesky(const HloInstruction* hlo) {
   return Status::OK();
 }
 
+Status HloCostAnalysis::HandleAllGather(const HloInstruction* hlo) {
+  return Status::OK();
+}
+
 Status HloCostAnalysis::HandleAllReduce(const HloInstruction* crs) {
   // We assume 2 replicas, so that each output element is the sum of two input
   // elements.
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index 915c4dcbe84..9fdb42185fb 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -76,6 +76,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   Status HandleFft(const HloInstruction* fft) override;
   Status HandleTriangularSolve(const HloInstruction* hlo) override;
   Status HandleCholesky(const HloInstruction* hlo) override;
+  Status HandleAllGather(const HloInstruction* hlo) override;
   Status HandleAllReduce(const HloInstruction* crs) override;
   Status HandleAllToAll(const HloInstruction* hlo) override;
   Status HandleCollectivePermute(const HloInstruction* hlo) override;
diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc
index b8e3f83b515..900b557b4dc 100644
--- a/tensorflow/compiler/xla/service/hlo_dce.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce.cc
@@ -47,16 +47,14 @@ StatusOr<bool> HloDCE::RunOnComputation(
   // computation's instruction while simultaneously removing instructions.
   std::vector<HloInstruction*> dead_roots;
   for (auto* instruction : computation->instructions()) {
+    auto maybe_collective_op = DynCast<HloAllReduceInstruction>(instruction);
     if (instruction != computation->root_instruction() &&
         instruction->user_count() == 0 &&
         computation->IsSafelyRemovable(instruction) &&
         (!instruction->HasSideEffect() ||
          (remove_cross_partition_collective_ops &&
-          ((instruction->opcode() == HloOpcode::kAllReduce &&
-            !Cast<HloAllReduceInstruction>(instruction)->constrain_layout()) ||
-           (instruction->opcode() == HloOpcode::kAllToAll &&
-            !Cast<HloAllToAllInstruction>(instruction)->constrain_layout()) ||
-           instruction->opcode() == HloOpcode::kCollectivePermute)))) {
+          (maybe_collective_op != nullptr &&
+           !maybe_collective_op->constrain_layout())))) {
       dead_roots.push_back(instruction);
     }
   }
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index e105ea8ce18..3dc9cc24734 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -700,6 +700,38 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  template <
+      typename NativeT,
+      typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
+  Status HandleCbrt(HloInstruction* cbrt) {
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[cbrt],
+        ElementWiseUnaryOp(cbrt, [](ElementwiseT elem_operand) -> ElementwiseT {
+          return std::pow(elem_operand, static_cast<ElementwiseT>(1.0 / 3.0));
+          return elem_operand.real() < 0
+                     ? -std::pow(-elem_operand,
+                                 static_cast<ElementwiseT>(1.0 / 3.0))
+                     : std::pow(elem_operand,
+                                static_cast<ElementwiseT>(1.0 / 3.0));
+        }));
+    return Status::OK();
+  }
+
+  template <
+      typename NativeT,
+      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  Status HandleCbrt(HloInstruction* cbrt) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[cbrt],
+                        ElementWiseUnaryOp(cbrt, [](ElementwiseT elem_operand) {
+                          return std::cbrt(elem_operand);
+                        }));
+    return Status::OK();
+  }
+
+  Status HandleCbrt(HloInstruction* cbrt) override {
+    return HandleCbrt<ElementwiseT>(cbrt);
+  }
+
   Status HandleRsqrt(HloInstruction* rsqrt) override {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[rsqrt],
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 78e4d39d3fe..cd2a61d7eff 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -980,6 +980,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
     case HloOpcode::kSlice:
     case HloOpcode::kSort:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kSubtract:
     case HloOpcode::kTanh:
       // De-emphasize scalar-shaped elementwise ops -- they're generally
@@ -1056,6 +1057,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
     case HloOpcode::kGetDimensionSize:
     case HloOpcode::kSetDimensionSize:
       return kGray;
+    case HloOpcode::kAllGather:
     case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5fc42eb5e3c..9e9c8b0913b 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -388,6 +388,24 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
                                   proto.outfeed_config());
       break;
     }
+    case HloOpcode::kAllGather: {
+      absl::optional<int64> channel_id;
+      if (proto.channel_id() > 0) {
+        channel_id = proto.channel_id();
+      }
+
+      TF_RET_CHECK(proto.dimensions_size() == 1)
+          << "AllGather cannot have more than 1 all-gather dimensions";
+      TF_RET_CHECK(all_operands().size() == 1)
+          << "AllGather must have a single operand";
+      int64 all_gather_dimension = proto.dimensions(0);
+      instruction = CreateAllGather(
+          shape, operands(0), all_gather_dimension,
+          std::vector<ReplicaGroup>(proto.replica_groups().begin(),
+                                    proto.replica_groups().end()),
+          proto.constrain_layout(), channel_id, proto.use_global_device_ids());
+      break;
+    }
     case HloOpcode::kAllReduce: {
       TF_RET_CHECK(proto.called_computation_ids_size() == 1)
           << "AllReduce should have 1 called computation but sees "
@@ -807,6 +825,7 @@ HloInstruction::CreateRngBitGenerator(const Shape& shape, HloInstruction* state,
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh:
       break;
     default:
@@ -928,6 +947,15 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
       shape, operand, exponent_bits, mantissa_bits);
 }
 
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAllGather(
+    const Shape& shape, HloInstruction* operand, int64 all_gather_dimension,
+    const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
+    const absl::optional<int64>& channel_id, bool use_global_device_ids) {
+  return absl::make_unique<HloAllGatherInstruction>(
+      shape, operand, all_gather_dimension, replica_groups, constrain_layout,
+      channel_id, use_global_device_ids);
+}
+
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAllReduce(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
@@ -1517,6 +1545,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
     case HloOpcode::kParameter:
     case HloOpcode::kGetTupleElement:
     case HloOpcode::kReducePrecision:
+    case HloOpcode::kAllGather:
     case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
@@ -1565,6 +1594,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh:
       CHECK_EQ(new_operands.size(), 1);
       clone = CreateUnary(shape, opcode_, new_operands[0]);
@@ -1937,6 +1967,7 @@ bool HloInstruction::IdenticalSlowPath(
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kSubtract:
     case HloOpcode::kTanh:
     case HloOpcode::kTuple:
@@ -1994,6 +2025,7 @@ bool HloInstruction::IdenticalSlowPath(
     case HloOpcode::kReducePrecision:
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
+    case HloOpcode::kAllGather:
     case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
@@ -2381,6 +2413,7 @@ bool HloInstruction::IsElementwiseImpl(
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh:
       CHECK_EQ(1, operand_count());
       return true;
@@ -2847,6 +2880,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleConvolution(this);
     case HloOpcode::kFft:
       return visitor->HandleFft(this);
+    case HloOpcode::kAllGather:
+      return visitor->HandleAllGather(this);
     case HloOpcode::kAllReduce:
       return visitor->HandleAllReduce(this);
     case HloOpcode::kAllToAll:
@@ -2893,6 +2928,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleSin(this);
     case HloOpcode::kSqrt:
       return visitor->HandleSqrt(this);
+    case HloOpcode::kCbrt:
+      return visitor->HandleCbrt(this);
     case HloOpcode::kRsqrt:
       return visitor->HandleRsqrt(this);
     case HloOpcode::kReal:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 3547de0f5e3..8be7a034877 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -618,6 +618,16 @@ class HloInstruction {
       const Shape& shape, HloInstruction* operand, const int exponent_bits,
       const int mantissa_bits);
 
+  // Creates an all-gather op, which concats the operands of all participants
+  // along all_gather_dimension. The replica_groups, channel_id, and
+  // use_global_device_ids arguments are identical to those in all-reduce,
+  // except that the order of the group members determines the concatenation
+  // order of inputs from different participants.
+  static std::unique_ptr<HloInstruction> CreateAllGather(
+      const Shape& shape, HloInstruction* operand, int64 all_gather_dimension,
+      const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
+      const absl::optional<int64>& channel_id, bool use_global_device_ids);
+
   // Creates a cross replica reduction op.
   //
   // `reduction_computation`: the reduction function.
@@ -1605,6 +1615,9 @@ class HloInstruction {
   virtual int64 dimensions(int64 index) const {
     LOG(FATAL) << "Unimplemented method.";
   }
+  virtual std::vector<int64>* mutable_dimensions() {
+    LOG(FATAL) << "Unimplemented method.";
+  }
 
   // Delegates to HloConcatenateInstruction::concatenate_dimension.
   int64 concatenate_dimension() const;
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index eb821d40e78..d5bdd674563 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -556,6 +556,51 @@ bool HloCollectiveInstruction::IdenticalSlowPath(
                        });
 }
 
+HloAllGatherInstruction::HloAllGatherInstruction(
+    const Shape& shape, HloInstruction* operand, int64 all_gather_dimension,
+    const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
+    const absl::optional<int64>& channel_id, bool use_global_device_ids)
+    : HloCollectiveInstruction(HloOpcode::kAllGather, shape, {operand},
+                               replica_groups, constrain_layout, channel_id),
+      all_gather_dimension_(all_gather_dimension),
+      use_global_device_ids_(use_global_device_ids) {}
+
+std::vector<string> HloAllGatherInstruction::ExtraAttributesToStringImpl(
+    const HloPrintOptions& options) const {
+  std::vector<string> result =
+      HloCollectiveInstruction::ExtraAttributesToStringImpl(options);
+  result.push_back(StrCat("dimensions={", all_gather_dimension_, "}"));
+  if (use_global_device_ids_) {
+    result.push_back("use_global_device_ids=true");
+  }
+  return result;
+}
+
+std::unique_ptr<HloInstruction>
+HloAllGatherInstruction::CloneWithNewOperandsImpl(
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
+    HloCloneContext* /*context*/) const {
+  return absl::make_unique<HloAllGatherInstruction>(
+      shape, new_operands[0], all_gather_dimension(), replica_groups(),
+      constrain_layout(), channel_id(), use_global_device_ids());
+}
+
+HloInstructionProto HloAllGatherInstruction::ToProto() const {
+  HloInstructionProto proto = HloCollectiveInstruction::ToProto();
+  proto.add_dimensions(all_gather_dimension_);
+  return proto;
+}
+
+bool HloAllGatherInstruction::IdenticalSlowPath(
+    const HloInstruction& other,
+    const std::function<bool(const HloComputation*, const HloComputation*)>&
+        eq_computations) const {
+  const auto& casted_other = static_cast<const HloAllGatherInstruction&>(other);
+  return HloCollectiveInstruction::IdenticalSlowPath(other, eq_computations) &&
+         all_gather_dimension_ == casted_other.all_gather_dimension() &&
+         use_global_device_ids() == casted_other.use_global_device_ids();
+}
+
 HloAllReduceInstruction::HloAllReduceInstruction(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 3b1916e9486..ae78d365cfa 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -348,6 +348,38 @@ class HloCollectiveInstruction : public HloChannelInstruction {
   bool constrain_layout_;
 };
 
+class HloAllGatherInstruction : public HloCollectiveInstruction {
+ public:
+  explicit HloAllGatherInstruction(
+      const Shape& shape, HloInstruction* operand, int64 all_gather_dimension,
+      const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
+      const absl::optional<int64>& channel_id, bool use_global_device_ids);
+  // Same as HloAllReduceInstruction::use_global_device_ids.
+  bool use_global_device_ids() const { return use_global_device_ids_; }
+
+  // The dimension on which data from different participants are concatenated.
+  int64 all_gather_dimension() const { return all_gather_dimension_; }
+
+ protected:
+  std::vector<string> ExtraAttributesToStringImpl(
+      const HloPrintOptions& options) const override;
+  HloInstructionProto ToProto() const override;
+
+ private:
+  bool IdenticalSlowPath(
+      const HloInstruction& other,
+      const std::function<bool(const HloComputation*, const HloComputation*)>&
+          eq_computations) const override;
+
+  // Implementation for non-common logic of CloneWithNewOperands.
+  std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
+      HloCloneContext* context) const override;
+
+  int64 all_gather_dimension_;
+  bool use_global_device_ids_;
+};
+
 class HloAllReduceInstruction : public HloCollectiveInstruction {
  public:
   explicit HloAllReduceInstruction(
@@ -465,6 +497,7 @@ class HloReverseInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() override { return &dimensions_; }
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
@@ -491,6 +524,7 @@ class HloConcatenateInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() override { return &dimensions_; }
   // Accessor for the dimension in which a concatenate HLO should occur.
   int64 concatenate_dimension() const { return dimensions(0); }
   // Returns a serialized representation of this instruction.
@@ -520,6 +554,7 @@ class HloReduceInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() override { return &dimensions_; }
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
@@ -560,6 +595,7 @@ class HloSortInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() override { return &dimensions_; }
   // Returns the sort dimension for this instruction
   int64 sort_dimension() const { return dimensions(0); }
   // Returns a serialized representation of this instruction.
@@ -594,6 +630,7 @@ class HloTransposeInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() override { return &dimensions_; }
   // Returns whether this instruction does a rank-2 transposition.
   bool IsRank2Transpose() const;
   // Returns a serialized representation of this instruction.
@@ -621,6 +658,7 @@ class HloBroadcastInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() override { return &dimensions_; }
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
@@ -668,6 +706,7 @@ class HloMapInstruction : public HloInstruction {
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
+  std::vector<int64>* mutable_dimensions() { return &dimensions_; }
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index de65ed99303..9722d5c2b76 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -420,6 +420,8 @@ StatusOr<HloModuleConfig> HloModule::CreateModuleConfigFromShape(
     if (execution_options->num_partitions() > 0) {
       module_config.set_num_partitions(execution_options->num_partitions());
     }
+    module_config.set_use_spmd_partitioning(
+        execution_options->use_spmd_partitioning());
     if (execution_options->has_device_assignment()) {
       TF_ASSIGN_OR_RETURN(std::unique_ptr<DeviceAssignment> device_assignment,
                           DeviceAssignment::Deserialize(
diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h
index b31a9ae6ca5..833d0fe59d0 100644
--- a/tensorflow/compiler/xla/service/hlo_module_config.h
+++ b/tensorflow/compiler/xla/service/hlo_module_config.h
@@ -128,6 +128,11 @@ class HloModuleConfig {
   }
   int64 num_partitions() const { return num_partitions_; }
 
+  void set_use_spmd_partitioning(bool use_spmd_partitioning) {
+    use_spmd_partitioning_ = use_spmd_partitioning;
+  }
+  bool use_spmd_partitioning() const { return use_spmd_partitioning_; }
+
   // Return a string which unambiguously represents all the fields of this data
   // structure. Used for generating a cache key for storing the compiled
   // executable.
@@ -199,6 +204,14 @@ class HloModuleConfig {
 
   std::vector<std::vector<int64>>* mutable_dot_config() { return &dot_config_; }
 
+  absl::Span<const std::vector<std::vector<int64>>> layout_config() const {
+    return layout_config_;
+  }
+
+  std::vector<std::vector<std::vector<int64>>>* mutable_layout_config() {
+    return &layout_config_;
+  }
+
  private:
   // If you add new members, be sure to update compilation_cache_key.
 
@@ -216,6 +229,10 @@ class HloModuleConfig {
   // The number of partitions (model parallelism) to compile this binary for.
   int64 num_partitions_ = 1;
 
+  // Whether to use SPMD (true) or MPMD (false) when num_partitions_ > 0 and XLA
+  // needs to partition the module.
+  bool use_spmd_partitioning_ = false;
+
   // The target maximum parallelism at which to partition HLOs for parallel
   // execution on the CPU backend.
   int64 intra_op_parallelism_threads_ = -1;
@@ -232,6 +249,9 @@ class HloModuleConfig {
   FusionConfigCollection fusion_config_collection_ =
       FusionConfigCollection::kOff;
 
+  // TODO(b/155665133): Consolidate fusion, dot, and layout config into a proto
+  // similar to backend config.
+
   // Custom fusion configuration, where fusion_config_[c][v] control if node v
   // in computation c must be fused to all its consumers (true) or not (false).
   std::vector<std::vector<bool>> fusion_config_;
@@ -240,6 +260,10 @@ class HloModuleConfig {
   // how to convert dot operation v (sorted topologically and by computation) to
   // convolution.
   std::vector<std::vector<int64>> dot_config_;
+
+  // Layout configuration, where layout_config_[v][i] controls the layout
+  // decision i of operation v.
+  std::vector<std::vector<std::vector<int64>>> layout_config_;
 };
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h
index dfe68d93f30..664fa10a990 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.h
+++ b/tensorflow/compiler/xla/service/hlo_opcode.h
@@ -48,6 +48,7 @@ namespace xla {
   V(kAdd, "add", 2)                                                    \
   V(kAddDependency, "add-dependency", 2)                               \
   V(kAfterAll, "after-all", kHloOpcodeIsVariadic)                      \
+  V(kAllGather, "all-gather", 1)                                       \
   V(kAllReduce, "all-reduce", kHloOpcodeIsVariadic)                    \
   V(kAllToAll, "all-to-all", kHloOpcodeIsVariadic)                     \
   V(kAtan2, "atan2", 2)                                                \
@@ -138,6 +139,7 @@ namespace xla {
   V(kSlice, "slice", 1)                                                \
   V(kSort, "sort", kHloOpcodeIsVariadic)                               \
   V(kSqrt, "sqrt", 1)                                                  \
+  V(kCbrt, "cbrt", 1)                                                  \
   V(kSubtract, "subtract", 2)                                          \
   V(kTanh, "tanh", 1)                                                  \
   V(kTrace, "trace", 1)                                                \
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 4162c5d62d5..2a90c95850c 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -784,6 +784,7 @@ bool HloParserImpl::ParseInstructionRhs(HloComputation::Builder* builder,
     case HloOpcode::kSign:
     case HloOpcode::kSin:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh: {
       if (!ParseOperands(&operands, /*expected_size=*/1) ||
           !ParseAttributes(attrs)) {
@@ -849,6 +850,35 @@ bool HloParserImpl::ParseInstructionRhs(HloComputation::Builder* builder,
           HloInstruction::CreateBitcastConvert(shape, operands[0]));
       break;
     }
+    case HloOpcode::kAllGather: {
+      optional<std::vector<std::vector<int64>>> tmp_groups;
+      optional<std::vector<int64>> replica_group_ids;
+      optional<int64> channel_id;
+      optional<std::vector<int64>> dimensions;
+      optional<bool> constrain_layout;
+      optional<bool> use_global_device_ids;
+      attrs["replica_groups"] = {/*required=*/false,
+                                 AttrTy::kBracedInt64ListList, &tmp_groups};
+      attrs["channel_id"] = {/*required=*/false, AttrTy::kInt64, &channel_id};
+      attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List,
+                             &dimensions};
+      attrs["constrain_layout"] = {/*required=*/false, AttrTy::kBool,
+                                   &constrain_layout};
+      attrs["use_global_device_ids"] = {/*required=*/false, AttrTy::kBool,
+                                        &use_global_device_ids};
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
+        return false;
+      }
+      std::vector<ReplicaGroup> replica_groups;
+      if (tmp_groups) {
+        replica_groups = CreateReplicaGroups(*tmp_groups);
+      }
+      instruction = builder->AddInstruction(HloInstruction::CreateAllGather(
+          shape, operands[0], dimensions->at(0), replica_groups,
+          constrain_layout ? *constrain_layout : false, channel_id,
+          use_global_device_ids ? *use_global_device_ids : false));
+      break;
+    }
     case HloOpcode::kAllReduce: {
       optional<std::vector<std::vector<int64>>> tmp_groups;
       optional<HloComputation*> to_apply;
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 7e66b4e648d..e18014a3071 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1480,6 +1480,43 @@ ENTRY CRS {
 
 )"
 },
+// all-gather
+{
+"AllGather",
+R"(HloModule AllGather
+
+ENTRY AllGather {
+  input = f32[128,32]{0,1} parameter(0)
+  ROOT ag = f32[128,128]{0,1} all-gather(input), replica_groups={}, dimensions={1}
+}
+
+)"
+},
+// all-gather with constrained layout
+{
+"AllGatherWithLayout",
+R"(HloModule AllGather
+
+ENTRY AllGather {
+  input = f32[128,32]{0,1} parameter(0)
+  ROOT ag = f32[128,128]{0,1} all-gather(input), replica_groups={}, constrain_layout=true, dimensions={1}
+}
+
+)"
+},
+// all-gather with subgroups
+{
+"AllGatherWithSubgroups",
+R"(HloModule AllGatherWithSubgroups
+
+ENTRY AllGatherWithSubgroups {
+  input = f32[128,32]{0,1} parameter(0)
+  ROOT ag = f32[128,64]{0,1} all-gather(input), replica_groups={{0,1},{2,3}}, dimensions={1}
+}
+
+)",
+/*replica_count=*/4,
+},
 // all-to-all
 {
 "AllToAll",
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.cc b/tensorflow/compiler/xla/service/hlo_sharding_util.cc
new file mode 100644
index 00000000000..129091ca06f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_sharding_util.cc
@@ -0,0 +1,574 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_sharding_util.h"
+
+#include <map>
+
+#include "absl/algorithm/container.h"
+#include "tensorflow/compiler/xla/array.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+namespace hlo_sharding_util {
+
+absl::optional<int64> SelectDominantDevice(
+    const std::map<int64, int64>& device_map, int64* top_count) {
+  int64 device = 0;
+  int64 count = 0;
+  for (auto& it : device_map) {
+    if (it.second > count) {
+      count = it.second;
+      device = it.first;
+    }
+  }
+  if (top_count != nullptr) {
+    *top_count = count;
+  }
+  return count > 0 ? absl::optional<int64>(device) : absl::optional<int64>();
+}
+
+Status AssignComputationDevice(HloComputation* computation, int64 device) {
+  VLOG(4) << "Assigning device " << device << " to " << computation->name()
+          << " computation";
+  for (HloInstruction* instruction : computation->instructions()) {
+    if (!instruction->has_sharding()) {
+      VLOG(4) << "Assigning device " << device << " to " << instruction->name();
+      instruction->set_device_sharding(device);
+    }
+  }
+  return Status::OK();
+}
+
+absl::optional<int64> GetMostOccurringDevice(
+    absl::Span<HloInstruction* const> instructions) {
+  std::map<int64, int64> device_map;
+  for (HloInstruction* instruction : instructions) {
+    if (instruction->has_sharding()) {
+      for (auto& it : instruction->sharding().UsedDevices(nullptr)) {
+        // The UsedDevices() API returns a map<device, occurrence_count>.
+        device_map[it.first] += it.second;
+      }
+    }
+  }
+  return SelectDominantDevice(device_map, nullptr);
+}
+
+StatusOr<absl::optional<int64>> GetDominantDevice(
+    absl::Span<HloComputation* const> computations, double dominant_factor) {
+  int64 instruction_count = 0;
+  std::map<int64, int64> device_map;
+  for (HloComputation* computation : computations) {
+    for (HloInstruction* instruction : computation->instructions()) {
+      int64 count = 1;
+      if (instruction->has_sharding()) {
+        for (auto& it : instruction->sharding().UsedDevices(&count)) {
+          // The UsedDevices() API returns a map<device, occurrence_count>.
+          device_map[it.first] += it.second;
+        }
+      }
+      instruction_count += count;
+    }
+  }
+  int64 count;
+  absl::optional<int64> device = SelectDominantDevice(device_map, &count);
+  absl::optional<int64> dominant_device;
+  if (device) {
+    double factor =
+        static_cast<double>(count) / static_cast<double>(instruction_count);
+    if (factor >= dominant_factor) {
+      dominant_device = device;
+    }
+  }
+  return dominant_device;
+}
+
+HloSharding TransposeSharding(const HloSharding& sharding,
+                              const std::vector<int64>& dimensions) {
+  if (sharding.IsTileMaximal()) {
+    return sharding;
+  }
+  const int64 rank = dimensions.size();
+  std::vector<int64> tile_assignment_dim(rank);
+  for (int64 i = 0; i < rank; ++i) {
+    tile_assignment_dim[i] = sharding.tile_assignment().dim(dimensions[i]);
+  }
+  Array<int64> tile_assignment = sharding.tile_assignment();
+  tile_assignment.Reshape(tile_assignment_dim);
+  tile_assignment.Each([&](absl::Span<const int64> indices, int64* value) {
+    std::vector<int64> src_indices(indices.size(), -1);
+    for (int64 i = 0; i < indices.size(); ++i) {
+      src_indices[dimensions[i]] = indices[i];
+    }
+    *value = sharding.tile_assignment()(src_indices);
+  });
+  return HloSharding::Tile(tile_assignment);
+}
+
+absl::optional<HloSharding> ReshapeSharding(const Shape& source_shape,
+                                            const Shape& target_shape,
+                                            const HloSharding& sharding) {
+  if (sharding.IsTileMaximal()) {
+    return sharding;
+  }
+
+  // In case of a tiled sharding the reshaped sharding will be a valid if the
+  // reshape is composed from the following operations:
+  // * Adding or removing dimensions with size 1.
+  // * Merging consecutive dimensions where only the most major is sharded.
+  // * Splitting a dimension to consecutive dimensions.
+  // * Any reshaping of unsharded dimensions.
+  // Note that merge and split can happen consecutively on the same dimension,
+  // e.g., f32[1024,256,1024] to f32[128,2048,1024] can be considered that 1024
+  // gets split into 128 and 8, but 8 then gets merged with 256. We use stacks
+  // to make supporting such cases easy.
+  const Shape tile_shape = sharding.TileShape(source_shape);
+  std::vector<int64> target_tile_assignment_dimensions;
+  std::vector<int64> source_dims_stack(source_shape.rank());
+  std::vector<int64> target_dims_stack(target_shape.rank());
+  std::vector<int64> sharding_tile_dims_stack(source_shape.rank());
+  for (int64 i = 0; i < source_shape.rank(); ++i) {
+    source_dims_stack[i] = source_shape.dimensions(source_shape.rank() - 1 - i);
+    sharding_tile_dims_stack[i] =
+        sharding.tile_assignment().dim(source_shape.rank() - 1 - i);
+  }
+  for (int64 i = 0; i < target_shape.rank(); ++i) {
+    target_dims_stack[i] = target_shape.dimensions(target_shape.rank() - 1 - i);
+  }
+  while (!source_dims_stack.empty() || !target_dims_stack.empty()) {
+    if (target_dims_stack.empty()) {
+      if (Product(sharding_tile_dims_stack) != 1) {
+        return absl::nullopt;
+      }
+      break;
+    }
+    int64 s_size = 1;
+    int64 t_size = 1;
+    int64 s_partitions = 1;
+    if (!source_dims_stack.empty()) {
+      s_size = source_dims_stack.back();
+      source_dims_stack.pop_back();
+      s_partitions = sharding_tile_dims_stack.back();
+      sharding_tile_dims_stack.pop_back();
+    }
+    t_size = target_dims_stack.back();
+    target_dims_stack.pop_back();
+    if (s_partitions * Product(sharding_tile_dims_stack) == 1) {
+      // No more partitions left.
+      target_tile_assignment_dimensions.push_back(1);
+      continue;
+    }
+    if (s_size == t_size) {
+      // Same dimension.
+      target_tile_assignment_dimensions.push_back(s_partitions);
+    } else if (t_size == 1) {
+      // Trivial dimension added.
+      target_tile_assignment_dimensions.push_back(1);
+      source_dims_stack.push_back(s_size);
+      sharding_tile_dims_stack.push_back(s_partitions);
+    } else if (s_size == 1) {
+      // Trivial dimension removed.
+      if (s_partitions != 1) {
+        return absl::nullopt;
+      }
+      target_dims_stack.push_back(t_size);
+    } else if (s_size > t_size) {
+      // Dimension split.
+      if (s_size % t_size != 0 || t_size % s_partitions != 0) {
+        return absl::nullopt;
+      }
+      target_tile_assignment_dimensions.push_back(s_partitions);
+      // We have part of the s_size unprocessed, so put it back to stack.
+      source_dims_stack.push_back(s_size / t_size);
+      sharding_tile_dims_stack.push_back(1);
+    } else {
+      // Dimension merge. Also merge the source dimension with the next, and
+      // process it next time.
+      if (s_size % s_partitions != 0) {
+        return absl::nullopt;
+      }
+      CHECK(!source_dims_stack.empty());
+      if (sharding_tile_dims_stack.back() != 1 && s_size != s_partitions) {
+        // If the next dimension to combine is sharded, we require that the
+        // current dimension's shard size to be 1. Otherwise, the new shard
+        // would be non-contiguous.
+        return absl::nullopt;
+      }
+      source_dims_stack.back() *= s_size;
+      sharding_tile_dims_stack.back() *= s_partitions;
+      target_dims_stack.push_back(t_size);
+    }
+  }
+  Array<int64> new_tile_assignment = sharding.tile_assignment();
+  new_tile_assignment.Reshape(target_tile_assignment_dimensions);
+  return HloSharding::Tile(new_tile_assignment);
+}
+
+HloSharding ReshapeToTileDimension(const HloSharding& sharding, int64 dim,
+                                   absl::Span<const int64> dims) {
+  CHECK(!sharding.IsTuple() && !sharding.IsTileMaximal());
+  CHECK_NE(absl::c_find(dims, dim), dims.end()) << "dim is not in dims";
+  // We optimize the tile assignment on the single dimension dim in a way to
+  // minimize communication among devices caused by the reshard:
+  // +---+---+               +---+---+              +-+-+-+-+
+  // |   |   |               |   0   |              | | | | |
+  // | 0 | 1 |               +-------+              | | | | |
+  // |   |   |  reshape on   |   1   |  reshape on  | | | | |
+  // +---+---+   dim 0  =>   +-------+   dim 1  =>  |0|2|1|3|
+  // |   |   |               |   2   |              | | | | |
+  // | 2 | 3 |               +-------+              | | | | |
+  // |   |   |               |   3   |              | | | | |
+  // +---+---+               +---+---+              +-+-+-+-+
+
+  std::vector<int64> tile_dims(sharding.tile_assignment().num_dimensions(), 1);
+  // Handle ignore dimensions.
+  std::vector<int64> ignore_sizes;
+  int64 ignore_size = 1;
+  for (int64 i = 0; i < sharding.tile_assignment().num_dimensions(); ++i) {
+    if (absl::c_find(dims, i) == dims.end()) {
+      int64 size = sharding.tile_assignment().dim(i);
+      ignore_sizes.push_back(size);
+      tile_dims[i] = size;
+      ignore_size *= size;
+    }
+  }
+
+  using Buckets = std::vector<std::vector<int64>>;
+  Array<Buckets> buckets(ignore_sizes,
+                         Buckets(sharding.tile_assignment().dim(dim)));
+  sharding.tile_assignment().Each(
+      [&](absl::Span<const int64> index, int64 device) {
+        std::vector<int64> ignore_index;
+        for (int64 i = 0; i < index.size(); ++i) {
+          if (absl::c_find(dims, i) == dims.end()) {
+            ignore_index.push_back(index[i]);
+          }
+        }
+        buckets(ignore_index)[index[dim]].push_back(device);
+      });
+  std::vector<int64> devices;
+  buckets.Each([&](absl::Span<const int64> index, const Buckets& buckets) {
+    for (auto& bucket : buckets) {
+      devices.insert(devices.end(), bucket.begin(), bucket.end());
+    }
+  });
+  tile_dims[dim] = devices.size() / ignore_size;
+  Array<int64> tile_assignment(tile_dims);
+  tile_assignment.SetValues(devices);
+  return HloSharding::Tile(tile_assignment);
+}
+
+bool ContainsTileSharding(const HloModule& module) {
+  for (const HloComputation* computation : module.computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      if (instruction->has_sharding() &&
+          !instruction->sharding().IsTileMaximal()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+HloSharding GatherOutputSharding(const HloSharding& index_sharding,
+                                 const HloInstruction* hlo) {
+  if (index_sharding.IsTileMaximal()) {
+    return index_sharding;
+  }
+
+  const GatherDimensionNumbers& dnums = hlo->gather_dimension_numbers();
+  std::vector<int64> output_tile_assignment_dims;
+  for (int64 i = 0, index_dim = 0; i < hlo->shape().rank(); ++i) {
+    if (absl::c_binary_search(dnums.offset_dims(), i)) {
+      output_tile_assignment_dims.push_back(1);
+    } else {
+      output_tile_assignment_dims.push_back(
+          index_sharding.tile_assignment().dim(index_dim));
+      index_dim++;
+    }
+  }
+  Array<int64> new_tile_assignment = index_sharding.tile_assignment();
+  new_tile_assignment.Reshape(output_tile_assignment_dims);
+  return HloSharding::Tile(new_tile_assignment);
+}
+
+HloSharding GatherIndexSharding(const HloSharding& output_sharding,
+                                const HloInstruction* hlo) {
+  if (output_sharding.IsTileMaximal()) {
+    return output_sharding;
+  }
+
+  const GatherDimensionNumbers& dnums = hlo->gather_dimension_numbers();
+  std::vector<int64> index_tile_assignment_dims;
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    if (!absl::c_binary_search(dnums.offset_dims(), i)) {
+      index_tile_assignment_dims.push_back(
+          output_sharding.tile_assignment().dim(i));
+    }
+  }
+  Array<int64> new_tile_assignment = output_sharding.tile_assignment();
+  new_tile_assignment.Reshape(index_tile_assignment_dims);
+  return HloSharding::Tile(new_tile_assignment);
+}
+
+HloSharding GatherEffectiveOutputSharding(const HloInstruction& hlo) {
+  if (hlo.sharding().IsTileMaximal()) {
+    return hlo.sharding();
+  }
+
+  const GatherDimensionNumbers& dnums = hlo.gather_dimension_numbers();
+  std::vector<int64> tile_assignment_dims(hlo.shape().rank());
+  int64 num_elements = 1;
+  for (int64 i = 0; i < hlo.shape().rank(); ++i) {
+    if (!absl::c_binary_search(dnums.offset_dims(), i)) {
+      tile_assignment_dims[i] = hlo.sharding().tile_assignment().dim(i);
+      num_elements *= hlo.sharding().tile_assignment().dim(i);
+    } else {
+      tile_assignment_dims[i] = 1;
+    }
+  }
+  if (num_elements == hlo.sharding().tile_assignment().num_elements()) {
+    // Output sharding is only on non offset dimensions. We use output sharding
+    // to shard this gather op directly.
+    return hlo.sharding();
+  }
+
+  if (num_elements == 1) {
+    // Output sharding is only on offset dimensions. We do not shard this gather
+    // op. Return a tile maximal sharding with the first device in output
+    // sharding tile assignment.
+    return HloSharding::AssignDevice(*hlo.sharding().tile_assignment().begin());
+  }
+
+  // Output sharding is on both offset and non offset dimensions. We shard the
+  // gather op only on non offset dimensions.
+  // For example:
+  // - the gather op has sharding [2,2]{0,1,2,3},
+  // - first dimension is non offset dimension,
+  // - second dimension is offset dimension,
+  // Then the result sharding will be [2,1]{0,2}.
+  std::vector<int64> slice_starts(hlo.shape().rank(), 0LL),
+      slice_limits(hlo.shape().rank());
+  for (int64 i = 0; i < hlo.shape().rank(); ++i) {
+    if (!absl::c_binary_search(dnums.offset_dims(), i)) {
+      slice_limits[i] = hlo.sharding().tile_assignment().dim(i);
+    } else {
+      slice_limits[i] = 1;
+    }
+  }
+  Array<int64> tile_assignment =
+      hlo.sharding().tile_assignment().Slice(slice_starts, slice_limits);
+  return HloSharding::Tile(tile_assignment);
+}
+
+HloSharding ScatterIndexSharding(const HloSharding& data_sharding,
+                                 const HloInstruction* hlo) {
+  if (data_sharding.IsTileMaximal()) {
+    return data_sharding;
+  }
+
+  const ScatterDimensionNumbers& dnums = hlo->scatter_dimension_numbers();
+  std::vector<int64> index_tile_assignment_dims;
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    if (!absl::c_binary_search(dnums.update_window_dims(), i)) {
+      index_tile_assignment_dims.push_back(
+          data_sharding.tile_assignment().dim(i));
+    }
+  }
+  if (index_tile_assignment_dims.size() < hlo->operand(1)->shape().rank()) {
+    index_tile_assignment_dims.push_back(1);
+  }
+  Array<int64> new_tile_assignment = data_sharding.tile_assignment();
+  new_tile_assignment.Reshape(index_tile_assignment_dims);
+  return HloSharding::Tile(new_tile_assignment);
+}
+
+HloSharding ScatterDataSharding(const HloSharding& index_sharding,
+                                const HloInstruction* hlo) {
+  if (index_sharding.IsTileMaximal()) {
+    return index_sharding;
+  }
+
+  const ScatterDimensionNumbers& dnums = hlo->scatter_dimension_numbers();
+  std::vector<int64> data_tile_assignment_dims;
+  for (int64 i = 0, index_dim = 0; i < hlo->shape().rank(); ++i) {
+    if (absl::c_binary_search(dnums.update_window_dims(), i)) {
+      data_tile_assignment_dims.push_back(1);
+    } else {
+      data_tile_assignment_dims.push_back(
+          index_sharding.tile_assignment().dim(index_dim));
+      index_dim++;
+    }
+  }
+  Array<int64> new_tile_assignment = index_sharding.tile_assignment();
+  new_tile_assignment.Reshape(data_tile_assignment_dims);
+  return HloSharding::Tile(new_tile_assignment);
+}
+
+HloSharding ScatterEffectiveIndexSharding(const HloSharding& index_sharding,
+                                          const HloInstruction& hlo) {
+  if (index_sharding.IsTileMaximal()) {
+    return index_sharding;
+  }
+
+  // Only shard on first "number of scatter_window_dims" dimensions.
+  const ScatterDimensionNumbers& dnums = hlo.scatter_dimension_numbers();
+  int64 num_elements = 1;
+  int64 index_dim = 0;
+  for (int64 i = 0; i < hlo.shape().rank(); ++i) {
+    if (absl::c_binary_search(dnums.inserted_window_dims(), i)) {
+      num_elements *= index_sharding.tile_assignment().dim(index_dim);
+      index_dim++;
+    }
+  }
+  if (num_elements == index_sharding.tile_assignment().num_elements()) {
+    // Index sharding is only on scatter_window_dims. We use this index sharding
+    // directly.
+    return index_sharding;
+  }
+
+  // Index sharding is only on update_window_dims. We do not shard this scatter
+  // op. Return a tile maximal sharding with the first device in index sharding
+  // tile assignment.
+  if (num_elements == 1) {
+    return HloSharding::AssignDevice(*index_sharding.tile_assignment().begin());
+  }
+
+  const int64 index_rank = hlo.operand(1)->shape().rank();
+  std::vector<int64> slice_starts(index_rank, 0LL), slice_limits(index_rank);
+  for (int64 i = 0; i < index_rank; ++i) {
+    if (i < index_dim) {
+      slice_limits[i] = index_sharding.tile_assignment().dim(i);
+    } else {
+      slice_limits[i] = 1;
+    }
+  }
+  Array<int64> tile_assignment =
+      index_sharding.tile_assignment().Slice(slice_starts, slice_limits);
+  return HloSharding::Tile(tile_assignment);
+}
+
+HloSharding ScatterEffectiveDataSharding(const HloSharding& data_sharding,
+                                         const HloInstruction& hlo) {
+  if (data_sharding.IsTileMaximal()) {
+    return data_sharding;
+  }
+
+  const ScatterDimensionNumbers& dnums = hlo.scatter_dimension_numbers();
+  const int64 data_rank = hlo.operand(2)->shape().rank();
+  std::vector<int64> tile_assignment_dims(data_rank, 1LL);
+  int64 num_elements = 1;
+  for (int64 i = 0; i < hlo.shape().rank(); ++i) {
+    if (absl::c_binary_search(dnums.inserted_window_dims(), i)) {
+      CHECK_LT(i, data_rank);
+      tile_assignment_dims[i] = data_sharding.tile_assignment().dim(i);
+      num_elements *= data_sharding.tile_assignment().dim(i);
+    }
+  }
+  if (num_elements == data_sharding.tile_assignment().num_elements()) {
+    // Data sharding is only on scatter_window_dims. We use this data sharding
+    // directly.
+    return data_sharding;
+  }
+
+  if (num_elements == 1) {
+    // Data sharding is only on update_window_dims. We do not shard this
+    // scatter op. Return a tile maximal sharding with the first device in
+    // data sharding tile assignment.
+    return HloSharding::AssignDevice(*data_sharding.tile_assignment().begin());
+  }
+
+  // Data sharding is on both update_window_dims and scatter_window_dims. We
+  // shard the scatter op only on scatter_window_dims. For example:
+  // - the scatter data has sharding [2,2]{0,1,2,3},
+  // - first dimension is scatter_window_dims,
+  // - second dimension is update_window_dims,
+  // Then the result sharding will be [2,1]{0,2}.
+  std::vector<int64> slice_starts(data_rank, 0LL);
+  Array<int64> tile_assignment =
+      data_sharding.tile_assignment().Slice(slice_starts, tile_assignment_dims);
+  return HloSharding::Tile(tile_assignment);
+}
+
+StatusOr<std::pair<std::unique_ptr<HloInstruction>, HloOpcode>>
+IdentityValueAndHloOpcodeForScatterReduceComputation(
+    const HloScatterInstruction& scatter) {
+  auto computation = scatter.to_apply();
+  // We only handle computations with 2 parameters and only 1 calculation.
+  if (computation->instruction_count() != 3) {
+    return Status(
+        tensorflow::error::Code::INVALID_ARGUMENT,
+        "Expected scatter reduce computation with 2 parameters and only 1 "
+        "calculation");
+  }
+
+  auto root_instruction = computation->root_instruction();
+  if (root_instruction->opcode() == HloOpcode::kAdd ||
+      root_instruction->opcode() == HloOpcode::kOr) {
+    return std::make_pair(HloInstruction::CreateConstant(LiteralUtil::Zero(
+                              scatter.shape().element_type())),
+                          root_instruction->opcode());
+  } else if (root_instruction->opcode() == HloOpcode::kMultiply ||
+             root_instruction->opcode() == HloOpcode::kAnd) {
+    return std::make_pair(HloInstruction::CreateConstant(
+                              LiteralUtil::One(scatter.shape().element_type())),
+                          root_instruction->opcode());
+  } else if (root_instruction->opcode() == HloOpcode::kMaximum) {
+    return std::make_pair(HloInstruction::CreateConstant(LiteralUtil::MinValue(
+                              scatter.shape().element_type())),
+                          root_instruction->opcode());
+  } else if (root_instruction->opcode() == HloOpcode::kMinimum) {
+    return std::make_pair(HloInstruction::CreateConstant(LiteralUtil::MaxValue(
+                              scatter.shape().element_type())),
+                          root_instruction->opcode());
+  }
+
+  return Status(tensorflow::error::Code::INVALID_ARGUMENT,
+                "Expected scatter reduce computation which is "
+                "add/or/multiply/add/min/max");
+}
+
+std::vector<int64> DevicesForSharding(
+    const HloSharding& sharding, const std::vector<int64>& available_devices) {
+  std::vector<int64> devices;
+  if (sharding.IsReplicated()) {
+    for (int64 d : available_devices) {
+      if (!HloSharding::IsReservedDevice(d)) {
+        devices.push_back(d);
+      }
+    }
+    return devices;
+  }
+
+  for (int64 i : available_devices) {
+    if (sharding.UsesDevice(i)) {
+      devices.push_back(i);
+    }
+  }
+  DCHECK(std::all_of(sharding.tile_assignment().begin(),
+                     sharding.tile_assignment().end(), [&](int64 device) {
+                       return std::find(available_devices.begin(),
+                                        available_devices.end(),
+                                        device) != available_devices.end();
+                     }));
+  return devices;
+}
+
+}  // namespace hlo_sharding_util
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.h b/tensorflow/compiler/xla/service/hlo_sharding_util.h
new file mode 100644
index 00000000000..00d9434a34d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_sharding_util.h
@@ -0,0 +1,143 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_SHARDING_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_SHARDING_UTIL_H_
+
+#include <map>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding.h"
+
+namespace xla {
+namespace hlo_sharding_util {
+
+// Given a map<device, occurrence_count>, selects the device with higher
+// occurrence count (if any). If top_count in not nullptr, it will receive the
+// count of the dominant device returned.
+absl::optional<int64> SelectDominantDevice(
+    const std::map<int64, int64>& device_map, int64* top_count);
+
+// Assigns all the instructions of a computation, to a given device.
+// This API does not recurse into called computations, and does not assign
+// instructions which already have sharding.
+Status AssignComputationDevice(HloComputation* computation, int64 device);
+
+// Given an instruction container, returns the device which is most commonly
+// occurring among the instructions.
+absl::optional<int64> GetMostOccurringDevice(
+    absl::Span<HloInstruction* const> instructions);
+
+// Given a set of computations, tries to extract the dominant device. A device
+// is dominant if the combined occurrence among all the instructions of the
+// input computations, is greater/equal than/to dominant_factor (real number
+// from 0 to 1).
+// This API does not recurse into called computations.
+// If no device exists that satisfies the condition, the returned optional will
+// hold no value.
+StatusOr<absl::optional<int64>> GetDominantDevice(
+    absl::Span<HloComputation* const> computations, double dominant_factor);
+
+// Returns the HloSharding with the tile dimensions and tile assignment
+// transposed based on the specified dimension numbers. In case of a tile
+// maximal sharding returns the original sharding.
+HloSharding TransposeSharding(const HloSharding& sharding,
+                              const std::vector<int64>& dimensions);
+
+// Returns the HloSharding with the tile shape reshaped based on the source and
+// target shapes and the tile assignment adjusted to correspond to the new tile
+// shape or absl::nullopt if the resulting reshape would create an invalid
+// sharding (non continuous or non uniformly sized tiles). In case of a tile
+// maximal sharding returns the original sharding.
+absl::optional<HloSharding> ReshapeSharding(const Shape& source_shape,
+                                            const Shape& target_shape,
+                                            const HloSharding& sharding);
+
+// Returns a sharding tiled on unique dimension dim by reshaping the tile
+// assignment of the sharding argument. Only dimensions in the dims span
+// argument are considered for reshaping, the others are ignored.
+// Assumptions: sharding is tile sharded, and dim must be included in dims.
+HloSharding ReshapeToTileDimension(const HloSharding& sharding, int64 dim,
+                                   absl::Span<const int64> dims);
+
+// Returns true if the provided module includes one or more instructions with
+// a tile sharding.
+bool ContainsTileSharding(const HloModule& module);
+
+// Returns the preferred output sharding for a gather op based on the sharding
+// of the indces.
+HloSharding GatherOutputSharding(const HloSharding& index_sharding,
+                                 const HloInstruction* hlo);
+
+// Returns the preferred index sharding for a gather op based on the sharding
+// of the output.
+HloSharding GatherIndexSharding(const HloSharding& output_sharding,
+                                const HloInstruction* hlo);
+
+// Returns a new HloSharding for a gather op so that only non offset dimensions
+// are sharded. Assume "result" is returned by this function. It is ensured that
+// "GetIndexSharding(result, hlo)" will have the same number of elements as
+// "result".
+HloSharding GatherEffectiveOutputSharding(const HloInstruction& hlo);
+
+// Returns the preferred index sharding for a scatter op based on the sharding
+// of the data.
+HloSharding ScatterIndexSharding(const HloSharding& data_sharding,
+                                 const HloInstruction* hlo);
+
+// Returns the preferred data sharding for a scatter op based on the sharding
+// of the index.
+HloSharding ScatterDataSharding(const HloSharding& index_sharding,
+                                const HloInstruction* hlo);
+
+// Returns a new index sharding for a scatter op so that we only shard on first
+// "number of scatter_window_dims" dimensions. Assume "result" is returned by
+// this function. It is ensured that "ScatterDataSharding(result, hlo)" will
+// have the same number of elements as "result".
+HloSharding ScatterEffectiveIndexSharding(const HloSharding& index_sharding,
+                                          const HloInstruction& hlo);
+
+// Returns a new data sharding for a scatter op so that we only shard on
+// scatter_window_dims. Assume "result" is returned by this function. It is
+// ensured that "ScatterIndexSharding(result, hlo)" will have the same number of
+// elements as "result".
+HloSharding ScatterEffectiveDataSharding(const HloSharding& data_sharding,
+                                         const HloInstruction& hlo);
+
+// Returns an identity value and an HloOpcode for reduce computation of scatter
+// instruction.
+// - If computation is add/or, return 0/false with corresponding op code;
+// - If computation is multiply/and, return 1/true with corresponding op code.
+// - If computation is min/max, return max value/min value with corresponding op
+//   code.
+// - Otherwise, return error status.
+StatusOr<std::pair<std::unique_ptr<HloInstruction>, HloOpcode>>
+IdentityValueAndHloOpcodeForScatterReduceComputation(
+    const HloScatterInstruction& scatter);
+
+// Given a sharding and a list of devices in the topology, return a
+// list of the devices that `sharding` applies to.
+std::vector<int64> DevicesForSharding(
+    const HloSharding& sharding, const std::vector<int64>& available_devices);
+
+}  // namespace hlo_sharding_util
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_SHARDING_UTIL_H_
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc
new file mode 100644
index 00000000000..02496c75965
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc
@@ -0,0 +1,206 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_sharding_util.h"
+
+#include "tensorflow/compiler/xla/test.h"
+
+namespace xla {
+namespace hlo_sharding_util {
+namespace {
+
+TEST(HloShardingUtilTest, TransposeShardingReplicated) {
+  EXPECT_EQ(TransposeSharding(HloSharding::Replicate(), {0, 1, 2}),
+            HloSharding::Replicate());
+}
+
+TEST(HloShardingUtilTest, TransposeShardingTiled) {
+  HloSharding input = HloSharding::Tile(Array4D<int64>({{{{0, 1}}, {{2, 3}}}}));
+  HloSharding output =
+      HloSharding::Tile(Array4D<int64>({{{{0}, {2}}}, {{{1}, {3}}}}));
+  EXPECT_EQ(TransposeSharding(input, {3, 0, 1, 2}), output);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingMaximal) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {2, 3, 5});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {3, 5, 2});
+  HloSharding sharding = HloSharding::AssignDevice(7);
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTiledInvalid) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {2, 3, 5});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {3, 5, 2});
+  HloSharding sharding = HloSharding::Tile(Array3D<int64>({{{0}, {1}}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, sharding);
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTiledMerge) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {4, 5, 7});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {20, 7});
+  HloSharding input_sharding =
+      HloSharding::Tile(Array3D<int64>({{{0}}, {{1}}}));
+  HloSharding output_sharding = HloSharding::Tile(Array2D<int64>({{0}, {1}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, input_sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), output_sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTiledSplit) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {16, 7});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {4, 4, 7});
+  HloSharding input_sharding = HloSharding::Tile(Array2D<int64>({{0}, {1}}));
+  HloSharding output_sharding =
+      HloSharding::Tile(Array3D<int64>({{{0}}, {{1}}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, input_sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), output_sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTiledSplitThenMerge) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {16, 4, 7});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {4, 16, 7});
+  HloSharding input_sharding =
+      HloSharding::Tile(Array3D<int64>({{{0}}, {{1}}}));
+  HloSharding output_sharding =
+      HloSharding::Tile(Array3D<int64>({{{0}}, {{1}}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, input_sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), output_sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTiledArbitraryMinorDimensions) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {16, 7, 5, 3});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {4, 15, 2, 14});
+  Array<int64> sharding_array({2, 1, 1, 1});
+  sharding_array(0, 0, 0, 0) = 0;
+  sharding_array(1, 0, 0, 0) = 1;
+  HloSharding sharding = HloSharding::Tile(sharding_array);
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTiledTrivialDimensions) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {3, 1, 5, 7});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {3, 5, 1, 7});
+  HloSharding input_sharding =
+      HloSharding::Tile(Array4D<int64>({{{{0}, {1}}}}));
+  HloSharding output_sharding =
+      HloSharding::Tile(Array4D<int64>({{{{0}}, {{1}}}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, input_sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), output_sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingTrivialDImensionInsertedToEnd) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {8, 16});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {8, 16, 1});
+  HloSharding input_sharding = HloSharding::Tile(Array2D<int64>({{0}, {1}}));
+  HloSharding output_sharding =
+      HloSharding::Tile(Array3D<int64>({{{0}}, {{1}}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, input_sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), output_sharding);
+}
+
+TEST(HloShardingUtilTest, NoopReshapeShardingEmptyTile) {
+  Shape shape = ShapeUtil::MakeShape(F32, {7, 1, 1});
+  HloSharding sharding = HloSharding::Tile(Array3D<int64>({{{0}, {1}}}));
+  absl::optional<HloSharding> result = ReshapeSharding(shape, shape, sharding);
+  EXPECT_TRUE(result.has_value());
+  EXPECT_EQ(result.value(), sharding);
+}
+
+TEST(HloShardingUtilTest, ReshapeShardingScalar) {
+  Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1});
+  Shape output_shape = ShapeUtil::MakeShape(F32, {});
+  HloSharding sharding = HloSharding::Tile(Array3D<int64>({{{0}, {1}}}));
+  absl::optional<HloSharding> result =
+      ReshapeSharding(input_shape, output_shape, sharding);
+  EXPECT_FALSE(result.has_value());
+}
+
+TEST(HloShardingUtilTest, ReshapeToTileDimension2D_Dim0) {
+  HloSharding sharding = HloSharding::Tile(Array2D<int64>({{0, 1}, {2, 3}}));
+  HloSharding result =
+      ReshapeToTileDimension(sharding, /*dim=*/0, /*dims=*/{0, 1});
+  EXPECT_EQ(result.tile_assignment(), Array2D<int64>({{0}, {1}, {2}, {3}}));
+}
+
+TEST(HloShardingUtilTest, ReshapeToTileDimension2D_Dim1) {
+  HloSharding sharding = HloSharding::Tile(Array2D<int64>({{0, 1}, {2, 3}}));
+  HloSharding result =
+      ReshapeToTileDimension(sharding, /*dim=*/1, /*dims=*/{0, 1});
+  EXPECT_EQ(result.tile_assignment(), Array2D<int64>({{0, 2, 1, 3}}));
+}
+
+TEST(HloShardingUtilTest, ReshapeToTileDimension3D_Dim0) {
+  HloSharding sharding =
+      HloSharding::Tile(Array3D<int64>({{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}));
+  HloSharding result =
+      ReshapeToTileDimension(sharding, /*dim=*/0, /*dims=*/{0, 1, 2});
+  EXPECT_EQ(
+      result.tile_assignment(),
+      Array3D<int64>({{{0}}, {{1}}, {{2}}, {{3}}, {{4}}, {{5}}, {{6}}, {{7}}}));
+}
+
+TEST(HloShardingUtilTest, ReshapeToTileDimension3D_Dim1) {
+  HloSharding sharding =
+      HloSharding::Tile(Array3D<int64>({{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}));
+  HloSharding result =
+      ReshapeToTileDimension(sharding, /*dim=*/1, /*dims=*/{0, 1, 2});
+  EXPECT_EQ(result.tile_assignment(),
+            Array3D<int64>({{{0}, {1}, {4}, {5}, {2}, {3}, {6}, {7}}}));
+}
+
+TEST(HloShardingUtilTest, ReshapeToTileDimension3D_Dim2) {
+  HloSharding sharding =
+      HloSharding::Tile(Array3D<int64>({{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}));
+  HloSharding result =
+      ReshapeToTileDimension(sharding, /*dim=*/2, /*dims=*/{0, 1, 2});
+  EXPECT_EQ(result.tile_assignment(),
+            Array3D<int64>({{{0, 2, 4, 6, 1, 3, 5, 7}}}));
+}
+
+TEST(HloShardingUtilTest, ReshapeToTileDimension2D_Dim2_Batch1) {
+  // Tile sharding in batch dimension, i.e.
+  // sharding={devices[2,2,2]0,1,2,3,4,5,6,7,8}.
+  HloSharding sharding =
+      HloSharding::Tile(Array3D<int64>({{{0, 1}, {2, 3}}, {{4, 5}, {6, 7}}}));
+  // Reshape on dimensions {1, 2} only, therefore ignoring batch dimension 0.
+  HloSharding result = ReshapeToTileDimension(sharding, /*dim=*/2,
+                                              /*dims=*/{1, 2});
+  // Expected result is {devices=[2,1,4]0,2,1,3,4,6,5,7}, i.e. the two
+  // non-batch dimensions {{0, 1}, {2, 3}} and {{4, 5}, {6, 7}} are individually
+  // reshaped to tile dimension 2, i.e. {{0, 2, 1, 3}}, {{4, 6, 5, 7}}.
+  EXPECT_EQ(result.tile_assignment(),
+            Array3D<int64>({{{0, 2, 1, 3}}, {{4, 6, 5, 7}}}));
+}
+
+}  // namespace
+}  // namespace hlo_sharding_util
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 0911af10f38..d15a36532eb 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -236,6 +236,40 @@ static Status CheckReplicaGroups(HloInstruction* hlo) {
   return Status::OK();
 }
 
+Status ShapeVerifier::HandleAllGather(HloInstruction* hlo) {
+  auto ag = Cast<HloAllGatherInstruction>(hlo);
+  TF_RETURN_IF_ERROR(CheckReplicaGroups(ag));
+  TF_RET_CHECK(ag->all_gather_dimension() >= 0);
+  TF_RET_CHECK(ag->all_gather_dimension() < ag->shape().rank());
+  TF_RET_CHECK(ag->all_gather_dimension() < ag->operand(0)->shape().rank());
+  if (ag->use_global_device_ids() && ag->replica_groups().empty()) {
+    return InternalError(
+        "Replica group must be specified when use_global_device_ids is true");
+  }
+
+  int64 shard_count = CeilOfRatio(
+      ag->shape().dimensions(ag->all_gather_dimension()),
+      ag->operand(0)->shape().dimensions(ag->all_gather_dimension()));
+  if (ag->channel_id().has_value()) {
+    if (ag->use_global_device_ids()) {
+      TF_RET_CHECK(shard_count == ag->replica_groups()[0].replica_ids_size());
+    } else {
+      if (ag->replica_groups().empty() ||
+          ag->replica_groups()[0].replica_ids_size() != 1) {
+        return InternalError(
+            "Replica group size must be 1 when use_global_device_ids is "
+            "false if the all-gather is also cross-partition");
+      }
+    }
+  } else if (!ag->replica_groups().empty()) {
+    // Cross-replica all-gather: shard count is subgroup size.
+    TF_RET_CHECK(shard_count == ag->replica_groups()[0].replica_ids_size());
+  }
+  return CheckShape(ag, ShapeInference::InferAllGatherShape(
+                            ag->operand(0)->shape(), ag->all_gather_dimension(),
+                            shard_count));
+}
+
 Status ShapeVerifier::HandleAllReduce(HloInstruction* crs) {
   TF_RETURN_IF_ERROR(CheckReplicaGroups(crs));
 
@@ -628,9 +662,11 @@ Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) {
           shape_size_function_(bitcast->operand(0)->shape())) {
     return InternalError(
         "Bitcast cannot have different shape sizes of output (%d) and operand "
-        "(%d)",
+        "(%d) (%s) (%s)",
         shape_size_function_(bitcast->shape()),
-        shape_size_function_(bitcast->operand(0)->shape()));
+        shape_size_function_(bitcast->operand(0)->shape()),
+        bitcast->shape().ToString(true),
+        bitcast->operand(0)->shape().ToString(true));
   }
   return Status::OK();
 }
@@ -697,11 +733,7 @@ Status ShapeVerifier::HandleFusion(HloInstruction* fusion) {
   }
   for (HloInstruction* fused_param : fused_parameters) {
     int64 param_no = fused_param->parameter_number();
-    // Since fusion buffers aren't materialized, fusion parameters will not have
-    // the same memory space as the fusion operand.
-    if (!ShapesSame(fused_param->shape(), fusion->operand(param_no)->shape(),
-                    /*minor_to_major_only=*/false,
-                    /*ignore_memory_space=*/true)) {
+    if (!ShapesSame(fused_param->shape(), fusion->operand(param_no)->shape())) {
       return InternalError(
           "Shape mismatch between parameter number %d and its operand in "
           "%s.",
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 2e83361a591..7a2d3dc2e6c 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -56,6 +56,7 @@ class ShapeVerifier : public DfsHloVisitor {
   Status HandleFft(HloInstruction* fft) override;
   Status HandleCholesky(HloInstruction* hlo) override;
   Status HandleTriangularSolve(HloInstruction* hlo) override;
+  Status HandleAllGather(HloInstruction* hlo) override;
   Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleAllToAll(HloInstruction* hlo) override;
   Status HandleCollectivePermute(HloInstruction* hlo) override;
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 53938a489f1..1bc3d24274c 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -145,6 +145,7 @@ bool IsAlwaysDuplicable(const HloInstruction& instruction) {
     case HloOpcode::kCholesky:
     case HloOpcode::kConditional:
     case HloOpcode::kConvolution:
+    case HloOpcode::kAllGather:
     case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
@@ -175,6 +176,7 @@ bool IsAlwaysDuplicable(const HloInstruction& instruction) {
     case HloOpcode::kSendDone:
     case HloOpcode::kSort:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh:
     case HloOpcode::kTrace:
     case HloOpcode::kTriangularSolve:
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h
index 3c35fda55f1..9e4bdeb2b2d 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.h
+++ b/tensorflow/compiler/xla/service/interpreter/executor.h
@@ -203,7 +203,8 @@ class XlaInterpreterExecutor : public internal::StreamExecutorInterface {
 
   std::unique_ptr<internal::StreamInterface> GetStreamImplementation()
       override {
-    return std::unique_ptr<internal::StreamInterface>(new host::HostStream());
+    return std::unique_ptr<internal::StreamInterface>(
+        new host::HostStream(/*thread_stack_size=*/0));
   }
 
   std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override {
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 64390e77ddb..13699f3adf9 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -951,7 +951,8 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) {
                 if (!Shape::Equal()
                          .IgnoreDynamicDimension()
                          .MinorToMajorOnlyInLayout()(instruction_subshape,
-                                                     buffer->shape())) {
+                                                     buffer->shape()) &&
+                    instruction->opcode() != HloOpcode::kBitcast) {
                   return InternalError(
                       "Layout of instruction %s at index {%s} does not match "
                       "source LogicalBuffer %s: %s vs %s",
@@ -1798,13 +1799,6 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) {
   // potential bugs in the layout assignment pass that may accidentally use the
   // existing layout.
   for (HloInstruction* instruction : computation->instructions()) {
-    if (instruction->opcode() == HloOpcode::kBitcast) {
-      // bitcasts are inherently layout sensitive and so a bitcast instruction
-      // present in the IR before layout assignment is a bug.
-      return InternalError(
-          "Unexpected bitcast operation seen during layout assignment: %s.",
-          instruction->ToString());
-    }
     // Some instructions carry mandatory layouts in their shape.
     if (instruction->opcode() != HloOpcode::kInfeed &&
         !IsLayoutConstrainedCustomCall(instruction) &&
@@ -2179,6 +2173,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kConditional:
     case HloOpcode::kConvert:
     case HloOpcode::kCos:
+    case HloOpcode::kAllGather:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
     case HloOpcode::kDivide:
@@ -2220,6 +2215,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kSlice:
     case HloOpcode::kSort:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kSubtract:
     case HloOpcode::kTanh:
     case HloOpcode::kPopulationCount:
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 304a80c7a52..6e575247e6b 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -814,27 +814,6 @@ TEST_F(LayoutAssignmentTest, ConditionalAsymmetricLayout) {
   EXPECT_THAT(false_result->opcode(), HloOpcode::kCopy);
 }
 
-TEST_F(LayoutAssignmentTest, InternalErrorOnBitcast) {
-  auto builder = HloComputation::Builder(TestName());
-  auto constant0 = builder.AddInstruction(
-      HloInstruction::CreateConstant(LiteralUtil::CreateR2WithLayout<float>(
-          {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1}))));
-  builder.AddInstruction(
-      HloInstruction::CreateBitcast(constant0->shape(), constant0));
-  auto m = CreateNewVerifiedModule();
-  m->AddEntryComputation(builder.Build());
-
-  ComputationLayout computation_layout(
-      m->entry_computation()->ComputeProgramShape());
-  LayoutAssignment layout_assignment(&computation_layout);
-  Status error_status = layout_assignment.Run(m.get()).status();
-  EXPECT_FALSE(error_status.ok());
-  EXPECT_THAT(
-      error_status.error_message(),
-      ::testing::HasSubstr(
-          "Unexpected bitcast operation seen during layout assignment"));
-}
-
 TEST_F(LayoutAssignmentTest, ChannelLayoutMismatch) {
   // Pin non matching layouts to parameter and root.
   const char* module_str = R"(
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index ef8ddfc1a76..c80646e0c70 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -112,6 +112,8 @@ ExecutionOptions CreateExecutionOptions(
   }
   execution_options.set_num_replicas(build_options.num_replicas());
   execution_options.set_num_partitions(build_options.num_partitions());
+  execution_options.set_use_spmd_partitioning(
+      build_options.use_spmd_partitioning());
   if (build_options.has_device_assignment()) {
     TF_CHECK_OK(build_options.device_assignment().Serialize(
         execution_options.mutable_device_assignment()));
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc
index d5a118c00dc..742de71e74c 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment.cc
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc
@@ -585,23 +585,35 @@ void AlternateMemoryBestFitHeap::AppendBufferInfoDebugString(
   // definition_time: int. Logical time this value was defined in the schedule.
   // use_times: string. This is a semicolon-separated list of integers for all
   // the use times.
+  // use_names: string. This is a semicolon-separated list of string
+  // representation of uses.
   if (debug_str->empty()) {
     // Append the column names.
     absl::StrAppend(debug_str,
-                    "buffer_id,buffer_name,alt_mem_benefit,size,definition_"
-                    "time,use_times\n");
+                    "buffer_id,buffer_name,alt_mem_benefit,size,"
+                    "definition_time,use_times,use_names\n");
   }
   const HloBuffer& buffer =
       alias_analysis_.GetBufferContainingValue(*interval.buffer);
   const auto& instruction_schedule = hlo_live_range_.instruction_schedule();
   int64 definition_time =
       instruction_schedule.at(interval.buffer->defining_position().instruction);
-  std::set<int64> use_times;
+  std::vector<std::pair<int64, std::string>> uses;
   for (const HloValue* value : buffer.values()) {
     for (const HloUse& use : value->uses()) {
-      use_times.insert(instruction_schedule.at(use.instruction));
+      uses.push_back(
+          {instruction_schedule.at(use.instruction), use.ToString()});
     }
   }
+  absl::c_sort(uses);
+  std::vector<int64> use_times;
+  std::vector<std::string> use_names;
+  use_times.reserve(uses.size());
+  use_names.reserve(uses.size());
+  for (auto use : uses) {
+    use_times.push_back(use.first);
+    use_names.push_back(use.second);
+  }
 
   absl::StrAppend(debug_str, buffer.id(), ",");
   absl::StrAppend(debug_str, "\"", interval.buffer->ToShortString(), "\",");
@@ -612,7 +624,8 @@ void AlternateMemoryBestFitHeap::AppendBufferInfoDebugString(
       debug_str, alternate_memory_benefit ? *alternate_memory_benefit : 0, ",");
   absl::StrAppend(debug_str, interval.size, ",");
   absl::StrAppend(debug_str, definition_time, ",");
-  absl::StrAppend(debug_str, "\"", absl::StrJoin(use_times, ";"), "\"");
+  absl::StrAppend(debug_str, "\"", absl::StrJoin(use_times, ";"), "\",");
+  absl::StrAppend(debug_str, "\"", absl::StrJoin(use_names, ";"), "\"");
   absl::StrAppend(debug_str, "\n");
 }
 
@@ -1820,24 +1833,30 @@ MemorySpaceAssignment::Run(HloModule* module,
   MemorySpaceAssignment memory_space_assignment(module, options,
                                                 hlo_live_range);
 
-  TF_RETURN_IF_ERROR(memory_space_assignment.FindAllocationSequence(
-      hlo_live_range, alias_analysis));
-  TF_RETURN_IF_ERROR(memory_space_assignment.Process());
-  memory_space_assignment.ScheduleAsynchronousCopies();
-  TF_RETURN_IF_ERROR(memory_space_assignment.SimplifyGraph());
-  TF_RETURN_IF_ERROR(memory_space_assignment.FixSchedule());
-  TF_RETURN_IF_ERROR(memory_space_assignment.ExportAndColorBuffers());
+  return memory_space_assignment.RunMemorySpaceAssignment(hlo_live_range,
+                                                          alias_analysis);
+}
+
+StatusOr<std::unique_ptr<PresetAssignments>>
+MemorySpaceAssignment::RunMemorySpaceAssignment(
+    const HloLiveRange& hlo_live_range,
+    const HloAliasAnalysis& alias_analysis) {
+  TF_RETURN_IF_ERROR(FindAllocationSequence(hlo_live_range, alias_analysis));
+  TF_RETURN_IF_ERROR(Process());
+  ScheduleAsynchronousCopies();
+  TF_RETURN_IF_ERROR(SimplifyGraph());
+  TF_RETURN_IF_ERROR(FixSchedule());
+  TF_RETURN_IF_ERROR(ExportAndColorBuffers());
 
   VLOG(3) << "Module after memory space assignment: ";
-  XLA_VLOG_LINES(3, module->ToString());
-  TF_CHECK_OK(module->schedule().Verify());
+  XLA_VLOG_LINES(3, module_->ToString());
+  TF_CHECK_OK(module_->schedule().Verify());
   VLOG(1) << "Maximum number of outstanding async copies: "
-          << CountMaximumOutstandingAsyncCopies(*module);
+          << CountMaximumOutstandingAsyncCopies(*module_);
 
-  TF_RETURN_IF_ERROR(
-      memory_space_assignment.VerifyAndExportHeapSimulatorTrace());
+  TF_RETURN_IF_ERROR(VerifyAndExportHeapSimulatorTrace());
 
-  return std::move(memory_space_assignment.preset_assignments_);
+  return std::move(preset_assignments_);
 }
 
 Status MemorySpaceAssignment::FindAllocationSequence(
diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h
index ab4bc5bf106..eb16db90600 100644
--- a/tensorflow/compiler/xla/service/memory_space_assignment.h
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.h
@@ -604,6 +604,8 @@ class MemorySpaceAssignment {
     AllocationSequence allocation_sequence_;
   };
 
+  virtual ~MemorySpaceAssignment() = default;
+
   // Runs the MemorySpaceAssignment pass.
   static StatusOr<std::unique_ptr<PresetAssignments>> Run(
       HloModule* module, const HloLiveRange& hlo_live_range,
@@ -621,13 +623,19 @@ class MemorySpaceAssignment {
   Status VerifyAndExportHeapSimulatorTrace();
 
  protected:
+  // Main driver of the memory space assignment pass.
+  virtual StatusOr<std::unique_ptr<PresetAssignments>> RunMemorySpaceAssignment(
+      const HloLiveRange& hlo_live_range,
+      const HloAliasAnalysis& alias_analysis);
+
   // Finds an AllocationSequence for placing buffers in alternate memory using
   // the AlternateMemoryBestFitHeap algorithm. Must be set before Process() is
   // called.
-  Status FindAllocationSequence(const HloLiveRange& hlo_live_range,
-                                const HloAliasAnalysis& alias_analysis);
+  virtual Status FindAllocationSequence(const HloLiveRange& hlo_live_range,
+                                        const HloAliasAnalysis& alias_analysis);
+
+  Options options() const { return options_; }
 
- private:
   MemorySpaceAssignment(HloModule* module, Options options,
                         const HloLiveRange& hlo_live_range)
       : module_(module),
@@ -646,6 +654,9 @@ class MemorySpaceAssignment {
     }
   }
 
+  AllocationSequence allocations_;
+
+ private:
   // Process calls Process methods of the allocations after the allocations have
   // been finalized.
   Status Process();
@@ -682,7 +693,6 @@ class MemorySpaceAssignment {
   Options options_;
   std::vector<HloInstruction*> flattened_instructions_;
   absl::flat_hash_set<const HloComputation*> computations_in_schedule_;
-  AllocationSequence allocations_;
   std::unique_ptr<PresetAssignments> preset_assignments_;
   std::vector<std::pair<HloPosition, Chunk>> alternate_memory_assignments_;
   int64 alternate_memory_size_ = 0;
diff --git a/tensorflow/compiler/xla/service/memory_space_propagation.cc b/tensorflow/compiler/xla/service/memory_space_propagation.cc
new file mode 100644
index 00000000000..80eb4017477
--- /dev/null
+++ b/tensorflow/compiler/xla/service/memory_space_propagation.cc
@@ -0,0 +1,67 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/memory_space_propagation.h"
+
+namespace xla {
+
+StatusOr<bool> MemorySpacePropagation::Run(HloModule* module) {
+  bool modified = false;
+  TF_ASSIGN_OR_RETURN(auto dataflow_analysis,
+                      HloDataflowAnalysis::Run(*module));
+  dataflow_analysis_ = std::move(dataflow_analysis);
+
+  for (HloComputation* computation : module->MakeNonfusionComputations()) {
+    for (HloInstruction* instruction : computation->instructions()) {
+      if (instruction->opcode() == HloOpcode::kFusion) {
+        // Propagate the operand subshapes.
+        for (int operand_idx = 0; operand_idx < instruction->operand_count();
+             ++operand_idx) {
+          modified |=
+              PropagateSubshapes(instruction->operand(operand_idx)->shape(),
+                                 instruction->fused_parameter(operand_idx));
+        }
+
+        // Propagate output subshapes.
+        modified |= PropagateSubshapes(instruction->shape(),
+                                       instruction->fused_expression_root());
+      }
+    }
+  }
+  return modified;
+}
+
+bool MemorySpacePropagation::PropagateSubshapes(
+    const Shape& caller_shape, const HloInstruction* callee_instruction) const {
+  bool modified = false;
+  for (const ShapeUtil::IndexedShape& indexed_shape :
+       ShapeUtil::GetLeafShapes(caller_shape)) {
+    int64 memory_space = indexed_shape.shape.layout().memory_space();
+    const HloValue& value = dataflow_analysis_->GetUniqueValueAt(
+        callee_instruction, indexed_shape.index);
+
+    for (const HloPosition& position : value.positions()) {
+      Shape* shape = ShapeUtil::GetMutableSubshape(
+          position.instruction->mutable_shape(), position.index);
+      if (shape->layout().memory_space() != memory_space) {
+        shape->mutable_layout()->set_memory_space(memory_space);
+        modified = true;
+      }
+    }
+  }
+  return modified;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/memory_space_propagation.h b/tensorflow/compiler/xla/service/memory_space_propagation.h
new file mode 100644
index 00000000000..65a1dfd14a6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/memory_space_propagation.h
@@ -0,0 +1,46 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_PROPAGATION_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_PROPAGATION_H_
+
+#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// This is a legalization pass that propagates the memory space in the layout to
+// the fusion computations.
+class MemorySpacePropagation : public HloModulePass {
+ public:
+  ~MemorySpacePropagation() override = default;
+  absl::string_view name() const override { return "memory-space-propagation"; }
+  StatusOr<bool> Run(HloModule* module) override;
+
+ private:
+  // Given the caller shape (operand or output) and its corresponding
+  // insturction in the fused computation (parameter or root), propagates the
+  // memory space to all the subshapes in the callee side. Returns true if the
+  // module is modified.
+  bool PropagateSubshapes(const Shape& caller_shape,
+                          const HloInstruction* callee_instruction) const;
+
+  std::unique_ptr<HloDataflowAnalysis> dataflow_analysis_;
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_PROPAGATION_H_
diff --git a/tensorflow/compiler/xla/service/memory_space_propagation_test.cc b/tensorflow/compiler/xla/service/memory_space_propagation_test.cc
new file mode 100644
index 00000000000..8d74958f6aa
--- /dev/null
+++ b/tensorflow/compiler/xla/service/memory_space_propagation_test.cc
@@ -0,0 +1,203 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/memory_space_propagation.h"
+
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+class MemorySpacePropagationTest : public HloTestBase {
+ public:
+  MemorySpacePropagationTest()
+      : HloTestBase(),
+        verifier_(/*layout_sensitive=*/false, /*allow_mixed_precision*/ false) {
+  }
+
+  Status Verify(HloModule* module) { return verifier_.Run(module).status(); }
+
+ private:
+  HloVerifier verifier_;
+};
+
+TEST_F(MemorySpacePropagationTest, NoMemorySpace) {
+  absl::string_view hlo_string = R"(
+  HloModule NoMemorySpace
+
+  %fused_computation {
+    %param_1.3 = s32[1]{0:T(128)} parameter(1)
+    %constant.2 = s32[]{:T(128)} constant(-2147483648)
+    %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5
+    %param_2.3 = s32[5]{0:T(128)} parameter(2)
+    %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0
+    %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3)
+    %param_0.1 = s32[6]{0:T(128)} parameter(0)
+    ROOT %add.0 = s32[6]{0:T(128)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+  }
+
+  ENTRY %entry {
+    %param0 = s32[6]{0:T(128)} parameter(0)
+    %param1 = s32[1]{0:T(128)} parameter(1)
+    %param2 = s32[5]{0:T(128)} parameter(2)
+    %arg0 = s32[6]{0:T(128)} copy(%param0)
+    %arg1 = s32[1]{0:T(128)} copy(%param1)
+    %arg2 = s32[5]{0:T(128)} copy(%param2)
+    %fusion = s32[6]{0:T(128)} fusion(s32[6]{0:T(128)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)} %arg2), kind=kLoop, calls=%fused_computation
+    ROOT %root = s32[6]{0:T(128)} copy(%fusion)
+  }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+  MemorySpacePropagation memory_space_propagation;
+  EXPECT_FALSE(memory_space_propagation.Run(module.get()).ValueOrDie());
+  TF_ASSERT_OK_AND_ASSIGN(auto ref, ParseAndReturnVerifiedModule(hlo_string));
+  EXPECT_EQ(module->Hash(), ref->Hash());
+}
+
+TEST_F(MemorySpacePropagationTest, NonTupleOutput) {
+  absl::string_view hlo_string = R"(
+  HloModule NonTupleOutput
+
+  %fused_computation {
+    %param_1.3 = s32[1]{0:T(128)} parameter(1)
+    %constant.2 = s32[]{:T(128)} constant(-2147483648)
+    %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5
+    %param_2.3 = s32[5]{0:T(128)} parameter(2)
+    %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0
+    %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3)
+    %param_0.1 = s32[6]{0:T(128)} parameter(0)
+    ROOT %add.0 = s32[6]{0:T(128)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+  }
+
+  ENTRY %entry {
+    %param0 = s32[6]{0:T(128)} parameter(0)
+    %param1 = s32[1]{0:T(128)} parameter(1)
+    %param2 = s32[5]{0:T(128)} parameter(2)
+    %arg0 = s32[6]{0:T(128)S(1)} copy(%param0)
+    %arg1 = s32[1]{0:T(128)} copy(%param1)
+    %arg2 = s32[5]{0:T(128)S(1)} copy(%param2)
+    %fusion = s32[6]{0:T(128)S(1)} fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation
+    ROOT %root = s32[6]{0:T(128)} copy(%fusion)
+  }
+  )";
+  absl::string_view expected_hlo_string = R"(
+  HloModule NonTupleOutput
+
+  %fused_computation {
+    %param_1.3 = s32[1]{0:T(128)} parameter(1)
+    %constant.2 = s32[]{:T(128)} constant(-2147483648)
+    %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5
+    %param_2.3 = s32[5]{0:T(128)S(1)} parameter(2)
+    %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0
+    %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3)
+    %param_0.1 = s32[6]{0:T(128)S(1)} parameter(0)
+    ROOT %add.0 = s32[6]{0:T(128)S(1)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+  }
+
+  ENTRY %entry {
+    %param0 = s32[6]{0:T(128)} parameter(0)
+    %param1 = s32[1]{0:T(128)} parameter(1)
+    %param2 = s32[5]{0:T(128)} parameter(2)
+    %arg0 = s32[6]{0:T(128)S(1)} copy(%param0)
+    %arg1 = s32[1]{0:T(128)} copy(%param1)
+    %arg2 = s32[5]{0:T(128)S(1)} copy(%param2)
+    %fusion = s32[6]{0:T(128)S(1)} fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation
+    ROOT %root = s32[6]{0:T(128)} copy(%fusion)
+  }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnUnverifiedModule(hlo_string));
+  MemorySpacePropagation memory_space_propagation;
+  EXPECT_TRUE(memory_space_propagation.Run(module.get()).ValueOrDie());
+  TF_EXPECT_OK(Verify(module.get()));
+  TF_ASSERT_OK_AND_ASSIGN(auto ref,
+                          ParseAndReturnVerifiedModule(expected_hlo_string));
+  EXPECT_EQ(module->Hash(), ref->Hash());
+}
+
+TEST_F(MemorySpacePropagationTest, TupleOutput) {
+  absl::string_view hlo_string = R"(
+  HloModule TupleOutput
+
+  %fused_computation {
+    %param_1.3 = s32[1]{0:T(128)} parameter(1)
+    %constant.2 = s32[]{:T(128)} constant(-2147483648)
+    %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5
+    %param_2.3 = s32[5]{0:T(128)} parameter(2)
+    %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0
+    %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3)
+    %param_0.1 = s32[6]{0:T(128)} parameter(0)
+    %add.0 = s32[6]{0:T(128)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+    %multiply.0 = s32[6]{0:T(128)} multiply(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+    ROOT %tuple = (s32[6]{0:T(128)}, s32[6]{0:T(128)}) tuple(%add.0, %multiply.0)
+  }
+
+  ENTRY %entry {
+    %param0 = s32[6]{0:T(128)} parameter(0)
+    %param1 = s32[1]{0:T(128)} parameter(1)
+    %param2 = s32[5]{0:T(128)} parameter(2)
+    %arg0 = s32[6]{0:T(128)S(1)} copy(%param0)
+    %arg1 = s32[1]{0:T(128)} copy(%param1)
+    %arg2 = s32[5]{0:T(128)S(1)} copy(%param2)
+    %fusion = (s32[6]{0:T(128)S(1)}, s32[6]{0:T(128)}) fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation
+    %gte0 = s32[6]{0:T(128)S(1)} get-tuple-element(%fusion), index=0
+    %gte1 = s32[6]{0:T(128)} get-tuple-element(%fusion), index=1
+    ROOT %root = s32[6]{0:T(128)} add(%gte0, %gte1)
+  }
+  )";
+  absl::string_view expected_hlo_string = R"(
+  HloModule TupleOutput
+
+  %fused_computation {
+    %param_1.3 = s32[1]{0:T(128)} parameter(1)
+    %constant.2 = s32[]{:T(128)} constant(-2147483648)
+    %pad.2 = s32[6]{0:T(128)} pad(s32[1]{0:T(128)} %param_1.3, s32[]{:T(128)} %constant.2), padding=0_5
+    %param_2.3 = s32[5]{0:T(128)S(1)} parameter(2)
+    %pad.3 = s32[6]{0:T(128)} pad(s32[5]{0:T(128)} %param_2.3, s32[]{:T(128)} %constant.2), padding=1_0
+    %maximum.1 = s32[6]{0:T(128)} maximum(s32[6]{0:T(128)} %pad.2, s32[6]{0:T(128)} %pad.3)
+    %param_0.1 = s32[6]{0:T(128)S(1)} parameter(0)
+    %add.0 = s32[6]{0:T(128)S(1)} add(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+    %multiply.0 = s32[6]{0:T(128)} multiply(s32[6]{0:T(128)} %maximum.1, s32[6]{0:T(128)} %param_0.1)
+    ROOT %tuple = (s32[6]{0:T(128)S(1)}, s32[6]{0:T(128)}) tuple(%add.0, %multiply.0)
+  }
+
+  ENTRY %entry {
+    %param0 = s32[6]{0:T(128)} parameter(0)
+    %param1 = s32[1]{0:T(128)} parameter(1)
+    %param2 = s32[5]{0:T(128)} parameter(2)
+    %arg0 = s32[6]{0:T(128)S(1)} copy(%param0)
+    %arg1 = s32[1]{0:T(128)} copy(%param1)
+    %arg2 = s32[5]{0:T(128)S(1)} copy(%param2)
+    %fusion = (s32[6]{0:T(128)S(1)}, s32[6]{0:T(128)}) fusion(s32[6]{0:T(128)S(1)} %arg0, s32[1]{0:T(128)} %arg1, s32[5]{0:T(128)S(1)} %arg2), kind=kLoop, calls=%fused_computation
+    %gte0 = s32[6]{0:T(128)S(1)} get-tuple-element(%fusion), index=0
+    %gte1 = s32[6]{0:T(128)} get-tuple-element(%fusion), index=1
+    ROOT %root = s32[6]{0:T(128)} add(%gte0, %gte1)
+  }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnUnverifiedModule(hlo_string));
+  MemorySpacePropagation memory_space_propagation;
+  EXPECT_TRUE(memory_space_propagation.Run(module.get()).ValueOrDie());
+  TF_EXPECT_OK(Verify(module.get()));
+  TF_ASSERT_OK_AND_ASSIGN(auto ref,
+                          ParseAndReturnVerifiedModule(expected_hlo_string));
+  EXPECT_EQ(module->Hash(), ref->Hash());
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD
index cd679f7412e..a57e4300d6e 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD
@@ -185,11 +185,11 @@ cc_library(
         "@llvm-project//mlir:LinalgOps",
         "@llvm-project//mlir:LinalgToLLVM",
         "@llvm-project//mlir:LinalgTransforms",
-        "@llvm-project//mlir:LoopOps",
-        "@llvm-project//mlir:LoopOpsTransforms",
         "@llvm-project//mlir:LoopsToGPUPass",
         "@llvm-project//mlir:NVVMDialect",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:SCFDialect",
+        "@llvm-project//mlir:SCFTransforms",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:Transforms",
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
index 33d3690d4ab..847ad918308 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
@@ -31,9 +31,9 @@ limitations under the License.
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"  // from @llvm-project
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // from @llvm-project
 #include "mlir/Dialect/Linalg/Passes.h"  // from @llvm-project
-#include "mlir/Dialect/LoopOps/LoopOps.h"  // from @llvm-project
-#include "mlir/Dialect/LoopOps/Passes.h"  // from @llvm-project
-#include "mlir/Dialect/LoopOps/Transforms.h"  // from @llvm-project
+#include "mlir/Dialect/SCF/Passes.h"  // from @llvm-project
+#include "mlir/Dialect/SCF/SCF.h"  // from @llvm-project
+#include "mlir/Dialect/SCF/Transforms.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BlockAndValueMapping.h"  // from @llvm-project
@@ -45,6 +45,7 @@ limitations under the License.
 #include "mlir/IR/Region.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Pass/PassManager.h"  // from @llvm-project
+#include "mlir/Transforms/BufferPlacement.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "mlir/Transforms/LoopUtils.h"  // from @llvm-project
 #include "mlir/Transforms/Passes.h"  // from @llvm-project
@@ -60,34 +61,6 @@ namespace {
 
 using ::mlir::xla_lhlo::FusionOp;
 
-// Following are some small transformations that are required to clean up code
-// after lowering from linalg to loops.
-
-// A simple pass that applies lowering of HLO to LHLO only within LHLO ops that
-// contain regions with HLO ops, e.g. FusionOp, ReduceOp, SelectAndScatterOp.
-// This is needed, as these ops are not closed from above and hence nested pass
-// managers can not be applied.
-struct NestedHloRegionsConverter
-    : public mlir::PassWrapper<NestedHloRegionsConverter,
-                               ::mlir::FunctionPass> {
-  void runOnFunction() override {
-    auto& ctx = getContext();
-    mlir::OwningRewritePatternList patterns;
-    mlir::ConversionTarget target(ctx);
-    target.addLegalDialect<::mlir::xla_lhlo::XlaLhloDialect>();
-    ::mlir::xla_hlo::populateHLOToLHLOConversionPattern(&ctx, &patterns);
-
-    getFunction().walk([&](mlir::Operation* op) {
-      if (op->getNumRegions() == 0) {
-        return;
-      }
-      if (failed(applyPartialConversion(op, target, patterns, nullptr))) {
-        signalPassFailure();
-      }
-    });
-  }
-};
-
 // Replaces a FusionOp by the operations contained in its region.
 struct FusionOpRemover
     : public mlir::PassWrapper<FusionOpRemover, ::mlir::FunctionPass> {
@@ -132,7 +105,7 @@ struct StoreForwardingPass
     // No store operation found. Continue search outside of the parallel
     // loop if block is in a parallel loop.
     if (auto parallelOp =
-            llvm::dyn_cast<mlir::loop::ParallelOp>(block->getParentOp())) {
+            llvm::dyn_cast<mlir::scf::ParallelOp>(block->getParentOp())) {
       return findStore(parallelOp.getOperation(), matches);
     }
     return {};
@@ -388,8 +361,8 @@ struct MapParallelLoops
 struct FuseInnerParallelLoops
     : public mlir::PassWrapper<FuseInnerParallelLoops, mlir::FunctionPass> {
   void runOnFunction() override {
-    getFunction().walk([](mlir::loop::ParallelOp op) {
-      mlir::loop::naivelyFuseParallelOps(op.region());
+    getFunction().walk([](mlir::scf::ParallelOp op) {
+      mlir::scf::naivelyFuseParallelOps(op.region());
     });
   }
 };
@@ -401,7 +374,7 @@ struct ParallelLoopCollapsingToFirstDim
   void runOnOperation() override {
     mlir::Operation* module = getOperation();
 
-    module->walk([&](mlir::loop::ParallelOp op) {
+    module->walk([&](mlir::scf::ParallelOp op) {
       unsigned num_loops = op.getNumLoops();
       std::vector<unsigned> combinedLoops;
       combinedLoops.reserve(num_loops);
@@ -436,8 +409,10 @@ Status LowerLHLOToGPU(mlir::ModuleOp module,
     tiling_for_unrolling.append(tile_sizes.begin(), tile_sizes.end());
   }
 
-  // First, lower bodies of LHLO operations that contain HLO ops.
-  pm.addPass(absl::make_unique<NestedHloRegionsConverter>());
+  // Legalize from HLO to LHLO.
+  pm.addPass(::mlir::xla_hlo::createLegalizeToLhloPass());
+  // Moving `AllocOp`s and inserting missing `DeallocOp`s
+  pm.addPass(::mlir::createBufferPlacementPass());
   // Next, we can strip the outer fusion operation.
   pm.addPass(absl::make_unique<FusionOpRemover>());
   // Remove unnecessary LHLO copies.
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/tests/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/tests/BUILD
index 014b26c5c78..850d5f5a0cf 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/tests/BUILD
@@ -22,6 +22,7 @@ glob_lit_tests(
     default_tags = tf_cuda_tests_tags() + [
         "no_pip",
         "config-cuda-only",
+        "no_rocm",
     ],
     driver = "@llvm-project//mlir:run_lit.sh",
     exclude = [
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index ab71c30dcae..2ed5e709d81 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -313,6 +313,8 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
     if (execution_options->num_partitions() > 0) {
       config->set_num_partitions(execution_options->num_partitions());
     }
+    config->set_use_spmd_partitioning(
+        execution_options->use_spmd_partitioning());
     config->set_seed(execution_options->seed());
     config->set_launch_id(execution_options->launch_id());
     config->set_debug_options(execution_options->debug_options());
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index d2cbdddff2e..8d6ef9faba9 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -257,6 +257,7 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
     case HloOpcode::kLog1p:
     case HloOpcode::kRsqrt:
     case HloOpcode::kSqrt:
+    case HloOpcode::kCbrt:
     case HloOpcode::kTanh:
       if (!ShapeUtil::ElementIsFloating(shape) &&
           !ShapeUtil::ElementIsComplex(shape)) {
@@ -1998,6 +1999,17 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
   return a;
 }
 
+/* static */ StatusOr<Shape> ShapeInference::InferAllGatherShape(
+    const Shape& operand_shape, int64 all_gather_dimension, int64 shard_count) {
+  TF_RET_CHECK(all_gather_dimension > 0);
+  TF_RET_CHECK(all_gather_dimension < operand_shape.rank());
+  TF_RET_CHECK(shard_count > 0);
+  auto shape = operand_shape;
+  shape.set_dimensions(all_gather_dimension,
+                       shard_count * shape.dimensions(all_gather_dimension));
+  return shape;
+}
+
 /* static */ StatusOr<Shape> ShapeInference::InferAllReduceShape(
     absl::Span<const Shape* const> operand_shapes) {
   for (const Shape* operand_shape : operand_shapes) {
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 2e96a77aa22..2cb5930d098 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -123,6 +123,12 @@ class ShapeInference {
   // Infers the shape produced by the given triangular solve operation.
   static StatusOr<Shape> InferCholeskyShape(const Shape& a);
 
+  // Infers the shape produced by an all-gather with the given operand shape,
+  // concat dimension, and shard count.
+  static StatusOr<Shape> InferAllGatherShape(const Shape& operand_shape,
+                                             int64 all_gather_dimension,
+                                             int64 shard_count);
+
   // Infers the shape produced by a cross replica sum with the given operand
   // shapes.
   static StatusOr<Shape> InferAllReduceShape(
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h
index a1872330648..b7a67b4e66e 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.h
+++ b/tensorflow/compiler/xla/service/shaped_buffer.h
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/shape_tree.h"
+#include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
@@ -93,6 +94,18 @@ class ShapedBuffer {
     buffers_.replace_shape_ptr(&on_device_shape_);
   }
 
+  // Reset the shape of this shaped buffer and underlying buffer structure.
+  //
+  // Precondition: EqualStructure(this->on_device_shape_, on_device_shape).
+  void set_shapes(const Shape& on_host_shape, const Shape& on_device_shape) {
+    CHECK(ShapeUtil::EqualStructure(on_device_shape, on_device_shape_))
+        << "Structures are not the same. new: " << on_device_shape
+        << ", old: " << on_device_shape_;
+    on_host_shape_ = on_host_shape;
+    on_device_shape_ = on_device_shape;
+    buffers_.replace_shape_ptr(&on_device_shape_);
+  }
+
   // Returns the underlying ShapeTree containing all the device addresses in the
   // ShapedBuffer.
   const ShapeTree<se::DeviceMemoryBase>& buffers() const { return buffers_; }
diff --git a/tensorflow/compiler/xla/service/spmd/BUILD b/tensorflow/compiler/xla/service/spmd/BUILD
new file mode 100644
index 00000000000..5be6a04f934
--- /dev/null
+++ b/tensorflow/compiler/xla/service/spmd/BUILD
@@ -0,0 +1,69 @@
+# Description: SPMD partitioning pass.
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+package(
+    default_visibility = [":friends"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+package_group(
+    name = "friends",
+    includes = [
+        "//tensorflow/compiler/xla:friends",
+    ],
+)
+
+cc_library(
+    name = "spmd_partitioner",
+    srcs = [
+        "spmd_partitioner.cc",
+        "spmd_partitioner_util.cc",
+    ],
+    hdrs = [
+        "spmd_partitioner.h",
+        "spmd_partitioner_util.h",
+    ],
+    deps = [
+        "//tensorflow/compiler/xla:comparison_util",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:protobuf_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:window_util",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/client/lib:comparators",
+        "//tensorflow/compiler/xla/service:flatten_call_graph",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
+        "//tensorflow/compiler/xla/service:hlo_cse",
+        "//tensorflow/compiler/xla/service:hlo_dce",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
+        "//tensorflow/compiler/xla/service:hlo_query",
+        "//tensorflow/compiler/xla/service:hlo_sharding_util",
+        "//tensorflow/compiler/xla/service:shape_inference",
+        "//tensorflow/compiler/xla/service:tuple_simplifier",
+        "//tensorflow/core/platform:numbers",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+tf_cc_test(
+    name = "spmd_partitioner_test",
+    srcs = ["spmd_partitioner_test.cc"],
+    deps = [
+        ":spmd_partitioner",
+        "//tensorflow/compiler/xla:xla_data_proto_cc",
+        "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc
new file mode 100644
index 00000000000..b857c8bdbe6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc
@@ -0,0 +1,4655 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h"
+
+#include <float.h>
+
+#include <functional>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/client/lib/comparators.h"
+#include "tensorflow/compiler/xla/comparison_util.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/protobuf_util.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/flatten_call_graph.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_cse.h"
+#include "tensorflow/compiler/xla/service/hlo_dce.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
+#include "tensorflow/compiler/xla/service/hlo_query.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding_util.h"
+#include "tensorflow/compiler/xla/service/shape_inference.h"
+#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h"
+#include "tensorflow/compiler/xla/service/tuple_simplifier.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/window_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/numbers.h"
+
+namespace xla {
+namespace spmd {
+
+string SpmdLogger::MakeReport() {
+  string report;
+  absl::StrAppend(&report,
+                  "\n\n***** SPMD memory during transformation *****\n");
+
+  std::sort(entries_.begin(), entries_.end(),
+            [](auto const& entry0, auto const& entry1) {
+              return entry0.first > entry1.first;
+            });
+  for (int64 i = 0;
+       i < std::min<int64>(report_instruction_count_, entries_.size()); ++i) {
+    absl::StrAppend(
+        &report, "\n  ",
+        tensorflow::strings::HumanReadableNumBytes(entries_[i].first), " : ",
+        entries_[i].second, "\n");
+  }
+
+  return report;
+}
+
+void SpmdLogger::RegisterLogEntry(HloInstruction* hlo,
+                                  const std::vector<HloInstruction*>& group) {
+  string report = hlo->ToString();
+  int64 max_value = -1;
+  for (HloInstruction* inst : group) {
+    if (inst->shape().IsTuple()) {
+      continue;
+    }
+    max_value =
+        std::max<int64>(max_value, ShapeUtil::ByteSizeOf(inst->shape(), 4));
+    absl::StrAppend(&report, "     * ", inst->ToString(), "\n");
+  }
+  entries_.push_back(std::make_pair(max_value, report));
+}
+
+/* static */ string SpmdLogger::ReportBeforePartition(
+    const HloModule& module, int64 report_instruction_count) {
+  string report;
+  absl::StrAppend(&report,
+                  "\n\n***** SPMD memory usage before partition *****\n");
+  absl::StrAppend(&report, "\n  ** Replicated instructions\n");
+  absl::StrAppend(&report, ReportMemoryUsage(
+                               module,
+                               [](const HloInstruction* hlo) {
+                                 return !hlo->has_sharding() ||
+                                        hlo->sharding().IsReplicated();
+                               },
+                               report_instruction_count));
+  absl::StrAppend(&report, "\n  ** All instructions\n");
+  absl::StrAppend(&report,
+                  ReportMemoryUsage(
+                      module, [](const HloInstruction* hlo) { return true; },
+                      report_instruction_count));
+  return report;
+}
+
+/* static */ string SpmdLogger::ReportAfterPartition(
+    const HloModule& module, int64 report_instruction_count) {
+  string report;
+  absl::StrAppend(&report,
+                  "\n\n***** SPMD memory usage after partition *****\n");
+  absl::StrAppend(&report,
+                  ReportMemoryUsage(
+                      module, [](const HloInstruction* hlo) { return true; },
+                      report_instruction_count));
+  return report;
+}
+
+template <typename F>
+/* static */ string SpmdLogger::ReportMemoryUsage(
+    const HloModule& module, const F& filter, int64 report_instruction_count) {
+  string report;
+  std::vector<HloInstruction*> instructions;
+  instructions.reserve(module.instruction_count());
+
+  for (auto computation : module.computations()) {
+    if (computation->IsFusionComputation()) {
+      continue;
+    }
+    for (auto hlo : computation->instructions()) {
+      if (hlo->shape().IsTuple() ||
+          ShapeUtil::IsEffectiveScalar(hlo->shape())) {
+        continue;
+      }
+      if (filter(hlo)) {
+        instructions.push_back(hlo);
+      }
+    }
+  }
+
+  const auto add_report = [&](std::vector<HloInstruction*>* insts) {
+    std::sort(insts->begin(), insts->end(),
+              [](const HloInstruction* inst0, const HloInstruction* inst1) {
+                return ShapeUtil::ByteSizeOf(inst0->shape()) >
+                       ShapeUtil::ByteSizeOf(inst1->shape());
+              });
+    for (int64 i = 0;
+         i < std::min<int64>(report_instruction_count, insts->size()); ++i) {
+      absl::StrAppend(&report, "  ",
+                      tensorflow::strings::HumanReadableNumBytes(
+                          ShapeUtil::ByteSizeOf((*insts)[i]->shape())),
+                      " : ", (*insts)[i]->ToString(), "\n");
+    }
+  };
+
+  add_report(&instructions);
+  return report;
+}
+
+namespace {
+
+// Returns the replica group configuration where each replica belongs to its own
+// group.
+std::vector<ReplicaGroup> CreateReplicaGroups(int64 num_replicas) {
+  std::vector<ReplicaGroup> groups(num_replicas);
+  for (int64 i = 0; i < num_replicas; ++i) {
+    groups[i].add_replica_ids(i);
+  }
+  return groups;
+}
+
+bool CanReshardWithAllToAll(const HloSharding& source,
+                            const HloSharding& target) {
+  return UniqueTiledDim(source) && UniqueTiledDim(target) &&
+         UniqueTiledDim(source) != UniqueTiledDim(target);
+}
+
+bool CanReshardWithCollectivePermute(const HloSharding& source,
+                                     const HloSharding& target) {
+  return UniqueTiledDim(source) && UniqueTiledDim(target) &&
+         UniqueTiledDim(source) == UniqueTiledDim(target) && source != target;
+}
+
+// Clears all sharding attributes from instructions in the module. This must be
+// called only after all SPMD transformation is complete.
+Status ClearShardingAttributes(HloModule* module) {
+  for (HloComputation* computation : module->computations()) {
+    for (HloInstruction* hlo : computation->instructions()) {
+      // Keep sharding annotation on Infeed and entry parameters since they're
+      // used by HloReplicationAnalysis later (for ArCrsCombiner).
+      if (hlo->opcode() == HloOpcode::kInfeed) {
+        continue;
+      }
+      if (hlo->opcode() == HloOpcode::kParameter &&
+          computation == module->entry_computation()) {
+        continue;
+      }
+      hlo->clear_sharding();
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+HloInstruction* SpmdBuilder::AddInstruction(
+    std::unique_ptr<HloInstruction> instruction) {
+  HloInstruction* hlo =
+      HloComputation::Builder::AddInstruction(std::move(instruction));
+  if (visiting_hlo_) {
+    instructions_[visiting_hlo_].push_back(hlo);
+  }
+  return hlo;
+}
+
+PartitionedHlo PartitionedHlo::Reshard(const HloSharding& target) {
+  auto& cache = state_.reshard_cache->per_hlo_cache[hlo()].reshard_cache;
+  for (auto& entry : cache) {
+    if (entry.first == target) {
+      return entry.second;
+    }
+  }
+  cache.emplace_back(target, ReshardNoCache(target));
+  state_.reshard_cache->per_hlo_cache[cache.back().second.hlo()]
+      .reshard_cache.emplace_back(sharding(), *this);
+  return cache.back().second;
+}
+
+PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) {
+  VLOG(2) << "Resharding " << hlo_->ToString() << " from "
+          << hlo_->sharding().ToString() << " to " << target.ToString();
+  const Shape& shape = hlo_->shape();
+  CHECK(shape.IsTuple() || !target.IsTuple());
+
+  // Tuple shape instructions may have non-tuple sharding, which means that the
+  // same sharding applies to all the leaves.
+  if (shape.IsTuple() && !target.IsTuple()) {
+    return Reshard(target.GetTupleSharding(shape).ValueOrDie());
+  }
+
+  // For a tuple shape, recursively apply Reshard to all the leaves and return
+  // a tuple instruction.
+  if (shape.IsTuple()) {
+    std::vector<HloInstruction*> elements;
+    for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+      auto subshape = ShapeUtil::GetTupleElementShape(shape, i);
+      auto element = state_.b->AddInstruction(
+          HloInstruction::CreateGetTupleElement(subshape, hlo(), i));
+      element->set_sharding(sharding().GetSubSharding(shape, {i}));
+      elements.push_back(
+          PartitionedHlo(
+              element, ShapeUtil::GetTupleElementShape(base_shape_, i), state_)
+              .Reshard(target.GetSubSharding(shape, {i}))
+              .hlo());
+    }
+    auto tuple =
+        state_.b->AddInstruction(HloInstruction::CreateTuple(elements));
+    tuple->set_sharding(target);
+    return PartitionedHlo(tuple, base_shape_, state_);
+  }
+
+  if (sharding() == target) {
+    return *this;
+  }
+
+  if (shape.element_type() == TOKEN) {
+    return *this;
+  }
+
+  if (CanReshardWithCollectivePermute(sharding(), target)) {
+    return ReshardWithCollectivePermute(target);
+  }
+
+  if (CanReshardWithAllToAll(sharding(), target)) {
+    return ReshardWithAllToAll(target);
+  }
+
+  // If not replicated yet, first replicate and then reshard to use one of the
+  // two implementations below.
+  if (!sharding().IsReplicated()) {
+    return Replicate().Reshard(target);
+  }
+
+  // 'Replicated' to 'SingleDevice'.
+  if (target.IsTileMaximal()) {
+    auto copy = state_.b->AddInstruction(
+        HloInstruction::CreateUnary(hlo_->shape(), HloOpcode::kCopy, hlo_));
+    copy->set_sharding(target);
+    return PartitionedHlo(copy, base_shape_, state_);
+  }
+
+  // 'Replicated' to 'Tiled'.
+  auto padded_hlo =
+      PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b);
+  auto shard_shape = MakePartitionedShape(shape, target);
+  auto slice = state_.b->AddInstruction(HloInstruction::CreateDynamicSlice(
+      shard_shape, padded_hlo,
+      MakePartitionOffsets(shape, target, state_.partition_id, state_.b),
+      shard_shape.dimensions()));
+  slice->set_sharding(target);
+  return PartitionedHlo(slice, base_shape_, state_);
+}
+
+PartitionedHlo PartitionedHlo::PadWithValue(HloInstruction* pad_value) const {
+  const HloSharding& sharding = hlo_->sharding();
+  const Shape& shape = hlo_->shape();
+  CHECK(!shape.IsTuple() && shape.element_type() != TOKEN);
+  if (sharding.IsReplicated() || EvenlyPartitions(base_shape_, sharding)) {
+    return *this;
+  }
+  CHECK(!sharding.IsTileMaximal());
+  auto index_shape = ShapeUtil::ChangeElementType(shape, S32);
+  auto mask_shape = ShapeUtil::ChangeElementType(index_shape, PRED);
+  auto get_mask_for_dim = [&](int64 dim, HloInstruction* start_index) {
+    // Comparison: iota + start_index < valid_size
+    auto iota =
+        state_.b->AddInstruction(HloInstruction::CreateIota(index_shape, dim));
+    auto broadcast_start_index = state_.b->AddInstruction(
+        HloInstruction::CreateBroadcast(index_shape, start_index, {}));
+    auto index_in_full_shape =
+        state_.b->AddInstruction(HloInstruction::CreateBinary(
+            index_shape, HloOpcode::kAdd, iota, broadcast_start_index));
+    auto valid_size = state_.b->AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::CreateR0<int32>(base_shape_.dimensions(dim))));
+    auto broadcast_valid_size = state_.b->AddInstruction(
+        HloInstruction::CreateBroadcast(index_shape, valid_size, {}));
+    return state_.b->AddInstruction(HloInstruction::CreateCompare(
+        mask_shape, index_in_full_shape, broadcast_valid_size,
+        ComparisonDirection::kLt));
+  };
+
+  HloInstruction* mask = nullptr;
+  auto offsets = MakePartitionOffsets(base_shape_, sharding,
+                                      state_.partition_id, state_.b);
+  for (int64 i = 0; i < shape.rank(); ++i) {
+    if (base_shape_.dimensions(i) % sharding.tile_assignment().dim(i) == 0) {
+      continue;
+    }
+    if (mask == nullptr) {
+      mask = get_mask_for_dim(i, offsets[i]);
+    } else {
+      mask = state_.b->AddInstruction(
+          HloInstruction::CreateBinary(mask->shape(), HloOpcode::kAnd, mask,
+                                       get_mask_for_dim(i, offsets[i])));
+    }
+  }
+
+  if (mask == nullptr) {
+    return *this;
+  }
+
+  auto broadcast_pad_value = state_.b->AddInstruction(
+      HloInstruction::CreateBroadcast(shape, pad_value, {}));
+  auto result = state_.b->AddInstruction(HloInstruction::CreateTernary(
+      shape, HloOpcode::kSelect, mask, hlo_, broadcast_pad_value));
+  result->set_sharding(sharding);
+  return PartitionedHlo(result, base_shape_, state_);
+}
+
+absl::optional<PartitionedHlo::WindowedInputShardReturnValue>
+PartitionedHlo::ReshardAsWindowedInput(const Window& window,
+                                       const HloSharding& target,
+                                       HloInstruction* pad_value,
+                                       bool mask_invalid_region) {
+  auto& cache = state_.reshard_cache->per_hlo_cache[hlo()].window_reshard_cache;
+  for (auto& entry : cache) {
+    if (std::get<0>(entry) == target &&
+        protobuf_util::ProtobufEquals(std::get<1>(entry), window)) {
+      return std::get<2>(entry);
+    }
+  }
+  auto update_cache = [&](WindowedInputShardReturnValue result) {
+    cache.emplace_back(target, window, std::move(result));
+    return std::get<2>(cache.back());
+  };
+  VLOG(2) << "ReshardAsWindowedInput()\n"
+          << "\twindow:" << window_util::ToString(window)
+          << "\ttarget sharding:" << target.ToString();
+
+  CHECK(!target.IsTileMaximal());
+  auto partition_ordinals =
+      MakeTiledPartitionOrdinals(target, state_.partition_id, state_.b);
+  auto shard_shape = base_shape_;
+
+  std::vector<MultiplyAddDivideOffsetCalculation> start_on_padded_calculations(
+      base_shape_.rank());
+  std::vector<MultiplyAddDivideOffsetCalculation> limit_on_padded_calculations(
+      base_shape_.rank());
+  std::vector<HloInstruction*> dynamic_slice_offset_on_output(
+      base_shape_.rank(), nullptr);
+
+  Window shard_window = window;
+  auto padded_shape = base_shape_;
+  std::vector<HloInstruction*> offsets_on_padded_shape(base_shape_.rank());
+  std::vector<int64> per_shard_window_counts(base_shape_.rank());
+  std::vector<int64> explicit_left_padding(base_shape_.rank());
+  for (int64 i = 0; i < base_shape_.rank(); ++i) {
+    // Do not pad non-partitioned dimensions.
+    int64 shard_count = target.tile_assignment().dim(i);
+    if (shard_count == 1) {
+      offsets_on_padded_shape[i] = state_.b->AddInstruction(
+          HloInstruction::CreateConstant(LiteralUtil::Zero(S32)));
+      continue;
+    }
+    const auto& wd = window.dimensions(i);
+    if (wd.window_dilation() != 1) {
+      // TODO(yuanzx): Support window dilation.
+      VLOG(2) << "Failed to reshard window operand due to window dilation";
+      return absl::nullopt;
+    }
+    int64 full_size =
+        base_shape_.dimensions(i) +
+        (wd.base_dilation() - 1) * (base_shape_.dimensions(i) - 1) +
+        wd.padding_high() + wd.padding_low();
+    if (full_size < wd.size()) {
+      VLOG(2) << "Failed to reshard window operand because the window size is "
+                 "larger than padded base size";
+      return absl::nullopt;
+    }
+    int64 window_count = (full_size - wd.size()) / wd.stride() + 1;
+    per_shard_window_counts[i] = CeilOfRatio(window_count, shard_count);
+    if (wd.stride() != 1 &&
+        (wd.stride() * per_shard_window_counts[i]) % wd.base_dilation() != 0) {
+      // TODO(yuanzx): Support this case.
+      VLOG(2) << "Failed to reshard window operand due to non-trivial dilation";
+      return absl::nullopt;
+    }
+
+    // We use explicit padding for full dilations, then use padding_low and
+    // padding_high on the sharded op for the remaining. padding_low and
+    // padding_high are now given initial values, which will be later updated if
+    // dilation is not 1.
+    auto swd = shard_window.mutable_dimensions(i);
+    explicit_left_padding[i] = wd.padding_low() / wd.base_dilation();
+    swd->set_padding_low(wd.padding_low() % wd.base_dilation());
+    swd->set_padding_high(0);
+
+    // Calculation for the first element needed on the 'padded-but-not-dilated'
+    // shape. The start on the dilated shape could be a hole, so we add
+    // wd.base_dilation() - 1 to the constant term to skip the leading holes.
+    start_on_padded_calculations[i] = MultiplyAddDivideOffsetCalculation(
+        wd.stride() * per_shard_window_counts[i],
+        wd.base_dilation() - 1 - swd->padding_low(), wd.base_dilation());
+    int64 dilated_shard_size =
+        wd.stride() * (per_shard_window_counts[i] - 1) + wd.size();
+    limit_on_padded_calculations[i] = MultiplyAddDivideOffsetCalculation(
+        wd.stride() * per_shard_window_counts[i],
+        dilated_shard_size + wd.base_dilation() - 1 - swd->padding_low(),
+        wd.base_dilation());
+
+    offsets_on_padded_shape[i] = start_on_padded_calculations[i].Calculate(
+        partition_ordinals[i], state_.b);
+
+    auto shard_size_function =
+        limit_on_padded_calculations[i] - start_on_padded_calculations[i];
+    int64 max_shard_size = shard_size_function.MaxInRange(0, shard_count);
+    shard_shape.set_dimensions(i, max_shard_size);
+    padded_shape.set_dimensions(
+        i, limit_on_padded_calculations[i].Calculate(shard_count - 1));
+
+    // For base dilation, calculate the needed padding_low and padding_high, as
+    // well as the offset for the output if a dynamic slice is needed after the
+    // sharded op.
+    if (wd.base_dilation() != 1) {
+      // Returns the offset of a shard's first valid element in the dilated
+      // shard.
+      auto get_first_valid_element_offset_on_dilated_shard =
+          [&](int64 shard_ordinal) {
+            return start_on_padded_calculations[i].Calculate(shard_ordinal) *
+                       wd.base_dilation() +
+                   swd->padding_low() -
+                   wd.stride() * per_shard_window_counts[i] * shard_ordinal;
+          };
+      CHECK_EQ(get_first_valid_element_offset_on_dilated_shard(0),
+               swd->padding_low());
+
+      // Determine swd->padding_high.
+      for (int64 shard_ordinal = 0; shard_ordinal < shard_count;
+           ++shard_ordinal) {
+        int64 wanted_limit_on_dilated_shard =
+            wd.stride() * (per_shard_window_counts[i] - 1) + wd.size();
+        int64 actual_limit_on_dilated_shard_without_pad_high =
+            get_first_valid_element_offset_on_dilated_shard(shard_ordinal) +
+            (max_shard_size - 1) * wd.base_dilation() + 1;
+        swd->set_padding_high(std::max<int64>(
+            swd->padding_high(),
+            wanted_limit_on_dilated_shard -
+                actual_limit_on_dilated_shard_without_pad_high));
+      }
+
+      // Determine swd->padding_low and output dynamic slice index.
+      if (wd.stride() == 1) {
+        int64 max_pad_low = get_first_valid_element_offset_on_dilated_shard(0);
+        bool all_same = true;
+        for (int64 shard_ordinal = 1; shard_ordinal < shard_count;
+             ++shard_ordinal) {
+          int64 start =
+              get_first_valid_element_offset_on_dilated_shard(shard_ordinal);
+          if (start != swd->padding_low()) {
+            all_same = false;
+          }
+          max_pad_low = std::max(max_pad_low, start);
+        }
+        if (!all_same) {
+          auto start_on_padded_input =
+              start_on_padded_calculations[i].Calculate(partition_ordinals[i],
+                                                        state_.b);
+          // We will calculate
+          //   max_pad_low - (first_window - required_first_window)
+          // which equals
+          //   required_first_window - (first_window - max_pad_low)
+          auto first_window_minus_max_pad_low =
+              MultiplyAddDivideOffsetCalculation(
+                  wd.base_dilation(), swd->padding_low() - max_pad_low, 1)
+                  .Calculate(start_on_padded_input, state_.b);
+          auto required_first_window =
+              MultiplyAddDivideOffsetCalculation(per_shard_window_counts[i], 0,
+                                                 1)
+                  .Calculate(partition_ordinals[i], state_.b);
+          dynamic_slice_offset_on_output[i] =
+              state_.b->AddInstruction(HloInstruction::CreateBinary(
+                  required_first_window->shape(), HloOpcode::kSubtract,
+                  required_first_window, first_window_minus_max_pad_low));
+        }
+        swd->set_padding_low(max_pad_low);
+      } else {
+        CHECK_EQ(
+            (wd.stride() * per_shard_window_counts[i]) % wd.base_dilation(), 0)
+            << "General base dilation not yet implemented.";
+        // padding_low on all shards should equal the initially assigned
+        // swd->padding_low(), i.e., the padding_low() on the original window.
+      }
+    }
+  }
+
+  // Returns the output dynamic slice offset when needed, and absl::nullopt
+  // otherwise.
+  auto get_dynamic_slice_offset_on_output_if_needed =
+      [&]() -> absl::optional<std::vector<HloInstruction*>> {
+    if (absl::c_all_of(
+            dynamic_slice_offset_on_output,
+            [](HloInstruction* offset) { return offset == nullptr; })) {
+      return absl::nullopt;
+    }
+    auto zero = state_.b->AddInstruction(
+        HloInstruction::CreateConstant(LiteralUtil::Zero(S32)));
+    for (int64 i = 0; i < dynamic_slice_offset_on_output.size(); ++i) {
+      if (dynamic_slice_offset_on_output[i] == nullptr) {
+        dynamic_slice_offset_on_output[i] = zero;
+      }
+    }
+    return dynamic_slice_offset_on_output;
+  };
+
+  // If the currrent HLO is replicated, pad then slice.
+  if (sharding().IsReplicated()) {
+    PaddingConfig padding_config;
+    for (int64 i = 0; i < base_shape_.rank(); ++i) {
+      auto padding_config_dim = padding_config.add_dimensions();
+      padding_config_dim->set_interior_padding(0);
+      // Do not pad non-partitioned dimensions.
+      if (target.tile_assignment().dim(i) == 1) {
+        padding_config_dim->set_edge_padding_low(0);
+        padding_config_dim->set_edge_padding_high(0);
+        continue;
+      }
+      padding_config_dim->set_edge_padding_low(explicit_left_padding[i]);
+      padding_config_dim->set_edge_padding_high(padded_shape.dimensions(i) -
+                                                explicit_left_padding[i] -
+                                                base_shape_.dimensions(i));
+    }
+    auto padded_hlo = ShapeUtil::Compatible(padded_shape, base_shape_)
+                          ? hlo_
+                          : state_.b->AddInstruction(HloInstruction::CreatePad(
+                                padded_shape, hlo_, pad_value, padding_config));
+    auto sharded_input =
+        state_.b->AddInstruction(HloInstruction::CreateDynamicSlice(
+            shard_shape, padded_hlo, offsets_on_padded_shape,
+            shard_shape.dimensions()));
+    return update_cache(WindowedInputShardReturnValue{
+        sharded_input, shard_window,
+        get_dynamic_slice_offset_on_output_if_needed()});
+  }
+
+  if (target != sharding()) {
+    return Replicate().ReshardAsWindowedInput(window, target, pad_value);
+  }
+
+  // Halo exchange.
+  HloInstruction* visiting_hlo = hlo_;
+  auto original_shard_shape = MakePartitionedShape(base_shape_, target);
+
+  std::vector<OffsetCalculation> left_halo_size_functions(base_shape_.rank());
+  std::vector<OffsetCalculation> right_halo_size_functions(base_shape_.rank());
+  // TODO(yuanzx): We are concatenating on each sharded dimension one at time,
+  // and in the second dimension (and beyond) we create halos by slicing the
+  // concat in the previous dimension, which is not optimal. We should generate
+  // halos only concating slices, instead of slicing concats.
+  for (int dim = 0; dim < base_shape_.rank(); ++dim) {
+    int64 shard_count = target.tile_assignment().dim(dim);
+    if (shard_count == 1) {
+      continue;
+    }
+    int64 input_shard_size =
+        CeilOfRatio(base_shape_.dimensions(dim), shard_count);
+
+    // Left halo. The size of the halo is derived by subtracting the first read
+    // element offset of the i'th partition from the limit of the (i-1)'th
+    // partition.
+    MultiplyAddDivideOffsetCalculation shard_limit_of_previous_on_padded(
+        input_shard_size, explicit_left_padding[dim], 1);
+    left_halo_size_functions[dim] =
+        shard_limit_of_previous_on_padded - start_on_padded_calculations[dim];
+
+    // Right halo.
+    MultiplyAddDivideOffsetCalculation shard_start_of_next_on_padded(
+        input_shard_size, input_shard_size + explicit_left_padding[dim], 1);
+    right_halo_size_functions[dim] =
+        limit_on_padded_calculations[dim] - shard_start_of_next_on_padded;
+
+    auto resharded = ExchangeHaloAndGetValidData(
+        visiting_hlo, base_shape_, left_halo_size_functions[dim],
+        right_halo_size_functions[dim], explicit_left_padding[dim],
+        padded_shape.dimensions(dim), shard_shape.dimensions(dim), dim, target,
+        offsets_on_padded_shape[dim], pad_value, partition_ordinals[dim],
+        state_.collective_ops_creator, state_.next_channel_id, state_.b,
+        mask_invalid_region);
+    if (!resharded) {
+      VLOG(1) << "ReshardAsWindowedInput failed without replicate first: halo "
+                 "is beyond the neighbor.";
+      return Replicate().ReshardAsWindowedInput(window, target, pad_value);
+    }
+    visiting_hlo = *resharded;
+  }
+  return update_cache(WindowedInputShardReturnValue{
+      visiting_hlo, shard_window,
+      get_dynamic_slice_offset_on_output_if_needed()});
+}
+
+PartitionedHlo PartitionedHlo::Replicate() {
+  const HloSharding& sharding = hlo_->sharding();
+  const Shape& shape = hlo_->shape();
+  CHECK(!shape.IsTuple() && shape.element_type() != TOKEN);
+
+  if (sharding.IsReplicated()) {
+    return *this;
+  }
+  auto& cache = state_.reshard_cache->per_hlo_cache[hlo()].reshard_cache;
+  for (auto& entry : cache) {
+    if (entry.first.IsReplicated()) {
+      return entry.second;
+    }
+  }
+  auto update_cache = [&](PartitionedHlo resharded) {
+    state_.reshard_cache->per_hlo_cache[resharded.hlo()]
+        .reshard_cache.emplace_back(sharding, *this);
+    cache.emplace_back(HloSharding::Replicate(), std::move(resharded));
+    return cache.back().second;
+  };
+  // 'Single Device' to 'Repliated'.
+  if (sharding.IsTileMaximal()) {
+    return update_cache(Broadcast());
+  }
+
+  // 'Tiled' to 'Replicated'.
+  Shape padded_base_shape = shape;
+  for (int64 i = 0; i < padded_base_shape.rank(); ++i) {
+    padded_base_shape.set_dimensions(
+        i, shape.dimensions(i) * sharding.tile_assignment().dim(i));
+  }
+  auto zero = state_.b->AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::Zero(shape.element_type())));
+  auto zero_bcast = state_.b->AddInstruction(
+      HloInstruction::CreateBroadcast(padded_base_shape, zero, {}));
+  auto dus = state_.b->AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
+      padded_base_shape, zero_bcast, hlo_,
+      MakePartitionOffsets(padded_base_shape, sharding, state_.partition_id,
+                           state_.b)));
+  HloComputation* reduction =
+      MakeBinaryAdd(shape.element_type(), state_.module);
+
+  auto all_reduce =
+      state_.collective_ops_creator.create_cross_partition_all_reduce(
+          state_.b, dus, reduction, NewChannel());
+  HloInstruction* result = all_reduce;
+  if (!ShapeUtil::Compatible(base_shape_, padded_base_shape)) {
+    std::vector<int64> start_indices(shape.rank(), 0);
+    std::vector<int64> strides(shape.rank(), 1);
+    result = state_.b->AddInstruction(HloInstruction::CreateSlice(
+        base_shape_, result, start_indices, base_shape_.dimensions(), strides));
+  }
+  result->set_sharding(HloSharding::Replicate());
+  return update_cache(PartitionedHlo(result, base_shape_, state_));
+}
+
+PartitionedHlo PartitionedHlo::Broadcast() const {
+  const Shape& shape = hlo_->shape();
+  const HloSharding& sharding = hlo_->sharding();
+  CHECK(sharding.HasUniqueDevice());
+  CHECK(!shape.IsTuple() && shape.element_type() != TOKEN);
+
+  auto src_core_id = state_.b->AddInstruction(HloInstruction::CreateConstant(
+      LiteralUtil::CreateR0<uint32>(sharding.GetUniqueDevice())));
+  Shape bcast_shape = ShapeUtil::ChangeElementType(shape, PRED);
+  auto is_src_core = state_.b->AddInstruction(HloInstruction::CreateBroadcast(
+      bcast_shape,
+      state_.b->AddInstruction(HloInstruction::CreateCompare(
+          ShapeUtil::MakeShape(PRED, {}), state_.partition_id, src_core_id,
+          ComparisonDirection::kEq)),
+      {}));
+
+  auto zero = state_.b->AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::Zero(shape.element_type())));
+  auto zero_bcast = state_.b->AddInstruction(
+      HloInstruction::CreateBroadcast(shape, zero, {}));
+  auto operand = state_.b->AddInstruction(HloInstruction::CreateTernary(
+      shape, HloOpcode::kSelect, is_src_core, hlo(), zero_bcast));
+  HloComputation* reduction =
+      MakeBinaryAdd(shape.element_type(), state_.module);
+
+  auto result = state_.collective_ops_creator.create_cross_partition_all_reduce(
+      state_.b, operand, reduction, NewChannel());
+  result->set_sharding(HloSharding::Replicate());
+  return PartitionedHlo(result, base_shape_, state_);
+}
+
+PartitionedHlo PartitionedHlo::ReshardWithAllToAll(
+    const HloSharding& target) const {
+  int64 partition_count = sharding().tile_assignment().num_elements();
+  absl::optional<int64> input_partition_dim = UniqueTiledDim(sharding());
+  absl::optional<int64> output_partition_dim = UniqueTiledDim(target);
+  CHECK(input_partition_dim.has_value());
+  CHECK(output_partition_dim.has_value());
+
+  // If the device order is different in the target, fix the order with
+  // ReshardWithCollectivePermute.
+  auto input_tile_fixed_device_order = target.tile_assignment();
+  input_tile_fixed_device_order.Reshape(
+      sharding().tile_assignment().dimensions());
+  auto input_sharding_fixed_device_order =
+      HloSharding::Tile(input_tile_fixed_device_order);
+  if (input_sharding_fixed_device_order != sharding()) {
+    auto fixed_order =
+        ReshardWithCollectivePermute(input_sharding_fixed_device_order);
+    return fixed_order.ReshardWithAllToAll(target);
+  }
+
+  auto padded_hlo =
+      PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b);
+
+  // The order of ids in the group must follow the target sharding.
+  std::vector<ReplicaGroup> groups(1);
+  for (int64 device : target.tile_assignment()) {
+    groups[0].add_replica_ids(device);
+  }
+
+  HloInstruction* result = nullptr;
+
+  // Split along the split dimension (output_partition_dim) of the all-to-all
+  // output.
+  std::vector<int64> dimensions;
+  for (int64 i = 0; i < base_shape_.rank(); ++i) {
+    if (i == *output_partition_dim) {
+      dimensions.push_back(partition_count);
+      dimensions.push_back(padded_hlo->shape().dimensions(i) / partition_count);
+    } else {
+      dimensions.push_back(padded_hlo->shape().dimensions(i));
+    }
+  }
+  auto reshape = state_.b->AddInstruction(HloInstruction::CreateReshape(
+      ShapeUtil::MakeShape(base_shape_.element_type(), dimensions),
+      padded_hlo));
+  // After the reshape, it is guaranteed to have at least 3 dimensions.
+  auto all_to_all =
+      state_.collective_ops_creator.create_cross_partition_all_to_all(
+          state_.b, {reshape}, groups, (*state_.next_channel_id)++,
+          output_partition_dim);
+
+  // Reorder the split dimension of the reshape to be located in front of the
+  // input partition dimension, so the two dimensions can be combined.
+  int64 new_input_partition_dim = (*output_partition_dim < *input_partition_dim)
+                                      ? *input_partition_dim + 1
+                                      : *input_partition_dim;
+  std::vector<int64> permutation;
+  for (int64 i = 0; i < all_to_all->shape().rank(); ++i) {
+    if (i == *output_partition_dim) {
+      continue;
+    }
+    if (i == new_input_partition_dim) {
+      permutation.push_back(*output_partition_dim);
+    }
+    permutation.push_back(i);
+  }
+  auto transpose = state_.b->AddInstruction(HloInstruction::CreateTranspose(
+      ShapeInference::InferTransposeShape(all_to_all->shape(), permutation)
+          .ValueOrDie(),
+      all_to_all, permutation));
+
+  // Combine the split dimension and the input partition dimension.
+  auto new_shape = ShapeInference::InferAllToAllShape(
+                       padded_hlo->shape(), *output_partition_dim,
+                       *input_partition_dim, partition_count)
+                       .ValueOrDie();
+  result = state_.b->AddInstruction(
+      HloInstruction::CreateReshape(new_shape, transpose));
+
+  const Shape result_shape = MakePartitionedShape(base_shape_, target);
+  if (result_shape != result->shape()) {
+    result = state_.b->AddInstruction(HloInstruction::CreateSlice(
+        result_shape, result, std::vector<int64>(result_shape.rank(), 0),
+        result_shape.dimensions(), std::vector<int64>(result_shape.rank(), 1)));
+  }
+  result->set_sharding(target);
+  return PartitionedHlo(result, base_shape_, state_);
+}
+
+PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute(
+    const HloSharding& target) const {
+  CHECK(CanReshardWithCollectivePermute(sharding(), target));
+  std::vector<std::pair<int64, int64>> src_dst_pairs;
+  sharding().tile_assignment().Each(
+      [&](absl::Span<const int64> indices, int64 src_device) {
+        int64 dst_device = target.tile_assignment()(indices);
+        if (dst_device != src_device) {
+          src_dst_pairs.emplace_back(src_device, dst_device);
+        }
+      });
+  auto cp =
+      state_.collective_ops_creator.create_cross_partition_collective_permute(
+          state_.b, hlo(), src_dst_pairs, (*state_.next_channel_id)++);
+  cp->set_sharding(target);
+  return PartitionedHlo(cp, base_shape_, state_);
+}
+
+SpmdPartitioningVisitor::SpmdPartitioningVisitor(
+    HloComputation* computation, int64 num_partitions, int64 num_replicas,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdLogger* logger, SpmdPartitionerOptions options,
+    SpmdPartitioner* partitioner)
+    : changed_(false),
+      module_(computation->parent()),
+      num_partitions_(num_partitions),
+      num_replicas_(num_replicas),
+      collective_ops_creator_(collective_ops_creator),
+      next_channel_id_(next_channel_id),
+      b_(SpmdBuilder(computation->name() + "_spmd", /*hlo=*/nullptr)),
+      partition_id_(collective_ops_creator_.create_partition_id(&b_)),
+      logger_(logger),
+      options_(std::move(options)),
+      partitioner_(partitioner) {}
+
+Status SpmdPartitioningVisitor::DefaultAction(HloInstruction* hlo) {
+  if (hlo->HasSideEffect()) {
+    return Unimplemented("Side-effect ops cannot be replicated: %s",
+                         hlo->ToString());
+  }
+
+  if (hlo->IsElementwise() && hlo->operand_count() > 0) {
+    return HandleElementwise(hlo);
+  }
+
+  if (!hlo->sharding().IsTileMaximal()) {
+    VLOG(1) << "Not partitioned in SPMD mode (DefaultAction):"
+            << hlo->ToString();
+    for (int64 i = 0; i < hlo->operand_count(); ++i) {
+      VLOG(1) << "  operand " << i
+              << " sharding:" << hlo->operand(i)->sharding().ToString();
+    }
+  }
+
+  // If the instruction cannot be partitioned, replicate the instruction unless
+  // the instruction has side-effect.
+  std::vector<HloInstruction*> new_operands;
+  for (HloInstruction* operand : hlo->operands()) {
+    new_operands.push_back(
+        GetPartitionedHlo(operand).Reshard(HloSharding::Replicate()).hlo());
+  }
+  auto clone =
+      b_.AddInstruction(hlo->CloneWithNewOperands(hlo->shape(), new_operands));
+  clone->set_sharding(HloSharding::Replicate());
+  clone->set_metadata(hlo->metadata());
+  SetPartitionedHlo(hlo,
+                    PartitionedHlo(clone, hlo->shape(), MakePartitioningState())
+                        .Reshard(hlo->sharding()));
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::Preprocess(HloInstruction* hlo) {
+  visiting_hlo_ = hlo;
+  b_.set_visiting_hlo(hlo);
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::Postprocess(HloInstruction* hlo) {
+  logger_->RegisterLogEntry(GetPartitionedHlo(hlo).hlo(),
+                            b_.derived_instructions(hlo));
+  visiting_hlo_ = nullptr;
+  b_.set_visiting_hlo(nullptr);
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleElementwise(HloInstruction* hlo) {
+  std::vector<HloInstruction*> new_operands;
+  for (HloInstruction* operand : hlo->operands()) {
+    new_operands.push_back(
+        GetPartitionedHlo(operand).Reshard(hlo->sharding()).hlo());
+  }
+  SetPartitionedHlo(hlo, [&] {
+    return b_.AddInstruction(hlo->CloneWithNewOperands(
+        MakePartitionedShape(hlo->shape(), hlo->sharding()), new_operands));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleConcatenate(HloInstruction* hlo) {
+  const HloSharding& sharding = hlo->sharding();
+  if (sharding.IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  const Shape shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+  const int64 dimension = hlo->concatenate_dimension();
+  if (sharding.tile_assignment().dim(dimension) == 1) {
+    std::vector<HloInstruction*> new_operands;
+    for (HloInstruction* operand : hlo->operands()) {
+      new_operands.push_back(
+          GetPartitionedHlo(operand).Reshard(sharding).hlo());
+    }
+    SetPartitionedHlo(hlo, [&] {
+      return b_.AddInstruction(
+          hlo->CloneWithNewOperands(shard_shape, new_operands));
+    });
+    return Status::OK();
+  }
+
+  // If the concatenate dimension is along one of the partitioned dimensions,
+  // allocate the full output shape, each partition updates its owned region,
+  // all-reduce across partitions, and then slice its output region.
+
+  // We currently don't support subgroup all-reduce along partitions, so more
+  // than 1 partitioned dimensions is not supported.
+  if (sharding.tile_assignment().dim(dimension) != num_partitions_) {
+    return DefaultAction(hlo);
+  }
+
+  // temp_output_shape is the output shape where the concatenate dimension
+  // is changed to the full (and padded to shard count) dimension size.
+  auto temp_output_shape = MakePartitionedShape(hlo->shape(), sharding);
+  temp_output_shape.set_dimensions(
+      dimension, temp_output_shape.dimensions(dimension) *
+                     sharding.tile_assignment().dim(dimension));
+  auto temp_output = CreateZero(temp_output_shape, &b_);
+
+  // Offset of each operand along the concatenate dimension.
+  int64 offset = 0;
+  for (HloInstruction* operand : hlo->operands()) {
+    auto spmd_operand = GetPartitionedHlo(operand).Reshard(sharding).hlo();
+    std::vector<HloInstruction*> start_indices(
+        hlo->shape().rank(), b_.AddInstruction(HloInstruction::CreateConstant(
+                                 LiteralUtil::Zero(S32))));
+    start_indices[dimension] =
+        MultiplyAddDivideOffsetCalculation(
+            spmd_operand->shape().dimensions(dimension), offset, 1)
+            .Calculate(MakeTiledPartitionOrdinals(sharding, partition_id_,
+                                                  &b_)[dimension],
+                       &b_);
+    temp_output = b_.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
+        temp_output_shape, temp_output, spmd_operand, start_indices));
+    offset += operand->shape().dimensions(dimension);
+  }
+  auto all_reduce = collective_ops_creator_.create_cross_partition_all_reduce(
+      &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_),
+      NewChannel());
+  SetPartitionedHlo(hlo, [&] {
+    auto start_indices =
+        MakeTiledPartitionOrdinals(hlo->sharding(), partition_id_, &b_);
+    start_indices[dimension] = MultiplyAddDivideOffsetCalculation(
+                                   shard_shape.dimensions(dimension), 0, 1)
+                                   .Calculate(start_indices[dimension], &b_);
+    return b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+        shard_shape, all_reduce, start_indices, shard_shape.dimensions()));
+  });
+
+  return Status::OK();
+}
+
+// If partitioning in the operand only happens in dimensions in passthrough
+// dimensions (offset dimensions in the gather output (or scatter update) that
+// have the same size as the operand), returns the corresponding output (or
+// update) sharding by passing through the input sharding.
+absl::optional<HloSharding> PassthroughOperandToGatherOutputOrScatterUpdate(
+    const PartitionedHlo& operand, const Shape& update_or_gather_shape,
+    absl::Span<const int64> collapsed_or_inserted_dims,
+    absl::Span<const int64> index_map,
+    absl::Span<const int64> offset_or_window_dims,
+    absl::Span<const int64> slice_size) {
+  if (operand.sharding().IsTileMaximal()) {
+    return operand.sharding();
+  }
+  std::vector<int64> passthrough_tile(update_or_gather_shape.rank(), 1);
+  int64 collapsed = 0;
+  for (int64 i = 0; i < operand.base_shape().rank(); ++i) {
+    int64 dim_partitions = operand.sharding().tile_assignment().dim(i);
+    if (absl::c_linear_search(collapsed_or_inserted_dims, i) ||
+        absl::c_linear_search(index_map, i)) {
+      if (dim_partitions > 1) {
+        return absl::nullopt;
+      }
+      collapsed++;
+      continue;
+    }
+    if (slice_size[i] != operand.base_shape().dimensions(i) &&
+        dim_partitions > 1) {
+      return absl::nullopt;
+    }
+    int64 offset_dim = offset_or_window_dims[i - collapsed];
+    if (i - collapsed > 0 &&
+        offset_dim < offset_or_window_dims[i - collapsed - 1]) {
+      // Output offsets are transposed, we do not support this case.
+      return absl::nullopt;
+    }
+    passthrough_tile[offset_dim] = dim_partitions;
+  }
+  Array<int64> tile_assignment = operand.sharding().tile_assignment();
+  tile_assignment.Reshape(passthrough_tile);
+  return HloSharding::Tile(tile_assignment);
+}
+
+// Returns whether partitioning in the operand only happens in dimensions with
+// gather/scatter slice size 1.
+bool GatherScatterOperandPartitionedOnlyOnTrivialSliceDims(
+    const PartitionedHlo& operand, absl::Span<const int64> index_map,
+    absl::Span<const int64> slice_size, int64 num_partitions) {
+  if (operand.sharding().IsTileMaximal()) {
+    return false;
+  }
+  int64 trivial_slice_dims_partitions = 1;
+  for (int64 dim : index_map) {
+    if (slice_size[dim] == 1) {
+      trivial_slice_dims_partitions *=
+          operand.sharding().tile_assignment().dim(dim);
+    }
+  }
+  return trivial_slice_dims_partitions == num_partitions;
+}
+
+// Returns the min and max for the indices (replicated) in a scatter/gather
+// which has the operand partitioned on trivial slice dimensions (slice size 1).
+std::pair<HloInstruction*, HloInstruction*>
+IndexBoundsForGatherScatterOperandPartitionedOnTrivialSliceDims(
+    const PartitionedHlo& operand, const PartitionedHlo& replicated_indices,
+    HloInstruction* partition_id, absl::Span<const int64> index_map,
+    int64 index_vector_dim, SpmdBuilder* b) {
+  auto operand_offsets = MakePartitionOffsets(
+      operand.base_shape(), operand.sharding(), partition_id, b);
+  // Find the per-dimension index bounds.
+  std::vector<HloInstruction*> min_indices;
+  std::vector<HloInstruction*> max_indices;
+  for (int64 i = 0; i < index_map.size(); ++i) {
+    int64 dim = index_map[i];
+    int64 partitions = operand.sharding().tile_assignment().dim(dim);
+    if (partitions == 1) {
+      min_indices.push_back(CreateR0WithType<int32>(
+          replicated_indices.base_shape().element_type(), 0, b));
+      max_indices.push_back(CreateR0WithType<int32>(
+          replicated_indices.base_shape().element_type(),
+          operand.base_shape().dimensions(dim), b));
+      continue;
+    }
+    auto offset = operand_offsets[dim];
+    if (offset->shape().element_type() !=
+        replicated_indices.base_shape().element_type()) {
+      offset = b->AddInstruction(HloInstruction::CreateConvert(
+          ShapeUtil::MakeShape(replicated_indices.base_shape().element_type(),
+                               {}),
+          offset));
+    }
+    min_indices.push_back(offset);
+    auto partition_size_minus_1 =
+        CreateR0WithType<int32>(replicated_indices.base_shape().element_type(),
+                                operand.hlo()->shape().dimensions(dim) - 1, b);
+    max_indices.push_back(b->AddInstruction(HloInstruction::CreateBinary(
+        offset->shape(), HloOpcode::kAdd, offset, partition_size_minus_1)));
+  }
+  // Broadcast the index bounds to the same shape as the indices.
+  HloInstruction* broadcast_min;
+  HloInstruction* broadcast_max;
+  if (index_vector_dim < replicated_indices.base_shape().rank()) {
+    // The index vector is an R1, we need to reshape individual bounds to
+    // [1], and concat them if there are more than one.
+    for (int64 i = 0; i < min_indices.size(); ++i) {
+      min_indices[i] = b->AddInstruction(HloInstruction::CreateReshape(
+          ShapeUtil::MakeShape(min_indices[i]->shape().element_type(), {1}),
+          min_indices[i]));
+      max_indices[i] = b->AddInstruction(HloInstruction::CreateReshape(
+          ShapeUtil::MakeShape(max_indices[i]->shape().element_type(), {1}),
+          max_indices[i]));
+    }
+    int64 slice_dims = max_indices.size();
+    if (slice_dims > 1) {
+      min_indices[0] = b->AddInstruction(HloInstruction::CreateConcatenate(
+          ShapeUtil::MakeShape(min_indices[0]->shape().element_type(),
+                               {slice_dims}),
+          min_indices, 0));
+      max_indices[0] = b->AddInstruction(HloInstruction::CreateConcatenate(
+          min_indices[0]->shape(), max_indices, 0));
+    }
+    broadcast_min = b->AddInstruction(HloInstruction::CreateBroadcast(
+        replicated_indices.base_shape(), min_indices[0], {index_vector_dim}));
+    broadcast_max = b->AddInstruction(HloInstruction::CreateBroadcast(
+        replicated_indices.base_shape(), max_indices[0], {index_vector_dim}));
+  } else {
+    CHECK_EQ(max_indices.size(), 1);
+    broadcast_min = b->AddInstruction(HloInstruction::CreateBroadcast(
+        replicated_indices.base_shape(), min_indices[0], {}));
+    broadcast_max = b->AddInstruction(HloInstruction::CreateBroadcast(
+        replicated_indices.base_shape(), max_indices[0], {}));
+  }
+  return {broadcast_min, broadcast_max};
+}
+
+Status SpmdPartitioningVisitor::HandleScatter(HloInstruction* hlo) {
+  auto scatter = Cast<HloScatterInstruction>(hlo);
+  auto dnums = scatter->scatter_dimension_numbers();
+  auto operand = GetPartitionedHlo(scatter->operand(0));
+  auto indices = GetPartitionedHlo(scatter->operand(1));
+  auto updates = GetPartitionedHlo(scatter->operand(2));
+  std::vector<int64> slice_size(operand.base_shape().rank(), 1);
+  int64 num_update_window_dims = 0;
+  for (int64 i = 0; i < operand.base_shape().rank(); ++i) {
+    if (absl::c_linear_search(dnums.inserted_window_dims(), i)) {
+      continue;
+    }
+    slice_size[i] = updates.base_shape().dimensions(
+        dnums.update_window_dims(num_update_window_dims++));
+  }
+  std::vector<int64> inserted_window_dims(dnums.inserted_window_dims().begin(),
+                                          dnums.inserted_window_dims().end());
+  std::vector<int64> scatter_dims_to_operand_dims(
+      dnums.scatter_dims_to_operand_dims().begin(),
+      dnums.scatter_dims_to_operand_dims().end());
+  std::vector<int64> update_window_dims(dnums.update_window_dims().begin(),
+                                        dnums.update_window_dims().end());
+  if (!operand.sharding().IsTileMaximal()) {
+    auto maybe_passthrough = PassthroughOperandToGatherOutputOrScatterUpdate(
+        operand, updates.base_shape(), inserted_window_dims,
+        scatter_dims_to_operand_dims, update_window_dims, slice_size);
+    // Handle pass through cases if we can use compatible sharding for update.
+    if (maybe_passthrough.has_value()) {
+      indices = indices.Reshard(HloSharding::Replicate());
+      updates = updates.Reshard(*maybe_passthrough);
+      auto pscatter = b_.AddInstruction(HloInstruction::CreateScatter(
+          operand.hlo()->shape(), operand.hlo(), indices.hlo(), updates.hlo(),
+          scatter->to_apply(), dnums, scatter->indices_are_sorted(),
+          scatter->unique_indices()));
+      pscatter->set_sharding(*maybe_passthrough);
+      SetPartitionedHlo(hlo, [&]() {
+        return PartitionedHlo(pscatter, hlo->shape(), MakePartitioningState())
+            .Reshard(hlo->sharding())
+            .hlo();
+      });
+      return Status::OK();
+    }
+    if (GatherScatterOperandPartitionedOnlyOnTrivialSliceDims(
+            operand, scatter_dims_to_operand_dims, slice_size,
+            num_partitions_) &&
+        ShapeUtil::ByteSizeOf(updates.base_shape()) <
+            ShapeUtil::ByteSizeOf(scatter->shape())) {
+      // Operand is sharded on trivial slice dims (update slice size 1). We can
+      // adjust the indices on each partition by subtracting the offsets. Then
+      // we execute a scatter on full updated indices, and out-of-bound accesses
+      // will have no effect on the result as guaranteed by the scatter
+      // semantics.
+      indices = indices.Reshard(HloSharding::Replicate());
+      updates = updates.Reshard(HloSharding::Replicate());
+      HloInstruction* indices_min;
+      HloInstruction* indices_max_unused;
+      std::tie(indices_min, indices_max_unused) =
+          IndexBoundsForGatherScatterOperandPartitionedOnTrivialSliceDims(
+              operand, indices, partition_id_, scatter_dims_to_operand_dims,
+              dnums.index_vector_dim(), &b_);
+      auto adjusted_indices = b_.AddInstruction(HloInstruction::CreateBinary(
+          indices.hlo()->shape(), HloOpcode::kSubtract, indices.hlo(),
+          indices_min));
+      auto pscatter = b_.AddInstruction(HloInstruction::CreateScatter(
+          operand.hlo()->shape(), operand.hlo(), adjusted_indices,
+          updates.hlo(), scatter->to_apply(), dnums,
+          scatter->indices_are_sorted(), scatter->unique_indices()));
+      pscatter->set_sharding(operand.sharding());
+      SetPartitionedHlo(hlo, [&]() {
+        return PartitionedHlo(pscatter, hlo->shape(), MakePartitioningState())
+            .Reshard(hlo->sharding())
+            .hlo();
+      });
+      return Status::OK();
+    }
+  }
+  return DefaultAction(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleSlice(HloInstruction* hlo) {
+  const HloSharding& sharding = hlo->sharding();
+  if (sharding.IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  auto operand = GetPartitionedHlo(hlo->operand(0)).Reshard(sharding);
+
+  // Create a window config to represent the slice.
+  Window window;
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    WindowDimension* dim = window.add_dimensions();
+    dim->set_size(1);
+    dim->set_stride(hlo->slice_strides(i));
+    dim->set_window_dilation(1);
+    dim->set_window_reversal(false);
+    dim->set_padding_low(-hlo->slice_starts(i));
+    dim->set_padding_high(hlo->slice_limits(i) -
+                          hlo->operand(0)->shape().dimensions(i));
+    dim->set_base_dilation(1);
+  }
+
+  auto reshard_operand = operand.ReshardAsWindowedInput(
+      window, sharding,
+      CreateZero(ShapeUtil::MakeShape(hlo->shape().element_type(), {}), &b_),
+      /*mask_invalid_region=*/false);
+  if (!reshard_operand.has_value()) {
+    return DefaultAction(hlo);
+  }
+  TF_RET_CHECK(!reshard_operand->dynamic_slice_index_on_output.has_value());
+  const Shape& operand_shape = reshard_operand->sharded_input->shape();
+
+  std::vector<int64> start_indices = hlo->slice_starts();
+  std::vector<int64> limit_indices = hlo->slice_limits();
+  std::vector<int64> strides = hlo->slice_strides();
+  bool need_slice = false;
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    auto dim = reshard_operand->shard_window.dimensions(i);
+    start_indices[i] = -dim.padding_low();
+    limit_indices[i] = operand_shape.dimensions(i) + dim.padding_high();
+    if (start_indices[i] != 0 || strides[i] != 1 ||
+        limit_indices[i] != operand_shape.dimensions(i)) {
+      need_slice = true;
+    }
+  }
+
+  SetPartitionedHlo(hlo, [&] {
+    if (need_slice) {
+      auto shard_shape = MakePartitionedShape(hlo->shape(), sharding);
+      return b_.AddInstruction(HloInstruction::CreateSlice(
+          shard_shape, reshard_operand->sharded_input, start_indices,
+          limit_indices, strides));
+    }
+    return reshard_operand->sharded_input;
+  });
+
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleSort(HloInstruction* hlo) {
+  HloSharding sharding = hlo->sharding();
+  if (hlo->shape().IsTuple()) {
+    // Check that all elements are sharded in the same way.
+    if (hlo->shape().tuple_shapes_size() == 0) {
+      return DefaultAction(hlo);
+    }
+    sharding = hlo->sharding().GetSubSharding(hlo->shape(), {0});
+    for (int64 i = 1; i < hlo->operand_count(); ++i) {
+      if (sharding != hlo->sharding().GetSubSharding(hlo->shape(), {i})) {
+        return DefaultAction(hlo);
+      }
+    }
+  }
+  if (sharding.IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+  for (int64 dim : hlo->dimensions()) {
+    if (sharding.tile_assignment().dim(dim) > 1) {
+      return DefaultAction(hlo);
+    }
+  }
+  // Reshard operands to the same as the output.
+  std::vector<HloInstruction*> new_operands;
+  for (HloInstruction* operand : hlo->operands()) {
+    new_operands.push_back(GetPartitionedHlo(operand).Reshard(sharding).hlo());
+  }
+  SetPartitionedHlo(hlo, [&] {
+    return b_.AddInstruction(hlo->CloneWithNewOperands(
+        MakePartitionedShape(hlo->shape(), hlo->sharding()), new_operands));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleCustomCall(HloInstruction* hlo) {
+  if (hlo->custom_call_target() == "SPMDFullToShardShape") {
+    // This op switches from auto partitioning to manual partitioning.
+    auto input_partitioned = GetPartitionedHlo(hlo->operand(0));
+    if (!EvenlyPartitions(hlo->shape(), input_partitioned.sharding())) {
+      input_partitioned = input_partitioned.PadWithValue(
+          CreateR0WithType(hlo->shape().element_type(), 0, &b_));
+    }
+    auto input = input_partitioned.hlo();
+    CHECK(hlo->sharding().IsReplicated());
+    CHECK(ShapeUtil::Compatible(input->shape(), hlo->shape()));
+    auto copy = b_.AddInstruction(
+        HloInstruction::CreateUnary(input->shape(), HloOpcode::kCopy, input));
+    SetPartitionedHlo(hlo, [&] { return copy; });
+    return Status::OK();
+  }
+  if (hlo->custom_call_target() == "SPMDShardToFullShape") {
+    // This op switches from manual partitioning to auto partitioning.
+    auto input = GetPartitionedHlo(hlo->operand(0)).hlo();
+    CHECK(input->sharding().IsReplicated());
+    auto copy = b_.AddInstruction(
+        HloInstruction::CreateUnary(input->shape(), HloOpcode::kCopy, input));
+    CHECK(ShapeUtil::Compatible(
+        copy->shape(), MakePartitionedShape(hlo->shape(), hlo->sharding())));
+    SetPartitionedHlo(hlo, [&] { return copy; });
+    return Status::OK();
+  }
+  if (hlo->custom_call_target() != "TopK") {
+    return DefaultAction(hlo);
+  }
+
+  if (!hlo->operand(0)->has_sharding()) {
+    return DefaultAction(hlo);
+  }
+
+  const HloSharding& sharding = hlo->operand(0)->sharding();
+  if (sharding.IsTileMaximal() || sharding.IsReplicated()) {
+    return DefaultAction(hlo);
+  }
+
+  const int64 sort_dim = 1;
+  const int64 shard_count = sharding.tile_assignment().dim(sort_dim);
+
+  if (shard_count <= 1) {
+    return DefaultAction(hlo);
+  }
+
+  const int64 input_size = hlo->operand(0)->shape().dimensions(sort_dim);
+  const int64 batch_size = hlo->shape().tuple_shapes(0).dimensions(0);
+  const int64 k = hlo->shape().tuple_shapes(0).dimensions(sort_dim);
+  const int64 per_partition_size = CeilOfRatio(input_size, shard_count);
+
+  if (k >= per_partition_size) {
+    return DefaultAction(hlo);
+  }
+
+  auto input = hlo->operand(0);
+  const auto element_type = input->shape().element_type();
+
+  // Pad input with minimal value.
+  auto min_value = b_.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::MinValue(element_type)));
+  // TODO(wangtao): add test to see if -NaN < -Inf in BF16.
+  if (element_type == F32) {
+    auto float_pad_value = std::numeric_limits<float>::quiet_NaN();
+    min_value = b_.AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::CreateR0<float>(-float_pad_value)));
+  }
+  auto partitioned_input = GetPartitionedHlo(input).PadWithValue(min_value);
+
+  // Each partition needs to do TopK separately, thus the base shape
+  // becomes [batch_size, k * shard_count].
+  const Shape replicated_shape = ShapeUtil::MakeTupleShape(
+      {ShapeUtil::MakeShape(hlo->operand(0)->shape().element_type(),
+                            {batch_size, k * shard_count}),
+       ShapeUtil::MakeShape(S32, {batch_size, k * shard_count})});
+  auto custom_call_sharding =
+      sharding.GetTupleSharding(replicated_shape).ValueOrDie();
+  auto shard_shape =
+      MakePartitionedShape(replicated_shape, custom_call_sharding);
+  auto topk = b_.AddInstruction(
+      hlo->CloneWithNewOperands(shard_shape, {partitioned_input.hlo()}));
+  topk->set_sharding(custom_call_sharding);
+  // Partition customcall.
+  PartitionedHlo partitioned_topk(topk, replicated_shape,
+                                  MakePartitioningState());
+  topk = partitioned_topk.hlo();
+
+  // Get value from TopK.
+  HloInstruction* value_gte =
+      b_.AddInstruction(HloInstruction::CreateGetTupleElement(
+          topk->shape().tuple_shapes(0), topk, 0));
+  value_gte->set_sharding(sharding);
+  // Partition GetTupleElement of value.
+  PartitionedHlo value_partitioned_gte(
+      value_gte, partitioned_topk.base_shape().tuple_shapes(0),
+      MakePartitioningState());
+  // Reshard value to be replicated.
+  auto replicated_value_gte =
+      value_partitioned_gte.Reshard(HloSharding::Replicate()).hlo();
+
+  // Get index from TopK.
+  HloInstruction* index_gte =
+      b_.AddInstruction(HloInstruction::CreateGetTupleElement(
+          topk->shape().tuple_shapes(1), topk, 1));
+  auto partition_id_s32 = b_.AddInstruction(HloInstruction::CreateConvert(
+      ShapeUtil::MakeShape(S32, partition_id_->shape().dimensions()),
+      partition_id_));
+  // Add per partition offset to index, index returned from CustomCall always
+  // starts from 0.
+  auto index_offset = b_.AddInstruction(HloInstruction::CreateBroadcast(
+      index_gte->shape(),
+      b_.AddInstruction(HloInstruction::CreateBinary(
+          partition_id_s32->shape(), HloOpcode::kMultiply, partition_id_s32,
+          b_.AddInstruction(HloInstruction::CreateConstant(
+              LiteralUtil::CreateR0<int32>(per_partition_size))))),
+      {}));
+  index_gte = b_.AddInstruction(HloInstruction::CreateBinary(
+      index_offset->shape(), HloOpcode::kAdd, index_gte, index_offset));
+  index_gte->set_sharding(sharding);
+  // Parttion GetTupleElement of index.
+  PartitionedHlo index_partitioned_gte(
+      index_gte, partitioned_topk.base_shape().tuple_shapes(1),
+      MakePartitioningState());
+  // Reshard index to be replicated.
+  auto replicated_index_gte =
+      index_partitioned_gte.Reshard(HloSharding::Replicate()).hlo();
+
+  // Creates replicated sort to do TopK, the input is value and index pairs
+  // from all the partitions. The reason to use Sort instead of CustomCall TopK
+  // is CustomCall only takes value as input. There will be an extra Gather
+  // to get the correct index if CustomCall is used here.
+
+  // Create comparator for the sort.
+  XlaBuilder b("Sort.Compare");
+  XlaComputation comparator = CreateScalarComparisonComputation(
+      "compare-value-and-index", {input->shape().element_type(), S32}, {Gt, Lt},
+      &b);
+  TF_ASSIGN_OR_RETURN(ProgramShape program_shape, comparator.GetProgramShape());
+  HloModuleConfig config(program_shape);
+  TF_ASSIGN_OR_RETURN(auto new_module,
+                      HloModule::CreateFromProto(comparator.proto(), config));
+  HloCloneContext context(module_);
+  auto compare_computation =
+      module_->DeepCloneComputation(new_module->entry_computation(), &context);
+  auto sort = b_.AddInstruction(HloInstruction::CreateSort(
+      replicated_shape, sort_dim, {replicated_value_gte, replicated_index_gte},
+      compare_computation, true));
+  sort->set_sharding(
+      HloSharding::Replicate().GetTupleSharding(sort->shape()).ValueOrDie());
+  PartitionedHlo replicated_sort(sort, replicated_shape,
+                                 MakePartitioningState());
+
+  // Slice value and index from top-k for output.
+  HloInstruction* sort_value_gte =
+      b_.AddInstruction(HloInstruction::CreateGetTupleElement(
+          replicated_sort.hlo()->shape().tuple_shapes(0), replicated_sort.hlo(),
+          0));
+  HloInstruction* sort_index_gte =
+      b_.AddInstruction(HloInstruction::CreateGetTupleElement(
+          replicated_sort.hlo()->shape().tuple_shapes(1), replicated_sort.hlo(),
+          1));
+  const Shape& hlo_shape = sort_value_gte->shape();
+  auto hlo_dims = hlo_shape.dimensions();
+  std::vector<int64> start_indices(hlo_shape.dimensions_size(), 0);
+  std::vector<int64> limit_indices(hlo_dims.begin(), hlo_dims.end());
+  std::vector<int64> strides(hlo_shape.dimensions_size(), sort_dim);
+  limit_indices[sort_dim] = k;
+  auto output_shape = hlo_shape;
+  output_shape.set_dimensions(sort_dim, k);
+  // Slice value from final sort.
+  HloInstruction* slice_sort_value =
+      b_.AddInstruction(HloInstruction::CreateSlice(
+          output_shape, sort_value_gte, start_indices, limit_indices, strides));
+  // Slice index from final sort.
+  auto index_output_shape = sort_index_gte->shape();
+  index_output_shape.set_dimensions(sort_dim, k);
+  HloInstruction* slice_index_value = b_.AddInstruction(
+      HloInstruction::CreateSlice(index_output_shape, sort_index_gte,
+                                  start_indices, limit_indices, strides));
+  auto create_tuple = b_.AddInstruction(
+      HloInstruction::CreateTuple({slice_sort_value, slice_index_value}));
+  create_tuple->set_sharding(HloSharding::Replicate());
+
+  SetPartitionedHlo(hlo, PartitionedHlo(create_tuple, create_tuple->shape(),
+                                        MakePartitioningState())
+                             .Reshard(hlo->sharding()));
+
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleTranspose(HloInstruction* hlo) {
+  const HloSharding& sharding = hlo->sharding();
+  if (sharding.IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  std::vector<int64> inverse_dimensions(hlo->shape().rank());
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    inverse_dimensions[hlo->dimensions(i)] = i;
+  }
+  auto desired_operand_sharding =
+      hlo_sharding_util::TransposeSharding(sharding, inverse_dimensions);
+
+  auto operand = GetPartitionedHlo(hlo->operand(0))
+                     .Reshard(desired_operand_sharding)
+                     .hlo();
+  SetPartitionedHlo(hlo, [&] {
+    return b_.AddInstruction(hlo->CloneWithNewOperands(
+        MakePartitionedShape(hlo->shape(), hlo->sharding()), {operand}));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleReshape(HloInstruction* hlo) {
+  const HloSharding& sharding = hlo->sharding();
+  if (sharding.IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  auto operand = GetPartitionedHlo(hlo->operand(0));
+  // The output shape is the source and the operand shape is the target to get
+  // the aligned sharding for the operand.
+  auto desired_operand_sharding = hlo_sharding_util::ReshapeSharding(
+      hlo->shape(), hlo->operand(0)->shape(), hlo->sharding());
+  if (desired_operand_sharding.has_value()) {
+    auto operand_hlo = operand.Reshard(*desired_operand_sharding).hlo();
+    SetPartitionedHlo(hlo, [&] {
+      return b_.AddInstruction(hlo->CloneWithNewOperands(
+          MakePartitionedShape(hlo->shape(), hlo->sharding()), {operand_hlo}));
+    });
+    return Status::OK();
+  }
+
+  // Try use halo exchange for certain split-dim/merge-dims cases.
+  // ReshapeSharding failed in these cases probably due to uneven partitioning,
+  // where halo exchange could help. Specifically we check the following
+  // conditions to detect supported cases:
+  // 1) Both input and output are partitioned on one dimension.
+  // 2) The combined size of dimensions before the partitioned dimension are the
+  // same on input and output. This means we don't need to consider the major
+  // dimensions.
+  // 3) Let A = the input size on the partitioned dimension, and
+  //        B = the output size on the partitioned dimension; then
+  //    either A % B == 0 (split dim) or B % A == 0 (merge dims).
+  auto maybe_input_sharded_dim = UniqueTiledDim(operand.sharding());
+  auto maybe_output_sharded_dim = UniqueTiledDim(sharding);
+  if (!maybe_input_sharded_dim || !maybe_output_sharded_dim) {
+    return DefaultAction(hlo);
+  }
+  int64 input_sharded_dim = *maybe_input_sharded_dim;
+  int64 output_sharded_dim = *maybe_output_sharded_dim;
+  // Check that the major dims before the sharded dim have the same total size
+  // for input and output.
+  int64 input_major_dims_size = 1;
+  for (int64 i = 0; i < input_sharded_dim; ++i) {
+    input_major_dims_size *= operand.base_shape().dimensions(i);
+  }
+  int64 output_major_dims_size = 1;
+  for (int64 i = 0; i < output_sharded_dim; ++i) {
+    output_major_dims_size *= hlo->shape().dimensions(i);
+  }
+  if (input_major_dims_size != output_major_dims_size) {
+    return DefaultAction(hlo);
+  }
+  // Fix potential device ordering mismatch in tile assignment.
+  Array<int64> new_input_tile_assignment = sharding.tile_assignment();
+  new_input_tile_assignment.Reshape(
+      operand.sharding().tile_assignment().dimensions());
+  operand = operand.Reshard(HloSharding::Tile(new_input_tile_assignment));
+
+  int64 input_dim_size = operand.base_shape().dimensions(input_sharded_dim);
+  int64 output_dim_size = hlo->shape().dimensions(output_sharded_dim);
+  auto input_shard_shape =
+      MakePartitionedShape(operand.base_shape(), operand.sharding());
+  auto output_shard_shape = MakePartitionedShape(hlo->shape(), sharding);
+  if (input_dim_size % output_dim_size == 0) {
+    // Split dim.
+    int64 split_factor = input_dim_size / output_dim_size;
+    int64 output_shard_size = output_shard_shape.dimensions(output_sharded_dim);
+    // Use halo exchange to fix misaligned data.
+    Window window;
+    for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+      WindowDimension* dim = window.add_dimensions();
+      dim->set_size(1);
+      dim->set_stride(1);
+      dim->set_window_dilation(1);
+      dim->set_window_reversal(false);
+      dim->set_base_dilation(1);
+      dim->set_padding_low(0);
+      if (i == input_sharded_dim) {
+        dim->set_padding_high(output_shard_size * split_factor *
+                                  num_partitions_ -
+                              input_dim_size);
+      } else {
+        dim->set_padding_high(0);
+      }
+    }
+
+    auto reshard_operand = operand.ReshardAsWindowedInput(
+        window, operand.sharding(),
+        CreateZero(ShapeUtil::MakeShape(hlo->shape().element_type(), {}), &b_),
+        /*mask_invalid_region=*/false);
+    if (!reshard_operand.has_value()) {
+      return DefaultAction(hlo);
+    }
+    TF_RET_CHECK(!reshard_operand->dynamic_slice_index_on_output.has_value());
+    CHECK_EQ(
+        reshard_operand->sharded_input->shape().dimensions(input_sharded_dim),
+        output_shard_size * split_factor);
+    SetPartitionedHlo(hlo, [&] {
+      // Do a local reshape.
+      return b_.AddInstruction(HloInstruction::CreateReshape(
+          output_shard_shape, reshard_operand->sharded_input));
+    });
+    return Status::OK();
+  } else if (output_dim_size % input_dim_size == 0) {
+    // Merge dims.
+    int64 merge_factor = output_dim_size / input_dim_size;
+    // First reshape locally. (The sharded dimension could include padded data.)
+    auto tmp_shard_shape = output_shard_shape;
+    tmp_shard_shape.set_dimensions(
+        output_sharded_dim,
+        input_shard_shape.dimensions(input_sharded_dim) * merge_factor);
+    auto tmp_reshape = b_.AddInstruction(
+        HloInstruction::CreateReshape(tmp_shard_shape, operand.hlo()));
+    tmp_reshape->set_metadata(hlo->metadata());
+    tmp_reshape->set_sharding(hlo->sharding());
+    auto tmp_full_shape = tmp_shard_shape;
+    tmp_full_shape.set_dimensions(
+        output_sharded_dim,
+        tmp_shard_shape.dimensions(output_sharded_dim) * num_partitions_);
+    auto tmp_output =
+        PartitionedHlo(tmp_reshape, tmp_full_shape, MakePartitioningState());
+
+    // Use halo exchange to fix misaligned data.
+    Window window;
+    for (int64 i = 0; i < tmp_shard_shape.rank(); ++i) {
+      WindowDimension* dim = window.add_dimensions();
+      dim->set_size(1);
+      dim->set_stride(1);
+      dim->set_window_dilation(1);
+      dim->set_window_reversal(false);
+      dim->set_base_dilation(1);
+      dim->set_padding_low(0);
+      if (i == output_sharded_dim) {
+        dim->set_padding_high(output_dim_size -
+                              tmp_shard_shape.dimensions(output_sharded_dim) *
+                                  num_partitions_);
+      } else {
+        dim->set_padding_high(0);
+      }
+    }
+
+    auto reshard_output = tmp_output.ReshardAsWindowedInput(
+        window, sharding,
+        CreateZero(ShapeUtil::MakeShape(hlo->shape().element_type(), {}), &b_),
+        /*mask_invalid_region=*/false);
+    if (!reshard_output.has_value()) {
+      return DefaultAction(hlo);
+    }
+    TF_RET_CHECK(!reshard_output->dynamic_slice_index_on_output.has_value());
+    CHECK_EQ(
+        reshard_output->sharded_input->shape().dimensions(output_sharded_dim),
+        output_shard_shape.dimensions(output_sharded_dim));
+    SetPartitionedHlo(hlo, [&] { return reshard_output->sharded_input; });
+    return Status::OK();
+  }
+  return DefaultAction(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleIota(HloInstruction* hlo) {
+  const HloSharding& sharding = hlo->sharding();
+  if (sharding.IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  SetPartitionedHlo(hlo, [&] {
+    int64 dimension = Cast<HloIotaInstruction>(hlo)->iota_dimension();
+    auto iota = b_.AddInstruction(HloInstruction::CreateIota(
+        MakePartitionedShape(hlo->shape(), sharding), dimension));
+
+    if (sharding.tile_assignment().dim(dimension) > 1) {
+      auto partition_ordinals =
+          MakeTiledPartitionOrdinals(sharding, partition_id_, &b_);
+      auto multiplier = b_.AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR0<int32>(iota->shape().dimensions(dimension))));
+      auto offset = b_.AddInstruction(HloInstruction::CreateBinary(
+          ShapeUtil::MakeShape(S32, {}), HloOpcode::kMultiply,
+          partition_ordinals[dimension], multiplier));
+      if (iota->shape().element_type() != S32) {
+        offset = b_.AddInstruction(HloInstruction::CreateConvert(
+            ShapeUtil::MakeShape(iota->shape().element_type(), {}), offset));
+      }
+      auto broadcast = b_.AddInstruction(
+          HloInstruction::CreateBroadcast(iota->shape(), offset, {}));
+      return b_.AddInstruction(HloInstruction::CreateBinary(
+          iota->shape(), HloOpcode::kAdd, iota, broadcast));
+    }
+
+    return iota;
+  });
+
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleSingleDevice(const HloInstruction* hlo) {
+  TF_RET_CHECK(hlo->sharding().HasUniqueDevice());
+  int64 device = hlo->sharding().GetUniqueDevice();
+  const HloSharding sharding = HloSharding::AssignDevice(device);
+
+  std::vector<HloInstruction*> operands;
+  std::vector<Shape> operand_shapes;
+  for (const HloInstruction* operand : hlo->operands()) {
+    operands.push_back(GetPartitionedHlo(operand).Reshard(sharding).hlo());
+    operand_shapes.push_back(operand->shape());
+  }
+  auto operand = b_.AddInstruction(HloInstruction::CreateTuple(operands));
+  auto operand_shape = ShapeUtil::MakeTupleShape(operand_shapes);
+
+  auto on_device = b_.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<uint32>(device)));
+  auto pred = b_.AddInstruction(HloInstruction::CreateCompare(
+      ShapeUtil::MakeShape(PRED, {}), partition_id_, on_device,
+      ComparisonDirection::kEq));
+
+  SpmdBuilder true_b("true_computation", visiting_hlo_);
+  HloComputation* true_computation;
+  {
+    auto param = true_b.AddInstruction(HloInstruction::CreateParameter(
+        /*parameter_number=*/0, operand_shape, "true_branch_param"));
+    std::vector<HloInstruction*> new_operands;
+    for (int64 i = 0; i < operands.size(); ++i) {
+      new_operands.push_back(true_b.AddInstruction(
+          HloInstruction::CreateGetTupleElement(operand_shapes[i], param, i)));
+    }
+    auto root = true_b.AddInstruction(
+        hlo->CloneWithNewOperands(hlo->shape(), new_operands));
+    true_computation = module_->AddEmbeddedComputation(true_b.Build(root));
+  }
+
+  SpmdBuilder false_b("false_computation", visiting_hlo_);
+  HloComputation* false_computation;
+  {
+    false_b.AddInstruction(HloInstruction::CreateParameter(
+        /*parameter_number=*/0, operand_shape, "false_branch_param"));
+    auto root = CreateZero(hlo->shape(), &false_b);
+    false_computation = module_->AddEmbeddedComputation(false_b.Build(root));
+  }
+
+  SetPartitionedHlo(hlo, [&]() {
+    return b_.AddInstruction(HloInstruction::CreateConditional(
+        hlo->shape(), pred, operand, true_computation, operand,
+        false_computation));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleAllReduce(HloInstruction* hlo) {
+  if (hlo->IsCrossReplicaAllReduce() && hlo->operand_count() == 1) {
+    return HandleElementwise(hlo);
+  }
+  return DefaultAction(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleBroadcast(HloInstruction* hlo) {
+  if (hlo->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  auto& operand = GetPartitionedHlo(hlo->operand(0));
+
+  // Tiled output.
+  std::vector<int64> wanted_input_tile_size(operand.base_shape().rank());
+  std::vector<int64> sharded_new_dims;
+  for (int64 i = 0; i < operand.base_shape().rank(); ++i) {
+    wanted_input_tile_size[i] =
+        hlo->sharding().tile_assignment().dim(hlo->dimensions(i));
+  }
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    if (!absl::c_linear_search(hlo->dimensions(), i) &&
+        hlo->sharding().tile_assignment().dim(i) > 1) {
+      sharded_new_dims.push_back(i);
+    }
+  }
+  if (sharded_new_dims.empty()) {
+    // The new dimensions are replicated, so that we can do the adjustment on
+    // the input.
+    Array<int64> wanted_input_tile_assignment(wanted_input_tile_size);
+    wanted_input_tile_assignment.Each(
+        [&](absl::Span<const int64> indices, int64* val) {
+          std::vector<int64> indices_in_broadcast(hlo->shape().rank(), 0);
+          for (int64 i = 0; i < operand.base_shape().rank(); ++i) {
+            indices_in_broadcast[hlo->dimensions(i)] = indices[i];
+          }
+          *val = hlo->sharding().tile_assignment()(indices_in_broadcast);
+        });
+    SetPartitionedHlo(hlo, [&] {
+      return b_.AddInstruction(hlo->CloneWithNewOperands(
+          MakePartitionedShape(hlo->shape(), hlo->sharding()),
+          {operand.Reshard(HloSharding::Tile(wanted_input_tile_assignment))
+               .hlo()}));
+    });
+  } else {
+    auto input = operand.Reshard(HloSharding::Replicate()).hlo();
+    // We pad and shard the input first, then broadcast to the final shard
+    // shape.
+    auto output_offsets =
+        MakePartitionOffsets(hlo->shape(), hlo->sharding(), partition_id_, &b_);
+    std::vector<HloInstruction*> input_offsets(operand.base_shape().rank());
+    auto output_shard_shape =
+        MakePartitionedShape(hlo->shape(), hlo->sharding());
+    auto input_shard_shape = input->shape();
+    auto padded_input_shape = input->shape();
+    for (int64 i = 0; i < input_offsets.size(); ++i) {
+      input_offsets[i] = output_offsets[hlo->dimensions(i)];
+      input_shard_shape.set_dimensions(
+          i, output_shard_shape.dimensions(hlo->dimensions(i)));
+      padded_input_shape.set_dimensions(
+          i, hlo->sharding().tile_assignment().dim(hlo->dimensions(i)) *
+                 input_shard_shape.dimensions(i));
+    }
+    auto padded_input = PadToShape(input, padded_input_shape, &b_);
+    auto input_shard =
+        ShapeUtil::Compatible(input_shard_shape, padded_input->shape())
+            ? padded_input
+            : b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+                  input_shard_shape, padded_input, input_offsets,
+                  input_shard_shape.dimensions()));
+    SetPartitionedHlo(hlo, [&] {
+      return b_.AddInstruction(
+          hlo->CloneWithNewOperands(output_shard_shape, {input_shard}));
+    });
+  }
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleConstant(HloInstruction* hlo) {
+  const Literal& literal = hlo->literal();
+  if (literal.shape().IsTuple() ||
+      (!hlo->sharding().IsTileMaximal() &&
+       (!EvenlyPartitions(hlo->shape(), hlo->sharding()) ||
+        !literal.IsAllFirst()))) {
+    return DefaultAction(hlo);
+  }
+
+  SetPartitionedHlo(hlo, [&]() {
+    auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+    std::vector<int64> start_indices(hlo->shape().rank(), 0);
+    auto constant = b_.AddInstruction(HloInstruction::CreateConstant(
+        literal.Slice(start_indices, shard_shape.dimensions())));
+    *constant->mutable_shape() = shard_shape;
+    return constant;
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleDynamicSlice(HloInstruction* hlo) {
+  if (hlo->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    if (hlo->sharding().tile_assignment().dim(i) != 1 &&
+        (hlo->dynamic_slice_sizes()[i] != hlo->shape().dimensions(i) ||
+         !hlo->operand(i + 1)->IsConstant() ||
+         !hlo->operand(i + 1)->literal().IsZero({}))) {
+      // We currently do not partition the sliced dimensions.
+      return DefaultAction(hlo);
+    }
+  }
+  std::vector<HloInstruction*> new_indices(hlo->shape().rank());
+  auto new_input =
+      GetPartitionedHlo(hlo->operand(0)).Reshard(hlo->sharding()).hlo();
+  for (int64 i = 0; i < new_indices.size(); ++i) {
+    // Replicate the indices.
+    new_indices[i] = GetPartitionedHlo(hlo->operand(i + 1))
+                         .Reshard(HloSharding::Replicate())
+                         .hlo();
+  }
+  SetPartitionedHlo(hlo, [&]() {
+    auto partitioned_shape =
+        MakePartitionedShape(hlo->shape(), hlo->sharding());
+    return b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+        partitioned_shape, new_input, new_indices,
+        partitioned_shape.dimensions()));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleDynamicUpdateSlice(HloInstruction* hlo) {
+  if (hlo->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    if (hlo->sharding().tile_assignment().dim(i) != 1 &&
+        (hlo->operand(1)->shape().dimensions(i) != hlo->shape().dimensions(i) ||
+         !hlo->operand(i + 2)->IsConstant() ||
+         !hlo->operand(i + 2)->literal().IsZero({}))) {
+      // We currently do not partition the sliced dimensions.
+      return DefaultAction(hlo);
+    }
+  }
+  std::vector<HloInstruction*> new_indices(hlo->shape().rank());
+  auto new_input =
+      GetPartitionedHlo(hlo->operand(0)).Reshard(hlo->sharding()).hlo();
+  auto new_update =
+      GetPartitionedHlo(hlo->operand(1)).Reshard(hlo->sharding()).hlo();
+  for (int64 i = 0; i < new_indices.size(); ++i) {
+    // Replicate the indices.
+    new_indices[i] = GetPartitionedHlo(hlo->operand(i + 2))
+                         .Reshard(HloSharding::Replicate())
+                         .hlo();
+  }
+  SetPartitionedHlo(hlo, [&]() {
+    auto partitioned_shape =
+        MakePartitionedShape(hlo->shape(), hlo->sharding());
+    return b_.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
+        partitioned_shape, new_input, new_update, new_indices));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleGather(HloInstruction* hlo) {
+  auto gather = Cast<HloGatherInstruction>(hlo);
+  const auto& dnums = gather->gather_dimension_numbers();
+  auto operand = GetPartitionedHlo(gather->operand(0));
+  auto indices = GetPartitionedHlo(gather->operand(1));
+  std::vector<int64> collapsed_slice_dims(dnums.collapsed_slice_dims().begin(),
+                                          dnums.collapsed_slice_dims().end());
+  std::vector<int64> start_index_map(dnums.start_index_map().begin(),
+                                     dnums.start_index_map().end());
+  std::vector<int64> offset_dims(dnums.offset_dims().begin(),
+                                 dnums.offset_dims().end());
+  if (!operand.sharding().IsTileMaximal()) {
+    auto maybe_passthrough = PassthroughOperandToGatherOutputOrScatterUpdate(
+        operand, gather->shape(), collapsed_slice_dims, start_index_map,
+        offset_dims, gather->gather_slice_sizes());
+    if (maybe_passthrough.has_value()) {
+      indices = indices.Reshard(HloSharding::Replicate());
+      auto pshape = MakePartitionedShape(gather->shape(), *maybe_passthrough);
+      std::vector<int64> pslice_sizes(gather->gather_slice_sizes().begin(),
+                                      gather->gather_slice_sizes().end());
+      for (int64 i = 0; i < pslice_sizes.size(); ++i) {
+        if (operand.sharding().tile_assignment().dim(i) > 1) {
+          pslice_sizes[i] = operand.hlo()->shape().dimensions(i);
+        }
+      }
+      auto pgather = b_.AddInstruction(HloInstruction::CreateGather(
+          pshape, operand.hlo(), indices.hlo(), dnums, pslice_sizes,
+          gather->indices_are_sorted()));
+      pgather->set_sharding(*maybe_passthrough);
+      SetPartitionedHlo(hlo, [&]() {
+        return PartitionedHlo(pgather, hlo->shape(), MakePartitioningState())
+            .Reshard(hlo->sharding())
+            .hlo();
+      });
+      return Status::OK();
+    }
+    if (GatherScatterOperandPartitionedOnlyOnTrivialSliceDims(
+            operand, start_index_map, gather->gather_slice_sizes(),
+            num_partitions_) &&
+        ShapeUtil::ByteSizeOf(gather->shape()) <
+            ShapeUtil::ByteSizeOf(gather->operand(0)->shape())) {
+      indices = indices.Reshard(HloSharding::Replicate());
+      // Now the operand is partitioned in trivial slice dimensions, and the
+      // indices are replicated. We execute a gather on partitioned operand,
+      // with full number of indices, where out-of-bounds indices are clamped,
+      // and masked out with 0 in the result; then we use all-reduce to combine
+      // results. Although gather will not get faster, we avoided the need to
+      // replicate the operand.
+      HloInstruction* indices_min;
+      HloInstruction* indices_max;
+      std::tie(indices_min, indices_max) =
+          IndexBoundsForGatherScatterOperandPartitionedOnTrivialSliceDims(
+              operand, indices, partition_id_, start_index_map,
+              dnums.index_vector_dim(), &b_);
+      // Clamp the indices.
+      auto adjusted_indices = b_.AddInstruction(HloInstruction::CreateTernary(
+          indices.base_shape(), HloOpcode::kClamp, indices_min, indices.hlo(),
+          indices_max));
+      // Adjust the indices by subtracting the offset.
+      adjusted_indices = b_.AddInstruction(HloInstruction::CreateBinary(
+          indices.base_shape(), HloOpcode::kSubtract, adjusted_indices,
+          indices_min));
+      // Gather on adjusted indices.
+      auto pgather = b_.AddInstruction(HloInstruction::CreateGather(
+          gather->shape(), operand.hlo(), adjusted_indices, dnums,
+          gather->gather_slice_sizes(), gather->indices_are_sorted()));
+      // Mask out invalid results.
+      auto filter = b_.AddInstruction(HloInstruction::CreateCompare(
+          ShapeUtil::ChangeElementType(indices.base_shape(), PRED),
+          indices.hlo(), indices_min, ComparisonDirection::kLt));
+      filter = b_.AddInstruction(HloInstruction::CreateBinary(
+          filter->shape(), HloOpcode::kOr, filter,
+          b_.AddInstruction(HloInstruction::CreateCompare(
+              ShapeUtil::ChangeElementType(indices.base_shape(), PRED),
+              indices.hlo(), indices_max, ComparisonDirection::kGt))));
+      if (dnums.index_vector_dim() < indices.base_shape().rank()) {
+        std::vector<int64> reduced_filter_dims;
+        for (int64 i = 0; i < filter->shape().rank(); ++i) {
+          if (i != dnums.index_vector_dim()) {
+            reduced_filter_dims.push_back(filter->shape().dimensions(i));
+          }
+        }
+        filter = b_.AddInstruction(HloInstruction::CreateReduce(
+            ShapeUtil::MakeShape(PRED, reduced_filter_dims), filter,
+            CreateR0WithType(PRED, false, &b_), {dnums.index_vector_dim()},
+            MakeBinaryAdd(PRED, module_)));
+      }
+      std::vector<int64> batch_dims;
+      for (int64 i = 0; i < pgather->shape().rank(); ++i) {
+        if (!absl::c_linear_search(dnums.offset_dims(), i)) {
+          batch_dims.push_back(i);
+        }
+      }
+      auto broadcast_filter = b_.AddInstruction(HloInstruction::CreateBroadcast(
+          ShapeUtil::ChangeElementType(pgather->shape(), PRED), filter,
+          batch_dims));
+      auto filtered = b_.AddInstruction(HloInstruction::CreateTernary(
+          pgather->shape(), HloOpcode::kSelect, broadcast_filter,
+          CreateZero(pgather->shape(), &b_), pgather));
+      // Combine from different partitions.
+      auto ar = collective_ops_creator_.create_cross_partition_all_reduce(
+          &b_, filtered,
+          MakeBinaryAdd(filtered->shape().element_type(), module_),
+          NewChannel());
+      ar->set_sharding(HloSharding::Replicate());
+      SetPartitionedHlo(hlo, [&]() {
+        return PartitionedHlo(ar, hlo->shape(), MakePartitioningState())
+            .Reshard(hlo->sharding())
+            .hlo();
+      });
+      return Status::OK();
+    }
+  }
+  return DefaultAction(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleGetTupleElement(HloInstruction* hlo) {
+  const auto& tuple = GetPartitionedHlo(hlo->operand(0));
+  auto gte = b_.AddInstruction(HloInstruction::CreateGetTupleElement(
+      ShapeUtil::GetTupleElementShape(tuple.hlo()->shape(), hlo->tuple_index()),
+      tuple.hlo(), hlo->tuple_index()));
+  SetPartitionedHlo(hlo, [&]() {
+    const auto source_sharding = tuple.sharding().GetSubSharding(
+        tuple.base_shape(), {hlo->tuple_index()});
+    gte->set_sharding(source_sharding);
+    PartitionedHlo source_partitioned_gte(gte, hlo->shape(),
+                                          MakePartitioningState());
+    return source_partitioned_gte.Reshard(hlo->sharding()).hlo();
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleInfeed(HloInstruction* hlo) {
+  const Shape& shape = ShapeUtil::GetTupleElementShape(hlo->shape(), 0);
+  auto token = GetPartitionedHlo(hlo->operand(0)).hlo();
+  if (ShapeUtil::GetLeafCount(shape) == 0) {
+    // TODO(b/155819021): HloSharding has issues with tuple-shaped sharding: it
+    // requires one element for an empty tuple, but leaf-count number of
+    // elements for non-empty tuple. So if it has a nested empty tuple, we
+    // cannot invoke GetSubSharding() since it expects a sharding for the empty
+    // tuple. This is a workaround for that case.
+    SetPartitionedHlo(hlo, [&]() {
+      return b_.AddInstruction(
+          HloInstruction::CreateInfeed(shape, token, hlo->infeed_config()));
+    });
+    return Status::OK();
+  }
+  auto sharding = hlo->sharding().GetSubSharding(hlo->shape(), {0});
+  auto shard_shape = MakePartitionedShape(shape, sharding);
+  if (EvenlyPartitions(shape, sharding)) {
+    SetPartitionedHlo(hlo, [&]() {
+      return b_.AddInstruction(HloInstruction::CreateInfeed(
+          shard_shape, token, hlo->infeed_config()));
+    });
+    return Status::OK();
+  }
+
+  if (hlo->sharding().HasUniqueDevice()) {
+    return HandleSingleDevice(hlo);
+  }
+
+  // Create a branch for each unique partitioned shape.
+  std::vector<Shape> per_branch_partitioned_shapes;
+  std::vector<int32> conditional_branch_indices(num_partitions_);
+  for (int64 i = 0; i < num_partitions_; ++i) {
+    auto partitioned_shape =
+        MakeNonPaddedShapeForGivenPartition(shape, sharding, i);
+    int64 matching_existing_index = 0;
+    for (; matching_existing_index < per_branch_partitioned_shapes.size();
+         ++matching_existing_index) {
+      if (ShapeUtil::Compatible(
+              partitioned_shape,
+              per_branch_partitioned_shapes[matching_existing_index])) {
+        break;
+      }
+    }
+    if (matching_existing_index < per_branch_partitioned_shapes.size()) {
+      conditional_branch_indices[i] = matching_existing_index;
+    } else {
+      conditional_branch_indices[i] = per_branch_partitioned_shapes.size();
+      per_branch_partitioned_shapes.push_back(std::move(partitioned_shape));
+    }
+  }
+
+  HloInstruction* branch_index;
+  if (per_branch_partitioned_shapes.size() == num_partitions_) {
+    // Use partition ID as the branch index if each partition has its own
+    // branch.
+    branch_index = partition_id_;
+    // PartitionId's output is U32 but conditional requires S32.
+    if (branch_index->shape().element_type() != S32) {
+      branch_index = b_.AddInstruction(HloInstruction::CreateConvert(
+          ShapeUtil::ChangeElementType(branch_index->shape(), S32),
+          branch_index));
+    }
+  } else {
+    // Otherwise, use a constant table to look up the branch index.
+    auto branch_index_table = b_.AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::CreateR1<int32>(conditional_branch_indices)));
+    branch_index = b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+        ShapeUtil::MakeShape(S32, {1}), branch_index_table, {partition_id_},
+        {1}));
+    branch_index = b_.AddInstruction(HloInstruction::CreateReshape(
+        ShapeUtil::MakeShape(S32, {}), branch_index));
+  }
+
+  std::vector<HloComputation*> branches(per_branch_partitioned_shapes.size());
+  for (int64 i = 0; i < branches.size(); ++i) {
+    SpmdBuilder branch_b(absl::StrCat("infeed_branch_", i), visiting_hlo_);
+    auto param = branch_b.AddInstruction(HloInstruction::CreateParameter(
+        /*parameter_number=*/0, token->shape(), "infeed_token_param"));
+    auto infeed = branch_b.AddInstruction(HloInstruction::CreateInfeed(
+        per_branch_partitioned_shapes[i], param, hlo->infeed_config()));
+    branches[i] = module_->AddEmbeddedComputation(branch_b.Build(infeed));
+    if (!ShapeUtil::Compatible(per_branch_partitioned_shapes[i], shard_shape)) {
+      TF_ASSIGN_OR_RETURN(
+          auto padded,
+          branches[i]->DeepCopyInstructionWithCustomCopier(
+              infeed, [&](HloInstruction* leaf, const ShapeIndex& leaf_index,
+                          HloComputation* comp) {
+                // Index {1} corresponds to the token.
+                if (leaf_index.empty() || leaf_index[0] != 0) {
+                  return leaf;
+                }
+                ShapeIndexView subindex(leaf_index, 1);
+                if (ShapeUtil::Compatible(
+                        ShapeUtil::GetSubshape(per_branch_partitioned_shapes[i],
+                                               subindex),
+                        ShapeUtil::GetSubshape(shard_shape, subindex))) {
+                  return leaf;
+                }
+                return PadToShape(leaf,
+                                  ShapeUtil::GetSubshape(shard_shape, subindex),
+                                  nullptr, comp);
+              }));
+      branches[i]->set_root_instruction(padded,
+                                        /*accept_different_shape=*/true);
+    }
+  }
+  SetPartitionedHlo(hlo, [&]() {
+    return b_.AddInstruction(HloInstruction::CreateConditional(
+        ShapeUtil::MakeTupleShape({shard_shape, token->shape()}), branch_index,
+        branches, std::vector<HloInstruction*>(branches.size(), token)));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandlePad(HloInstruction* hlo) {
+  if (hlo->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    const auto& pd = hlo->padding_config().dimensions(i);
+    // Right now we only support non-padded dimensions to be partitioned.
+    if (hlo->sharding().tile_assignment().dim(i) > 1 &&
+        (pd.edge_padding_high() != 0 || pd.edge_padding_low() != 0 ||
+         pd.interior_padding() != 0)) {
+      return DefaultAction(hlo);
+    }
+  }
+  auto resharded_lhs =
+      GetPartitionedHlo(hlo->operand(0)).Reshard(hlo->sharding()).hlo();
+  auto replicated_rhs = GetPartitionedHlo(hlo->operand(1))
+                            .Reshard(HloSharding::Replicate())
+                            .hlo();
+  SetPartitionedHlo(hlo, [&]() {
+    auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+    return b_.AddInstruction(hlo->CloneWithNewOperands(
+        shard_shape, {resharded_lhs, replicated_rhs}));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleParameter(HloInstruction* hlo) {
+  SetPartitionedHlo(hlo, [&]() {
+    auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+    auto new_param = b_.AddInstruction(HloInstruction::CreateParameter(
+        hlo->parameter_number(), shard_shape, "param"));
+    if (hlo->parameter_replicated_at_leaf_buffers()) {
+      new_param->set_parameter_replicated_at_leaf_buffers(
+          *hlo->parameter_replicated_at_leaf_buffers());
+    }
+    return new_param;
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) {
+  int64 input_count = 1;
+  auto per_input_sharding = hlo->sharding();
+  if (hlo->shape().IsTuple()) {
+    input_count = hlo->shape().tuple_shapes_size();
+    CHECK_GT(input_count, 0);
+    per_input_sharding = hlo->sharding().GetSubSharding(hlo->shape(), {0});
+  }
+
+  std::vector<PartitionedHlo> inputs;
+  std::vector<HloInstruction*> inits;
+  for (int64 operand_id = 0; operand_id < input_count; ++operand_id) {
+    inits.push_back(GetPartitionedHlo(hlo->operand(operand_id + input_count))
+                        .Reshard(HloSharding::Replicate())
+                        .hlo());
+    inputs.push_back(GetPartitionedHlo(hlo->operand(operand_id)));
+    if (operand_id > 0) {
+      // Make sure all operands are sharded in the same way.
+      inputs.back() = inputs.back().Reshard(inputs[0].sharding());
+    }
+    if (!inputs[0].sharding().IsTileMaximal()) {
+      inputs.back() = inputs.back().PadWithValue(inits[operand_id]);
+    }
+  }
+  bool reduce_sharded_dimension = false;
+  if (!inputs[0].sharding().IsTileMaximal()) {
+    reduce_sharded_dimension = absl::c_any_of(hlo->dimensions(), [&](int64 i) {
+      return inputs[0].sharding().tile_assignment().dim(i) > 1;
+    });
+
+    // reduce_sharded_dimension is not supported for tuple-shaped reduces.
+    if (reduce_sharded_dimension && input_count > 1) {
+      return DefaultAction(hlo);
+    }
+
+    // Currently we only support reducing all or none of the sharded
+    // dimensions.
+    if (reduce_sharded_dimension) {
+      for (int64 i = 0; i < inputs[0].base_shape().rank(); ++i) {
+        if (inputs[0].sharding().tile_assignment().dim(i) > 1 &&
+            absl::c_count(hlo->dimensions(), i) == 0) {
+          return DefaultAction(hlo);
+        }
+      }
+    }
+  }
+
+  std::vector<Shape*> new_operand_shapes(input_count * 2);
+  for (int64 i = 0; i < input_count; ++i) {
+    new_operand_shapes[i] = inputs[i].hlo()->mutable_shape();
+    new_operand_shapes[i + input_count] = inits[i]->mutable_shape();
+  }
+  // Create the shard shape of the reduce result.
+  TF_ASSIGN_OR_RETURN(
+      auto reduce_shape,
+      ShapeInference::InferReduceShape(new_operand_shapes, hlo->dimensions(),
+                                       hlo->to_apply()->ComputeProgramShape()));
+  *reduce_shape.mutable_layout() = hlo->shape().layout();
+
+  std::vector<HloInstruction*> input_hlos(input_count);
+  for (int64 i = 0; i < input_count; ++i) {
+    input_hlos[i] = inputs[i].hlo();
+  }
+  auto local_reduce = b_.AddInstruction(HloInstruction::CreateReduce(
+      reduce_shape, input_hlos, inits, hlo->dimensions(), hlo->to_apply()));
+  local_reduce->set_metadata(hlo->metadata());
+
+  SetPartitionedHlo(hlo, [&]() {
+    HloInstruction* reduce;
+    if (reduce_sharded_dimension) {
+      CHECK(local_reduce->shape().IsArray());
+      reduce = collective_ops_creator_.create_cross_partition_all_reduce(
+          &b_, local_reduce, hlo->to_apply(), NewChannel());
+      reduce->set_sharding(HloSharding::Replicate());
+    } else {
+      reduce = local_reduce;
+      if (inputs[0].sharding().IsTileMaximal()) {
+        reduce->set_sharding(inputs[0].sharding());
+      } else {
+        // Remove tile assignment dimensions that are reduced.
+        std::vector<int64> tile_dimensions;
+        for (int64 i = 0; i < input_hlos[0]->shape().rank(); ++i) {
+          if (absl::c_count(hlo->dimensions(), i) == 0) {
+            tile_dimensions.push_back(
+                inputs[0].sharding().tile_assignment().dim(i));
+          }
+        }
+        Array<int64> new_tile = inputs[0].sharding().tile_assignment();
+        new_tile.Reshape(tile_dimensions);
+        auto sharding = HloSharding::Tile(new_tile);
+        if (input_count > 1) {
+          std::vector<HloSharding> tuple(input_count, sharding);
+          sharding = HloSharding::Tuple(hlo->shape(), tuple);
+        }
+        reduce->set_sharding(sharding);
+      }
+    }
+
+    return PartitionedHlo(reduce, hlo->shape(), MakePartitioningState())
+        .Reshard(hlo->sharding())
+        .hlo();
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleReverse(HloInstruction* hlo) {
+  auto reverse = Cast<HloReverseInstruction>(hlo);
+  if (reverse->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+  if (absl::c_all_of(reverse->dimensions(), [&](int64 d) {
+        return reverse->sharding().tile_assignment().dim(d) == 1;
+      })) {
+    auto operand =
+        GetPartitionedHlo(reverse->operand(0)).Reshard(reverse->sharding());
+    SetPartitionedHlo(hlo, [&] {
+      return b_.AddInstruction(
+          hlo->CloneWithNewOperands(operand.hlo()->shape(), {operand.hlo()}));
+    });
+    return Status::OK();
+  }
+  return DefaultAction(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleWhile(HloInstruction* hlo) {
+  const HloSharding& sharding = hlo->sharding();
+
+  // Shardings for the body parameter, body root, and cond parameter must be
+  // the same, and the condition root must be replicated so that all partitions
+  // follow the same control flow.
+  hlo->while_condition()->parameter_instruction(0)->set_sharding(sharding);
+  hlo->while_body()->parameter_instruction(0)->set_sharding(sharding);
+  TF_RETURN_IF_ERROR(partitioner_
+                         ->PartitionComputation(hlo->while_condition(),
+                                                HloSharding::Replicate(),
+                                                next_channel_id_, logger_)
+                         .status());
+  TF_RETURN_IF_ERROR(partitioner_
+                         ->PartitionComputation(hlo->while_body(), sharding,
+                                                next_channel_id_, logger_)
+                         .status());
+  SetPartitionedHlo(hlo, [&] {
+    return b_.AddInstruction(HloInstruction::CreateWhile(
+        MakePartitionedShape(hlo->shape(), sharding), hlo->while_condition(),
+        hlo->while_body(),
+        GetPartitionedHlo(hlo->operand(0)).Reshard(sharding).hlo()));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleConditional(HloInstruction* hlo) {
+  std::vector<HloInstruction*> branch_args;
+  for (int64 i = 0; i < hlo->branch_count(); ++i) {
+    HloComputation* computation = hlo->branch_computation(i);
+
+    // Shardings of the branch computation parameter and its argument must be
+    // the same.
+    computation->parameter_instruction(0)->set_sharding(
+        hlo->operand(i + 1)->sharding());
+    branch_args.push_back(GetPartitionedHlo(hlo->operand(i + 1)).hlo());
+  }
+
+  // The root of the branch computations must follow the sharding of the
+  // conditional instruction.
+  for (int64 i = 0; i < hlo->branch_count(); ++i) {
+    HloComputation* computation = hlo->branch_computation(i);
+    TF_RETURN_IF_ERROR(partitioner_
+                           ->PartitionComputation(computation, hlo->sharding(),
+                                                  next_channel_id_, logger_)
+                           .status());
+  }
+
+  // We replicate the predicate of the conditional (the first operand) so that
+  // all partitions follow the same control flow.
+  SetPartitionedHlo(hlo, [&] {
+    return b_.AddInstruction(HloInstruction::CreateConditional(
+        MakePartitionedShape(hlo->shape(), hlo->sharding()),
+        GetPartitionedHlo(hlo->operand(0))
+            .Reshard(HloSharding::Replicate())
+            .hlo(),
+        hlo->called_computations(), branch_args));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleOutfeed(HloInstruction* hlo) {
+  TF_RET_CHECK(hlo->sharding().HasUniqueDevice());
+  return HandleSingleDevice(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleRng(HloInstruction* hlo) {
+  if (hlo->sharding().HasUniqueDevice()) {
+    return HandleSingleDevice(hlo);
+  }
+
+  if (hlo->sharding().IsReplicated()) {
+    SetPartitionedHlo(hlo, [&] {
+      // Run on a single device (0) and distribute the data to all other cores.
+      std::vector<HloInstruction*> new_operands;
+      for (int64 i = 0; i < hlo->operand_count(); ++i) {
+        new_operands.push_back(GetPartitionedHlo(hlo->operand(i))
+                                   .Reshard(HloSharding::AssignDevice(0))
+                                   .hlo());
+      }
+      auto clone = b_.AddInstruction(
+          hlo->CloneWithNewOperands(hlo->shape(), new_operands));
+      clone->set_sharding(HloSharding::AssignDevice(0));
+      return PartitionedHlo(clone, hlo->shape(), MakePartitioningState())
+          .Reshard(HloSharding::Replicate())
+          .hlo();
+    });
+    return Status::OK();
+  }
+
+  TF_RET_CHECK(!hlo->sharding().IsTileMaximal());
+  SetPartitionedHlo(hlo, [&] {
+    // Replicate the operands and run partitioned Rng on all devices.
+    std::vector<HloInstruction*> new_operands;
+    for (int64 i = 0; i < hlo->operand_count(); ++i) {
+      new_operands.push_back(GetPartitionedHlo(hlo->operand(i))
+                                 .Reshard(HloSharding::Replicate())
+                                 .hlo());
+    }
+    return b_.AddInstruction(HloInstruction::CreateRng(
+        MakePartitionedShape(hlo->shape(), hlo->sharding()),
+        hlo->random_distribution(), new_operands));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleReduceWindow(HloInstruction* hlo) {
+  auto& operand = GetPartitionedHlo(hlo->operand(0));
+  if (hlo->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+
+  // Replicate init
+  auto replicated_init = GetPartitionedHlo(hlo->mutable_operand(1))
+                             .Reshard(HloSharding::Replicate());
+  auto resharded_operand_and_window = operand.ReshardAsWindowedInput(
+      hlo->window(), hlo->sharding(), replicated_init.hlo());
+  if (!resharded_operand_and_window.has_value()) {
+    return DefaultAction(hlo);
+  }
+
+  TF_ASSIGN_OR_RETURN(Shape sharded_rw_shape,
+                      ShapeInference::InferReduceWindowShape(
+                          resharded_operand_and_window->sharded_input->shape(),
+                          replicated_init.hlo()->shape(),
+                          resharded_operand_and_window->shard_window,
+                          hlo->to_apply()->ComputeProgramShape()));
+  auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+  *sharded_rw_shape.mutable_layout() = shard_shape.layout();
+  SetPartitionedHlo(hlo, [&]() {
+    auto sharded_rw = b_.AddInstruction(HloInstruction::CreateReduceWindow(
+        sharded_rw_shape, resharded_operand_and_window->sharded_input,
+        replicated_init.hlo(), resharded_operand_and_window->shard_window,
+        hlo->to_apply()));
+    if (!resharded_operand_and_window->dynamic_slice_index_on_output
+             .has_value()) {
+      CHECK(ShapeUtil::Compatible(shard_shape, sharded_rw->shape()));
+      return sharded_rw;
+    }
+    return b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+        shard_shape, sharded_rw,
+        *resharded_operand_and_window->dynamic_slice_index_on_output,
+        shard_shape.dimensions()));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleSelectAndScatter(HloInstruction* hlo) {
+  if (hlo->sharding().IsTileMaximal()) {
+    return DefaultAction(hlo);
+  }
+  auto operand = GetPartitionedHlo(hlo->operand(0));
+  auto source = GetPartitionedHlo(hlo->mutable_operand(1));
+  if (hlo->sharding() != operand.sharding()) {
+    operand = operand.Reshard(hlo->sharding());
+  }
+  if (hlo->sharding() != source.sharding()) {
+    source = source.Reshard(hlo->sharding());
+  }
+
+  // For F32 and BF16 types, we can use NaN padding to workaround the issue with
+  // low/high padding, since comparison will return false with NaN input.
+  if (hlo->shape().element_type() != F32 &&
+      hlo->shape().element_type() != BF16) {
+    return DefaultAction(hlo);
+  }
+
+  auto select = hlo->called_computations()[0];
+  auto select_root = select->root_instruction();
+  if (select_root->opcode() != HloOpcode::kCompare ||
+      select_root->operand(0)->opcode() != HloOpcode::kParameter ||
+      select_root->operand(1)->opcode() != HloOpcode::kParameter ||
+      select_root->operand(0)->parameter_number() +
+              select_root->operand(1)->parameter_number() !=
+          1) {
+    return DefaultAction(hlo);
+  }
+
+  float float_pad_value;
+  if (select_root->comparison_direction() == ComparisonDirection::kGe ||
+      select_root->comparison_direction() == ComparisonDirection::kGt) {
+    if (select_root->operand(0)->parameter_number() == 0) {
+      float_pad_value = -std::numeric_limits<float>::infinity();
+    } else {
+      float_pad_value = std::numeric_limits<float>::infinity();
+    }
+  } else if (select_root->comparison_direction() == ComparisonDirection::kLe ||
+             select_root->comparison_direction() == ComparisonDirection::kLt) {
+    if (select_root->operand(0)->parameter_number() == 0) {
+      float_pad_value = std::numeric_limits<float>::infinity();
+    } else {
+      float_pad_value = -std::numeric_limits<float>::infinity();
+    }
+  } else {
+    return DefaultAction(hlo);
+  }
+
+  auto pad_value = b_.AddInstruction(HloInstruction::CreateConstant(
+      hlo->shape().element_type() == BF16
+          ? LiteralUtil::CreateR0<bfloat16>(
+                static_cast<bfloat16>(float_pad_value))
+          : LiteralUtil::CreateR0<float>(float_pad_value)));
+
+  // Replicate init
+  auto replicated_init = GetPartitionedHlo(hlo->mutable_operand(2))
+                             .Reshard(HloSharding::Replicate());
+
+  auto partition_ordinals =
+      MakeTiledPartitionOrdinals(hlo->sharding(), partition_id_, &b_);
+
+  // The first window for each dimension that overlaps with the shard area.
+  std::vector<MultiplyAddDivideOffsetCalculation> first_window(
+      hlo->shape().rank());
+  // The first window for each dimension that goes beyond with the shard area.
+  std::vector<MultiplyAddDivideOffsetCalculation> limit_window(
+      hlo->shape().rank());
+  std::vector<OffsetCalculation> data_left_halo_sizes(hlo->shape().rank());
+  std::vector<OffsetCalculation> data_right_halo_sizes(hlo->shape().rank());
+  std::vector<OffsetCalculation> source_left_halo_sizes(hlo->shape().rank());
+  std::vector<OffsetCalculation> source_right_halo_sizes(hlo->shape().rank());
+  auto unpadded_data_shard_shape =
+      MakePartitionedShape(hlo->shape(), hlo->sharding());
+  auto unpadded_source_shard_shape =
+      MakePartitionedShape(hlo->operand(1)->shape(), hlo->sharding());
+  auto source_shard_hlo = source.hlo();
+  auto data_shard_hlo = operand.hlo();
+  for (int64 i = 0; i < hlo->shape().rank(); ++i) {
+    int64 shard_count = hlo->sharding().tile_assignment().dim(i);
+    if (shard_count == 1) {
+      continue;
+    }
+    // If stride > window_size, there will be gaps between windows. These gaps
+    // will also exist in the output, so we keep them during halo exchange.
+    //
+    // TODO(yuanzx): This could introduce overhead if partitions start at
+    // different offsets in a gap.
+    auto wd = hlo->window().dimensions(i);
+    if (wd.stride() > wd.size()) {
+      wd.set_size(wd.stride());
+    }
+    // shard_size * i < stride * k - pad_low + window_size  =>
+    //   k > (shard_size * i + pad_low - window_size) / stride  =>
+    //   first_k == (shard_size * i + pad_low - window_size + stride) / stride
+    first_window[i] = MultiplyAddDivideOffsetCalculation(
+        unpadded_data_shard_shape.dimensions(i),
+        wd.padding_low() - wd.size() + wd.stride(), wd.stride());
+    // shard_size * (i + 1) <= stride * k - pad_low  =>
+    //   k >= (shard_size * i + shard_size + pad_low) / stride  =>
+    //   limit_k == (shard_size * i + shard_size + pad_low + stride - 1) /
+    //     stride
+    limit_window[i] = MultiplyAddDivideOffsetCalculation(
+        unpadded_data_shard_shape.dimensions(i),
+        unpadded_data_shard_shape.dimensions(i) + wd.padding_low() +
+            wd.stride() - 1,
+        wd.stride());
+    source_left_halo_sizes[i] =
+        MultiplyAddDivideOffsetCalculation(
+            unpadded_source_shard_shape.dimensions(i), 0, 1) -
+        first_window[i];
+    source_right_halo_sizes[i] =
+        limit_window[i] - MultiplyAddDivideOffsetCalculation(
+                              unpadded_source_shard_shape.dimensions(i),
+                              unpadded_source_shard_shape.dimensions(i), 1);
+    data_left_halo_sizes[i] =
+        OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+            unpadded_data_shard_shape.dimensions(i), wd.padding_low(), 1)) -
+        OffsetCalculation(
+            HloOpcode::kMultiply, first_window[i],
+            MultiplyAddDivideOffsetCalculation(0, wd.stride(), 1));
+    data_right_halo_sizes[i] =
+        OffsetCalculation(
+            HloOpcode::kMultiply, limit_window[i],
+            MultiplyAddDivideOffsetCalculation(0, wd.stride(), 1)) -
+        OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+            unpadded_data_shard_shape.dimensions(i),
+            unpadded_data_shard_shape.dimensions(i) + wd.stride() +
+                wd.padding_low() - wd.size(),
+            1));
+
+    int64 max_windows =
+        (limit_window[i] - first_window[i]).MaxInRange(0, shard_count);
+    auto first_window_hlo =
+        first_window[i].Calculate(partition_ordinals[i], &b_);
+    // Padding on the source is filled with the init value so they do not change
+    // the data on overlapping windows.
+    auto resharded_source = ExchangeHaloAndGetValidData(
+        source_shard_hlo, source.base_shape(), source_left_halo_sizes[i],
+        source_right_halo_sizes[i], 0,
+        limit_window[i].Calculate(shard_count - 1), max_windows, i,
+        hlo->sharding(), first_window_hlo, replicated_init.hlo(),
+        partition_ordinals[i], collective_ops_creator_, next_channel_id_, &b_);
+    if (!resharded_source) {
+      return DefaultAction(hlo);
+    }
+    source_shard_hlo = *resharded_source;
+
+    auto offset_start_in_data =
+        MultiplyAddDivideOffsetCalculation(wd.stride(), 0, 1)
+            .Calculate(first_window_hlo, &b_);
+    int64 padded_data_size =
+        (limit_window[i].Calculate(shard_count - 1) - 1) * wd.stride() +
+        wd.size();
+    int64 data_shard_size = (max_windows - 1) * wd.stride() + wd.size();
+    auto resharded_data = ExchangeHaloAndGetValidData(
+        data_shard_hlo, operand.base_shape(), data_left_halo_sizes[i],
+        data_right_halo_sizes[i], wd.padding_low(), padded_data_size,
+        data_shard_size, i, hlo->sharding(), offset_start_in_data, pad_value,
+        partition_ordinals[i], collective_ops_creator_, next_channel_id_, &b_);
+    if (!resharded_data) {
+      return DefaultAction(hlo);
+    }
+    data_shard_hlo = *resharded_data;
+  }
+
+  Window window_on_shard = hlo->window();
+  for (int64 i = 0; i < window_on_shard.dimensions_size(); ++i) {
+    int64 shard_count = hlo->sharding().tile_assignment().dim(i);
+    if (shard_count == 1) {
+      continue;
+    }
+    auto reshard_wd = window_on_shard.mutable_dimensions(i);
+    // The shards are already explicitly padded.
+    reshard_wd->set_padding_low(0);
+    reshard_wd->set_padding_high(0);
+  }
+
+  auto sharded_select_and_scatter =
+      b_.AddInstruction(HloInstruction::CreateSelectAndScatter(
+          data_shard_hlo->shape(), data_shard_hlo, select, window_on_shard,
+          source_shard_hlo, replicated_init.hlo(),
+          hlo->called_computations()[1]));
+  SetPartitionedHlo(hlo, [&]() {
+    auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+    if (ShapeUtil::Compatible(sharded_select_and_scatter->shape(),
+                              shard_shape)) {
+      return sharded_select_and_scatter;
+    }
+    auto zero = b_.AddInstruction(
+        HloInstruction::CreateConstant(LiteralUtil::Zero(S32)));
+    std::vector<HloInstruction*> slice_offsets(shard_shape.rank(), zero);
+    for (int64 i = 0; i < window_on_shard.dimensions_size(); ++i) {
+      if (hlo->sharding().tile_assignment().dim(i) == 1) {
+        continue;
+      }
+      int64 pad_low = hlo->window().dimensions(i).padding_low();
+      auto left_halo_size =
+          data_left_halo_sizes[i].Calculate(partition_ordinals[i], &b_);
+      if (data_left_halo_sizes[i].Calculate(0) == pad_low) {
+        slice_offsets[i] = left_halo_size;
+      } else {
+        auto is_shard0 = b_.AddInstruction(HloInstruction::CreateCompare(
+            ShapeUtil::MakeShape(PRED, {}), zero, partition_ordinals[i],
+            ComparisonDirection::kEq));
+        auto pad_low_hlo = b_.AddInstruction(HloInstruction::CreateConstant(
+            LiteralUtil::CreateR0<int32>(pad_low)));
+        slice_offsets[i] = b_.AddInstruction(HloInstruction::CreateTernary(
+            zero->shape(), HloOpcode::kSelect, is_shard0, pad_low_hlo,
+            left_halo_size));
+      }
+    }
+    return b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+        shard_shape, sharded_select_and_scatter, slice_offsets,
+        shard_shape.dimensions()));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleTuple(HloInstruction* hlo) {
+  std::vector<HloInstruction*> new_operands;
+  for (int64 i = 0; i < hlo->operand_count(); ++i) {
+    new_operands.push_back(
+        GetPartitionedHlo(hlo->operand(i))
+            .Reshard(hlo->sharding().GetSubSharding(hlo->shape(), {i}))
+            .hlo());
+  }
+  SetPartitionedHlo(hlo, [&]() {
+    return b_.AddInstruction(HloInstruction::CreateTuple(new_operands));
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs(
+    HloInstruction* hlo) {
+  TF_RET_CHECK(hlo->opcode() == HloOpcode::kConvolution);
+
+  auto lhs = GetPartitionedHlo(hlo->operand(0));
+  auto rhs = GetPartitionedHlo(hlo->operand(1));
+  TF_RET_CHECK(!lhs.sharding().IsTileMaximal() &&
+               !rhs.sharding().IsTileMaximal());
+
+  const auto& dnums = hlo->convolution_dimension_numbers();
+
+  // Check if the operand shardings are aligned. Also we currently don't
+  // support partitioning non-spatial dimensions.
+  std::vector<int64> rhs_to_lhs_indices(hlo->shape().rank());
+  rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] =
+      dnums.input_batch_dimension();
+  rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] =
+      dnums.input_feature_dimension();
+  for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+    rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] =
+        dnums.input_spatial_dimensions(i);
+  }
+  std::vector<int64> lhs_to_rhs_indices(hlo->shape().rank());
+  for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) {
+    lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i;
+  }
+  auto aligned_rhs_sharding =
+      hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices);
+  auto aligned_lhs_sharding =
+      hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices);
+
+  auto unsupported_sharding = [&](const HloSharding& lhs_sharding,
+                                  const HloSharding& rhs_sharding) {
+    return lhs_sharding.tile_assignment().dim(dnums.input_batch_dimension()) !=
+               1 ||
+           rhs_sharding.tile_assignment().dim(
+               dnums.kernel_output_feature_dimension()) != 1;
+  };
+
+  auto zero = b_.AddInstruction(HloInstruction::CreateConstant(
+      LiteralUtil::Zero(hlo->shape().element_type())));
+  if (ShapeUtil::ByteSizeOf(lhs.base_shape()) <
+      ShapeUtil::ByteSizeOf(rhs.base_shape())) {
+    if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) {
+      return DefaultAction(hlo);
+    }
+    lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero);
+    rhs = rhs.PadWithValue(zero);
+  } else {
+    if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) {
+      return DefaultAction(hlo);
+    }
+    lhs = lhs.PadWithValue(zero);
+    rhs = rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero);
+  }
+
+  // Reshard LHS by exchanging halo such that each shard computes the partial
+  // sum of the full shape result, and add AllReduce.
+  //
+  // The size of halo on each dimension can be calculated from the projection
+  // onto the LHS that each RHS shard i needs to read. RHS and LHS below refers
+  // to the shard size of RHS and LHS, WC is the number of windows, and D is the
+  // window dilation.
+  //
+  // * offset(i): RHS * D * i - low_padding
+  // * limit(i): {(RHS - 1) * D + 1} * (i + 1) + (WC - 1) * stride - low_padding
+  //
+  // Since shard i has LHS of range [i * LHS, (i + 1) * LHS)
+  // * left-halo: i * LHS - offset(i)
+  //              = (LHS - RHS) * i + low_padding
+  // * right-halo: limit(i) - (i + 1) * LHS
+  //   = [{(RHS - 1) * D + 1} - LHS] * (i + 1) + (WC - 1) * stride - low_padding
+
+  Window window = hlo->window();
+  std::vector<int64> shard_counts(dnums.input_spatial_dimensions_size());
+  std::vector<int64> lhs_shard_sizes(dnums.input_spatial_dimensions_size());
+  std::vector<int64> rhs_shard_sizes(dnums.input_spatial_dimensions_size());
+  for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+    int64 lhs_dimension = dnums.input_spatial_dimensions(i);
+    int64 rhs_dimension = dnums.kernel_spatial_dimensions(i);
+    int64 shard_count = lhs.sharding().tile_assignment().dim(lhs_dimension);
+    auto wd = window.dimensions(i);
+    if (wd.base_dilation() != 1 || wd.window_reversal()) {
+      return DefaultAction(hlo);
+    }
+
+    int64 lhs_shard_size =
+        CeilOfRatio(lhs.base_shape().dimensions(lhs_dimension), shard_count);
+    int64 rhs_shard_size =
+        CeilOfRatio(rhs.base_shape().dimensions(rhs_dimension), shard_count);
+    shard_counts[i] = shard_count;
+    lhs_shard_sizes[i] = lhs_shard_size;
+    rhs_shard_sizes[i] = rhs_shard_size;
+  }
+
+  std::vector<OffsetCalculation> left_halo_size_functions(hlo->shape().rank());
+  std::vector<OffsetCalculation> right_halo_size_functions(hlo->shape().rank());
+  Window new_window = window;
+
+  auto partition_ordinals =
+      MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_);
+  HloInstruction* lhs_with_halo = lhs.hlo();
+  for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+    int64 lhs_dimension = dnums.input_spatial_dimensions(i);
+    int64 lhs_shard_size = lhs_shard_sizes[i];
+    int64 rhs_shard_size = rhs_shard_sizes[i];
+
+    if (shard_counts[i] == 1) {
+      continue;
+    }
+
+    // Calculate the left and right halo sizes as described in the comments
+    // above.
+    auto wd = window.dimensions(i);
+    int64 padding_low = wd.padding_low();
+    int64 padding_high = wd.padding_high();
+    int64 base = lhs.base_shape().dimensions(lhs_dimension);
+    int64 window_count = 1 + (padding_low + padding_high + base -
+                              (1 + (wd.size() - 1) * wd.window_dilation())) /
+                                 wd.stride();
+    int64 rhs_shard_size_dilated =
+        (rhs_shard_size - 1) * wd.window_dilation() + 1;
+
+    left_halo_size_functions[lhs_dimension] =
+        OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+            lhs_shard_size - rhs_shard_size * wd.window_dilation(), padding_low,
+            1));
+    right_halo_size_functions[lhs_dimension] =
+        OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+            rhs_shard_size_dilated - lhs_shard_size,
+            rhs_shard_size_dilated - lhs_shard_size +
+                wd.stride() * (window_count - 1) - padding_low,
+            1));
+
+    // Exchange halo and concatenate.
+    int64 dim = dnums.input_spatial_dimensions(i);
+    int64 explicit_left_padding_on_full_shape = padding_low;
+    int64 shard_size_with_halo =
+        wd.stride() * (window_count - 1) + rhs_shard_size_dilated;
+
+    new_window.mutable_dimensions(i)->set_padding_low(0);
+    new_window.mutable_dimensions(i)->set_padding_high(0);
+    new_window.mutable_dimensions(i)->set_size(rhs_shard_size);
+
+    // offset_on_padded_shape and padded_full_shape_size are needed only if
+    // we want to mask out-of-range values in ExchangeHaloAndGetValidData().
+    // Since the default value for both the collective-permute is zero and
+    // also we call PadWithValue() on both operands at the beginning, we
+    // don't need to mask here.
+    //
+    // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls
+    // if it's always safe.
+    auto offset_on_padded_shape =
+        OffsetCalculation(MultiplyAddDivideOffsetCalculation());
+    int64 padded_full_shape_size = 0;
+    auto concat = ExchangeHaloAndGetValidData(
+        lhs_with_halo, lhs.base_shape(), left_halo_size_functions[dim],
+        right_halo_size_functions[dim], explicit_left_padding_on_full_shape,
+        padded_full_shape_size, shard_size_with_halo, dim, lhs.sharding(),
+        offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), zero,
+        partition_ordinals[dim], collective_ops_creator_, next_channel_id_, &b_,
+        /*mask_invalid_region=*/false);
+    if (!concat) {
+      return DefaultAction(hlo);
+    }
+    lhs_with_halo = *concat;
+  }
+
+  SetPartitionedHlo(hlo, [&]() {
+    auto conv = b_.AddInstruction(HloInstruction::CreateConvolve(
+        hlo->shape(), lhs_with_halo, rhs.hlo(), hlo->feature_group_count(),
+        hlo->batch_group_count(), new_window,
+        hlo->convolution_dimension_numbers(), hlo->precision_config()));
+    auto ar = collective_ops_creator_.create_cross_partition_all_reduce(
+        &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_),
+        NewChannel());
+    ar->set_sharding(HloSharding::Replicate());
+    return PartitionedHlo(ar, hlo->shape(), MakePartitioningState())
+        .Reshard(hlo->sharding())
+        .hlo();
+  });
+  return Status::OK();
+}
+
+Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) {
+  auto lhs = GetPartitionedHlo(hlo->operand(0));
+  auto rhs = GetPartitionedHlo(hlo->operand(1));
+  const HloSharding& sharding = hlo->sharding();
+  const auto& dnums = hlo->convolution_dimension_numbers();
+  std::vector<int64> rhs_to_lhs_indices(hlo->shape().rank());
+  rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] =
+      dnums.input_batch_dimension();
+  rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] =
+      dnums.input_feature_dimension();
+  for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+    rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] =
+        dnums.input_spatial_dimensions(i);
+  }
+  std::vector<int64> lhs_to_rhs_indices(hlo->shape().rank());
+  for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) {
+    lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i;
+  }
+  auto aligned_rhs_sharding =
+      hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices);
+  auto aligned_lhs_sharding =
+      hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices);
+
+  // Handling cases where both operands' shardings are aligned. We check that
+  // the LHS batch dimension is not partitioned because it is mapped to the
+  // output feature dimension in aligned_rhs_sharding, which are not the same
+  // dimension.
+  if (!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()) {
+    if (options_.conv_halo_exchange_always_on_lhs) {
+      return HandleConvolutionTiledLhsAndRhs(hlo);
+    } else {
+      // Reshard RHS so that each shard computes the partial sum of the full
+      // shape result, and add AllReduce. See HandleConvolutionTiledLhsAndRhs()
+      // that reshards LHS.
+      //
+      // The size of halo on each dimension can be calculated from the
+      // projection onto the RHS that shard i needs to read. RHS and LHS below
+      // refers to the shard size of RHS and LHS, WC is the number of windows,
+      // and D is the window dilation.
+      //
+      // * offset(i): LHS * i + low_padding - (WC - 1) * stride
+      // * limit(i): LHS * (i + 1) + low_padding
+      //
+      // Since shard i has RHS of range [i * RHS * D, (i + 1) * RHS * D)
+      // * left-halo: i * RHS - offset(i)
+      //              = i * (RHS * D - LHS) + (WC - 1) * stride - low_padding
+      // * right-halo: limit(i) - (i + 1) * RHS
+      //              = (i + 1) * (LHS - RHS * D) + low_pading
+
+      auto unsupported_sharding = [&](const HloSharding& lhs_sharding,
+                                      const HloSharding& rhs_sharding) {
+        // We currently don't support partitioning input batch or output feature
+        // dimensions.
+        return lhs_sharding.tile_assignment().dim(
+                   dnums.input_batch_dimension()) != 1 ||
+               rhs_sharding.tile_assignment().dim(
+                   dnums.kernel_output_feature_dimension()) != 1;
+      };
+      auto zero = b_.AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::Zero(hlo->shape().element_type())));
+      if (ShapeUtil::ByteSizeOf(lhs.base_shape()) <
+          ShapeUtil::ByteSizeOf(rhs.base_shape())) {
+        if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) {
+          return DefaultAction(hlo);
+        }
+        lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero);
+        rhs = rhs.PadWithValue(zero);
+      } else {
+        if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) {
+          return DefaultAction(hlo);
+        }
+        lhs = lhs.PadWithValue(zero);
+        rhs = rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero);
+      }
+
+      Window window = hlo->window();
+      std::vector<int64> shard_counts(dnums.input_spatial_dimensions_size());
+      std::vector<int64> lhs_shard_sizes(dnums.input_spatial_dimensions_size());
+      std::vector<int64> rhs_shard_sizes(dnums.input_spatial_dimensions_size());
+      for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+        int64 lhs_dimension = dnums.input_spatial_dimensions(i);
+        int64 rhs_dimension = dnums.kernel_spatial_dimensions(i);
+        int64 shard_count = rhs.sharding().tile_assignment().dim(rhs_dimension);
+        auto wd = window.dimensions(i);
+        if (wd.base_dilation() != 1 || wd.window_reversal()) {
+          return DefaultAction(hlo);
+        }
+
+        int64 lhs_shard_size = CeilOfRatio(
+            lhs.base_shape().dimensions(lhs_dimension), shard_count);
+        int64 rhs_shard_size = CeilOfRatio(
+            rhs.base_shape().dimensions(rhs_dimension), shard_count);
+        shard_counts[i] = shard_count;
+        lhs_shard_sizes[i] = lhs_shard_size;
+        rhs_shard_sizes[i] = rhs_shard_size;
+      }
+
+      std::vector<OffsetCalculation> left_halo_size_functions(
+          hlo->shape().rank());
+      std::vector<OffsetCalculation> right_halo_size_functions(
+          hlo->shape().rank());
+      Window new_window = window;
+
+      // Data structures needed for Pad and DynamicSlice on LHS if needed.
+      bool need_dynamic_slice_lhs = false;
+      auto partition_ordinals =
+          MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_);
+      std::vector<int64> zero_padding(hlo->shape().rank());
+      PaddingConfig pad_config =
+          window_util::MakeSymmetricPadding(zero_padding);
+      auto zero_s32 = b_.AddInstruction(
+          HloInstruction::CreateConstant(LiteralUtil::Zero(S32)));
+      std::vector<HloInstruction*> dynamic_slice_start_indices(
+          hlo->shape().rank(), zero_s32);
+      Shape dynamic_slice_shape = lhs.hlo()->shape();
+      Shape pad_shape = lhs.hlo()->shape();
+
+      for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+        int64 lhs_dimension = dnums.input_spatial_dimensions(i);
+        int64 rhs_dimension = dnums.kernel_spatial_dimensions(i);
+        int64 lhs_shard_size = lhs_shard_sizes[i];
+        int64 rhs_shard_size = rhs_shard_sizes[i];
+
+        if (shard_counts[i] == 1) {
+          continue;
+        }
+
+        // Calculate the left and right halo sizes as described in the comments
+        // above. It calculcates the halo sizes with dilation, so we apply
+        // CeilOfRatio({left,right}_halo_size, window_dilation).
+        auto wd = window.dimensions(i);
+        int64 padding_low = wd.padding_low();
+        int64 padding_high = wd.padding_high();
+        int64 base = lhs.base_shape().dimensions(lhs_dimension);
+        int64 window_count =
+            1 + (padding_low + padding_high + base -
+                 (1 + (wd.size() - 1) * wd.window_dilation())) /
+                    wd.stride();
+        left_halo_size_functions[rhs_dimension] =
+            OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+                rhs_shard_size * wd.window_dilation() - lhs_shard_size,
+                (window_count - 1) * wd.stride() - padding_low +
+                    wd.window_dilation() - 1,
+                wd.window_dilation()));
+        right_halo_size_functions[rhs_dimension] =
+            OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+                lhs_shard_size - rhs_shard_size * wd.window_dilation(),
+                lhs_shard_size - rhs_shard_size * wd.window_dilation() +
+                    padding_low + wd.window_dilation() - 1,
+                wd.window_dilation()));
+
+        // New RHS window size includes the maximum of both left and right
+        // halos.
+        int64 halo_size = left_halo_size_functions[rhs_dimension].MaxInRange(
+                              1, shard_counts[i]) +
+                          right_halo_size_functions[rhs_dimension].MaxInRange(
+                              0, shard_counts[i] - 1);
+        int64 new_window_size =
+            rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size;
+
+        // The amount of new low padding could be dynamic (e.g., window_dilation
+        // != 1), which requires pad (to the maximum) and dynamic slice on LHS.
+        //
+        // If we consider the first window, the offset of the dilated RHS that
+        // aligns with the first valid LHS element for shard i is 'padding_low +
+        // LHS * i'. When the left halo is added to RHS, the offset of the first
+        // RHS element is (RHS * i - left_halo) * window_dilation. The
+        // difference between the two values is the amount of padding_low we
+        // need on LHS.
+        auto new_padding_low_function =
+            OffsetCalculation(
+                HloOpcode::kMultiply, left_halo_size_functions[rhs_dimension],
+                OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+                    0, wd.window_dilation(), 1))) -
+            OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+                rhs_shard_size * wd.window_dilation() - lhs_shard_size,
+                -padding_low, 1));
+
+        int64 new_padding_low_max =
+            new_padding_low_function.MaxInRange(0, shard_counts[i]);
+        int64 new_padding_low = new_padding_low_max;
+        int64 new_padding_high = window_count * wd.stride() +
+                                 (new_window_size - 1) * wd.window_dilation() -
+                                 new_padding_low - lhs_shard_size;
+
+        // We do pad/dynamic-slice only when the padding is dynamic.
+        if (!new_padding_low_function.IsConstant()) {
+          need_dynamic_slice_lhs = true;
+          new_padding_low = 0;
+          pad_config.mutable_dimensions(lhs_dimension)
+              ->set_edge_padding_low(new_padding_low_max);
+          pad_config.mutable_dimensions(lhs_dimension)
+              ->set_edge_padding_high(new_padding_low_max);
+          pad_shape.set_dimensions(lhs_dimension,
+                                   lhs_shard_size + 2 * new_padding_low_max);
+          dynamic_slice_start_indices[lhs_dimension] =
+              (OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+                   0, new_padding_low_max, 1)) -
+               new_padding_low_function)
+                  .Calculate(partition_ordinals[lhs_dimension], &b_);
+          dynamic_slice_shape.set_dimensions(
+              lhs_dimension, lhs_shard_size + new_padding_low_max);
+        }
+
+        // Since the convolution RHS operand size increased with halos, adjust
+        // the window config accordingly.
+        new_window.mutable_dimensions(i)->set_padding_low(new_padding_low);
+        new_window.mutable_dimensions(i)->set_padding_high(new_padding_high);
+        new_window.mutable_dimensions(i)->set_size(
+            rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size);
+      }
+
+      HloInstruction* conv_lhs = lhs.hlo();
+      if (need_dynamic_slice_lhs) {
+        auto pad = b_.AddInstruction(
+            HloInstruction::CreatePad(pad_shape, lhs.hlo(), zero, pad_config));
+        conv_lhs = b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+            dynamic_slice_shape, pad, dynamic_slice_start_indices,
+            dynamic_slice_shape.dimensions()));
+      }
+
+      // Exchange halo and concatenate.
+      HloInstruction* rhs_with_halo = rhs.hlo();
+      for (int i = 0; i < dnums.kernel_spatial_dimensions_size(); ++i) {
+        int64 dim = dnums.kernel_spatial_dimensions(i);
+        int64 explicit_left_padding_on_full_shape =
+            left_halo_size_functions[dim].Calculate(0);
+        int64 shard_size_with_halo = new_window.dimensions(i).size();
+
+        // offset_on_padded_shape and padded_full_shape_size are needed only if
+        // we want to mask out-of-range values in ExchangeHaloAndGetValidData().
+        // Since the default value for both the collective-permute is zero and
+        // also we call PadWithValue() on both operands at the beginning, we
+        // don't need to mask here.
+        //
+        // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls
+        // if it's always safe.
+        auto offset_on_padded_shape =
+            OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+                rhs_shard_sizes[i], explicit_left_padding_on_full_shape, 1)) -
+            left_halo_size_functions[dim];
+        int64 padded_full_shape_size =
+            offset_on_padded_shape.Calculate(shard_counts[i] - 1) +
+            new_window.dimensions(i).size();
+        auto concat = ExchangeHaloAndGetValidData(
+            rhs_with_halo, rhs.base_shape(), left_halo_size_functions[dim],
+            right_halo_size_functions[dim], explicit_left_padding_on_full_shape,
+            padded_full_shape_size, shard_size_with_halo, dim, rhs.sharding(),
+            offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_),
+            zero, partition_ordinals[dim], collective_ops_creator_,
+            next_channel_id_, &b_, /*mask_invalid_region=*/false);
+        if (!concat) {
+          return DefaultAction(hlo);
+        }
+        rhs_with_halo = *concat;
+      }
+
+      SetPartitionedHlo(hlo, [&]() {
+        auto conv = b_.AddInstruction(HloInstruction::CreateConvolve(
+            hlo->shape(), conv_lhs, rhs_with_halo, hlo->feature_group_count(),
+            hlo->batch_group_count(), new_window, dnums,
+            hlo->precision_config()));
+        auto ar = collective_ops_creator_.create_cross_partition_all_reduce(
+            &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_),
+            NewChannel());
+        ar->set_sharding(HloSharding::Replicate());
+        return PartitionedHlo(ar, hlo->shape(), MakePartitioningState())
+            .Reshard(hlo->sharding())
+            .hlo();
+      });
+      return Status::OK();
+    }
+  }
+
+  if (!sharding.IsTileMaximal()) {
+    // We don't currently support sharding on output feature dimension.
+    if (sharding.tile_assignment().dim(dnums.output_feature_dimension()) > 1) {
+      return DefaultAction(hlo);
+    }
+
+    // Check if the operand and the output sharding are aligned.
+    std::vector<int64> input_to_output_indices(hlo->shape().rank());
+    input_to_output_indices[dnums.input_batch_dimension()] =
+        dnums.output_batch_dimension();
+    input_to_output_indices[dnums.input_feature_dimension()] =
+        dnums.output_feature_dimension();
+    for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+      input_to_output_indices[dnums.input_spatial_dimensions(i)] =
+          dnums.output_spatial_dimensions(i);
+    }
+    auto target_operand_sharding =
+        hlo_sharding_util::TransposeSharding(sharding, input_to_output_indices);
+    lhs = lhs.Reshard(target_operand_sharding);
+
+    // Replicate the RHS.
+    rhs = rhs.Reshard(HloSharding::Replicate());
+
+    // Convolution window config does not include batch and feature dimensions,
+    // whereas ReshardAsWindowedInput() expects the same number of window
+    // dimensions as the rank of the operand. So add two more trivial
+    // dimensions.
+    std::vector<int64> ones(hlo->shape().rank(), 1);
+    auto operand_window = window_util::MakeWindow(ones);
+    for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+      *operand_window.mutable_dimensions(dnums.input_spatial_dimensions(i)) =
+          hlo->window().dimensions(i);
+    }
+
+    auto zero = b_.AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::Zero(hlo->shape().element_type())));
+    auto resharded_operand_and_window = lhs.ReshardAsWindowedInput(
+        operand_window, target_operand_sharding, zero);
+    if (!resharded_operand_and_window.has_value()) {
+      return DefaultAction(hlo);
+    }
+    Window new_window;
+    for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) {
+      *new_window.add_dimensions() =
+          resharded_operand_and_window->shard_window.dimensions(
+              dnums.input_spatial_dimensions(i));
+    }
+    TF_ASSIGN_OR_RETURN(
+        Shape sharded_conv_shape,
+        ShapeInference::InferConvolveShape(
+            resharded_operand_and_window->sharded_input->shape(),
+            rhs.hlo()->shape(), hlo->feature_group_count(),
+            hlo->batch_group_count(), new_window, dnums));
+    auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding());
+    *sharded_conv_shape.mutable_layout() = shard_shape.layout();
+    SetPartitionedHlo(hlo, [&]() {
+      auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve(
+          sharded_conv_shape, resharded_operand_and_window->sharded_input,
+          rhs.hlo(), hlo->feature_group_count(), hlo->batch_group_count(),
+          new_window, dnums, hlo->precision_config()));
+      if (!resharded_operand_and_window->dynamic_slice_index_on_output
+               .has_value()) {
+        CHECK(ShapeUtil::Compatible(shard_shape, sharded_conv->shape()));
+        return sharded_conv;
+      }
+      return b_.AddInstruction(HloInstruction::CreateDynamicSlice(
+          shard_shape, sharded_conv,
+          *resharded_operand_and_window->dynamic_slice_index_on_output,
+          shard_shape.dimensions()));
+    });
+    return Status::OK();
+  }
+  return DefaultAction(hlo);
+}
+
+Status SpmdPartitioningVisitor::HandleDot(HloInstruction* hlo) {
+  DotGeneralDimsMapping mapping;
+  const auto& dnums = hlo->dot_dimension_numbers();
+  int64 next_output_dim = 0;
+  for (int64 i = 0; i < dnums.lhs_batch_dimensions_size(); ++i) {
+    mapping.batch_dims.emplace_back();
+    mapping.batch_dims.back().lhs = dnums.lhs_batch_dimensions(i);
+    mapping.batch_dims.back().rhs = dnums.rhs_batch_dimensions(i);
+    mapping.batch_dims.back().output = next_output_dim++;
+  }
+  for (int64 i = 0; i < dnums.lhs_contracting_dimensions_size(); ++i) {
+    mapping.contracting_dims.emplace_back();
+    mapping.contracting_dims.back().lhs = dnums.lhs_contracting_dimensions(i);
+    mapping.contracting_dims.back().rhs = dnums.rhs_contracting_dimensions(i);
+    mapping.contracting_dims.back().output = -1;
+  }
+  for (int64 i = 0; i < hlo->operand(0)->shape().rank(); ++i) {
+    if (absl::c_linear_search(dnums.lhs_batch_dimensions(), i) ||
+        absl::c_linear_search(dnums.lhs_contracting_dimensions(), i)) {
+      continue;
+    }
+    mapping.lhs_non_contracting_dims.emplace_back();
+    mapping.lhs_non_contracting_dims.back().lhs = i;
+    mapping.lhs_non_contracting_dims.back().rhs = -1;
+    mapping.lhs_non_contracting_dims.back().output = next_output_dim++;
+  }
+  for (int64 i = 0; i < hlo->operand(1)->shape().rank(); ++i) {
+    if (absl::c_linear_search(dnums.rhs_batch_dimensions(), i) ||
+        absl::c_linear_search(dnums.rhs_contracting_dimensions(), i)) {
+      continue;
+    }
+    mapping.rhs_non_contracting_dims.emplace_back();
+    mapping.rhs_non_contracting_dims.back().lhs = -1;
+    mapping.rhs_non_contracting_dims.back().rhs = i;
+    mapping.rhs_non_contracting_dims.back().output = next_output_dim++;
+  }
+  auto create_sharded_dot = [&](HloInstruction* l, HloInstruction* r,
+                                SpmdBuilder* b) -> StatusOr<HloInstruction*> {
+    TF_ASSIGN_OR_RETURN(
+        auto sharded_dot_shape,
+        ShapeInference::InferDotOpShape(l->shape(), r->shape(),
+                                        hlo->dot_dimension_numbers()));
+    return b->AddInstruction(HloInstruction::CreateDot(
+        sharded_dot_shape, l, r, hlo->dot_dimension_numbers(),
+        hlo->precision_config()));
+  };
+  return HandleDotHelper(hlo, mapping, create_sharded_dot);
+}
+
+Status SpmdPartitioningVisitor::HandleDotHelper(
+    HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping,
+    const std::function<StatusOr<HloInstruction*>(
+        HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) {
+  const HloSharding& lhs_sharding = hlo->operand(0)->sharding();
+  const HloSharding& rhs_sharding = hlo->operand(1)->sharding();
+
+  // Similar to hlo_sharding_util::TransposeSharding(), but allows
+  // removing/adding non-partitioned dimensions.
+  auto transpose_sharding =
+      [&](const HloSharding& source, absl::Span<int64 const> src_to_tgt,
+          absl::Span<int64 const> tgt_to_src) -> absl::optional<HloSharding> {
+    if (source.IsTileMaximal()) {
+      return source;
+    }
+    std::vector<int64> tgt_dims_skipping_new(tgt_to_src.size(), -1);
+    int64 skipped_tgt_dims = 0;
+    for (int64 i = 0; i < tgt_to_src.size(); ++i) {
+      if (tgt_to_src[i] < 0) {
+        skipped_tgt_dims++;
+      } else {
+        tgt_dims_skipping_new[i] = i - skipped_tgt_dims;
+      }
+    }
+    int64 skipped_src_dims = absl::c_count(src_to_tgt, -1);
+    std::vector<int64> perm(src_to_tgt.size());
+    for (int64 i = 0; i < src_to_tgt.size(); ++i) {
+      if (src_to_tgt[i] < 0) {
+        if (source.tile_assignment().dim(i) > 1) {
+          return absl::nullopt;
+        }
+        perm[src_to_tgt.size() - skipped_src_dims] = i;
+        skipped_src_dims--;
+      } else {
+        perm[tgt_dims_skipping_new[src_to_tgt[i]]] = i;
+      }
+    }
+    auto tgt_sharding = hlo_sharding_util::TransposeSharding(source, perm);
+    if (skipped_tgt_dims == 0) {
+      return tgt_sharding;
+    }
+    auto reshape_tiles = tgt_sharding.tile_assignment();
+    std::vector<int64> tgt_tiles(tgt_to_src.size(), 1);
+    for (int64 i = 0; i < tgt_tiles.size(); ++i) {
+      if (tgt_to_src[i] >= 0) {
+        tgt_tiles[i] = reshape_tiles.dim(tgt_dims_skipping_new[i]);
+      }
+    }
+    reshape_tiles.Reshape(tgt_tiles);
+    return HloSharding::Tile(reshape_tiles);
+  };
+
+  std::vector<int64> lhs_to_rhs_indices(hlo->operand(0)->shape().rank(), -1);
+  std::vector<int64> lhs_to_output_indices(hlo->operand(0)->shape().rank(), -1);
+  std::vector<int64> rhs_to_lhs_indices(hlo->operand(1)->shape().rank(), -1);
+  std::vector<int64> rhs_to_output_indices(hlo->operand(1)->shape().rank(), -1);
+  std::vector<int64> output_to_lhs_indices(hlo->shape().rank(), -1);
+  std::vector<int64> output_to_rhs_indices(hlo->shape().rank(), -1);
+  auto populate_indices_mapping =
+      [&](const DotGeneralDimsMapping::DimsMapping& mapping) {
+        if (mapping.lhs >= 0) {
+          lhs_to_rhs_indices[mapping.lhs] = mapping.rhs;
+          lhs_to_output_indices[mapping.lhs] = mapping.output;
+        }
+        if (mapping.rhs >= 0) {
+          rhs_to_lhs_indices[mapping.rhs] = mapping.lhs;
+          rhs_to_output_indices[mapping.rhs] = mapping.output;
+        }
+        if (mapping.output >= 0) {
+          output_to_lhs_indices[mapping.output] = mapping.lhs;
+          output_to_rhs_indices[mapping.output] = mapping.rhs;
+        }
+      };
+  for (const auto& mapping : dims_mapping.batch_dims) {
+    populate_indices_mapping(mapping);
+  }
+  for (const auto& mapping : dims_mapping.contracting_dims) {
+    populate_indices_mapping(mapping);
+  }
+  for (const auto& mapping : dims_mapping.lhs_non_contracting_dims) {
+    populate_indices_mapping(mapping);
+  }
+  for (const auto& mapping : dims_mapping.rhs_non_contracting_dims) {
+    populate_indices_mapping(mapping);
+  }
+  auto lhs_sharding_transposed_to_match_rhs =
+      transpose_sharding(lhs_sharding, lhs_to_rhs_indices, rhs_to_lhs_indices);
+  auto rhs_sharding_transposed_to_match_lhs =
+      transpose_sharding(rhs_sharding, rhs_to_lhs_indices, lhs_to_rhs_indices);
+  auto lhs_sharding_transposed_to_match_output = transpose_sharding(
+      lhs_sharding, lhs_to_output_indices, output_to_lhs_indices);
+  auto rhs_sharding_transposed_to_match_output = transpose_sharding(
+      rhs_sharding, rhs_to_output_indices, output_to_rhs_indices);
+  auto output_sharding_transposed_to_match_lhs = transpose_sharding(
+      hlo->sharding(), output_to_lhs_indices, lhs_to_output_indices);
+  auto output_sharding_transposed_to_match_rhs = transpose_sharding(
+      hlo->sharding(), output_to_rhs_indices, rhs_to_output_indices);
+
+  // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output.
+  auto get_partitions_for_dims =
+      [&](const HloSharding& sharding,
+          absl::Span<const DotGeneralDimsMapping::DimsMapping> dims,
+          int lhs_rhs_or_output) {
+        int64 partitions = 1;
+        if (sharding.IsTileMaximal()) {
+          return partitions;
+        }
+        for (const auto& dim : dims) {
+          if (lhs_rhs_or_output == 0) {
+            partitions *= sharding.tile_assignment().dim(dim.lhs);
+          } else if (lhs_rhs_or_output == 1) {
+            partitions *= sharding.tile_assignment().dim(dim.rhs);
+          } else {
+            CHECK_EQ(lhs_rhs_or_output, 2);
+            partitions *= sharding.tile_assignment().dim(dim.output);
+          }
+        }
+        return partitions;
+      };
+  const int64 lhs_batch_partitions =
+      get_partitions_for_dims(lhs_sharding, dims_mapping.batch_dims, 0);
+  const int64 rhs_batch_partitions =
+      get_partitions_for_dims(rhs_sharding, dims_mapping.batch_dims, 1);
+  const int64 output_batch_partitions =
+      get_partitions_for_dims(hlo->sharding(), dims_mapping.batch_dims, 2);
+  const int64 lhs_contracting_partitions =
+      get_partitions_for_dims(lhs_sharding, dims_mapping.contracting_dims, 0);
+  const int64 rhs_contracting_partitions =
+      get_partitions_for_dims(rhs_sharding, dims_mapping.contracting_dims, 1);
+  const int64 lhs_non_contracting_partitions = get_partitions_for_dims(
+      lhs_sharding, dims_mapping.lhs_non_contracting_dims, 0);
+  const int64 rhs_non_contracting_partitions = get_partitions_for_dims(
+      rhs_sharding, dims_mapping.rhs_non_contracting_dims, 1);
+  const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims(
+      hlo->sharding(), dims_mapping.lhs_non_contracting_dims, 2);
+  const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims(
+      hlo->sharding(), dims_mapping.rhs_non_contracting_dims, 2);
+
+  auto& lhs = GetPartitionedHlo(hlo->operand(0));
+  auto& rhs = GetPartitionedHlo(hlo->operand(1));
+  // LHS and RHS are partitioned the same way and only partitioned in batch
+  // dimensions.
+  if (lhs_batch_partitions == rhs_batch_partitions &&
+      rhs_batch_partitions == num_partitions_ &&
+      lhs_sharding_transposed_to_match_rhs == rhs_sharding) {
+    TF_ASSIGN_OR_RETURN(auto dot,
+                        create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] {
+      dot->set_sharding(*lhs_sharding_transposed_to_match_output);
+      return PartitionedHlo(dot, hlo->shape(), MakePartitioningState())
+          .Reshard(hlo->sharding())
+          .hlo();
+    });
+    return Status::OK();
+  }
+
+  // Try emit batch-partitioned einsum with one operand resharded. Returns
+  // whether the attempt succeeds. If may_reshard_with_allreduce is false,
+  // reshard must be done using all-to-all; otherwise this attempt fails.
+  auto try_emit_output_batch_partitioned_einsum_with_reshard =
+      [&](bool may_reshard_with_allreduce) -> StatusOr<bool> {
+    // LHS and output are batch partitioned in the same way.
+    if (lhs_batch_partitions == num_partitions_ &&
+        output_batch_partitions == num_partitions_ &&
+        lhs_sharding_transposed_to_match_output == hlo->sharding()) {
+      if (!may_reshard_with_allreduce &&
+          !CanReshardWithAllToAll(rhs.sharding(),
+                                  *lhs_sharding_transposed_to_match_rhs)) {
+        return false;
+      }
+      auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs);
+      TF_ASSIGN_OR_RETURN(
+          auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), &b_));
+      SetPartitionedHlo(hlo, [&] { return dot; });
+      return true;
+    }
+    // RHS and output are batch partitioned in the same way.
+    if (rhs_batch_partitions == num_partitions_ &&
+        output_batch_partitions == num_partitions_ &&
+        rhs_sharding_transposed_to_match_output == hlo->sharding()) {
+      if (!may_reshard_with_allreduce &&
+          !CanReshardWithAllToAll(lhs.sharding(),
+                                  *rhs_sharding_transposed_to_match_lhs)) {
+        return false;
+      }
+      auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs);
+      TF_ASSIGN_OR_RETURN(
+          auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), &b_));
+      SetPartitionedHlo(hlo, [&] { return dot; });
+      return true;
+    }
+    return false;
+  };
+
+  {
+    // Try batch-parallel by resharding one operand, and not using all-reduce.
+    TF_ASSIGN_OR_RETURN(
+        bool emitted,
+        try_emit_output_batch_partitioned_einsum_with_reshard(false));
+    if (emitted) {
+      return Status::OK();
+    }
+  }
+
+  // Try to emit windowed DotGeneral when one operand is partitioned in the same
+  // way as the output along non-contracting dimensions, but the other operand
+  // is tiled in other dimensions.
+  auto emit_windowed_dot_general = [&](int64 matching_operand,
+                                       int64 windowing_operand,
+                                       bool windowed_at_contracting_dims,
+                                       bool windowed_at_batch_dims) {
+    CHECK_EQ(matching_operand + windowing_operand, 1);
+    CHECK(!windowed_at_batch_dims || !windowed_at_contracting_dims);
+    auto unpadded_result_buffer_shape =
+        MakePartitionedShape(hlo->shape(), hlo->sharding());
+    auto padded_result_buffer_shape = unpadded_result_buffer_shape;
+    // For windowing at batch/non-contracting dims, we produce the result one
+    // partition at a time, so we need to pad the shape in case of uneven
+    // partitioning in order to make dynamic-update-slice in-bound.
+    if (!windowed_at_contracting_dims) {
+      padded_result_buffer_shape = GetPaddedShapeForUnevenPartitioning(
+          padded_result_buffer_shape,
+          windowing_operand == 0 ? *lhs_sharding_transposed_to_match_output
+                                 : *rhs_sharding_transposed_to_match_output);
+    }
+    // Mask the padding area of the windowed operand with zero if there is
+    // uneven partitioning.
+    if (windowed_at_contracting_dims) {
+      auto& to_mask = windowing_operand == 0 ? lhs : rhs;
+      to_mask =
+          to_mask.PadWithValue(b_.AddInstruction(HloInstruction::CreateConstant(
+              LiteralUtil::Zero(hlo->shape().element_type()))));
+    }
+    auto result_buffer = CreateZero(padded_result_buffer_shape, &b_);
+    auto iteration = b_.AddInstruction(
+        HloInstruction::CreateConstant(LiteralUtil::CreateR0<uint32>(0)));
+
+    // Create a while loop that computes one window per iteration. During each
+    // iteration, each partition sends its input window to its neighbor using
+    // collective-permute for the next iteration.
+    SpmdBuilder body_b("windowed_dot_general_body", visiting_hlo_);
+    auto param = body_b.AddInstruction(HloInstruction::CreateParameter(
+        /*parameter_number=*/0,
+        ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(),
+                                   result_buffer->shape(), iteration->shape()}),
+        "param"));
+    auto l = body_b.AddInstruction(
+        HloInstruction::CreateGetTupleElement(lhs.hlo()->shape(), param, 0));
+    auto r = body_b.AddInstruction(
+        HloInstruction::CreateGetTupleElement(rhs.hlo()->shape(), param, 1));
+    auto o = body_b.AddInstruction(HloInstruction::CreateGetTupleElement(
+        result_buffer->shape(), param, 2));
+    auto i = body_b.AddInstruction(
+        HloInstruction::CreateGetTupleElement(iteration->shape(), param, 3));
+
+    auto partition_id = collective_ops_creator_.create_partition_id(&body_b);
+    auto data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary(
+        i->shape(), HloOpcode::kAdd, i, partition_id));
+    auto partition_count = body_b.AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::CreateR0<uint32>(num_partitions_)));
+    data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary(
+        i->shape(), HloOpcode::kRemainder, data_partition_id, partition_count));
+    auto dot_lhs = l;
+    auto dot_rhs = r;
+    if (windowed_at_contracting_dims || windowed_at_batch_dims) {
+      // Slice the matching operand according to the partitioned contracting
+      // dimensions on the windowed operand. We do this by treating the matching
+      // operand as replicated, and resharding it to match the windowed operand.
+      auto slice_operand = matching_operand == 0 ? l : r;
+      slice_operand->set_sharding(HloSharding::Replicate());
+      auto state = MakePartitioningState();
+      state.b = &body_b;
+      state.partition_id = data_partition_id;
+      auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state)
+                       .Reshard(windowing_operand == 0
+                                    ? *lhs_sharding_transposed_to_match_rhs
+                                    : *rhs_sharding_transposed_to_match_lhs)
+                       .hlo();
+      slice_operand->clear_sharding();
+      if (matching_operand == 0) {
+        dot_lhs = slice;
+      } else {
+        dot_rhs = slice;
+      }
+    }
+    TF_ASSIGN_OR_RETURN(auto dot,
+                        create_sharded_dot(dot_lhs, dot_rhs, &body_b));
+    if (windowed_at_contracting_dims) {
+      // Accumulate the partial output to the result buffer.
+      o = body_b.AddInstruction(
+          HloInstruction::CreateBinary(o->shape(), HloOpcode::kAdd, o, dot));
+    } else {
+      // The windowing operand is partitioned along batch/non-contracting
+      // dimensions, so we need a dynamic-update-slice to save the partial
+      // output in the result buffer.
+      auto offsets = MakePartitionOffsets(
+          o->shape(),
+          windowing_operand == 0 ? *lhs_sharding_transposed_to_match_output
+                                 : *rhs_sharding_transposed_to_match_output,
+          data_partition_id, &body_b);
+      o = body_b.AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
+          o->shape(), o, dot, offsets));
+    }
+
+    // ++i
+    i = body_b.AddInstruction(HloInstruction::CreateBinary(
+        i->shape(), HloOpcode::kAdd, i,
+        body_b.AddInstruction(
+            HloInstruction::CreateConstant(LiteralUtil::CreateR0<uint32>(1)))));
+    auto has_more = body_b.AddInstruction(HloInstruction::CreateCompare(
+        ShapeUtil::MakeShape(PRED, {}), i,
+        body_b.AddInstruction(HloInstruction::CreateConstant(
+            LiteralUtil::CreateR0<uint32>(num_partitions_))),
+        ComparisonDirection::kLt));
+    // Collective-permute for the next window. We don't need it for the last
+    // iteration, so we use a conditional around the collective-permute.
+    HloInstruction* conditional;
+    {
+      SpmdBuilder cp_b("window_collective_permute", visiting_hlo_);
+      {
+        auto p = cp_b.AddInstruction(HloInstruction::CreateParameter(
+            0, windowing_operand == 0 ? l->shape() : r->shape(), "window"));
+        std::vector<std::pair<int64, int64>> sd_pairs(num_partitions_);
+        for (int64 source = 0; source < num_partitions_; ++source) {
+          // 0 -> n-1, 1 -> 0, 2 -> 1, ...
+          sd_pairs[source] = {source,
+                              (source - 1 + num_partitions_) % num_partitions_};
+        }
+        collective_ops_creator_.create_cross_partition_collective_permute(
+            &cp_b, p, sd_pairs, (*next_channel_id_)++);
+      }
+      SpmdBuilder ncp_b("last_iteration_noop", visiting_hlo_);
+      {
+        ncp_b.AddInstruction(HloInstruction::CreateParameter(
+            0, windowing_operand == 0 ? l->shape() : r->shape(), "window"));
+      }
+      conditional = body_b.AddInstruction(HloInstruction::CreateConditional(
+          windowing_operand == 0 ? l->shape() : r->shape(), has_more,
+          windowing_operand == 0 ? l : r,
+          module_->AddEmbeddedComputation(cp_b.Build()),
+          windowing_operand == 0 ? l : r,
+          module_->AddEmbeddedComputation(ncp_b.Build())));
+    }
+    if (windowing_operand == 0) {
+      l = conditional;
+    } else {
+      r = conditional;
+    }
+    body_b.AddInstruction(HloInstruction::CreateTuple({l, r, o, i}));
+
+    SpmdBuilder cond_b("windowed_dot_general_cond", visiting_hlo_);
+    auto cond_param = cond_b.AddInstruction(HloInstruction::CreateParameter(
+        /*parameter_number=*/0,
+        ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(),
+                                   result_buffer->shape(), iteration->shape()}),
+        "param"));
+    auto cond_i = cond_b.AddInstruction(HloInstruction::CreateGetTupleElement(
+        iteration->shape(), cond_param, 3));
+    cond_b.AddInstruction(HloInstruction::CreateCompare(
+        ShapeUtil::MakeShape(PRED, {}), cond_i,
+        cond_b.AddInstruction(HloInstruction::CreateConstant(
+            LiteralUtil::CreateR0<uint32>(num_partitions_))),
+        ComparisonDirection::kLt));
+    auto while_loop = b_.AddInstruction(HloInstruction::CreateWhile(
+        cond_param->shape(), module_->AddEmbeddedComputation(cond_b.Build()),
+        module_->AddEmbeddedComputation(body_b.Build()),
+        b_.AddInstruction(HloInstruction::CreateTuple(
+            {lhs.hlo(), rhs.hlo(), result_buffer, iteration}))));
+    windowed_dot_general_loops_.push_back({while_loop, windowing_operand,
+                                           windowed_at_contracting_dims,
+                                           windowed_at_batch_dims});
+    SetPartitionedHlo(hlo, [&] {
+      auto result = b_.AddInstruction(HloInstruction::CreateGetTupleElement(
+          result_buffer->shape(), while_loop, 2));
+      if (!ShapeUtil::Compatible(padded_result_buffer_shape,
+                                 unpadded_result_buffer_shape)) {
+        result = b_.AddInstruction(HloInstruction::CreateSlice(
+            unpadded_result_buffer_shape, result,
+            std::vector<int64>(padded_result_buffer_shape.rank(), 0),
+            unpadded_result_buffer_shape.dimensions(),
+            std::vector<int64>(padded_result_buffer_shape.rank(), 1)));
+      }
+      return result;
+    });
+    return Status::OK();
+  };
+  if (output_lhs_non_contracting_partitions == num_partitions_ &&
+      output_sharding_transposed_to_match_lhs == lhs_sharding &&
+      ShapeUtil::ByteSizeOf(hlo->operand(1)->shape()) >=
+          options_.threshold_for_windowed_einsum_mib * 1024 * 1024) {
+    if (rhs_contracting_partitions == num_partitions_) {
+      return emit_windowed_dot_general(0, 1, true, false);
+    }
+    if (rhs_non_contracting_partitions == num_partitions_) {
+      return emit_windowed_dot_general(0, 1, false, false);
+    }
+    if (rhs_batch_partitions == num_partitions_) {
+      return emit_windowed_dot_general(0, 1, false, true);
+    }
+  }
+  if (output_rhs_non_contracting_partitions == num_partitions_ &&
+      output_sharding_transposed_to_match_rhs == rhs_sharding &&
+      ShapeUtil::ByteSizeOf(hlo->operand(0)->shape()) >=
+          options_.threshold_for_windowed_einsum_mib * 1024 * 1024) {
+    if (lhs_contracting_partitions == num_partitions_) {
+      return emit_windowed_dot_general(1, 0, true, false);
+    }
+    if (lhs_non_contracting_partitions == num_partitions_) {
+      return emit_windowed_dot_general(1, 0, false, false);
+    }
+    if (lhs_batch_partitions == num_partitions_) {
+      return emit_windowed_dot_general(1, 0, false, true);
+    }
+  }
+
+  {
+    // Try batch-parallel by resharding one operand, and allowing all-reduce.
+    TF_ASSIGN_OR_RETURN(
+        bool emitted,
+        try_emit_output_batch_partitioned_einsum_with_reshard(true));
+    if (emitted) {
+      return Status::OK();
+    }
+  }
+
+  // LHS and RHS have the same partitioned contracting dimensions.
+  if (lhs_contracting_partitions == rhs_contracting_partitions &&
+      lhs_contracting_partitions == num_partitions_) {
+    auto zero = b_.AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::Zero(hlo->shape().element_type())));
+    // Pad both sides with zero, since NaN at one side cannot be masked by zero
+    // on the other side.
+    if (ShapeUtil::ByteSizeOf(lhs.base_shape()) <
+        ShapeUtil::ByteSizeOf(rhs.base_shape())) {
+      lhs =
+          lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero);
+      rhs = rhs.PadWithValue(zero);
+    } else {
+      lhs = lhs.PadWithValue(zero);
+      rhs =
+          rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero);
+    }
+    TF_ASSIGN_OR_RETURN(auto dot,
+                        create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] {
+      auto ar = collective_ops_creator_.create_cross_partition_all_reduce(
+          &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_),
+          NewChannel());
+      ar->set_sharding(HloSharding::Replicate());
+      return PartitionedHlo(ar, hlo->shape(), MakePartitioningState())
+          .Reshard(hlo->sharding())
+          .hlo();
+    });
+    return Status::OK();
+  }
+
+  // LHS and output have the same partitioned non-contracting dimensions.
+  if (lhs_non_contracting_partitions == num_partitions_ &&
+      output_lhs_non_contracting_partitions == num_partitions_ &&
+      lhs_sharding == hlo->sharding()) {
+    auto rhs_replicated = rhs.Reshard(HloSharding::Replicate()).hlo();
+    TF_ASSIGN_OR_RETURN(auto dot,
+                        create_sharded_dot(lhs.hlo(), rhs_replicated, &b_));
+    SetPartitionedHlo(hlo, [&] { return dot; });
+    return Status::OK();
+  }
+
+  // RHS and output have the same partitioned non-contracting dimensions.
+  if (rhs_non_contracting_partitions == num_partitions_ &&
+      output_rhs_non_contracting_partitions == num_partitions_ &&
+      rhs_sharding_transposed_to_match_output == hlo->sharding()) {
+    auto lhs_replicated = lhs.Reshard(HloSharding::Replicate()).hlo();
+    TF_ASSIGN_OR_RETURN(auto dot,
+                        create_sharded_dot(lhs_replicated, rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] { return dot; });
+    return Status::OK();
+  }
+
+  // Output is batch partitioned.
+  if (output_batch_partitions == num_partitions_) {
+    auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs);
+    auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs);
+    TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(),
+                                                     resharded_rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] { return dot; });
+    return Status::OK();
+  }
+  // Output is partitioned along LHS non-contracting dimensions.
+  if (output_lhs_non_contracting_partitions == num_partitions_) {
+    auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs);
+    auto replicated_rhs = rhs.Reshard(HloSharding::Replicate());
+    TF_ASSIGN_OR_RETURN(
+        auto dot,
+        create_sharded_dot(resharded_lhs.hlo(), replicated_rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] { return dot; });
+    return Status::OK();
+  }
+  // Output is partitioned along RHS non-contracting dimensions.
+  if (output_rhs_non_contracting_partitions == num_partitions_) {
+    auto replicated_lhs = lhs.Reshard(HloSharding::Replicate());
+    auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs);
+    TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(replicated_lhs.hlo(),
+                                                     resharded_rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] { return dot; });
+    return Status::OK();
+  }
+
+  // Returns true if it is beneficial to reshard the operand at `operand_idx`
+  // across the contracting dimension.
+  const auto should_partition_contracting_dim = [&](int64 operand_idx) {
+    if (!hlo->sharding().IsReplicated()) {
+      return false;
+    }
+
+    if (operand_idx == 0) {
+      // If LHS and output are replicated, we compare the cost of all-gather
+      // on RHS vs all-reduce on the output.
+      return (rhs_contracting_partitions == num_partitions_) &&
+             lhs.sharding().IsReplicated() &&
+             ShapeUtil::ElementsIn(hlo->operand(1)->shape()) >
+                 ShapeUtil::ElementsIn(hlo->shape());
+    } else {
+      return (lhs_contracting_partitions == num_partitions_) &&
+             rhs.sharding().IsReplicated() &&
+             ShapeUtil::ElementsIn(hlo->operand(0)->shape()) >
+                 ShapeUtil::ElementsIn(hlo->shape());
+    }
+  };
+
+  // When the output is replicated and one of the operands is partitioned along
+  // contracting dimension, align the other operand to be partitioned along
+  // the contracting dimensions.
+  if (hlo->sharding().IsReplicated() && (should_partition_contracting_dim(0) ||
+                                         should_partition_contracting_dim(1))) {
+    auto zero = b_.AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::Zero(hlo->shape().element_type())));
+    if (should_partition_contracting_dim(0)) {
+      lhs =
+          lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero);
+      rhs = rhs.PadWithValue(zero);
+    } else {
+      lhs = lhs.PadWithValue(zero);
+      rhs =
+          rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero);
+    }
+    TF_ASSIGN_OR_RETURN(auto dot,
+                        create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_));
+    SetPartitionedHlo(hlo, [&] {
+      auto ar = collective_ops_creator_.create_cross_partition_all_reduce(
+          &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_),
+          NewChannel());
+      ar->set_sharding(HloSharding::Replicate());
+      return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()).hlo();
+    });
+    return Status::OK();
+  }
+
+  return DefaultAction(hlo);
+}
+
+namespace {
+
+// Finds a cluster of nodes that produce the inputs for `hlo` which only depend
+// on small operands, which means the cluster should start with broadcasts,
+// constants and iotas. All other internal nodes must be non-side-effecting
+// elemntwise ops. Returns the set of nodes, and the small operands. E.g., for
+// the following graph,
+//
+//     a -> broadcast -> multiply
+//     iota  ---> add--/
+//     constant/
+//
+// FindInputNodesIfOnlyDependOnSmallOperands(multiply) will return
+//    <{broadcast, iota, constant, add, multiply}, [a]>.
+std::pair<std::unordered_set<HloInstruction*>, std::vector<HloInstruction*>>
+FindInputNodesIfOnlyDependOnSmallOperands(HloInstruction* hlo) {
+  std::unordered_set<HloInstruction*> nodes_found;
+  std::vector<HloInstruction*> new_operands;
+  std::unordered_set<const HloInstruction*> new_operands_set;
+  std::vector<HloInstruction*> worklist;
+  worklist.push_back(hlo);
+  while (!worklist.empty()) {
+    auto inst = worklist.back();
+    worklist.pop_back();
+    if (nodes_found.count(inst) > 0) {
+      continue;
+    }
+    if (inst->opcode() == HloOpcode::kBroadcast ||
+        inst->opcode() == HloOpcode::kConstant ||
+        inst->opcode() == HloOpcode::kIota) {
+      nodes_found.insert(inst);
+      for (auto o : inst->operands()) {
+        auto res = new_operands_set.emplace(o);
+        if (res.second) {
+          new_operands.push_back(o);
+        }
+      }
+    } else if (inst->IsElementwise() && !inst->HasSideEffectNoRecurse() &&
+               inst->opcode() != HloOpcode::kAllReduce &&
+               absl::c_all_of(inst->operands(),
+                              [inst](const HloInstruction* o) {
+                                return ShapeUtil::CompatibleIgnoringElementType(
+                                    o->shape(), inst->shape());
+                              })) {
+      nodes_found.insert(inst);
+      for (auto o : inst->operands()) {
+        worklist.push_back(o);
+      }
+    } else {
+      nodes_found.clear();
+      new_operands.clear();
+      break;
+    }
+  }
+  return {std::move(nodes_found), std::move(new_operands)};
+}
+
+// Moves a cluster of memory-reducing nodes into the windowed dot-general loop
+// on contracting dimensions. Such a loop has a dynamic slice on the
+// non-windowed operand. If we move the input nodes into the loop, the
+// dynamic-slice could be merged with them by later optimization passes, which
+// reduces memory.
+//
+// small_operands             small_operands
+//        |                          |
+// input_nodes                loop { |
+//        |          =>         input_nodes
+// loop { |                          |
+//    dynamic-slice             dynamic-slice
+//    ...                       ...
+// }                          }
+//
+// Later optimization passes (TpuPadSliceMover) will merge the dynamic slice
+// with the input nodes.
+Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions(
+    HloInstruction* loop, int64 non_windowed_operand_index) {
+  auto input_tuple = loop->mutable_operand(0);
+  auto old_operand = input_tuple->mutable_operand(non_windowed_operand_index);
+  auto input_nodes = FindInputNodesIfOnlyDependOnSmallOperands(old_operand);
+  auto to_sink = std::move(input_nodes.first);
+  auto new_operands = std::move(input_nodes.second);
+  if (to_sink.empty()) {
+    return Status::OK();
+  }
+  auto computation = loop->parent();
+  // Replace the old operand with a tuple of the found small operands.
+  auto new_input_subtuple =
+      computation->AddInstruction(HloInstruction::CreateTuple(new_operands));
+  TF_RETURN_IF_ERROR(input_tuple->ReplaceOperandWithDifferentShape(
+      non_windowed_operand_index, new_input_subtuple));
+
+  auto body = loop->while_body();
+  auto body_param = body->parameter_instruction(0);
+  auto old_body_param_users = body_param->users();
+  // Update all tuple shapes.
+  for (auto tuple : std::vector<HloInstruction*>{
+           input_tuple, loop, loop->while_condition()->parameter_instruction(0),
+           body_param, body->root_instruction()}) {
+    *ShapeUtil::GetMutableSubshape(tuple->mutable_shape(),
+                                   {non_windowed_operand_index}) =
+        new_input_subtuple->shape();
+  }
+  // Now update the loop body.
+  auto new_operand_tuple_inside =
+      body->AddInstruction(HloInstruction::CreateGetTupleElement(
+          new_input_subtuple->shape(), body_param, non_windowed_operand_index));
+  TF_RETURN_IF_ERROR(body->root_instruction()->ReplaceOperandWithDifferentShape(
+      non_windowed_operand_index, new_operand_tuple_inside));
+
+  // Create nodes inside the loop body.
+  std::vector<HloInstruction*> worklist;
+  std::unordered_map<const HloInstruction*, HloInstruction*> outside_to_inside;
+  auto add_users_if_available = [&](HloInstruction* inst) {
+    for (auto u : inst->users()) {
+      if (outside_to_inside.count(u) == 0 && to_sink.count(u) > 0 &&
+          absl::c_all_of(u->operands(), [&](const HloInstruction* o) {
+            return outside_to_inside.count(o) > 0;
+          })) {
+        worklist.push_back(u);
+      }
+    }
+  };
+  for (int64 i = 0; i < new_operands.size(); ++i) {
+    outside_to_inside[new_operands[i]] =
+        body->AddInstruction(HloInstruction::CreateGetTupleElement(
+            new_operands[i]->shape(), new_operand_tuple_inside, i));
+    add_users_if_available(new_operands[i]);
+  }
+  // HLOs to sink without operands.
+  std::vector<HloInstruction*> nullaries_to_sink;
+  for (auto inst : to_sink) {
+    if (inst->operand_count() == 0) {
+      nullaries_to_sink.push_back(inst);
+    }
+  }
+  // Sort nullaries_to_sink to make it deterministic.
+  absl::c_sort(nullaries_to_sink,
+               [](const HloInstruction* a, const HloInstruction* b) {
+                 return a->unique_id() < b->unique_id();
+               });
+  for (auto inst : nullaries_to_sink) {
+    worklist.push_back(inst);
+  }
+  while (!worklist.empty()) {
+    auto inst = worklist.back();
+    worklist.pop_back();
+    std::vector<HloInstruction*> inst_new_operands(inst->operand_count());
+    for (int64 i = 0; i < inst->operand_count(); ++i) {
+      inst_new_operands[i] = outside_to_inside[inst->operand(i)];
+    }
+    outside_to_inside[inst] = body->AddInstruction(
+        inst->CloneWithNewOperands(inst->shape(), inst_new_operands));
+    add_users_if_available(inst);
+  }
+  TF_RET_CHECK(outside_to_inside.count(old_operand) > 0);
+  for (auto ou : old_body_param_users) {
+    if (ou->opcode() == HloOpcode::kGetTupleElement &&
+        ou->tuple_index() == non_windowed_operand_index) {
+      TF_RETURN_IF_ERROR(
+          ou->ReplaceAllUsesWith(outside_to_inside[old_operand]));
+      TF_RETURN_IF_ERROR(body->RemoveInstruction(ou));
+    }
+  }
+  return Status::OK();
+}
+
+// Moves a cluster of memory-reducing nodes (with reduce nodes at the end) into
+// the windowed dot-general loop on non-contracting dimensions. Such a loop has
+// a dynamic-update-slice at the output. If we move the user nodes into the loop
+// and before the dynamic-update-slice, the user nodes can operate on smaller
+// shapes, which reduces memory.
+//
+// small_operands                   small_operands
+//  | |                 =>                  | |
+//  | |  loop {                     loop {  | |
+//  | |    conv                             | broadcast      conv
+//  | |      |                              |     |           /
+//  | | dynamic-update-slice                |  dynamic-slice /
+//  | |         |                           |     |         /
+//  | |  }      |                           |  multiply-----
+//  |broadcast  /                           |    /
+//  | |        /                            reduce
+//  |multiply--                             |
+//  \ |                                dynamic-update-slice
+//   reduce                         }
+//
+// Later optimization passes (TpuPadSliceMover) will merge the dynamic slice
+// with the input nodes (broadcast).
+Status MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions(
+    HloInstruction* loop) {
+  CHECK_EQ(loop->user_count(), 1);
+  // There should be a single direct user of the while loop, which is the
+  // gte for element 2, i.e., the dot output.
+  auto user_gte = loop->users().front();
+  CHECK_EQ(user_gte->opcode(), HloOpcode::kGetTupleElement);
+  CHECK_EQ(user_gte->tuple_index(), 2);
+  auto computation = loop->parent();
+
+  // Find the reduce outputs and the input nodes they depend on, if input nodes
+  // only have small operands.
+  std::unordered_set<HloInstruction*> to_move;
+  std::vector<HloInstruction*> new_operands;
+  std::unordered_set<const HloInstruction*> new_operands_set;
+  std::vector<HloInstruction*> reduce_outputs;
+  std::vector<HloInstruction*> worklist;
+  Shape padded_shape = user_gte->shape();
+  Shape unpadded_shape = user_gte->shape();
+  auto original_output = user_gte;
+
+  if (user_gte->user_count() == 1 &&
+      user_gte->users().back()->opcode() == HloOpcode::kSlice) {
+    original_output = user_gte->users().back();
+    unpadded_shape = original_output->shape();
+  }
+  for (auto u : original_output->users()) {
+    worklist.push_back(u);
+  }
+  to_move.insert(original_output);
+  while (!worklist.empty()) {
+    auto inst = worklist.back();
+    worklist.pop_back();
+    if (to_move.count(inst) > 0) {
+      continue;
+    }
+    // We only support reduces with simple reduction function, since we may need
+    // to accumulate across iterations manually.
+    if (inst->opcode() == HloOpcode::kReduce &&
+        inst->to_apply()->instruction_count() == 3 &&
+        inst->to_apply()->num_parameters() == 2 &&
+        inst->to_apply()->root_instruction()->IsElementwise()) {
+      to_move.insert(inst);
+      auto other_operand = inst->mutable_operand(1);
+      auto res = new_operands_set.emplace(other_operand);
+      if (res.second) {
+        new_operands.push_back(other_operand);
+      }
+      reduce_outputs.push_back(inst);
+    } else if (inst != computation->root_instruction() &&
+               inst->user_count() > 0 && inst->IsElementwise() &&
+               !inst->HasSideEffectNoRecurse() &&
+               inst->opcode() != HloOpcode::kAllReduce &&
+               absl::c_all_of(inst->operands(),
+                              [inst](const HloInstruction* o) {
+                                return ShapeUtil::CompatibleIgnoringElementType(
+                                    o->shape(), inst->shape());
+                              })) {
+      // For an elementwise op, we need to make sure that they depend on only
+      // nodes already in to_move and nodes with small operands.
+      bool can_include = true;
+      for (auto operand : inst->operands()) {
+        if (to_move.count(operand) > 0) {
+          continue;
+        }
+        auto find_result = FindInputNodesIfOnlyDependOnSmallOperands(operand);
+        if (find_result.first.empty()) {
+          can_include = false;
+          break;
+        }
+        for (auto n : find_result.first) {
+          to_move.insert(n);
+        }
+        for (auto new_operand : find_result.second) {
+          auto res = new_operands_set.insert(new_operand);
+          if (res.second) {
+            new_operands.push_back(new_operand);
+          }
+        }
+      }
+      if (!can_include) {
+        to_move.clear();
+        break;
+      }
+      to_move.insert(inst);
+      for (auto u : inst->users()) {
+        worklist.push_back(u);
+      }
+    } else {
+      to_move.clear();
+      break;
+    }
+  }
+  // If nothing is found, to_move could contain only original_output, or cleared
+  // by the above code.
+  if (to_move.size() <= 1) {
+    return Status::OK();
+  }
+
+  // We will replace the original loop output with reduce-shape outputs. Create
+  // the initial buffers before the loop.
+  for (auto out : reduce_outputs) {
+    auto padded_out_shape = out->shape();
+    int64 operand_dim = 0;
+    int64 output_dim = 0;
+    while (output_dim < padded_out_shape.rank()) {
+      if (absl::c_linear_search(out->dimensions(), operand_dim)) {
+        // Dimension colapsed.
+        ++operand_dim;
+        continue;
+      }
+      // Kept dimensions have the same size of the padded shape.
+      padded_out_shape.set_dimensions(output_dim,
+                                      padded_shape.dimensions(operand_dim));
+      ++operand_dim;
+      ++output_dim;
+    }
+    auto broadcast =
+        computation->AddInstruction(HloInstruction::CreateBroadcast(
+            padded_out_shape,
+            computation->AddInstruction(HloInstruction::CreateConstant(
+                LiteralUtil::Zero(out->shape().element_type()))),
+            {}));
+    new_operands.push_back(broadcast);
+  }
+
+  auto input_tuple = loop->mutable_operand(0);
+  // Create the new input subtuple that contains the small operands and the
+  // reduce-shape result buffers.
+  auto new_input_subtuple =
+      computation->AddInstruction(HloInstruction::CreateTuple(new_operands));
+  TF_RETURN_IF_ERROR(
+      input_tuple->ReplaceOperandWithDifferentShape(2, new_input_subtuple));
+  auto body = loop->while_body();
+  auto body_param = body->parameter_instruction(0);
+  auto body_root = body->root_instruction();
+  CHECK_EQ(body_root->opcode(), HloOpcode::kTuple);
+  // Update tuple shapes.
+  for (auto tuple : std::vector<HloInstruction*>{
+           input_tuple, loop, loop->while_condition()->parameter_instruction(0),
+           body_param, body_root}) {
+    *ShapeUtil::GetMutableSubshape(tuple->mutable_shape(), {2}) =
+        new_input_subtuple->shape();
+  }
+  auto new_loop_input =
+      body->AddInstruction(HloInstruction::CreateGetTupleElement(
+          new_input_subtuple->shape(), body_param, 2));
+
+  // Now create the moved nodes inside the loop body.
+  std::unordered_map<const HloInstruction*, HloInstruction*> outside_to_inside;
+  worklist.clear();
+  auto add_users_if_available = [&](HloInstruction* inst) {
+    for (auto u : inst->users()) {
+      if (outside_to_inside.count(u) == 0 && to_move.count(u) > 0 &&
+          absl::c_all_of(u->operands(), [&](const HloInstruction* o) {
+            return outside_to_inside.count(o) > 0;
+          })) {
+        worklist.push_back(u);
+      }
+    }
+  };
+  for (int64 i = 0; i < new_operands.size(); ++i) {
+    outside_to_inside[new_operands[i]] =
+        body->AddInstruction(HloInstruction::CreateGetTupleElement(
+            new_operands[i]->shape(), new_loop_input, i));
+    add_users_if_available(new_operands[i]);
+  }
+  // The elementwise nodes will be created with sliced shape. The original loop
+  // output corresponds to the dynamic-update-slice's update slice.
+  auto dus = body_root->mutable_operand(2);
+  CHECK_EQ(dus->opcode(), HloOpcode::kDynamicUpdateSlice);
+  outside_to_inside[original_output] = dus->mutable_operand(1);
+  add_users_if_available(original_output);
+  std::vector<HloInstruction*> slice_offsets(padded_shape.rank());
+  for (int64 i = 0; i < slice_offsets.size(); ++i) {
+    slice_offsets[i] = dus->mutable_operand(i + 2);
+  }
+  auto get_slice = [&](HloInstruction* padded) {
+    return body->AddInstruction(HloInstruction::CreateDynamicSlice(
+        ShapeUtil::ChangeElementType(dus->operand(1)->shape(),
+                                     padded->shape().element_type()),
+        padded, slice_offsets, dus->operand(1)->shape().dimensions()));
+  };
+  // Helper functions to create nodes with small operands.
+  auto add_broadcast = [&](const HloInstruction* broadcast) {
+    auto padded_operand_shape = broadcast->operand(0)->shape();
+    for (int64 i = 0; i < broadcast->dimensions().size(); ++i) {
+      padded_operand_shape.set_dimensions(
+          i, padded_shape.dimensions(broadcast->dimensions(i)));
+    }
+    auto padded_operand = PadToShape(outside_to_inside[broadcast->operand(0)],
+                                     padded_operand_shape, nullptr, body);
+    outside_to_inside[broadcast] =
+        get_slice(body->AddInstruction(broadcast->CloneWithNewOperands(
+            ShapeUtil::ChangeElementType(padded_shape,
+                                         padded_operand_shape.element_type()),
+            {padded_operand})));
+  };
+  auto add_iota = [&](const HloInstruction* iota) {
+    outside_to_inside[iota] =
+        get_slice(body->AddInstruction(iota->CloneWithNewOperands(
+            ShapeUtil::ChangeElementType(padded_shape,
+                                         iota->shape().element_type()),
+            {})));
+  };
+  auto add_constant = [&](const HloInstruction* constant) {
+    outside_to_inside[constant] = body->AddInstruction(constant->Clone());
+    outside_to_inside[constant] = get_slice(
+        PadToShape(outside_to_inside[constant],
+                   ShapeUtil::ChangeElementType(
+                       padded_shape, constant->shape().element_type()),
+                   nullptr, body));
+  };
+  while (!worklist.empty()) {
+    auto inst = worklist.back();
+    worklist.pop_back();
+    if (outside_to_inside.count(inst) > 0) {
+      continue;
+    }
+    if (inst->opcode() == HloOpcode::kBroadcast) {
+      add_broadcast(inst);
+    } else if (inst->opcode() == HloOpcode::kIota) {
+      add_iota(inst);
+    } else if (inst->opcode() == HloOpcode::kConstant) {
+      add_constant(inst);
+    } else if (inst->opcode() == HloOpcode::kReduce) {
+      // This is an output, for which we has special handling later.
+    } else {
+      std::vector<HloInstruction*> operands_inside(inst->operand_count());
+      for (int64 i = 0; i < operands_inside.size(); ++i) {
+        operands_inside[i] = outside_to_inside[inst->operand(i)];
+      }
+      outside_to_inside[inst] = body->AddInstruction(inst->CloneWithNewOperands(
+          ShapeUtil::ChangeElementType(dus->operand(1)->shape(),
+                                       inst->shape().element_type()),
+          operands_inside));
+    }
+    add_users_if_available(inst);
+  }
+  std::vector<HloInstruction*> new_outputs_inside(new_operands.size());
+  for (int64 i = 0; i < new_outputs_inside.size(); ++i) {
+    new_outputs_inside[i] = outside_to_inside[new_operands[i]];
+  }
+  // Now create the reduce outpus inside of the loop.
+  for (int64 i = 0; i < reduce_outputs.size(); ++i) {
+    auto reduce_outside = reduce_outputs[i];
+    CHECK_EQ(reduce_outside->opcode(), HloOpcode::kReduce);
+    int64 index_in_operand = new_operands.size() - reduce_outputs.size() + i;
+    auto last_iter_result = outside_to_inside[new_operands[index_in_operand]];
+    auto operand0 = outside_to_inside[reduce_outside->operand(0)];
+    auto operand1 = outside_to_inside[reduce_outside->operand(1)];
+    TF_ASSIGN_OR_RETURN(auto reduce_shape,
+                        ShapeInference::InferReduceShape(
+                            {&operand0->shape(), &operand1->shape()},
+                            reduce_outside->dimensions(),
+                            reduce_outside->to_apply()->ComputeProgramShape()));
+    *reduce_shape.mutable_layout() = reduce_outside->shape().layout();
+    std::vector<HloInstruction*> reduce_dus_offsets;
+    // If any collapsed dimension is windowed, we need to accumulate with last
+    // iteration's result. If such a dimension has padding, we also need to mask
+    // off invalid data.
+    bool needs_accumulate = false;
+    std::vector<int64> dims_to_mask;
+    for (int64 i = 0; i < slice_offsets.size(); ++i) {
+      if (absl::c_linear_search(reduce_outside->dimensions(), i)) {
+        if (reduce_outside->operand(0)->shape().dimensions(i) !=
+            operand0->shape().dimensions(i)) {
+          needs_accumulate = true;
+          if (unpadded_shape.dimensions(i) != padded_shape.dimensions(i)) {
+            dims_to_mask.push_back(i);
+          }
+        }
+        continue;
+      }
+      reduce_dus_offsets.push_back(slice_offsets[i]);
+    }
+    // Mask off invalid data in collapsed dimensions.
+    for (int64 dim : dims_to_mask) {
+      auto iota = body->AddInstruction(HloInstruction::CreateIota(
+          ShapeUtil::ChangeElementType(operand0->shape(), S32), dim));
+      auto add = body->AddInstruction(HloInstruction::CreateBinary(
+          iota->shape(), HloOpcode::kAdd, iota,
+          body->AddInstruction(HloInstruction::CreateBroadcast(
+              iota->shape(), slice_offsets[dim], {}))));
+      auto limit = body->AddInstruction(HloInstruction::CreateBroadcast(
+          iota->shape(),
+          body->AddInstruction(
+              HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(
+                  reduce_outside->operand(0)->shape().dimensions(dim)))),
+          {}));
+      auto compare = body->AddInstruction(HloInstruction::CreateCompare(
+          ShapeUtil::ChangeElementType(iota->shape(), PRED), add, limit,
+          ComparisonDirection::kLt));
+      operand0 = body->AddInstruction(HloInstruction::CreateTernary(
+          operand0->shape(), HloOpcode::kSelect, compare, operand0,
+          body->AddInstruction(HloInstruction::CreateBroadcast(
+              operand0->shape(), operand1, {}))));
+    }
+    auto output_inside =
+        body->AddInstruction(reduce_outside->CloneWithNewOperands(
+            reduce_shape, {operand0, operand1}));
+    // Accumulate with previous results if needed.
+    if (needs_accumulate) {
+      auto input_slice =
+          body->AddInstruction(HloInstruction::CreateDynamicSlice(
+              output_inside->shape(), last_iter_result, reduce_dus_offsets,
+              output_inside->shape().dimensions()));
+      output_inside = body->AddInstruction(HloInstruction::CreateBinary(
+          output_inside->shape(),
+          reduce_outside->to_apply()->root_instruction()->opcode(),
+          output_inside, input_slice));
+    }
+    // Dynamic-update-slice if needed.
+    if (!ShapeUtil::Compatible(output_inside->shape(),
+                               last_iter_result->shape())) {
+      output_inside =
+          body->AddInstruction(HloInstruction::CreateDynamicUpdateSlice(
+              last_iter_result->shape(), last_iter_result, output_inside,
+              reduce_dus_offsets));
+    }
+    new_outputs_inside[index_in_operand] = output_inside;
+  }
+  // Body output.
+  auto new_output_inside =
+      body->AddInstruction(HloInstruction::CreateTuple(new_outputs_inside));
+  TF_RETURN_IF_ERROR(
+      body_root->ReplaceOperandWithDifferentShape(2, new_output_inside));
+  TF_RETURN_IF_ERROR(body->RemoveInstructionAndUnusedOperands(dus));
+  // Replace uses of the reduces outside the loop.
+  auto new_output_gte =
+      computation->AddInstruction(HloInstruction::CreateGetTupleElement(
+          new_output_inside->shape(), loop, 2));
+  for (int64 i = 0; i < reduce_outputs.size(); ++i) {
+    int64 index_in_operand = new_operands.size() - reduce_outputs.size() + i;
+    auto new_output =
+        computation->AddInstruction(HloInstruction::CreateGetTupleElement(
+            new_outputs_inside[index_in_operand]->shape(), new_output_gte,
+            index_in_operand));
+    if (!ShapeUtil::Compatible(new_output->shape(),
+                               reduce_outputs[i]->shape())) {
+      new_output = computation->AddInstruction(HloInstruction::CreateSlice(
+          reduce_outputs[i]->shape(), new_output,
+          std::vector<int64>(new_output->shape().rank(), 0),
+          reduce_outputs[i]->shape().dimensions(),
+          std::vector<int64>(new_output->shape().rank(), 1)));
+    }
+    TF_RETURN_IF_ERROR(reduce_outputs[i]->ReplaceAllUsesWith(new_output));
+    TF_RETURN_IF_ERROR(
+        computation->RemoveInstructionAndUnusedOperands(reduce_outputs[i]));
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Status SpmdPartitioningVisitor::DoCodeMotionForWindowedDotGeneralLoops(
+    HloComputation* computation) {
+  for (auto& loop : windowed_dot_general_loops_) {
+    if (loop.windowed_in_contracting_dims || loop.windowed_in_batch_dims) {
+      // We have a dynamic-slice for the non-windowed operand in
+      // batch/contracting-dim windowed dot-general. So moving the
+      // broadcast/iota/elementwise ops into the loop could help reduce memory
+      // via fusion.
+      TF_RETURN_IF_ERROR(
+          SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions(
+              loop.while_loop, 1 - loop.windowed_operand));
+    }
+    if (!loop.windowed_in_contracting_dims) {
+      // We have a dynamic-update-slice for the output in
+      // batch/non-contracting-dim windowed dot-general. So moving reduce ops
+      // into the loop could help reduce memory.
+      TF_RETURN_IF_ERROR(
+          MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions(
+              loop.while_loop));
+    }
+  }
+  return Status::OK();
+}
+
+StatusOr<bool> SpmdPartitioningVisitor::DoPartition(
+    HloComputation* computation, const HloSharding& root_sharding) {
+  VLOG(2) << "Partitioning computation " << computation->name() << " for "
+          << num_replicas_ << " replicas and " << num_partitions_
+          << " partitions";
+  TF_RETURN_IF_ERROR(computation->Accept(this));
+
+  HloModule* module = computation->parent();
+  auto new_root =
+      GetPartitionedHlo(computation->root_instruction()).Reshard(root_sharding);
+  auto new_computation =
+      module->AddEmbeddedComputation(b_.Build(new_root.hlo()));
+  TF_RETURN_IF_ERROR(DoCodeMotionForWindowedDotGeneralLoops(new_computation));
+
+  // Replace the original computation with the new SPMD computation.
+  std::unordered_map<HloComputation*, HloComputation*> replacement;
+  replacement[computation] = new_computation;
+  module->ReplaceComputations(replacement);
+  return changed_;
+}
+
+Status SpmdPartitioningVisitor::HandlePartitionId(HloInstruction* hlo) {
+  return Unimplemented(
+      "PartitionId instruction is not supported for SPMD partitioning since "
+      "the meaning is ambiguous -- whether the instruction is replicated or "
+      "the data is replicated, and if the latter which data is replicated.");
+}
+
+SpmdPartitioner::SpmdPartitioner(int64 num_partitions, int64 num_replicas,
+                                 SpmdPartitionerOptions options)
+    : SpmdPartitioner(
+          num_partitions, num_replicas, std::move(options),
+          SPMDCollectiveOpsCreator{
+              [](SpmdBuilder* b) {
+                return b->AddInstruction(HloInstruction::CreatePartitionId());
+              },
+              [num_replicas](SpmdBuilder* b, HloInstruction* operand,
+                             HloComputation* reduction, int64 channel_id) {
+                return b->AddInstruction(HloInstruction::CreateAllReduce(
+                    operand->shape(), {operand}, reduction,
+                    CreateReplicaGroups(num_replicas),
+                    /*constrain_layout=*/false, channel_id,
+                    /*use_global_device_ids=*/false));
+              },
+              [](SpmdBuilder* b, HloInstruction* operand,
+                 std::vector<std::pair<int64, int64>>& src_dst_pairs,
+                 int64 channel_id) {
+                return b->AddInstruction(
+                    HloInstruction::CreateCollectivePermute(
+                        operand->shape(), operand, src_dst_pairs, channel_id));
+              },
+              [](SpmdBuilder* b, absl::Span<HloInstruction* const> operands,
+                 const std::vector<ReplicaGroup>& replica_groups,
+                 int64 channel_id, absl::optional<int64> split_dimension) {
+                std::vector<Shape> shapes(operands.size(),
+                                          operands[0]->shape());
+                const Shape output_shape =
+                    (shapes.size() == 1) ? shapes[0]
+                                         : ShapeUtil::MakeTupleShape(shapes);
+                return b->AddInstruction(HloInstruction::CreateAllToAll(
+                    output_shape, operands, replica_groups,
+                    /*constrain_layout=*/false, channel_id, split_dimension));
+              },
+          }) {}
+
+StatusOr<bool> SpmdPartitioner::PartitionComputation(
+    HloComputation* computation, const HloSharding& root_sharding,
+    int64* next_channel_id, SpmdLogger* logger) {
+  auto visitor =
+      CreateVisitor(computation, num_partitions_, num_replicas_,
+                    collective_ops_creator_, next_channel_id, logger, options_);
+  return visitor->DoPartition(computation, root_sharding);
+}
+
+std::unique_ptr<SpmdPartitioningVisitor> SpmdPartitioner::CreateVisitor(
+    HloComputation* computation, int64 num_partitions, int64 num_replicas,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdLogger* logger,
+    SpmdPartitionerOptions options) {
+  return absl::make_unique<SpmdPartitioningVisitor>(
+      computation, num_partitions, num_replicas, collective_ops_creator,
+      next_channel_id, logger, std::move(options), this);
+}
+
+StatusOr<bool> SpmdPartitioner::Run(HloModule* module) {
+  TF_RETURN_IF_ERROR(PreprocessSharding(module));
+
+  XLA_VLOG_LINES(1, SpmdLogger::ReportBeforePartition(
+                        *module, options_.report_instruction_count));
+
+  // Add the parameters' and output's shardings to the module.
+  std::vector<HloSharding> entry_params_shardings;
+  for (int64 i = 0; i < module->entry_computation()->num_parameters(); ++i) {
+    auto param = module->entry_computation()->parameter_instruction(i);
+    CHECK(param->has_sharding()) << "Missing sharding in entry parameter " << i;
+    entry_params_shardings.push_back(param->sharding());
+  }
+  module->set_spmd_parameters_shardings(entry_params_shardings);
+  auto entry_root = module->entry_computation()->root_instruction();
+  CHECK(entry_root->has_sharding()) << "Missing sharding in entry root.";
+  module->set_spmd_output_sharding(entry_root->sharding());
+
+  FlattenCallGraph flatten;
+  TF_ASSIGN_OR_RETURN(auto changed, flatten.Run(module));
+
+  SpmdLogger logger(options_.report_instruction_count);
+  auto program_shape = module->entry_computation()->ComputeProgramShape();
+  int64 next_channel_id = hlo_query::NextChannelId(*module);
+  TF_ASSIGN_OR_RETURN(
+      bool partition_changed,
+      PartitionComputation(
+          module->entry_computation(),
+          module->entry_computation()->root_instruction()->sharding(),
+          &next_channel_id, &logger));
+  changed |= partition_changed;
+
+  // For the entry computation, make sure that the root instruction and the
+  // parameters preserve their signatures.
+  auto new_program_shape = module->entry_computation()->ComputeProgramShape();
+  if (!options_.allow_module_signature_change) {
+    TF_RET_CHECK(Shape::Equal().MinorToMajorOnlyInLayout()(
+        program_shape.result(), new_program_shape.result()))
+        << "Result shape changed for the entry computation";
+    TF_RET_CHECK(program_shape.parameters_size() ==
+                 new_program_shape.parameters_size())
+        << "Parameter count changed for the entry computation";
+    for (int64 i = 0; i < program_shape.parameters_size(); ++i) {
+      TF_RET_CHECK(Shape::Equal().MinorToMajorOnlyInLayout()(
+          program_shape.parameters(i), new_program_shape.parameters(i)))
+          << "Parameter shape changed for the entry computation";
+    }
+  } else {
+    const auto& old_entry_layout = module->entry_computation_layout();
+    // Shapes can change but the layout should still remain the same.
+    for (int64 i = 0; i < new_program_shape.parameters_size(); ++i) {
+      TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes(
+          old_entry_layout.parameter_shape(i),
+          new_program_shape.mutable_parameters(i)));
+    }
+    TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes(
+        old_entry_layout.result_shape(), new_program_shape.mutable_result()));
+
+    HloModuleConfig config = module->config();
+    *config.mutable_entry_computation_layout() =
+        ComputationLayout(new_program_shape, /*ignore_layouts=*/false);
+    module->set_config(config);
+  }
+
+  XLA_VLOG_LINES(1, SpmdLogger::ReportAfterPartition(
+                        *module, options_.report_instruction_count));
+  XLA_VLOG_LINES(1, logger.MakeReport());
+
+  if (changed) {
+    HloPassPipeline pass("spmd-cleanup");
+    pass.AddPass<TupleSimplifier>();
+    pass.AddPass<HloDCE>();
+    pass.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
+    pass.AddPass<FlattenCallGraph>();
+    TF_RETURN_IF_ERROR(pass.Run(module).status());
+  }
+
+  TF_RETURN_IF_ERROR(ClearShardingAttributes(module));
+  return changed;
+}
+
+Status SpmdPartitioner::PreprocessSharding(HloModule* module) {
+  for (HloComputation* computation : module->computations()) {
+    for (HloInstruction* hlo : computation->instructions()) {
+      if (hlo->HasSideEffectNoRecurse() && hlo->opcode() != HloOpcode::kRng) {
+        TF_RET_CHECK(hlo->has_sharding())
+            << "Side-effect HLO must have sharding: " << hlo->ToString();
+        TF_RET_CHECK(!HasReplicatedSharding(hlo->sharding()) ||
+                     hlo->opcode() == HloOpcode::kInfeed)
+            << "Non-infeed side-effect HLO cannot have a replicated sharding:"
+            << hlo->ToString();
+      }
+
+      // For unassigned HLOs, annotate with replicated sharding.
+      //
+      // Among side-effecting ops, only Rng is allowed to omit the annotation.
+      // In that case, we currently force it to run on core 0, since we don't
+      // support partitioning or replicating the Rng op (the values depend on
+      // the seed provided to each device).
+      //
+      // TODO(hyouklee): Should we also convert single-device shardings (without
+      // side-effects) into replicated?
+      if (!hlo->has_sharding()) {
+        if (hlo->opcode() == HloOpcode::kRng) {
+          hlo->set_sharding(HloSharding::AssignDevice(0));
+        } else {
+          hlo->set_sharding(
+              HloSharding::Single(hlo->shape(), HloSharding::Replicate()));
+        }
+      } else if (!hlo->sharding().IsTileMaximal()) {
+        std::vector<int64> available(num_partitions_);
+        std::iota(available.begin(), available.end(), 0);
+        TF_RET_CHECK(num_partitions_ == hlo_sharding_util::DevicesForSharding(
+                                            hlo->sharding(), available)
+                                            .size())
+            << "num_partitions:" << num_partitions_ << "\n"
+            << "SPMD partitioner only supports tile sharding that includes all "
+               "partitions. If you didn't add this sharding annotation in the "
+               "model, please file a bug to XLA team.\n"
+            << hlo->ToString();
+      }
+    }
+  }
+
+  // Entry computation's parameter and root sharding must be either all
+  // replicated or all on a single device.
+  if (!options_.allow_module_signature_change) {
+    const HloComputation* entry = module->entry_computation();
+    TF_RET_CHECK(entry->root_instruction()->has_sharding());
+    const HloSharding& root_sharding = entry->root_instruction()->sharding();
+    TF_RET_CHECK(root_sharding.IsReplicated() ||
+                 root_sharding.UniqueDevice().has_value())
+        << "Unsupported entry root sharding: " << root_sharding.ToString();
+
+    for (const HloInstruction* param : entry->parameter_instructions()) {
+      TF_RET_CHECK(param->has_sharding());
+      TF_RET_CHECK(param->sharding().IsReplicated() ||
+                   param->sharding().UniqueDevice().has_value())
+          << "Unsupported entry parameter sharding:"
+          << param->sharding().ToString();
+    }
+  }
+
+  return Status::OK();
+}
+
+}  // namespace spmd
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h
new file mode 100644
index 00000000000..f22f564be73
--- /dev/null
+++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h
@@ -0,0 +1,436 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_SPMD_SPMD_PARTITIONER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_SPMD_SPMD_PARTITIONER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding.h"
+
+namespace xla {
+namespace spmd {
+
+struct SpmdPartitionerOptions {
+  // Always exchange halo on LHS for all convolutions. If false, backprop filter
+  // convolution exchanges halo on RHS.
+  bool conv_halo_exchange_always_on_lhs = true;
+
+  // The number of instructions to be reported for the highest memory profile
+  // instructions.
+  int64 report_instruction_count = 5;
+
+  // The minimum size in MiB of an einsum operand to be considered using
+  // windowed implementation in an HLO loop.
+  int64 threshold_for_windowed_einsum_mib = 256;
+
+  // Whether the entry computations' signature could change after partitioning.
+  bool allow_module_signature_change = false;
+};
+
+// Class to wrap the computation builder to capture information during SPMD
+// transformation.
+class SpmdBuilder : public HloComputation::Builder {
+ public:
+  SpmdBuilder(const std::string& name, HloInstruction* hlo)
+      : HloComputation::Builder(name) {
+    visiting_hlo_ = hlo;
+  }
+  HloInstruction* AddInstruction(std::unique_ptr<HloInstruction> instruction);
+
+  const std::vector<HloInstruction*>& derived_instructions(
+      HloInstruction* hlo) {
+    return instructions_.at(hlo);
+  }
+
+  void set_visiting_hlo(HloInstruction* hlo) { visiting_hlo_ = hlo; }
+
+  HloInstruction* visiting_hlo() const { return visiting_hlo_; }
+
+ private:
+  // Currently visiting instruction.
+  HloInstruction* visiting_hlo_;
+
+  // Map from the currently visiting (old) instruction to new instructions
+  // created during SPMD partitioning.
+  HloInstructionMap<std::vector<HloInstruction*>> instructions_;
+};
+
+// A set of functions that create the cross-partition collective ops.
+struct SPMDCollectiveOpsCreator {
+  // Function used to create a partition ID HLO.
+  std::function<HloInstruction*(SpmdBuilder*)> create_partition_id;
+
+  // Function used to create a cross-partition all-reduce HLO.
+  std::function<HloInstruction*(SpmdBuilder*, HloInstruction* operand,
+                                HloComputation* reduction, int64 channel_id)>
+      create_cross_partition_all_reduce;
+
+  // Function used to create a cross-partition collective-permute HLO.
+  std::function<HloInstruction*(
+      SpmdBuilder*, HloInstruction* operand,
+      std::vector<std::pair<int64, int64>>& src_dst_pairs,
+      int64 next_channel_id)>
+      create_cross_partition_collective_permute;
+
+  // Function used to create a cross-partition all-to-all HLO.
+  std::function<HloInstruction*(
+      SpmdBuilder*, absl::Span<HloInstruction* const> operands,
+      const std::vector<ReplicaGroup>& replica_groups, int64 channel_id,
+      absl::optional<int64> split_dimension)>
+      create_cross_partition_all_to_all;
+};
+
+// Logger to report memory usage during SPMD partitioning.
+class SpmdLogger {
+ public:
+  explicit SpmdLogger(int64 report_instruction_count)
+      : report_instruction_count_(report_instruction_count) {}
+  static std::string ReportBeforePartition(const HloModule& module,
+                                           int64 report_instruction_count);
+  static std::string ReportAfterPartition(const HloModule& module,
+                                          int64 report_instruction_count);
+
+  // Registers the logging for the groups of instructions created to transform
+  // the given hlo.
+  void RegisterLogEntry(HloInstruction* hlo,
+                        const std::vector<HloInstruction*>& group);
+
+  std::string MakeReport();
+
+ private:
+  template <typename F>
+  static std::string ReportMemoryUsage(const HloModule& module, const F& filter,
+                                       int64 report_instruction_count);
+
+  // A vector of logging messages (one for each original HLO instruction), where
+  // the first integer of the pair represents the size of the HBM used.
+  std::vector<std::pair<int64, std::string>> entries_;
+
+  int64 report_instruction_count_;
+};
+
+class SpmdPartitioningVisitor;
+
+class SpmdPartitioner : public HloModulePass {
+ public:
+  SpmdPartitioner(int64 num_partitions, int64 num_replicas,
+                  SpmdPartitionerOptions options);
+  SpmdPartitioner(int64 num_partitions, int64 num_replicas,
+                  SpmdPartitionerOptions options,
+                  SPMDCollectiveOpsCreator collective_ops_creator)
+      : num_partitions_(num_partitions),
+        num_replicas_(num_replicas),
+        options_(std::move(options)),
+        collective_ops_creator_(std::move(collective_ops_creator)) {}
+  absl::string_view name() const override { return "spmd-partitioning"; }
+  StatusOr<bool> Run(HloModule* module) override;
+
+  // Transforms the given computation with SPMD instructions, replacing it with
+  // a new computation.
+  StatusOr<bool> PartitionComputation(HloComputation* computation,
+                                      const HloSharding& root_sharding,
+                                      int64* next_channel_id,
+                                      SpmdLogger* logger);
+
+ protected:
+  virtual std::unique_ptr<SpmdPartitioningVisitor> CreateVisitor(
+      HloComputation* computation, int64 num_partitions, int64 num_replicas,
+      const SPMDCollectiveOpsCreator& collective_ops_creator,
+      int64* next_channel_id, SpmdLogger* logger,
+      SpmdPartitionerOptions options);
+
+ private:
+  // Verify that the sharding of instructions in the module are valid, and also
+  // fill in missing sharding information.
+  Status PreprocessSharding(HloModule* module);
+
+  const int64 num_partitions_;
+  const int64 num_replicas_;
+
+  SpmdPartitionerOptions options_;
+  SPMDCollectiveOpsCreator collective_ops_creator_;
+};
+
+// Class describes partition state of the data represented by an HLO created
+// during SPMD partitioning pass.
+//
+// Data on some devices may include padding region, if the base (full) shape
+// could not be evenly partitioned.
+class PartitionedHlo {
+ public:
+  // Return value for ReshardAsWindowedInput which describes the resharded HLO,
+  // the window for the user on the shard, and if necessary, the dynamic slice
+  // offsets to be applied to the output of the op being sharded.
+  struct WindowedInputShardReturnValue {
+    HloInstruction* sharded_input;
+    Window shard_window;
+    absl::optional<std::vector<HloInstruction*>> dynamic_slice_index_on_output;
+  };
+  // A cache for resharding each partitioned HLO.
+  struct ReshardCache {
+    struct PerHloCache {
+      std::vector<std::pair<HloSharding, PartitionedHlo>> reshard_cache;
+      std::vector<
+          std::tuple<HloSharding, Window, WindowedInputShardReturnValue>>
+          window_reshard_cache;
+    };
+    std::unordered_map<HloInstruction*, PerHloCache> per_hlo_cache;
+  };
+  struct PartitioningState {
+    SpmdBuilder* b;
+    HloModule* module;
+    int64 num_replicas;
+    HloInstruction* partition_id;
+    SPMDCollectiveOpsCreator collective_ops_creator;
+    int64* next_channel_id;
+    ReshardCache* reshard_cache;
+  };
+  PartitionedHlo(HloInstruction* hlo, Shape base_shape, PartitioningState state)
+      : hlo_(hlo), base_shape_(base_shape), state_(std::move(state)) {
+    CHECK(hlo->has_sharding())
+        << "PartitionedHlo is missing sharding:" << hlo->ToString();
+    // If the tuple shape instruction does not have a tuple sharding, reassign
+    // to use the tuple sharding. Reshard() implementation assumes this.
+    if (hlo_->shape().IsTuple() && !hlo_->sharding().IsTuple()) {
+      hlo_->set_sharding(
+          hlo_->sharding().GetTupleSharding(hlo_->shape()).ValueOrDie());
+    }
+  }
+
+  // Reshards the current SPMD instruction to a new sharding. Could only modify
+  // the reshard cache.
+  PartitionedHlo Reshard(const HloSharding& target);
+
+  // Pads the garbage area of the output with the provided value.
+  PartitionedHlo PadWithValue(HloInstruction* pad_value) const;
+
+  // Returns the SPMD instruction.
+  HloInstruction* hlo() const { return hlo_; }
+
+  // Returns the sharding of the SPMD instruction.
+  const HloSharding& sharding() const { return hlo_->sharding(); }
+
+  // Original full shape of the data.
+  const Shape& base_shape() const { return base_shape_; }
+
+  int64 NewChannel() const { return (*state_.next_channel_id)++; }
+
+  // Reshards the HLO to a usable partitioned input for a windowed user. Could
+  // only modify the reshard cache.
+  absl::optional<WindowedInputShardReturnValue> ReshardAsWindowedInput(
+      const Window& window, const HloSharding& target,
+      HloInstruction* pad_value, bool mask_invalid_region = true);
+
+ private:
+  // Same as Reshard except that it does not explicitly modify the reshard
+  // cache, although it would indirectly modify by calling Replicate().
+  PartitionedHlo ReshardNoCache(const HloSharding& target);
+
+  // Helper function to replicate the data on all devices. Could only modify
+  // the reshard cache.
+  PartitionedHlo Replicate();
+
+  // Helper function to broadcast data from a single device to all devices.
+  PartitionedHlo Broadcast() const;
+
+  // Helper function to reshard the tensor using AllToAll (instead of the
+  // default of Replicate followed by Slice).
+  PartitionedHlo ReshardWithAllToAll(const HloSharding& target) const;
+
+  // Helper function to reshard the tensor using CollectivePermute.
+  PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const;
+
+  // SPMD instruction.
+  HloInstruction* hlo_;
+
+  // The original shape of the data before SPMD transformation is applied.
+  Shape base_shape_;
+
+  PartitioningState state_;
+};
+
+struct DotGeneralDimsMapping {
+  // The dimension numbers for the operands and output corresponding to a
+  // logical dimension (e.g., batch, contracting, non-contracting). If an
+  // operand or the output doesn't have the logical dimension, it is set to
+  // -1.
+  struct DimsMapping {
+    int64 lhs;
+    int64 rhs;
+    int64 output;
+  };
+  std::vector<DimsMapping> batch_dims;
+  std::vector<DimsMapping> contracting_dims;
+  std::vector<DimsMapping> lhs_non_contracting_dims;
+  std::vector<DimsMapping> rhs_non_contracting_dims;
+};
+
+class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault {
+ public:
+  SpmdPartitioningVisitor(
+      HloComputation* computation, int64 num_partitions, int64 num_replicas,
+      const SPMDCollectiveOpsCreator& collective_ops_creator,
+      int64* next_channel_id, SpmdLogger* logger,
+      SpmdPartitionerOptions options, SpmdPartitioner* partitioner);
+
+  Status DefaultAction(HloInstruction* hlo) override;
+  Status HandleAllReduce(HloInstruction* hlo) override;
+  Status HandleBroadcast(HloInstruction* hlo) override;
+  Status HandleConstant(HloInstruction* hlo) override;
+  Status HandleCustomCall(HloInstruction* hlo) override;
+  Status HandleDot(HloInstruction* hlo) override;
+  Status HandleDynamicSlice(HloInstruction* hlo) override;
+  Status HandleDynamicUpdateSlice(HloInstruction* hlo) override;
+  Status HandleGather(HloInstruction* hlo) override;
+  Status HandleGetTupleElement(HloInstruction* hlo) override;
+  Status HandleInfeed(HloInstruction* hlo) override;
+  Status HandleOutfeed(HloInstruction* hlo) override;
+  Status HandlePad(HloInstruction* hlo) override;
+  Status HandleParameter(HloInstruction* hlo) override;
+  Status HandleReduce(HloInstruction* hlo) override;
+  Status HandleReverse(HloInstruction* hlo) override;
+  Status HandleWhile(HloInstruction* hlo) override;
+  Status HandleConditional(HloInstruction* hlo) override;
+  Status HandleReduceWindow(HloInstruction* hlo) override;
+  Status HandleSelectAndScatter(HloInstruction* hlo) override;
+  Status HandleTuple(HloInstruction* hlo) override;
+  Status HandleRng(HloInstruction* hlo) override;
+  Status HandleConvolution(HloInstruction* hlo) override;
+  Status HandleConcatenate(HloInstruction* hlo) override;
+  Status HandleScatter(HloInstruction* hlo) override;
+  Status HandleSlice(HloInstruction* hlo) override;
+  Status HandleSort(HloInstruction* hlo) override;
+  Status HandleTranspose(HloInstruction* hlo) override;
+  Status HandleReshape(HloInstruction* hlo) override;
+  Status HandleIota(HloInstruction* hlo) override;
+  Status HandlePartitionId(HloInstruction* hlo) override;
+
+  // Handles convolution where both LHS and RHS operands are tiled.
+  Status HandleConvolutionTiledLhsAndRhs(HloInstruction* hlo);
+
+  // Implementation of dot partitioning given DotGeneralDimsMapping.
+  Status HandleDotHelper(
+      HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping,
+      const std::function<StatusOr<HloInstruction*>(
+          HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot);
+
+  // Common handle for elementwise HLOs.
+  Status HandleElementwise(HloInstruction* hlo);
+
+  // Common handle for HLOs that runs on a single device.
+  Status HandleSingleDevice(const HloInstruction* hlo);
+
+  // Returns the PartitionedHlo that corresponds to the original hlo.
+  PartitionedHlo& GetPartitionedHlo(const HloInstruction* hlo) {
+    CHECK_EQ(partitioned_instructions_.count(hlo), 1);
+    return partitioned_instructions_.find(hlo)->second;
+  }
+
+  // Sets the PartitionedHlo for the original hlo.
+  void SetPartitionedHlo(const HloInstruction* hlo,
+                         const PartitionedHlo& partitioned_hlo) {
+    CHECK_EQ(partitioned_instructions_.count(hlo), 0);
+    partitioned_instructions_.emplace(hlo, partitioned_hlo);
+    changed_ = true;
+  }
+
+  // Convenient wrapper that creates PartitionedHlo from the result of the func
+  // and maps it to the given original hlo.
+  void SetPartitionedHlo(const HloInstruction* hlo,
+                         const std::function<HloInstruction*()>& func) {
+    HloInstruction* new_hlo = func();
+    new_hlo->set_sharding(hlo->sharding());
+    new_hlo->set_metadata(hlo->metadata());
+    SetPartitionedHlo(
+        hlo, PartitionedHlo(new_hlo, hlo->shape(), MakePartitioningState()));
+    changed_ = true;
+  }
+
+  int64 NewChannel() { return (*next_channel_id_)++; }
+
+  PartitionedHlo::PartitioningState MakePartitioningState() {
+    PartitionedHlo::PartitioningState state;
+    state.b = &b_;
+    state.module = module_;
+    state.num_replicas = num_replicas_;
+    state.partition_id = partition_id_;
+    state.collective_ops_creator = collective_ops_creator_;
+    state.next_channel_id = next_channel_id_;
+    state.reshard_cache = &reshard_cache_;
+    return state;
+  }
+
+  SpmdBuilder* builder() { return &b_; }
+
+  StatusOr<bool> DoPartition(HloComputation* computation,
+                             const HloSharding& root_sharding);
+
+ private:
+  Status Preprocess(HloInstruction* hlo) override;
+  Status Postprocess(HloInstruction* hlo) override;
+
+  // Performs code motion for windowed dot-general loops in
+  // windowed_dot_general_loops_. Invoked after the visitor finishes traversing
+  // the graph.
+  Status DoCodeMotionForWindowedDotGeneralLoops(HloComputation* computation);
+
+  bool changed_;
+  HloModule* module_;
+  int64 num_partitions_;
+  int64 num_replicas_;
+
+  SPMDCollectiveOpsCreator collective_ops_creator_;
+
+  // Tracks the next channel id to use for cross-partition all-reduce.
+  int64* next_channel_id_;
+  SpmdBuilder b_;
+
+  HloInstruction* partition_id_;
+
+  PartitionedHlo::ReshardCache reshard_cache_;
+
+  // Mapping from the instruction in the original computation to the new SPMD
+  // partitioned instruction.
+  ConstHloInstructionMap<PartitionedHlo> partitioned_instructions_;
+
+  // Information about a loop created for windowed dot-general. Used when
+  // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor
+  // finishes traversing the graph.
+  struct WindowedDotGeneralLoop {
+    HloInstruction* while_loop;
+    int64 windowed_operand;
+    bool windowed_in_contracting_dims;
+    bool windowed_in_batch_dims;
+  };
+  std::vector<WindowedDotGeneralLoop> windowed_dot_general_loops_;
+
+  HloInstruction* visiting_hlo_;
+  SpmdLogger* logger_;
+  const SpmdPartitionerOptions options_;
+  SpmdPartitioner* partitioner_;
+};
+
+}  // namespace spmd
+}  // namespace xla
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_SPMD_SPMD_PARTITIONER_H_
diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc
new file mode 100644
index 00000000000..ca1afc816b0
--- /dev/null
+++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc
@@ -0,0 +1,3215 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h"
+
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_pipeline.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace spmd {
+namespace {
+
+using ::testing::_;
+using ::testing::AllOf;
+namespace op = xla::testing::opcode_matchers;
+
+class SpmdPartitioningTest : public HloTestBase {
+ public:
+  StatusOr<std::unique_ptr<HloModule>> PartitionComputation(
+      const char* hlo_module, int64 num_devices,
+      bool conv_halo_exchange_always_on_lhs = true) {
+    // Some tests (BackpropFilter convs) set this flag false to test two
+    // different paths of the implementation.
+    SpmdPartitionerOptions options;
+    options.conv_halo_exchange_always_on_lhs = conv_halo_exchange_always_on_lhs;
+    options.allow_module_signature_change = true;
+
+    TF_ASSIGN_OR_RETURN(auto module, ParseAndReturnVerifiedModule(
+                                         hlo_module, GetModuleConfigForTest()));
+    HloPassPipeline pass("spmd-partitioning");
+    pass.AddPass<HloVerifier>(/*layout_sensitive=*/false,
+                              /*allow_mixed_precision=*/false);
+    pass.AddPass<SpmdPartitioner>(num_devices, /*num_replicas=*/1, options);
+    pass.AddPass<HloVerifier>(/*layout_sensitive=*/false,
+                              /*allow_mixed_precision=*/false);
+    TF_RETURN_IF_ERROR(pass.Run(module.get()).status());
+    return StatusOr<std::unique_ptr<HloModule>>(std::move(module));
+  }
+};
+
+TEST_F(SpmdPartitioningTest, InvalidSharding) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  token0 = token[] after-all(), sharding={maximal device=0}
+  infeed = (f32[8,2]{1,0}, token[]) infeed(token0),
+    sharding={{devices=[2,1]0,1}, {maximal device=0}}
+  ROOT infeed.data = f32[8,2]{1,0} get-tuple-element(infeed), index=0,
+    sharding={maximal device=0}
+})";
+  auto module_status = PartitionComputation(hlo_string, /*num_devices=*/4);
+  EXPECT_FALSE(module_status.status().ok());
+  EXPECT_THAT(module_status.status().ToString(),
+              ::testing::HasSubstr(
+                  "only supports tile sharding that includes all partitions"));
+}
+
+TEST_F(SpmdPartitioningTest, SingleDeviceToReplicated) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %constant = s32[2,3]{1,0} constant({{1,1,1},{1,1,1}}),
+    sharding={maximal device=0}
+  ROOT %copy = s32[2,3]{1,0} copy(%constant), sharding={replicated}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Copy(op::AllReduce(
+                              op::Select(op::Broadcast(op::Compare()),
+                                         op::Constant(), op::Broadcast()))),
+                          op::Shape("s32[2,3]")));
+}
+
+TEST_F(SpmdPartitioningTest, SingleDeviceToSingleDevice) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %constant = s32[2,3]{1,0} constant({{1,1,1},{1,1,1}}),
+    sharding={maximal device=0}
+  ROOT %copy = s32[2,3]{1,0} copy(%constant), sharding={maximal device=1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  VLOG(1) << module->ToString();
+  EXPECT_THAT(root, op::Copy(AllOf(op::Copy(op::AllReduce(op::Select(
+                                       op::Broadcast(op::Compare()),
+                                       op::Constant(), op::Broadcast()))),
+                                   op::Shape("s32[2,3]"))));
+}
+
+TEST_F(SpmdPartitioningTest, SingleDeviceToTiled) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %constant = s32[2,3]{1,0} constant({{1,1,1},{1,1,1}}),
+    sharding={maximal device=0}
+  ROOT %copy = s32[2,3]{1,0} copy(%constant),
+    sharding={devices=[2,1]1,0}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(
+          op::Copy(op::DynamicSlice(
+              op::AllReduce(op::Select(
+                  op::Broadcast(op::Compare(op::PartitionId(), op::Constant())),
+                  op::Constant(), op::Broadcast())),
+              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
+                                           op::Constant())),
+              op::Constant())),
+          op::Shape("s32[1,3]")));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToReplicated) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %constant = s32[2,3]{1,0} constant({{1,1,1},{1,1,1}}),
+    sharding={devices=[2,1]0,1}
+  ROOT %copy = s32[2,3]{1,0} copy(%constant), sharding={replicated}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      op::Copy(op::AllReduce(AllOf(
+          op::DynamicUpdateSlice(
+              op::Broadcast(), AllOf(op::Constant(), op::Shape("s32[1,3]")),
+              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
+                                           op::Constant())),
+              op::Constant()),
+          op::Shape("s32[2,3]")))));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToSingleDevice) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %constant = s32[2,3]{1,0} constant({{1,1,1},{1,1,1}}),
+    sharding={devices=[2,1]0,1}
+  ROOT %copy = s32[2,3]{1,0} copy(%constant), sharding={maximal device=0}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      op::Copy(op::Copy(op::AllReduce(AllOf(
+          op::DynamicUpdateSlice(
+              op::Broadcast(), AllOf(op::Constant(), op::Shape("s32[1,3]")),
+              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
+                                           op::Constant())),
+              op::Constant()),
+          op::Shape("s32[2,3]"))))));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToTiledEven) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param= s32[8,2]{1,0} parameter(0), sharding={devices=[2,1]0,1}
+  ROOT %copy = s32[8,2]{1,0} copy(%param), sharding={devices=[1,2]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Copy(op::Reshape(op::Transpose(op::AllToAll(AllOf(
+                op::Reshape(op::Parameter()), op::Shape("s32[4,2,1]")))))),
+            op::Shape("s32[8,1]")));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToTiledUneven) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param= f32[7,31,128]{2,1,0} parameter(0), sharding={devices=[1,2,1]0,1}
+  ROOT %copy = f32[7,31,128]{2,1,0} copy(%param), sharding={devices=[2,1,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Copy(op::Slice(op::Reshape(AllOf(op::Transpose(op::AllToAll(
+          op::Reshape(AllOf(op::Pad(), op::Shape("f32[8,16,128]")))))))))));
+}
+
+TEST_F(SpmdPartitioningTest, GetTupleElementSwapDevice) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param.0 = (f32[2,3]{1,0}, u32[]) parameter(0),
+    sharding={{maximal device=1}, {maximal device=1}}
+  %gte.0 = f32[2,3]{1,0} get-tuple-element(%param.0), index=0,
+    sharding={maximal device=0}
+  %gte.1 = u32[] get-tuple-element(%param.0), index=1,
+    sharding={maximal device=0}
+  ROOT %tuple = (f32[2,3]{1,0}, u32[]) tuple(%gte.0, %gte.1),
+    sharding={{maximal device=0},{maximal device=0}}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  ASSERT_THAT(root, op::Tuple());
+
+  EXPECT_THAT(root->operand(0),
+              op::Copy(op::AllReduce(op::Select(
+                  op::Broadcast(op::Compare(op::PartitionId(), op::Constant())),
+                  op::GetTupleElement(op::Parameter()), op::Broadcast()))));
+  EXPECT_THAT(root->operand(1),
+              op::Copy(op::AllReduce(op::Select(
+                  op::Broadcast(op::Compare(op::PartitionId(), op::Constant())),
+                  op::GetTupleElement(op::Parameter()), op::Broadcast()))));
+}
+
+TEST_F(SpmdPartitioningTest, GetTupleElementTiled) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  param.0 = (f32[2,3]{1,0}, u32[2,3]{1,0}) parameter(0),
+    sharding={{replicated}, {replicated}}
+  gte.0 = f32[2,3]{1,0} get-tuple-element(param.0), index=0,
+    sharding={devices=[2,1]0,1}
+  gte.1 = u32[2,3]{1,0} get-tuple-element(param.0), index=1,
+    sharding={devices=[2,1]0,1}
+  ROOT %tuple = (f32[2,3]{1,0}, u32[2,3]{1,0}) tuple(gte.0, gte.1),
+    sharding={{devices=[2,1]0,1},{devices=[2,1]0,1}}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  ASSERT_THAT(root, op::Tuple());
+
+  auto offset = op::Reshape(
+      op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant()));
+
+  EXPECT_THAT(root->operand(0),
+              op::DynamicSlice(op::GetTupleElement(op::Parameter()), offset,
+                               op::Constant()));
+  EXPECT_THAT(root->operand(1),
+              op::DynamicSlice(op::GetTupleElement(op::Parameter()), offset,
+                               op::Constant()));
+}
+
+TEST_F(SpmdPartitioningTest, TiledInfeed) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  token0 = token[] after-all(), sharding={maximal device=0}
+  infeed = (f32[8,2]{1,0}, token[]) infeed(token0),
+    sharding={{devices=[2,1]0,1}, {maximal device=0}}
+  ROOT infeed.data = f32[8,2]{1,0} get-tuple-element(infeed), index=0,
+    sharding={maximal device=0}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root, op::Copy(op::AllReduce(op::DynamicUpdateSlice(
+                op::Broadcast(),
+                op::GetTupleElement(
+                    AllOf(op::Infeed(), op::Shape("(f32[4,2]{1,0}, token[])"))),
+                op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
+                                             op::Constant())),
+                op::Constant()))));
+}
+
+TEST_F(SpmdPartitioningTest, UnevenTiledInfeed) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  token0 = token[] after-all(), sharding={maximal device=0}
+  infeed = (f32[9,2]{1,0}, token[]) infeed(token0),
+    sharding={{devices=[2,1]0,1}, {maximal device=0}}
+  ROOT infeed.data = f32[9,2]{1,0} get-tuple-element(infeed), index=0,
+    sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root, AllOf(op::Shape("f32[5,2]"), op::GetTupleElement(op::Conditional(
+                                             op::Convert(op::PartitionId()),
+                                             op::AfterAll(), op::AfterAll()))));
+  EXPECT_THAT(
+      root->operand(0)->called_computations()[0]->root_instruction(),
+      AllOf(op::Shape("(f32[5,2], token[])"), op::Infeed(op::Parameter())));
+  auto second_infeed =
+      AllOf(op::Shape("(f32[4,2], token[])"), op::Infeed(op::Parameter()));
+  EXPECT_THAT(root->operand(0)->called_computations()[1]->root_instruction(),
+              AllOf(op::Shape("(f32[5,2], token[])"),
+                    op::Tuple(op::Pad(op::GetTupleElement(second_infeed),
+                                      op::Constant()),
+                              op::GetTupleElement(second_infeed))));
+}
+
+TEST_F(SpmdPartitioningTest, UnevenTiledTupleInfeed) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  token0 = token[] after-all(), sharding={maximal device=0}
+  infeed = ((f32[9,2]{1,0}, f32[2]{0}), token[]) infeed(token0),
+    sharding={{devices=[2,1]0,1}, {replicated}, {maximal device=0}}
+  ROOT infeed.data = (f32[9,2]{1,0}, f32[2]{0}) get-tuple-element(infeed),
+    index=0, sharding={{devices=[2,1]0,1}, {replicated}}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Shape("(f32[5,2], f32[2])"),
+                          op::GetTupleElement(op::Conditional(
+                              op::Convert(op::PartitionId()), op::AfterAll(),
+                              op::AfterAll()))));
+  EXPECT_THAT(root->operand(0)->called_computations()[0]->root_instruction(),
+              AllOf(op::Shape("((f32[5,2], f32[2]), token[])"),
+                    op::Infeed(op::Parameter())));
+  auto second_infeed = AllOf(op::Shape("((f32[4,2], f32[2]), token[])"),
+                             op::Infeed(op::Parameter()));
+  EXPECT_THAT(
+      root->operand(0)->called_computations()[1]->root_instruction(),
+      AllOf(op::Shape("((f32[5,2], f32[2]), token[])"),
+            op::Tuple(op::Tuple(op::Pad(op::GetTupleElement(
+                                            op::GetTupleElement(second_infeed)),
+                                        op::Constant()),
+                                op::GetTupleElement(
+                                    op::GetTupleElement(second_infeed))),
+                      op::GetTupleElement(second_infeed))));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToReplicatedReduce) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  constant = f32[3,3]{1,0} constant({{1,1,1},{1,1,1},{1,1,1}}),
+    sharding={devices=[2,1]0,1}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT reduce = f32[] reduce(constant, constant.1), dimensions={0,1},
+    to_apply=sum, sharding={replicated}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      op::AllReduce(op::Reduce(
+          op::Select(
+              op::Compare(op::Add(op::Iota(), op::Broadcast(op::Reshape())),
+                          op::Broadcast(op::Constant())),
+              AllOf(op::Shape("f32[2,3]{1,0}"),
+                    op::DynamicSlice(op::Pad(op::Constant(), op::Constant()),
+                                     op::Reshape(), op::Constant())),
+              op::Broadcast(op::Constant())),
+          op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, TiledElementwise) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  constant = f32[3,3]{1,0} constant({{1,1,1},{1,1,1},{1,1,1}}),
+    sharding={devices=[2,1]0,1}
+  constant.1 = f32[3,3]{1,0} constant({{2,2,2},{2,2,2},{2,2,2}}),
+    sharding={replicated}
+  multiply = f32[3,3]{1,0} multiply(constant, constant.1),
+    sharding={devices=[2,1]0,1}
+  ROOT add = f32[3,3]{1,0} add(multiply, constant.1),
+    sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(
+          op::Shape("f32[2,3]{1,0}"),
+          op::Add(op::Multiply(
+                      op::DynamicSlice(op::Pad(op::Constant(), op::Constant()),
+                                       op::Reshape(), op::Constant()),
+                      op::DynamicSlice(op::Pad(op::Constant(), op::Constant()),
+                                       op::Reshape(), op::Constant())),
+                  op::DynamicSlice(op::Pad(op::Constant(), op::Constant()),
+                                   op::Reshape(), op::Constant()))));
+}
+
+TEST_F(SpmdPartitioningTest, TiledAllReduce) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  parameter = f32[3,3]{1,0} parameter(0), sharding={devices=[2,1]0,1}
+  ROOT all-reduce = f32[3,3]{1,0} all-reduce(parameter), to_apply=sum,
+    replica_groups={}, sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root, AllOf(op::Shape("f32[2,3]{1,0}"), op::AllReduce(op::Parameter(0))));
+}
+
+TEST_F(SpmdPartitioningTest, BroadcastOnlyNewDimsSharded) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  constant = f32[4,3]{1,0} constant({{1,1,1},{1,1,1},{1,1,1},{1,1,1}}),
+    sharding={replicated}
+  ROOT broadcast = f32[3,4,3]{2,1,0} broadcast(constant), dimensions={1,2},
+    sharding={devices=[2,1,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Shape("f32[2,4,3]{2,1,0}"),
+                          op::Broadcast(op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, BroadcastOnlyOldDimsSharded) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  constant = f32[4,3]{1,0} constant({{1,1,1},{1,1,1},{1,1,1},{1,1,1}}),
+    sharding={replicated}
+  ROOT broadcast = f32[4,4,3]{2,1,0} broadcast(constant), dimensions={1,2},
+    sharding={devices=[1,2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Shape("f32[4,2,3]{2,1,0}"),
+                          op::Broadcast(op::DynamicSlice(
+                              op::Constant(), op::Reshape(), op::Constant()))));
+}
+
+TEST_F(SpmdPartitioningTest, BroadcastBothOldAndNewDimsSharded) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  constant = f32[4,3]{1,0} constant({{1,1,1},{1,1,1},{1,1,1},{1,1,1}}),
+    sharding={replicated}
+  ROOT broadcast = f32[4,4,3]{2,1,0} broadcast(constant), dimensions={1,2},
+    sharding={devices=[2,2,1]0,1,2,3}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Shape("f32[2,2,3]{2,1,0}"),
+            op::Broadcast(AllOf(op::Shape("f32[2,3]{1,0}"),
+                                op::DynamicSlice(op::Constant(), op::Reshape(),
+                                                 op::Constant())))));
+}
+
+TEST_F(SpmdPartitioningTest, BroadcastPropagateTiledSharding) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  constant = f32[4,3]{1,0} constant({{1,1,1},{1,4,1},{1,3,1},{1,2,1}}),
+    sharding={devices=[2,1]0,1}
+  ROOT broadcast = f32[4,4,3]{2,1,0} broadcast(constant), dimensions={1,2},
+    sharding={devices=[1,2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Shape("f32[4,2,3]{2,1,0}"),
+                          op::Broadcast(op::DynamicSlice(
+                              op::Constant(), op::Reshape(), op::Constant()))));
+}
+
+TEST_F(SpmdPartitioningTest, OutfeedSingleDevice) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  token.0 = token[] after-all()
+  data = f32[1024]{0} parameter(0), sharding={maximal device=0}
+  outfeed = token[] outfeed(data, token.0), sharding={maximal device=0}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Shape("token[]"),
+                          op::Conditional(
+                              op::Compare(op::PartitionId(), op::Constant()),
+                              op::Tuple(op::Parameter(0), op::AfterAll()),
+                              op::Tuple(op::Parameter(0), op::AfterAll()))));
+
+  HloInstruction* root_b0 = root->branch_computation(0)->root_instruction();
+  EXPECT_THAT(root_b0,
+              AllOf(op::Shape("token[]"),
+                    op::Outfeed(op::GetTupleElement(op::Parameter(), 0),
+                                op::GetTupleElement(op::Parameter(), 1))));
+
+  HloInstruction* root_b1 = root->branch_computation(1)->root_instruction();
+  EXPECT_THAT(root_b1, AllOf(op::Shape("token[]"), op::AfterAll()));
+}
+
+TEST_F(SpmdPartitioningTest, ReduceWindowReplicatedInput) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  constant = f32[6,2]{1,0} constant({{1,1},{1,4},{2,1},{3,1},{1,2},{2,2}}),
+    sharding={replicated}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT reduce-window = f32[3,2]{1,0} reduce-window(constant, constant.1),
+    window={size=3x1 stride=2x1 pad=1_0x0_0}, to_apply=sum,
+    sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Shape("f32[2,2]{1,0}"),
+            op::ReduceWindow(
+                op::DynamicSlice(AllOf(op::Shape("f32[9,2]{1,0}"),
+                                       op::Pad(op::Constant(), op::Constant())),
+                                 op::Multiply(op::Reshape(), op::Constant()),
+                                 op::Constant()),
+                op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, ReduceWindowTiledNegativeLeftHalo) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  constant = f32[6,2]{1,0} constant({{1,1},{1,4},{2,1},{3,1},{1,2},{2,2}}),
+    sharding={devices=[2,1]0,1}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT %reduce-window = f32[3,2]{1,0} reduce-window(%constant, %constant.1),
+    window={size=3x1 stride=2x1 pad=0_1x0_0}, to_apply=sum,
+    sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+
+  auto sharded_input =
+      op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant());
+  auto right_halo = AllOf(op::Shape("f32[2,2]{1,0}"),
+                          op::CollectivePermute(op::Slice(sharded_input)));
+  auto pre_masking = op::DynamicSlice(
+      AllOf(
+          op::Shape("f32[6,2]{1,0}"),
+          op::Pad(op::Concatenate(sharded_input, right_halo), op::Constant())),
+      op::Reshape(), op::Constant());
+  auto index_in_padded = op::Add(
+      op::Iota(), op::Broadcast(op::Multiply(op::Reshape(), op::Constant())));
+  auto masked =
+      op::Select(op::Compare(index_in_padded, op::Broadcast(op::Constant())),
+                 pre_masking, op::Broadcast(op::Constant()));
+  EXPECT_THAT(root, AllOf(op::Shape("f32[2,2]{1,0}"),
+                          op::ReduceWindow(masked, op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, ReduceWindowTiledOneSideUnequalHalo) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  constant = f32[9,2]{1,0} constant(
+    {{1,1},{1,4},{2,1},{3,1},{1,2},{2,2},{4,1},{1,2},{2,1}}),
+    sharding={devices=[3,1]0,1,2}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT reduce-window = f32[5,2]{1,0} reduce-window(constant, constant.1),
+    window={size=3x1 stride=2x1 pad=1_1x0_0}, to_apply=sum,
+    sharding={devices=[3,1]0,1,2}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/3));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+
+  auto sharded_input =
+      op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant());
+  auto right_halo = AllOf(op::Shape("f32[2,2]{1,0}"),
+                          op::CollectivePermute(op::Slice(sharded_input)));
+  auto pre_masking = op::DynamicSlice(
+      AllOf(
+          op::Shape("f32[7,2]{1,0}"),
+          op::Pad(op::Concatenate(sharded_input, right_halo), op::Constant())),
+      op::Reshape(), op::Constant());
+  auto index_in_padded = op::Add(
+      op::Iota(), op::Broadcast(op::Multiply(op::Reshape(), op::Constant())));
+  auto masked = op::Select(
+      op::And(op::Compare(index_in_padded, op::Broadcast(op::Constant())),
+              op::Compare(index_in_padded, op::Broadcast(op::Constant()))),
+      pre_masking, op::Broadcast(op::Constant()));
+  EXPECT_THAT(root, AllOf(op::Shape("f32[2,2]{1,0}"),
+                          op::ReduceWindow(masked, op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, ReduceWindowTiledTwoSideHalo) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  constant = f32[4,2]{1,0} constant({{1,1},{1,4},{2,1},{3,1}}),
+    sharding={devices=[2,1]0,1}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT reduce-window = f32[2,2]{1,0} reduce-window(constant, constant.1),
+    window={size=5x1 stride=3x1 pad=2_2x0_0}, to_apply=sum,
+    sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+
+  auto sharded_input =
+      op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant());
+  auto left_halo = AllOf(op::Shape("f32[1,2]{1,0}"),
+                         op::CollectivePermute(op::Slice(sharded_input)));
+  auto right_halo = AllOf(op::Shape("f32[1,2]{1,0}"),
+                          op::CollectivePermute(op::Slice(sharded_input)));
+  auto pre_masking = AllOf(
+      op::Shape("f32[5,2]{1,0}"),
+      op::DynamicSlice(
+          AllOf(op::Shape("f32[6,2]{1,0}"),
+                op::Pad(op::Concatenate(left_halo, sharded_input, right_halo),
+                        op::Constant())),
+          op::Reshape(), op::Constant()));
+  auto index_in_padded = op::Add(
+      op::Iota(), op::Broadcast(op::Multiply(op::Reshape(), op::Constant())));
+  auto masked = op::Select(
+      op::And(op::Compare(index_in_padded, op::Broadcast(op::Constant())),
+              op::Compare(index_in_padded, op::Broadcast(op::Constant()))),
+      pre_masking, op::Broadcast(op::Constant()));
+  EXPECT_THAT(root, AllOf(op::Shape("f32[1,2]{1,0}"),
+                          op::ReduceWindow(masked, op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, ReduceWindowTiled2D) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  token0 = token[] after-all(), sharding={maximal device=0}
+  infeed = (f32[4,4,2,2]{3,2,1,0}, token[]) infeed(token0),
+    sharding={{devices=[2,2,1,1]0,1,2,3}, {maximal device=0}}
+  infeed.data = f32[4,4,2,2]{3,2,1,0} get-tuple-element(infeed), index=0,
+    sharding={devices=[2,2,1,1]0,1,2,3}
+  constant = f32[] constant(0), sharding={replicated}
+  ROOT reduce-window = f32[2,2,2,2]{3,2,1,0} reduce-window(infeed.data, constant),
+    window={size=5x5x1x1 stride=3x3x1x1 pad=2_2x2_2x0_0x0_0}, to_apply=sum,
+    sharding={devices=[2,2,1,1]0,1,2,3}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+
+  auto sharded_input = AllOf(op::Shape("f32[2,2,2,2]{3,2,1,0}"),
+                             op::GetTupleElement(op::Infeed()));
+  auto dim0_left_halo = AllOf(op::Shape("f32[1,2,2,2]{3,2,1,0}"),
+                              op::CollectivePermute(op::Slice(sharded_input)));
+  auto dim0_right_halo = AllOf(op::Shape("f32[1,2,2,2]{3,2,1,0}"),
+                               op::CollectivePermute(op::Slice(sharded_input)));
+  auto dim0_pre_masking = op::DynamicSlice(
+      AllOf(op::Shape("f32[6,2,2,2]{3,2,1,0}"),
+            op::Pad(
+                op::Concatenate(dim0_left_halo, sharded_input, dim0_right_halo),
+                op::Constant())),
+      op::Reshape(), op::Constant(), op::Constant(), op::Constant());
+  auto dim0_index_in_padded = op::Add(
+      op::Iota(), op::Broadcast(op::Multiply(op::Reshape(), op::Constant())));
+  auto dim0_masked = op::Select(
+      op::And(op::Compare(dim0_index_in_padded, op::Broadcast(op::Constant())),
+              op::Compare(dim0_index_in_padded, op::Broadcast(op::Constant()))),
+      dim0_pre_masking, op::Broadcast(op::Constant()));
+  auto dim0_resharded = AllOf(op::Shape("f32[5,2,2,2]{3,2,1,0}"), dim0_masked);
+  auto dim1_left_halo = AllOf(op::Shape("f32[5,1,2,2]{3,2,1,0}"),
+                              op::CollectivePermute(op::Slice(dim0_resharded)));
+  auto dim1_right_halo =
+      AllOf(op::Shape("f32[5,1,2,2]{3,2,1,0}"),
+            op::CollectivePermute(op::Slice(dim0_resharded)));
+  auto dim1_pre_masking = op::DynamicSlice(
+      AllOf(op::Shape("f32[5,6,2,2]{3,2,1,0}"),
+            op::Pad(op::Concatenate(dim1_left_halo, dim0_resharded,
+                                    dim1_right_halo),
+                    op::Constant())),
+      op::Constant(), op::Reshape(), op::Constant(), op::Constant());
+  auto dim1_index_in_padded = op::Add(
+      op::Iota(), op::Broadcast(op::Multiply(op::Reshape(), op::Constant())));
+  auto dim1_masked = op::Select(
+      op::And(op::Compare(dim1_index_in_padded, op::Broadcast(op::Constant())),
+              op::Compare(dim1_index_in_padded, op::Broadcast(op::Constant()))),
+      dim1_pre_masking, op::Broadcast(op::Constant()));
+  auto dim1_resharded = AllOf(op::Shape("f32[5,5,2,2]{3,2,1,0}"), dim1_masked);
+  EXPECT_THAT(root, AllOf(op::Shape("f32[1,1,2,2]{3,2,1,0}"),
+                          op::ReduceWindow(dim1_resharded, op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsReplicated) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,224,224,3] parameter(0)
+  %lhs.copy = f32[128,224,224,3] copy(f32[128,224,224,3] %lhs),
+    sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[7,7,3,64] parameter(1)
+  %rhs.copy = f32[7,7,3,64] copy(f32[7,7,3,64] %rhs),
+    sharding={replicated}
+  ROOT %conv = f32[128,112,112,64] convolution(
+    f32[128,224,224,3] %lhs.copy,
+    f32[7,7,3,64] %rhs.copy),
+    window={size=7x7 stride=2x2 pad=3_3x3_3},
+    dim_labels=b01f_01io->b01f,
+    sharding={devices=[1,2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,112,224,3]"));
+  auto rhs = AllOf(op::Copy(op::Parameter()), op::Shape("f32[7,7,3,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                         op::Shape("f32[128,3,224,3]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                          op::Shape("f32[128,2,224,3]"));
+  EXPECT_THAT(root,
+              AllOf(op::Convolution(
+                        op::Select(op::And(),
+                                   op::Concatenate(left_halo, lhs, right_halo),
+                                   op::Broadcast()),
+                        rhs),
+                    op::Shape("f32[128,56,112,64]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsReplicatedNeedReshard) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,224,224,3] parameter(0)
+  %lhs.copy = f32[128,224,224,3] copy(f32[128,224,224,3] %lhs),
+    sharding={devices=[2,1,1,1]0,1}
+  %rhs = f32[7,7,3,64] parameter(1)
+  %rhs.copy = f32[7,7,3,64] copy(f32[7,7,3,64] %rhs),
+    sharding={replicated}
+  ROOT %conv = f32[128,112,112,64] convolution(
+    f32[128,224,224,3] %lhs.copy,
+    f32[7,7,3,64] %rhs.copy),
+    window={size=7x7 stride=2x2 pad=3_3x3_3},
+    dim_labels=b01f_01io->b01f,
+    sharding={devices=[1,2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), op::Constant(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[64,224,224,3]"));
+  auto all_to_all =
+      AllOf(op::AllToAll(op::Reshape(lhs)), op::Shape("f32[64,2,112,224,3]"));
+  auto reshard_lhs = AllOf(op::Reshape(op::Transpose(all_to_all)),
+                           op::Shape("f32[128,112,224,3]"));
+
+  auto rhs = AllOf(op::Copy(op::Parameter()), op::Shape("f32[7,7,3,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(reshard_lhs)),
+                         op::Shape("f32[128,3,224,3]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(reshard_lhs)),
+                          op::Shape("f32[128,2,224,3]"));
+  EXPECT_THAT(
+      root,
+      AllOf(op::Convolution(
+                op::Select(op::And(),
+                           op::Concatenate(left_halo, reshard_lhs, right_halo),
+                           op::Broadcast()),
+                rhs),
+            op::Shape("f32[128,56,112,64]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsReplicatedReordered) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[224,224,3,128] parameter(0)
+  %lhs.copy = f32[224,224,3,128] copy(%lhs), sharding={devices=[2,1,1,1]0,1}
+  %rhs = f32[7,7,3,64] parameter(1)
+  %rhs.copy = f32[7,7,3,64] copy(%rhs), sharding={replicated}
+  ROOT %conv = f32[128,112,112,64] convolution(%lhs.copy, %rhs.copy),
+    window={size=7x7 stride=2x2 pad=3_3x3_3},
+    dim_labels=01fb_01io->b01f,
+    sharding={devices=[1,2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), op::Constant(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[112,224,3,128]"));
+  auto rhs = AllOf(op::Copy(op::Parameter()), op::Shape("f32[7,7,3,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                         op::Shape("f32[3,224,3,128]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                          op::Shape("f32[2,224,3,128]"));
+  EXPECT_THAT(root,
+              AllOf(op::Convolution(
+                        op::Select(op::And(),
+                                   op::Concatenate(left_halo, lhs, right_halo),
+                                   op::Broadcast()),
+                        rhs),
+                    op::Shape("f32[128,56,112,64]")));
+}
+
+// (stride * per_shard_window_count) % dilation == 0
+TEST_F(SpmdPartitioningTest,
+       ConvolutionBaseDilationSameStartPatternLhsTiledRhsReplicated) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,7,7,512] parameter(0)
+  %lhs.copy = f32[128,7,7,512] copy(%lhs),
+    sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[3,3,512,512] parameter(1)
+  %rhs.copy = f32[3,3,512,512] copy(%rhs),
+    sharding={replicated}
+  ROOT %conv = f32[128,4,4,512] convolution(%lhs.copy, %rhs.copy),
+    window={size=3x3 stride=4x4 pad=1_1x1_1 lhs_dilate=2x2 rhs_reversal=1x1},
+    dim_labels=b01f_01io->b01f,
+    sharding={devices=[1,2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  // There is no halo exchange, and because the last element in the shard is not
+  // needed (stride == 4), the LHS will be just a slice.
+  auto sliced_lhs =
+      AllOf(op::Slice(op::Copy(op::DynamicSlice(
+                op::Pad(op::Parameter(), op::Constant()), op::Constant(),
+                op::Reshape(), op::Constant(), op::Constant()))),
+            op::Shape("f32[128,3,7,512]"));
+  auto rhs = AllOf(op::Copy(op::Parameter()), op::Shape("f32[3,3,512,512]"));
+  EXPECT_THAT(root, AllOf(op::Convolution(sliced_lhs, rhs),
+                          op::Shape("f32[128,2,4,512]")));
+  EXPECT_EQ(root->window().dimensions(0).padding_low(), 1);
+  EXPECT_EQ(root->window().dimensions(0).padding_high(), 1);
+}
+
+// (stride * per_shard_window_count) % dilation != 0 but stride == 1
+TEST_F(SpmdPartitioningTest,
+       ConvolutionBaseDilationStride1LhsTiledRhsReplicated) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,7,7,512] parameter(0)
+  %lhs.copy = f32[128,7,7,512] copy(%lhs),
+    sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[3,3,512,512] parameter(1)
+  %rhs.copy = f32[3,3,512,512] copy(%rhs),
+    sharding={replicated}
+  ROOT %conv = f32[128,14,14,512] convolution(%lhs.copy, %rhs.copy),
+    window={size=3x3 pad=1_2x1_2 lhs_dilate=2x2 rhs_reversal=1x1},
+    dim_labels=b01f_01io->b01f,
+    sharding={devices=[1,2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(
+                       op::Pad(op::Parameter(), op::Constant()), op::Constant(),
+                       op::Reshape(), op::Constant(), op::Constant())),
+                   op::Shape("f32[128,4,7,512]"));
+  auto rhs = AllOf(op::Copy(op::Parameter()), op::Shape("f32[3,3,512,512]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                         op::Shape("f32[128,1,7,512]"));
+  auto start_window = op::Multiply(op::Reshape(), op::Constant());
+  auto start_input_element = op::Divide(start_window, op::Constant());
+  auto dynamic_offset_for_padded_concat = op::Subtract(
+      op::Constant(), op::Subtract(op::Multiply(op::Reshape(), op::Constant()),
+                                   start_input_element));
+  auto pre_masking =
+      AllOf(op::Shape("f32[128,5,7,512]"),
+            op::DynamicSlice(
+                AllOf(op::Shape("f32[128,6,7,512]"),
+                      op::Pad(op::Concatenate(left_halo, lhs), op::Constant())),
+                op::Constant(), dynamic_offset_for_padded_concat,
+                op::Constant(), op::Constant()));
+  auto masked = op::Select(
+      op::Compare(op::Add(op::Iota(), op::Broadcast(start_input_element)),
+                  op::Broadcast(op::Constant())),
+      pre_masking, op::Broadcast(op::Constant()));
+  auto dynamic_offset_on_output = op::Subtract(
+      start_window, op::Multiply(start_input_element, op::Constant()));
+  EXPECT_THAT(root,
+              AllOf(op::DynamicSlice(AllOf(op::Convolution(masked, rhs),
+                                           op::Shape("f32[128,8,14,512]")),
+                                     op::Constant(), dynamic_offset_on_output,
+                                     op::Constant(), op::Constant()),
+                    op::Shape("f32[128,7,14,512]")));
+  EXPECT_EQ(root->operand(0)->window().dimensions(0).padding_low(), 1);
+  EXPECT_EQ(root->operand(0)->window().dimensions(0).padding_high(), 0);
+}
+
+TEST_F(SpmdPartitioningTest, SelectAndScatterNoOverlap) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ge {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT compare = pred[] compare(a, b), direction=GE
+}
+
+sum {
+  c = f32[] parameter(0)
+  d = f32[] parameter(1)
+  ROOT add = f32[] add(c, d)
+}
+
+ENTRY entry {
+  %param = f32[11,4]{1,0} parameter(0)
+  %param.copy = f32[11,4] copy(%param),
+    sharding={devices=[4,1]0,1,2,3}
+  constant = f32[4,2]{1,0} constant({{1,2},{3,4},{1,0},{2,8}}),
+    sharding={devices=[4,1]0,1,2,3}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT select-and-scatter = f32[11,4]{1,0} select-and-scatter(param.copy,
+    constant, constant.1), window={size=3x2 stride=3x2 pad=0_1x0_0},
+    select=ge, scatter=sum, sharding={devices=[4,1]0,1,2,3}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+  auto root = module->entry_computation()->root_instruction();
+  auto source =
+      AllOf(op::Shape("f32[1,2]{1,0}"),
+            op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant()));
+  auto masked_data = AllOf(
+      op::Shape("f32[3,4]{1,0}"),
+      op::Select(
+          op::Compare(op::Add(op::Iota(), op::Broadcast(op::Multiply(
+                                              op::Reshape(), op::Constant()))),
+                      op::Broadcast(op::Constant())),
+          op::Copy(op::DynamicSlice(op::Pad(op::Parameter(), op::Constant()),
+                                    op::Reshape(), op::Constant())),
+          op::Broadcast(op::Constant())));
+
+  EXPECT_THAT(root,
+              AllOf(op::SelectAndScatter(masked_data, source, op::Constant()),
+                    op::Shape("f32[3,4]{1,0}")));
+  EXPECT_EQ(root->window().dimensions(0).padding_low(), 0);
+  EXPECT_EQ(root->window().dimensions(0).padding_high(), 0);
+}
+
+TEST_F(SpmdPartitioningTest, SelectAndScatterNoOverlapReshard) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ge {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT compare = pred[] compare(a, b), direction=GE
+}
+
+sum {
+  c = f32[] parameter(0)
+  d = f32[] parameter(1)
+  ROOT add = f32[] add(c, d)
+}
+
+ENTRY entry {
+  %param = f32[11,4]{1,0} parameter(0)
+  %param.copy = f32[11,4] copy(%param),
+    sharding={devices=[1,4]0,1,2,3}
+  constant = f32[4,2]{1,0} constant({{1,2},{3,4},{1,0},{2,8}}),
+    sharding={devices=[4,1]0,1,2,3}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT select-and-scatter = f32[11,4]{1,0} select-and-scatter(param.copy,
+    constant, constant.1), window={size=3x2 stride=3x2 pad=0_1x0_0},
+    select=ge, scatter=sum, sharding={devices=[4,1]0,1,2,3}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+  auto root = module->entry_computation()->root_instruction();
+  auto source =
+      AllOf(op::Shape("f32[1,2]{1,0}"),
+            op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant()));
+  auto operand = AllOf(op::Copy(op::DynamicSlice(
+                           op::Parameter(0), op::Constant(), op::Reshape())),
+                       op::Shape("f32[11,1]"));
+  auto reshard_operand = op::Reshape(op::Transpose(
+      op::AllToAll(op::Reshape(op::Pad(operand, op::Constant())))));
+  auto masked_data = AllOf(
+      op::Shape("f32[3,4]{1,0}"),
+      op::Select(
+          op::Compare(op::Add(op::Iota(), op::Broadcast(op::Multiply(
+                                              op::Reshape(), op::Constant()))),
+                      op::Broadcast(op::Constant())),
+          reshard_operand, op::Broadcast(op::Constant())));
+
+  EXPECT_THAT(root,
+              AllOf(op::SelectAndScatter(masked_data, source, op::Constant()),
+                    op::Shape("f32[3,4]{1,0}")));
+  EXPECT_EQ(root->window().dimensions(0).padding_low(), 0);
+  EXPECT_EQ(root->window().dimensions(0).padding_high(), 0);
+}
+
+TEST_F(SpmdPartitioningTest, SelectAndScatterWithOverlap) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ge {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT compare = pred[] compare(a, b), direction=GE
+}
+
+sum {
+  c = f32[] parameter(0)
+  d = f32[] parameter(1)
+  ROOT add = f32[] add(c, d)
+}
+
+ENTRY entry {
+  %param = f32[11,4]{1,0} parameter(0)
+  %param.copy = f32[11,4] copy(%param),
+    sharding={devices=[4,1]0,1,2,3}
+  constant = f32[6,2]{1,0} constant({{1,2},{3,4},{1,0},{2,8},{6,6},{1,9}}),
+    sharding={devices=[4,1]0,1,2,3}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT select-and-scatter = f32[11,4]{1,0} select-and-scatter(param.copy,
+    constant, constant.1), window={size=3x2 stride=2x2 pad=1_1x0_0},
+    select=ge, scatter=sum, sharding={devices=[4,1]0,1,2,3}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+  auto root = module->entry_computation()->root_instruction();
+
+  auto source_shard =
+      AllOf(op::Shape("f32[2,2]{1,0}"),
+            op::DynamicSlice(op::Pad(), op::Reshape(), op::Constant()));
+  // Max halo size is the same as the shard size, so slice is not needed.
+  auto source_left_halo = op::CollectivePermute(source_shard);
+  auto required_source_shard_start =
+      op::Divide(op::Multiply(op::Reshape(), op::Constant()), op::Constant());
+  auto source_with_halo = op::DynamicSlice(
+      AllOf(op::Shape("f32[5,2]{1,0}"),
+            op::Pad(op::Concatenate(source_left_halo, source_shard),
+                    op::Constant())),
+      op::Subtract(op::Constant(),
+                   op::Subtract(op::Multiply(op::Reshape(), op::Constant()),
+                                required_source_shard_start)),
+      op::Constant());
+  auto masked_source_with_halo = AllOf(
+      AllOf(op::Shape("f32[3,2]{1,0}")),
+      op::Select(
+          op::Compare(
+              op::Add(op::Iota(), op::Broadcast(required_source_shard_start)),
+              op::Broadcast(op::Constant())),
+          source_with_halo, op::Broadcast(op::Constant())));
+
+  auto data_shard =
+      AllOf(op::Shape("f32[3,4]{1,0}"),
+            op::Copy(op::DynamicSlice(op::Pad(op::Parameter(), op::Constant()),
+                                      op::Reshape(), op::Constant())));
+  auto data_left_halo = AllOf(op::Shape("f32[2,4]{1,0}"),
+                              op::CollectivePermute(op::Slice(data_shard)));
+  auto data_right_halo = AllOf(op::Shape("f32[2,4]{1,0}"),
+                               op::CollectivePermute(op::Slice(data_shard)));
+  auto required_data_start_on_padded =
+      op::Multiply(required_source_shard_start, op::Constant());
+  auto left_halo_size = op::Subtract(
+      op::Add(op::Multiply(op::Reshape(), op::Constant()), op::Constant()),
+      required_data_start_on_padded);
+  auto data_with_halo =
+      AllOf(op::Shape("f32[7,4]{1,0}"),
+            op::DynamicSlice(
+                AllOf(op::Shape("f32[8,4]{1,0}"),
+                      op::Pad(op::Concatenate(data_left_halo, data_shard,
+                                              data_right_halo),
+                              op::Constant())),
+                op::Subtract(op::Constant(), left_halo_size), op::Constant()));
+  auto index_on_padded =
+      op::Add(op::Iota(), op::Broadcast(required_data_start_on_padded));
+  auto masked_data_with_halo = op::Select(
+      op::And(op::Compare(index_on_padded, op::Broadcast(op::Constant())),
+              op::Compare(index_on_padded, op::Broadcast(op::Constant()))),
+      data_with_halo, op::Broadcast(op::Constant()));
+
+  EXPECT_THAT(
+      root, AllOf(op::DynamicSlice(op::SelectAndScatter(masked_data_with_halo,
+                                                        masked_source_with_halo,
+                                                        op::Constant()),
+                                   left_halo_size, op::Constant()),
+                  op::Shape("f32[3,4]{1,0}")));
+  EXPECT_EQ(root->operand(0)->window().dimensions(0).padding_low(), 0);
+  EXPECT_EQ(root->operand(0)->window().dimensions(0).padding_high(), 0);
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiled) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,56,56,64] parameter(0)
+  %lhs.copy = f32[128,56,56,64] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,56,56,256] parameter(1)
+  %rhs.copy = f32[128,56,56,256] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[1,1,64,256] convolution(%lhs.copy, %rhs.copy),
+    window={size=56x56}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,28,56,64]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,28,56,256]"));
+
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(lhs, rhs)),
+                          op::Shape("f32[1,1,64,256]")));
+}
+
+TEST_F(SpmdPartitioningTest, DotLhsTiledRhsTiledWithReshard) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,56,56,64] parameter(0)
+  %lhs.copy = f32[128,56,56,64] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,56,56,256] parameter(1)
+  %rhs.copy = f32[128,56,56,256] copy(%rhs), sharding={devices=[2,1,1,1]0,1}
+  ROOT %conv = f32[1,1,64,256] convolution(%lhs.copy, %rhs.copy),
+    window={size=56x56}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,28,56,64]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), op::Constant(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[64,56,56,256]"));
+  auto all_to_all =
+      AllOf(op::AllToAll(op::Reshape(lhs)), op::Shape("f32[2,64,28,56,64]"));
+  auto reshard = AllOf(op::Reshape(op::Transpose(all_to_all)));
+
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(reshard, rhs)),
+                          op::Shape("f32[1,1,64,256]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiledWithReshard) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,56,56,512] parameter(0)
+  %lhs.copy = f32[128,56,56,512] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,28,28,64] parameter(1)
+  %rhs.copy = f32[128,28,28,64] copy(%rhs), sharding={devices=[2,1,1,1]0,1}
+  ROOT %conv = f32[1,1,512,64] convolution(%lhs.copy, %rhs.copy),
+    window={size=28x28 pad=0_-1x0_-1 rhs_dilate=2x2},
+    dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,28,56,512]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), op::Constant(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[64,28,28,64]"));
+  auto all_to_all =
+      AllOf(op::AllToAll(op::Reshape(rhs)), op::Shape("f32[64,2,14,28,64]"));
+  auto reshard = op::Reshape(op::Transpose(all_to_all));
+
+  EXPECT_THAT(root,
+              AllOf(op::AllReduce(op::Convolution(op::Slice(lhs), reshard)),
+                    op::Shape("f32[1,1,512,64]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiledWithPadding) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,28,28,128] parameter(0)
+  %lhs.copy = f32[32,28,28,128] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[32,28,28,64] parameter(1)
+  %rhs.copy = f32[32,28,28,64] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[3,3,128,64] convolution(%lhs.copy, %rhs.copy),
+    window={size=28x28 pad=1_1x1_1}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module,
+      PartitionComputation(hlo_string, /*num_devices=*/2,
+                           /*conv_halo_exchange_always_on_lhs=*/false));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[32,14,28,128]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[32,14,28,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(rhs)),
+                         op::Shape("f32[32,1,28,64]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(rhs)),
+                          op::Shape("f32[32,1,28,64]"));
+  EXPECT_THAT(root,
+              AllOf(op::AllReduce(op::Convolution(
+                        lhs, AllOf(op::Concatenate(left_halo, rhs, right_halo),
+                                   op::Shape("f32[32,16,28,64]")))),
+                    op::Shape("f32[3,3,128,64]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiledWindowDilate) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,224,224,3] parameter(0)
+  %lhs.copy = f32[128,224,224,3] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,112,112,64] parameter(1)
+  %rhs.copy = f32[128,112,112,64] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[7,7,3,64] convolution(%lhs.copy, %rhs.copy),
+    window={size=112x112 pad=3_2x3_2 rhs_dilate=2x2}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module,
+      PartitionComputation(hlo_string, /*num_devices=*/2,
+                           /*conv_halo_exchange_always_on_lhs=*/false));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,112,224,3]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,56,112,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(rhs)),
+                         op::Shape("f32[128,2,112,64]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(rhs)),
+                          op::Shape("f32[128,2,112,64]"));
+  EXPECT_THAT(root,
+              AllOf(op::AllReduce(op::Convolution(
+                        lhs, AllOf(op::Concatenate(left_halo, rhs, right_halo),
+                                   op::Shape("f32[128,60,112,64]")))),
+                    op::Shape("f32[7,7,3,64]")));
+}
+
+TEST_F(SpmdPartitioningTest,
+       ConvolutionLhsTiledRhsTiledWindowDilateNegativeRhsPadding) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,56,56,256] parameter(0)
+  %lhs.copy = f32[128,56,56,256] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,28,28,512] parameter(1)
+  %rhs.copy = f32[128,28,28,512] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[1,1,256,512] convolution(%lhs.copy, %rhs.copy),
+    window={size=28x28 pad=0_-1x0_-1 rhs_dilate=2x2}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module,
+      PartitionComputation(hlo_string, /*num_devices=*/2,
+                           /*conv_halo_exchange_always_on_lhs=*/false));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,28,56,256]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,14,28,512]"));
+
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(lhs, rhs)),
+                          op::Shape("f32[1,1,256,512]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiledWindowDilateUneven) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,14,14,512] parameter(0)
+  %lhs.copy = f32[128,14,14,512] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,7,7,512] parameter(1)
+  %rhs.copy = f32[128,7,7,512] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[3,3,512,512] convolution(%lhs.copy, %rhs.copy),
+    window={size=7x7 pad=1_0x1_0 rhs_dilate=2x2}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module,
+      PartitionComputation(hlo_string, /*num_devices=*/2,
+                           /*conv_halo_exchange_always_on_lhs=*/false));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,7,14,512]"));
+  auto rhs = AllOf(
+      op::Select(op::Compare(),
+                 op::Copy(op::DynamicSlice(
+                     op::Pad(op::Parameter(), op::Constant()), op::Constant(),
+                     op::Reshape(), op::Constant(), op::Constant())),
+                 op::Broadcast()),
+      op::Shape("f32[128,4,7,512]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(rhs)),
+                         op::Shape("f32[128,1,7,512]"));
+  EXPECT_THAT(root,
+              AllOf(op::AllReduce(op::Convolution(
+                        AllOf(op::DynamicSlice(op::Pad(lhs, op::Constant()),
+                                               op::Constant(), op::Subtract(),
+                                               op::Constant(), op::Constant()),
+                              op::Shape("f32[128,10,14,512]")),
+                        AllOf(op::Concatenate(left_halo, rhs),
+                              op::Shape("f32[128,5,7,512]")))),
+                    op::Shape("f32[3,3,512,512]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiledWithPadding_HaloOnLhs) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,28,28,128] parameter(0)
+  %lhs.copy = f32[32,28,28,128] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[32,28,28,64] parameter(1)
+  %rhs.copy = f32[32,28,28,64] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[3,3,128,64] convolution(%lhs.copy, %rhs.copy),
+    window={size=28x28 pad=1_1x1_1}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[32,14,28,128]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[32,14,28,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                         op::Shape("f32[32,1,28,128]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                          op::Shape("f32[32,1,28,128]"));
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(
+                              AllOf(op::Concatenate(left_halo, lhs, right_halo),
+                                    op::Shape("f32[32,16,28,128]")),
+                              rhs)),
+                          op::Shape("f32[3,3,128,64]")));
+}
+
+TEST_F(SpmdPartitioningTest,
+       ConvolutionLhsTiledRhsTiledWindowDilate_HaloOnLhs) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,224,224,3] parameter(0)
+  %lhs.copy = f32[128,224,224,3] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,112,112,64] parameter(1)
+  %rhs.copy = f32[128,112,112,64] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[7,7,3,64] convolution(%lhs.copy, %rhs.copy),
+    window={size=112x112 pad=3_2x3_2 rhs_dilate=2x2}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,112,224,3]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,56,112,64]"));
+
+  auto left_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                         op::Shape("f32[128,3,224,3]"));
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                          op::Shape("f32[128,2,224,3]"));
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(
+                              AllOf(op::Concatenate(left_halo, lhs, right_halo),
+                                    op::Shape("f32[128,117,224,3]")),
+                              rhs)),
+                          op::Shape("f32[7,7,3,64]")));
+}
+
+TEST_F(SpmdPartitioningTest,
+       ConvolutionLhsTiledRhsTiledWindowDilateNegativeRhsPadding_HaloOnLhs) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,56,56,256] parameter(0)
+  %lhs.copy = f32[128,56,56,256] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,28,28,512] parameter(1)
+  %rhs.copy = f32[128,28,28,512] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[1,1,256,512] convolution(%lhs.copy, %rhs.copy),
+    window={size=28x28 pad=0_-1x0_-1 rhs_dilate=2x2}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,28,56,256]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,14,28,512]"));
+
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(op::Slice(lhs), rhs)),
+                          op::Shape("f32[1,1,256,512]")));
+}
+
+TEST_F(SpmdPartitioningTest,
+       ConvolutionLhsTiledRhsTiledWindowDilateUneven_HaloOnLhs) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,14,14,512] parameter(0)
+  %lhs.copy = f32[128,14,14,512] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[128,7,7,512] parameter(1)
+  %rhs.copy = f32[128,7,7,512] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %conv = f32[3,3,512,512] convolution(%lhs.copy, %rhs.copy),
+    window={size=7x7 pad=1_0x1_0 rhs_dilate=2x2}, dim_labels=f01b_i01o->01bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[128,7,14,512]"));
+  auto rhs = AllOf(
+      op::Select(op::Compare(),
+                 op::Copy(op::DynamicSlice(
+                     op::Pad(op::Parameter(), op::Constant()), op::Constant(),
+                     op::Reshape(), op::Constant(), op::Constant())),
+                 op::Broadcast()),
+      op::Shape("f32[128,4,7,512]"));
+
+  auto right_halo = AllOf(op::CollectivePermute(op::Slice(lhs)),
+                          op::Shape("f32[128,1,14,512]"));
+  EXPECT_THAT(
+      root, AllOf(op::AllReduce(op::Convolution(
+                      AllOf(op::DynamicSlice(
+                                AllOf(op::Pad(op::Concatenate(lhs, right_halo),
+                                              op::Constant()),
+                                      op::Shape("f32[128,10,14,512]")),
+                                op::Constant(), op::Reshape(), op::Constant(),
+                                op::Constant()),
+                            op::Shape("f32[128,9,14,512]")),
+                      rhs)),
+                  op::Shape("f32[3,3,512,512]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConcatenateAlongNonPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[14,257] parameter(0)
+  %param0.copy = f32[14,257] copy(%param0), sharding={devices=[2,1]0,1}
+  %param1 = f32[14,116] parameter(1)
+  %param1.copy = f32[14,116] copy(%param1), sharding={devices=[2,1]0,1}
+  ROOT %concatenate = f32[14,373] concatenate(%param0.copy, %param1.copy),
+    dimensions={1}, sharding={devices=[2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(),
+                                                op::Constant())),
+                      op::Shape("f32[7,257]"));
+  auto param1 = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(),
+                                                op::Constant())),
+                      op::Shape("f32[7,116]"));
+  EXPECT_THAT(root,
+              AllOf(op::Concatenate(param0, param1), op::Shape("f32[7,373]")));
+}
+
+TEST_F(SpmdPartitioningTest, ConcatenateAlongPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[14,257] parameter(0)
+  %param0.copy = f32[14,257] copy(%param0), sharding={devices=[1,2]0,1}
+  %param1 = f32[14,116] parameter(1)
+  %param1.copy = f32[14,116] copy(%param1), sharding={devices=[1,2]0,1}
+  ROOT %concatenate = f32[14,373] concatenate(%param0.copy, %param1.copy),
+    dimensions={1}, sharding={devices=[1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 =
+      AllOf(op::Copy(op::DynamicSlice(op::Pad(op::Parameter(), op::Constant()),
+                                      op::Constant(), op::Reshape())),
+            op::Shape("f32[14,129]"));
+  auto param1 = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(),
+                                                op::Reshape())),
+                      op::Shape("f32[14,58]"));
+  EXPECT_THAT(root, AllOf(op::DynamicSlice(
+                              AllOf(op::AllReduce(op::DynamicUpdateSlice(
+                                        op::DynamicUpdateSlice(
+                                            op::Broadcast(), param0,
+                                            op::Constant(), op::Multiply()),
+                                        param1, op::Constant(), op::Add())),
+                                    op::Shape("f32[14,374]")),
+                              op::Constant(), op::Multiply()),
+                          op::Shape("f32[14,187]")));
+}
+
+TEST_F(SpmdPartitioningTest, PadAlongNonPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[128,14,257] parameter(0)
+  %param0.copy = f32[128,14,257] copy(%param0), sharding={devices=[1,1,2]0,1}
+  %const = f32[] constant(0)
+  ROOT %pad = f32[128,17,257] pad(%param0.copy, %const), padding=0_0x1_2x0_0,
+    sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Pad(op::Parameter(), op::Constant()),
+                                op::Constant(), op::Constant(), op::Reshape())),
+      op::Shape("f32[128,14,129]"));
+  EXPECT_THAT(root, AllOf(op::Pad(param0, op::Constant()),
+                          op::Shape("f32[128,17,129]")));
+}
+
+TEST_F(SpmdPartitioningTest, SliceAlongNonPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[128,14,257] parameter(0)
+  %param0.copy = f32[128,14,257] copy(%param0), sharding={devices=[1,1,2]0,1}
+  ROOT %slice = f32[128,11,257] slice(%param0.copy),
+    slice={[0:128:1], [2:13:1], [0:257:1]}, sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Pad(op::Parameter(), op::Constant()),
+                                op::Constant(), op::Constant(), op::Reshape())),
+      op::Shape("f32[128,14,129]"));
+  EXPECT_THAT(root, AllOf(op::Slice(param0), op::Shape("f32[128,11,129]")));
+}
+
+TEST_F(SpmdPartitioningTest, SliceAlongPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[128,14,257] parameter(0)
+  %param0.copy = f32[128,14,257] copy(%param0), sharding={devices=[1,1,2]0,1}
+  ROOT %slice = f32[63,14,251] slice(%param0.copy),
+    slice={[2:128:2], [0:14:1], [5:256:1]}, sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Pad(op::Parameter(), op::Constant()),
+                                op::Constant(), op::Constant(), op::Reshape())),
+      op::Shape("f32[128,14,129]"));
+  EXPECT_THAT(
+      root,
+      AllOf(op::Slice(AllOf(
+                op::DynamicSlice(
+                    AllOf(op::Concatenate(
+                              param0,
+                              AllOf(op::CollectivePermute(op::Slice(param0)),
+                                    op::Shape("f32[128,14,2]"))),
+                          op::Shape("f32[128,14,131]")),
+                    op::Constant(), op::Constant(), op::Add()),
+                op::Shape("f32[128,14,126]"))),
+            op::Shape("f32[63,14,126]")));
+}
+
+TEST_F(SpmdPartitioningTest, SortAlongNonPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ge {
+  p.0.lhs.1247 = f32[]{:T(256)} parameter(0), sharding={replicated}
+  bitcast-convert = s32[]{:T(256)} bitcast-convert(p.0.lhs.1247), sharding={replicated}
+  constant = s32[]{:T(256)} constant(0), sharding={replicated}
+  compare = pred[]{:T(256)E(32)} compare(bitcast-convert, constant), direction=LT, sharding={replicated}
+  constant.1 = u32[]{:T(256)} constant(2147483647), sharding={replicated}
+  bitcast-convert.1 = u32[]{:T(256)} bitcast-convert(p.0.lhs.1247), sharding={replicated}
+  subtract = u32[]{:T(256)} subtract(constant.1, bitcast-convert.1), sharding={replicated}
+  bitcast-convert.2 = s32[]{:T(256)} bitcast-convert(subtract), sharding={replicated}
+  select = s32[]{:T(256)} select(compare, bitcast-convert.2, bitcast-convert), sharding={replicated}
+  p.0.rhs.1248 = f32[]{:T(256)} parameter(1), sharding={replicated}
+  bitcast-convert.3 = s32[]{:T(256)} bitcast-convert(p.0.rhs.1248), sharding={replicated}
+  compare.1 = pred[]{:T(256)E(32)} compare(bitcast-convert.3, constant), direction=LT, sharding={replicated}
+  bitcast-convert.4 = u32[]{:T(256)} bitcast-convert(p.0.rhs.1248), sharding={replicated}
+  subtract.1 = u32[]{:T(256)} subtract(constant.1, bitcast-convert.4), sharding={replicated}
+  bitcast-convert.5 = s32[]{:T(256)} bitcast-convert(subtract.1), sharding={replicated}
+  select.1 = s32[]{:T(256)} select(compare.1, bitcast-convert.5, bitcast-convert.3), sharding={replicated}
+  compare.2 = pred[]{:T(256)E(32)} compare(select, select.1), direction=GT, sharding={replicated}
+  compare.258 = pred[]{:T(256)E(32)} compare(select.1, select), direction=GT, sharding={replicated}
+  compare.259 = pred[]{:T(256)E(32)} compare(compare.2, compare.258), direction=EQ, sharding={replicated}
+  p.1.lhs.1249 = s32[]{:T(256)} parameter(2), sharding={replicated}
+  p.1.rhs.1250 = s32[]{:T(256)} parameter(3), sharding={replicated}
+  compare.260 = pred[]{:T(256)E(32)} compare(p.1.lhs.1249, p.1.rhs.1250), direction=LT, sharding={replicated}
+  ROOT select.86 = pred[]{:T(256)E(32)} select(compare.259, compare.260, compare.2), sharding={replicated}
+}
+
+ENTRY entry {
+  %param0 = f32[128,14,257] parameter(0)
+  %param0.copy = f32[128,14,257] copy(%param0), sharding={devices=[1,2,1]0,1}
+  %param1 = s32[128,14,257] parameter(1)
+  %param1.copy = s32[128,14,257] copy(%param1), sharding={devices=[1,2,1]0,1}
+  ROOT %sort.6 = (f32[128,14,257]{2,1,0:T(8,128)}, s32[128,14,257]{2,1,0:T(8,128)})
+    sort(%param0.copy, %param1.copy), dimensions={2}, is_stable=true,
+    to_apply=%ge, sharding={{devices=[1,2,1]0,1},{devices=[1,2,1]0,1}}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 =
+      AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(),
+                                      op::Reshape(), op::Constant())),
+            op::Shape("f32[128,7,257]"));
+  auto param1 =
+      AllOf(op::Copy(op::DynamicSlice(op::Parameter(1), op::Constant(),
+                                      op::Reshape(), op::Constant())),
+            op::Shape("s32[128,7,257]"));
+  EXPECT_THAT(root, AllOf(op::Sort(param0, param1),
+                          op::Shape("(f32[128,7,257], s32[128,7,257])")));
+}
+
+TEST_F(SpmdPartitioningTest, PartitionCustomCall) {
+  const char* const hlo_string = R"(
+HloModule cluster_2013453984438090939__.47
+
+ENTRY %cluster_2013453984438090939__.47
+  (arg_tuple.1: ()) -> (bf16[2,2000], s32[2,2000]) {
+  %arg_tuple.1 = bf16[2,209664] parameter(0)
+  %copy.arg_tuple.1 = bf16[2,209664] copy(%arg_tuple.1), sharding={devices=[1,2]0,1}
+  %custom-call = (bf16[2,2000]{1,0}, s32[2,2000]{1,0})
+    custom-call(bf16[2,209664]{1,0} %copy.arg_tuple.1), custom_call_target="TopK"
+  %get-tuple-element = bf16[2,2000]{1,0}
+    get-tuple-element((bf16[2,2000]{1,0}, s32[2,2000]{1,0}) %custom-call),
+    index=0, sharding={replicated}
+  %get-tuple-element.1 = s32[2,2000]{1,0} get-tuple-element((bf16[2,2000]{1,0},
+    s32[2,2000]{1,0}) %custom-call), index=1, sharding={replicated}
+  ROOT %tuple.46 = (bf16[2,2000]{1,0}, s32[2,2000]{1,0})
+    tuple(bf16[2,2000]{1,0} %get-tuple-element, s32[2,2000]{1,0}
+    %get-tuple-element.1), sharding={{replicated}, {replicated}},
+    metadata={op_name="XLA_Retvals"}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  auto custom_call = FindInstruction(module.get(), "custom-call.1");
+  EXPECT_EQ(custom_call->operand(0)->shape().dimensions(1), 104832);
+  auto sort = FindInstruction(module.get(), "sort");
+  EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 4000);
+  EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 4000);
+}
+
+TEST_F(SpmdPartitioningTest, ShardableTranspose) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[16,38,38,4] parameter(0)
+  %param0.copy = f32[16,38,38,4] copy(%param0), sharding={devices=[1,2,1,1]0,1}
+  ROOT %transpose = f32[16,4,38,38] transpose(%param0.copy),
+    dimensions={0,3,1,2}, sharding={devices=[1,1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[16,19,38,4]"));
+  EXPECT_THAT(root, AllOf(op::Transpose(param0), op::Shape("f32[16,4,19,38]")));
+}
+
+TEST_F(SpmdPartitioningTest, MultiDimensionShardedTranspose) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[16,38,38,4] parameter(0)
+  %param0.copy = f32[16,38,38,4] copy(%param0),
+    sharding={devices=[4,2,1,1]0,1,2,3,4,5,6,7}
+  ROOT %transpose = f32[38,4,16,38] transpose(%param0.copy),
+    dimensions={1,3,0,2}, sharding={devices=[2,1,4,1]0,2,4,6,1,3,5,7}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/8));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[4,19,38,4]"));
+  EXPECT_THAT(root, AllOf(op::Transpose(param0), op::Shape("f32[19,4,4,38]")));
+}
+
+TEST_F(SpmdPartitioningTest, NonShardableTranspose) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[16,38,38,4] parameter(0)
+  %param0.copy = f32[16,38,38,4] copy(%param0), sharding={devices=[1,2,1,1]0,1}
+  ROOT %transpose = f32[16,4,38,38] transpose(%param0.copy),
+    dimensions={0,3,1,2}, sharding={devices=[1,2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto resahrd = AllOf(op::Reshape(op::Transpose(op::Reshape(op::AllToAll()))),
+                       op::Shape("f32[16,38,38,2]"));
+  EXPECT_THAT(root, AllOf(op::Transpose(), op::Shape("f32[16,2,38,38]")));
+}
+
+TEST_F(SpmdPartitioningTest, ShardableReshape) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[38,38,324] parameter(0)
+  %param0.copy = f32[38,38,324] copy(%param0), sharding={devices=[2,1,1]0,1}
+  ROOT %reshape = f32[38,38,4,81] reshape(%param0.copy),
+    sharding={devices=[2,1,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 =
+      AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(),
+                                      op::Constant(), op::Constant())),
+            op::Shape("f32[19,38,324]"));
+  EXPECT_THAT(root, AllOf(op::Reshape(param0), op::Shape("f32[19,38,4,81]")));
+}
+
+TEST_F(SpmdPartitioningTest, NonShardableReshape) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %param0 = f32[38,38,324] parameter(0)
+  %param0.copy = f32[38,38,324] copy(%param0), sharding={devices=[1,1,2]0,1}
+  ROOT %transpose = f32[38,38,4,81] reshape(%param0.copy),
+    sharding={devices=[1,1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::DynamicSlice(
+                AllOf(op::Pad(
+                          AllOf(op::Reshape(AllOf(op::AllReduce(),
+                                                  op::Shape("f32[38,38,324]"))),
+                                op::Shape("f32[38,38,4,81]")),
+                          op::Constant()),
+                      op::Shape("f32[38,38,4,82]")),
+                op::Constant(), op::Constant(), op::Constant(), op::Reshape()),
+            op::Shape("f32[38,38,4,41]")));
+}
+
+TEST_F(SpmdPartitioningTest, ReshapeMergeDimsWithHaloExchange) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %input = s32[2,3,7,10] parameter(0), sharding={devices=[1,1,2,1]0,1}
+  ROOT %reshape = s32[3,2,1,14,5] reshape(%input),
+    sharding={devices=[1,1,1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto reshape =
+      AllOf(op::Reshape(op::Parameter(0)), op::Shape("s32[3,2,1,8,5]"));
+  auto halo = op::CollectivePermute(op::Slice(reshape));
+  auto exchanged =
+      op::DynamicSlice(op::Concatenate(halo, reshape), _, _, _, _, _);
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(exchanged, op::Shape("s32[3,2,1,7,5]")));
+}
+
+// Produces an invalid module after transformation.
+TEST_F(SpmdPartitioningTest, InceptionV3_4_way_ReduceWindowDilated) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  %param0 = f32[128,5,5,768] parameter(0)
+  %param0.copy = f32[128,5,5,768] copy(%param0),
+    sharding={devices=[1,4,1,1]0,1,2,3}
+  %constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT %rw = f32[128,17,17,768] reduce-window(%param0.copy, %constant.1),
+    window={size=1x5x5x1 pad=0_0x4_4x4_4x0_0 lhs_dilate=1x3x3x1},
+    to_apply=sum, sharding={devices=[1,4,1,1]0,1,2,3}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+
+  auto input_shard = op::Copy(op::DynamicSlice(
+      op::Pad(op::Parameter(0), op::Constant()), op::Constant(), op::Reshape(),
+      op::Constant(), op::Constant()));
+  auto id_mul4_add1 =
+      op::Add(op::Multiply(op::Reshape(), op::Constant()), op::Constant());
+  auto id_mul5 = op::Multiply(op::Reshape(), op::Constant());
+  auto id_mul5_add1_div3 =
+      op::Divide(op::Add(id_mul5, op::Constant()), op::Constant());
+  auto before_masking = AllOf(
+      op::Shape("f32[128,3,5,768]"),
+      op::DynamicSlice(
+          AllOf(
+              op::Shape("f32[128,4,5,768]"),
+              op::Concatenate(op::CollectivePermute(input_shard), input_shard)),
+          op::Constant(),
+          op::Subtract(op::Constant(),
+                       op::Subtract(id_mul4_add1, id_mul5_add1_div3)),
+          op::Constant(), op::Constant()));
+  auto masked = op::Select(
+      op::And(op::Compare(op::Add(op::Iota(), op::Broadcast(id_mul5_add1_div3)),
+                          op::Broadcast(op::Constant())),
+              op::Compare(op::Add(op::Iota(), op::Broadcast(id_mul5_add1_div3)),
+                          op::Broadcast(op::Constant()))),
+      before_masking, op::Broadcast(op::Constant()));
+  auto rw = AllOf(op::Shape("f32[128,7,17,768]"),
+                  op::ReduceWindow(masked, op::Constant()));
+  auto final_slice_index = op::Subtract(
+      id_mul5,
+      op::Add(op::Multiply(id_mul5_add1_div3, op::Constant()), op::Constant()));
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root,
+              AllOf(op::Shape("f32[128,5,17,768]"),
+                    op::DynamicSlice(rw, op::Constant(), final_slice_index,
+                                     op::Constant(), op::Constant())));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToTiledReduce) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  %param0 = f32[4,32,32,128] parameter(0)
+  %param0.copy = f32[4,32,32,128] copy(%param0),
+    sharding={devices=[1,1,1,2]0,1}
+  %constant.1 = f32[] constant(0), sharding={replicated}
+  %reduce = f32[128] reduce(%param0.copy, %constant.1), dimensions={0,1,2},
+    to_apply=%sum, sharding={devices=[2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Constant(),
+                                op::Constant(), op::Reshape())),
+      op::Shape("f32[4,32,32,64]"));
+
+  EXPECT_THAT(root,
+              AllOf(op::Reduce(param0, op::Constant()), op::Shape("f32[64]")));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToTiledTupleReduce) {
+  const char* const hlo_string = R"(
+HloModule module
+
+%minmax_func {
+  %lhs_value = f32[] parameter(0)
+  %rhs_value = f32[] parameter(2)
+  %compare.2 = pred[] compare(%lhs_value, %rhs_value), direction=GT
+  %select.4 = f32[] select(%compare.2, %lhs_value, %rhs_value)
+  %lhs_index = s32[] parameter(1)
+  %rhs_index = s32[] parameter(3)
+  %select.5 = s32[] select(%compare.2, %lhs_index, %rhs_index)
+  ROOT %tuple.2 = (f32[], s32[]) tuple(%select.4, %select.5)
+}
+
+ENTRY %main {
+  %param0 = f32[28,10] parameter(0), sharding={devices=[2,1]0,1}
+  %param1 = s32[28,10] parameter(1), sharding={devices=[2,1]0,1}
+  %init0 = f32[] parameter(2)
+  %init1 = s32[] parameter(3)
+  ROOT %reduce = (f32[28], s32[28]) reduce(%param0, %param1, %init0, %init1),
+    dimensions={1}, to_apply=%minmax_func,
+    sharding={{devices=[2]0,1}, {devices=[2]0,1}}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Reduce(op::Parameter(0), op::Parameter(1),
+                                     op::Parameter(2), op::Parameter(3)),
+                          op::Shape("(f32[14], s32[14])")));
+}
+
+TEST_F(SpmdPartitioningTest, TiledToTiledReduceOutputReshard) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  %param0 = f32[4,32,32,128] parameter(0)
+  %param0.copy = f32[4,32,32,128] copy(%param0),
+    sharding={devices=[1,2,1,1]0,1}
+  %constant.1 = f32[] constant(0), sharding={replicated}
+  %reduce = f32[128] reduce(%param0.copy, %constant.1), dimensions={0,1,2},
+    to_apply=%sum, sharding={devices=[2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto param0 = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[4,16,32,128]"));
+
+  EXPECT_THAT(root,
+              AllOf(op::DynamicSlice(
+                        AllOf(op::AllReduce(op::Reduce(param0, op::Constant())),
+                              op::Shape("f32[128]")),
+                        op::Reshape()),
+                    op::Shape("f32[64]")));
+}
+
+TEST_F(SpmdPartitioningTest, IotaAlongNonTileDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  ROOT %iota = s32[16,80,91] iota(), iota_dimension=1,
+    sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Iota(), op::Shape("s32[16,80,46]")));
+}
+
+TEST_F(SpmdPartitioningTest, IotaAlongTileDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  ROOT %iota = s32[16,80,91] iota(), iota_dimension=2,
+    sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Add(op::Iota(), op::Broadcast()),
+                          op::Shape("s32[16,80,46]")));
+}
+
+TEST_F(SpmdPartitioningTest, U32IotaAlongTileDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  ROOT %iota = u32[16,80,91] iota(), iota_dimension=2,
+    sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Add(op::Iota(), op::Broadcast()),
+                          op::Shape("u32[16,80,46]")));
+}
+
+TEST_F(SpmdPartitioningTest, Conditional) {
+  const char* const hlo_string = R"(
+HloModule module
+
+Negate {
+  x = f32[4,5] parameter(0), sharding={replicated}
+  ROOT negate = f32[4,5] negate(x), sharding={replicated}
+}
+
+Identity {
+  y = f32[4,5] parameter(0), sharding={devices=[2,1]0,1}
+  ROOT copy = f32[4,5] copy(y), sharding={devices=[2,1]0,1}
+}
+
+ENTRY entry {
+  %param.0 = pred[] parameter(0)
+  %param.0.copy = pred[] copy(%param.0), sharding={maximal device=0}
+  %param.1 = f32[4,5] parameter(1)
+  %param.1.copy = f32[4,5] copy(%param.1), sharding={replicated}
+  %param.2 = f32[4,5] parameter(2)
+  %param.2.copy = f32[4,5] copy(%param.2), sharding={devices=[2,1]0,1}
+  ROOT cond = f32[4,5] conditional(%param.0.copy, %param.1.copy, %param.2.copy),
+    true_computation=Negate, false_computation=Identity,
+    sharding={devices=[2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto param0 = AllOf(op::Copy(op::Copy(op::Parameter()), op::Shape("pred[]")));
+  auto param1 = AllOf(op::Copy(op::Parameter()), op::Shape("f32[4,5]"));
+  auto param2 = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(),
+                                                op::Constant())),
+                      op::Shape("f32[2,5]"));
+
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Conditional(op::AllReduce(), param1, param2),
+                          op::Shape("f32[2,5]")));
+
+  auto then_branch_root = root->branch_computation(0)->root_instruction();
+  EXPECT_THAT(then_branch_root,
+              AllOf(op::DynamicSlice(op::Negate(op::Parameter()), op::Reshape(),
+                                     op::Constant()),
+                    op::Shape("f32[2,5]")));
+
+  auto else_branch_root = root->branch_computation(1)->root_instruction();
+  EXPECT_THAT(else_branch_root,
+              AllOf(op::Copy(op::Parameter()), op::Shape("f32[2,5]")));
+}
+
+TEST_F(SpmdPartitioningTest, SelectAndScatter_RetinaNet) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ge {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT compare = pred[] compare(a, b), direction=GE
+}
+
+sum {
+  c = f32[] parameter(0)
+  d = f32[] parameter(1)
+  ROOT add = f32[] add(c, d)
+}
+
+ENTRY entry {
+  %param.0 = f32[32,128,384,64] parameter(0)
+  %param.0.copy = f32[32,128,384,64] copy(%param.0),
+    sharding={devices=[1,8,1,1]0,1,2,3,4,5,6,7}
+  %param.1 = f32[32,64,192,64] parameter(1)
+  %param.1.copy = f32[32,64,192,64] copy(%param.1),
+    sharding={devices=[1,8,1,1]0,1,2,3,4,5,6,7}
+  constant.1 = f32[] constant(0), sharding={replicated}
+  ROOT select-and-scatter = f32[32,128,384,64] select-and-scatter(param.0.copy,
+    %param.1.copy, constant.1), window={size=1x1x1x1 stride=1x2x2x1},
+    select=ge, scatter=sum, sharding={devices=[1,8,1,1]0,1,2,3,4,5,6,7}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/8));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto source = AllOf(
+      op::Shape("f32[32,8,192,64]"),
+      op::Copy(op::DynamicSlice(op::Parameter(1), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())));
+  auto data = AllOf(
+      op::Shape("f32[32,16,384,64]"),
+      op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())));
+
+  EXPECT_THAT(root, op::SelectAndScatter(data, source, op::Constant()));
+  EXPECT_EQ(root->window().dimensions(0).padding_low(), 0);
+  EXPECT_EQ(root->window().dimensions(0).padding_high(), 0);
+}
+
+TEST_F(SpmdPartitioningTest, TiledDot) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,64] parameter(0)
+  %lhs.copy = f32[128,64] copy(%lhs), sharding={devices=[1,2]0,1}
+  %rhs = f32[64,256] parameter(1)
+  %rhs.copy = f32[64,256] copy(%rhs), sharding={devices=[2,1]0,1}
+  ROOT %conv = f32[128,256] convolution(%lhs.copy, %rhs.copy),
+    dim_labels=bf_io->bf, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module,
+      PartitionComputation(hlo_string, /*num_devices=*/2,
+                           /*conv_halo_exchange_always_on_lhs=*/false));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(),
+                                             op::Reshape())),
+                   op::Shape("f32[128,32]"));
+  auto rhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(),
+                                             op::Constant())),
+                   op::Shape("f32[32,256]"));
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Convolution(lhs, rhs)),
+                          op::Shape("f32[128,256]")));
+}
+
+TEST_F(SpmdPartitioningTest, TiledDotOutputTiled) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,64] parameter(0)
+  %lhs.copy = f32[128,64] copy(%lhs), sharding={devices=[1,2]0,1}
+  %rhs = f32[64,256] parameter(1)
+  %rhs.copy = f32[64,256] copy(%rhs), sharding={devices=[2,1]0,1}
+  ROOT %conv = f32[128,256] convolution(%lhs.copy, %rhs.copy),
+    dim_labels=bf_io->bf, sharding={devices=[1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(),
+                                             op::Reshape())),
+                   op::Shape("f32[128,32]"));
+  auto rhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(),
+                                             op::Constant())),
+                   op::Shape("f32[32,256]"));
+  EXPECT_THAT(root, AllOf(op::DynamicSlice(
+                              AllOf(op::AllReduce(op::Convolution(lhs, rhs)),
+                                    op::Shape("f32[128,256]")),
+                              op::Constant(), op::Reshape()),
+                          op::Shape("f32[128,128]")));
+}
+
+TEST_F(SpmdPartitioningTest, BatchPartitionedConvolution) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[128,256,256] parameter(0)
+  %lhs.copy = f32[128,256,256] copy(%lhs), sharding={devices=[1,2,1]0,1}
+  %rhs = f32[256,8,1] parameter(1)
+  %rhs.copy = f32[256,8,1] copy(%rhs), sharding={replicated}
+  ROOT %conv = f32[128,256,8] convolution(%lhs.copy, %rhs.copy),
+    window={size=1}, dim_labels=0bf_io0->0bf, sharding={devices=[1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(),
+                                             op::Reshape(), op::Constant())),
+                   op::Shape("f32[128,128,256]"));
+  auto rhs = AllOf(op::Copy(op::Parameter(1)), op::Shape("f32[256,8,1]"));
+  EXPECT_THAT(root,
+              AllOf(op::Convolution(lhs, rhs), op::Shape("f32[128,128,8]")));
+}
+
+TEST_F(SpmdPartitioningTest, DotOutputFeaturePartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[24,64] parameter(0)
+  %lhs.copy = f32[24,64] copy(%lhs), sharding={replicated}
+  %rhs = f32[39296,64] parameter(1)
+  %rhs.copy = f32[39296,64] copy(%rhs), sharding={devices=[2,1]0,1}
+  ROOT %dot = f32[24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={}, rhs_batch_dims={},
+    lhs_contracting_dims={1}, rhs_contracting_dims={1},
+    sharding={devices=[1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::Parameter(0)), op::Shape("f32[24,64]"));
+  auto rhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(1), op::Reshape(),
+                                             op::Constant())),
+                   op::Shape("f32[19648,64]"));
+  EXPECT_THAT(root, AllOf(op::Dot(lhs, rhs), op::Shape("f32[24,19648]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumBatchPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64] parameter(0)
+  %lhs.copy = f32[32,24,64] copy(%lhs), sharding={devices=[2,1,1]0,1}
+  %rhs = f32[32,39296,64] parameter(1)
+  %rhs.copy = f32[32,39296,64] copy(%rhs), sharding={devices=[2,1,1]0,1}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2}, rhs_contracting_dims={2},
+    sharding={devices=[2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Reshape(),
+                                             op::Constant(), op::Constant())),
+                   op::Shape("f32[16,24,64]"));
+  auto rhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(1), op::Reshape(),
+                                             op::Constant(), op::Constant())),
+                   op::Shape("f32[16,39296,64]"));
+  EXPECT_THAT(root, AllOf(op::Dot(lhs, rhs), op::Shape("f32[16,24,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumLHSandOutputBatchPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64] parameter(0)
+  %lhs.copy = f32[32,24,64] copy(%lhs), sharding={devices=[2,1,1]0,1}
+  %rhs = f32[32,39296,64] parameter(1)
+  %rhs.copy = f32[32,39296,64] copy(%rhs), sharding={replicated}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2}, rhs_contracting_dims={2},
+    sharding={devices=[2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Reshape(),
+                                             op::Constant(), op::Constant())),
+                   op::Shape("f32[16,24,64]"));
+  auto rhs = AllOf(op::Copy(op::Parameter(1)), op::Shape("f32[32,39296,64]"));
+  EXPECT_THAT(root, AllOf(op::Dot(lhs, op::DynamicSlice(rhs, op::Reshape(),
+                                                        op::Constant(),
+                                                        op::Constant())),
+                          op::Shape("f32[16,24,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSandOutputBatchPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64] parameter(0)
+  %lhs.copy = f32[32,24,64] copy(%lhs), sharding={devices=[1,2,1]0,1}
+  %rhs = f32[32,39296,64] parameter(1)
+  %rhs.copy = f32[32,39296,64] copy(%rhs), sharding={devices=[2,1,1]0,1}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2}, rhs_contracting_dims={2},
+    sharding={devices=[2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(),
+                                             op::Reshape(), op::Constant())),
+                   op::Shape("f32[32,12,64]"));
+  auto rhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(1), op::Reshape(),
+                                             op::Constant(), op::Constant())),
+                   op::Shape("f32[16,39296,64]"));
+  auto lhs_reshard = op::Reshape(op::Transpose(op::AllToAll(op::Reshape(lhs))));
+  EXPECT_THAT(root,
+              AllOf(op::Dot(lhs_reshard, rhs), op::Shape("f32[16,24,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumOutputBatchPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64] parameter(0)
+  %lhs.copy = f32[32,24,64] copy(%lhs), sharding={replicated}
+  %rhs = f32[32,39296,64] parameter(1)
+  %rhs.copy = f32[32,39296,64] copy(%rhs), sharding={replicated}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2}, rhs_contracting_dims={2},
+    sharding={devices=[2,1,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs_slice =
+      AllOf(op::DynamicSlice(op::Copy(op::Parameter(0)), op::Reshape(),
+                             op::Constant(), op::Constant()),
+            op::Shape("f32[16,24,64]"));
+  auto rhs_slice =
+      AllOf(op::DynamicSlice(op::Copy(op::Parameter(1)), op::Reshape(),
+                             op::Constant(), op::Constant()),
+            op::Shape("f32[16,39296,64]"));
+  EXPECT_THAT(root, AllOf(op::Dot(lhs_slice, rhs_slice),
+                          op::Shape("f32[16,24,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumContractingDimsPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={devices=[1,1,2,2]0,1,2,3}
+  %rhs = f32[32,39296,64,128] parameter(1)
+  %rhs.copy = f32[32,39296,64,128] copy(%rhs), sharding={devices=[1,1,2,2]0,1,2,3}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(),
+                                op::Constant(), op::Reshape(), op::Reshape())),
+      op::Shape("f32[32,24,32,64]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(1), op::Constant(),
+                                op::Constant(), op::Reshape(), op::Reshape())),
+      op::Shape("f32[32,39296,32,64]"));
+  EXPECT_THAT(root, AllOf(op::AllReduce(op::Dot(lhs, rhs)),
+                          op::Shape("f32[32,24,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumLHSNonContractingDimsPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={devices=[1,2,1,2]0,1,2,3}
+  %rhs = f32[32,39296,64] parameter(1)
+  %rhs.copy = f32[32,39296,64] copy(%rhs), sharding={replicated}
+  ROOT %dot = f32[32,24,128,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2}, rhs_contracting_dims={2},
+    sharding={devices=[1,2,2,1]0,1,2,3}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Reshape())),
+      op::Shape("f32[32,12,64,64]"));
+  auto rhs = AllOf(op::Copy(op::Parameter(1)), op::Shape("f32[32,39296,64]"));
+  EXPECT_THAT(root, AllOf(op::Dot(lhs, rhs), op::Shape("f32[32,12,64,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSNonContractingDimsPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64] parameter(0)
+  %lhs.copy = f32[32,24,64] copy(%lhs), sharding={replicated}
+  %rhs = f32[32,39296,64,128] parameter(1)
+  %rhs.copy = f32[32,39296,64,128] copy(%rhs), sharding={devices=[1,2,1,2]0,1,2,3}
+  ROOT %dot = f32[32,24,39296,128] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2}, rhs_contracting_dims={2},
+    sharding={devices=[1,1,2,2]0,1,2,3}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/4));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::Parameter(0)), op::Shape("f32[32,24,64]"));
+  auto rhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(1), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Reshape())),
+      op::Shape("f32[32,19648,64,64]"));
+  EXPECT_THAT(root, AllOf(op::Dot(lhs, rhs), op::Shape("f32[32,24,19648,64]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumOutputLHSNonContractingDimPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={replicated}
+  %rhs = f32[32,39296,64,128] parameter(1)
+  %rhs.copy = f32[32,39296,64,128] copy(%rhs), sharding={replicated}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::Parameter(0)), op::Shape("f32[32,24,64,128]"));
+  auto rhs =
+      AllOf(op::Copy(op::Parameter(1)), op::Shape("f32[32,39296,64,128]"));
+  EXPECT_THAT(
+      root,
+      AllOf(op::Dot(AllOf(op::DynamicSlice(lhs, op::Constant(), op::Reshape(),
+                                           op::Constant(), op::Constant()),
+                          op::Shape("f32[32,12,64,128]")),
+                    rhs),
+            op::Shape("f32[32,12,39296]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumOutputRHSNonContractingDimPartitioned) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={replicated}
+  %rhs = f32[32,39296,64,128] parameter(1)
+  %rhs.copy = f32[32,39296,64,128] copy(%rhs), sharding={replicated}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::Parameter(0)), op::Shape("f32[32,24,64,128]"));
+  auto rhs =
+      AllOf(op::Copy(op::Parameter(1)), op::Shape("f32[32,39296,64,128]"));
+  EXPECT_THAT(root,
+              AllOf(op::Dot(lhs, AllOf(op::DynamicSlice(
+                                           rhs, op::Constant(), op::Reshape(),
+                                           op::Constant(), op::Constant()),
+                                       op::Shape("f32[32,19648,64,128]"))),
+                    op::Shape("f32[32,24,19648]")));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSWindowedNonContracting) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[32,39295,64,128] parameter(1)
+  %rhs.copy = f32[32,39295,64,128] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  ROOT %dot = f32[32,24,39295] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string,
+                                                            /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[32,12,64,128]"));
+  auto rhs =
+      AllOf(op::Copy(op::DynamicSlice(op::Pad(op::Parameter(1), op::Constant()),
+                                      op::Constant(), op::Reshape(),
+                                      op::Constant(), op::Constant())),
+            op::Shape("f32[32,19648,64,128]"));
+  EXPECT_THAT(
+      root,
+      AllOf(op::Slice(AllOf(op::GetTupleElement(op::While(op::Tuple(
+                                lhs, rhs, op::Broadcast(), op::Constant()))),
+                            op::Shape("f32[32,12,39296]"))),
+            op::Shape("f32[32,12,39295]")));
+  auto while_loop = root->operand(0)->operand(0);
+  // Check loop condition.
+  EXPECT_THAT(
+      while_loop->while_condition()->root_instruction(),
+      op::Compare(op::GetTupleElement(op::Parameter(0)), op::Constant()));
+
+  // Check loop body.
+  auto next_i = op::Add(op::GetTupleElement(op::Parameter(0)), op::Constant());
+  auto window = op::Conditional(op::Compare(next_i, op::Constant()),
+                                op::GetTupleElement(op::Parameter(0)),
+                                op::GetTupleElement(op::Parameter(0)));
+  auto partial_output = op::Dot(op::GetTupleElement(op::Parameter(0)),
+                                op::GetTupleElement(op::Parameter(0)));
+  EXPECT_THAT(
+      while_loop->while_body()->root_instruction(),
+      op::Tuple(op::GetTupleElement(op::Parameter(0)), window,
+                op::DynamicUpdateSlice(op::GetTupleElement(op::Parameter(0)),
+                                       partial_output, op::Constant(),
+                                       op::Constant(), op::Reshape()),
+                next_i));
+
+  // Check the conditional that contains the collective permute.
+  auto cp_conditional =
+      while_loop->while_body()->root_instruction()->operand(1);
+  EXPECT_THAT(cp_conditional->true_computation()->root_instruction(),
+              op::CollectivePermute(op::Parameter(0)));
+  EXPECT_THAT(cp_conditional->false_computation()->root_instruction(),
+              op::Parameter(0));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSWindowedContracting) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = f32[32,24,63,128] parameter(0)
+  %lhs.copy = f32[32,24,63,128] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[32,39296,63,128] parameter(1)
+  %rhs.copy = f32[32,39296,63,128] copy(%rhs), sharding={devices=[1,1,2,1]0,1}
+  ROOT %dot = f32[32,24,39296] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string,
+                                                            /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(
+      op::Copy(op::DynamicSlice(op::Parameter(0), op::Constant(), op::Reshape(),
+                                op::Constant(), op::Constant())),
+      op::Shape("f32[32,12,63,128]"));
+  auto rhs =
+      AllOf(op::Copy(op::DynamicSlice(op::Pad(op::Parameter(1), op::Constant()),
+                                      op::Constant(), op::Constant(),
+                                      op::Reshape(), op::Constant())),
+            op::Shape("f32[32,39296,32,128]"));
+  auto masked_rhs =
+      op::Select(op::Compare(), rhs, op::Broadcast(op::Constant()));
+  EXPECT_THAT(root,
+              AllOf(op::GetTupleElement(op::While(op::Tuple(
+                        lhs, masked_rhs, op::Broadcast(), op::Constant()))),
+                    op::Shape("f32[32,12,39296]")));
+  auto while_loop = root->operand(0);
+  // Check loop condition.
+  EXPECT_THAT(
+      while_loop->while_condition()->root_instruction(),
+      op::Compare(op::GetTupleElement(op::Parameter(0)), op::Constant()));
+
+  // Check loop body.
+  auto next_i = op::Add(op::GetTupleElement(op::Parameter(0)), op::Constant());
+  auto window = op::Conditional(op::Compare(next_i, op::Constant()),
+                                op::GetTupleElement(op::Parameter(0)),
+                                op::GetTupleElement(op::Parameter(0)));
+  auto partial_output = op::Dot(
+      op::DynamicSlice(
+          op::Pad(op::GetTupleElement(op::Parameter(0)), op::Constant()),
+          op::Constant(), op::Constant(), op::Reshape(), op::Constant()),
+      op::GetTupleElement(op::Parameter(0)));
+  EXPECT_THAT(
+      while_loop->while_body()->root_instruction(),
+      op::Tuple(op::GetTupleElement(op::Parameter(0)), window,
+                op::Add(op::GetTupleElement(op::Parameter(0)), partial_output),
+                next_i));
+
+  // Check the conditional that contains the collective permute.
+  auto cp_conditional =
+      while_loop->while_body()->root_instruction()->operand(1);
+  EXPECT_THAT(cp_conditional->true_computation()->root_instruction(),
+              op::CollectivePermute(op::Parameter(0)));
+  EXPECT_THAT(cp_conditional->false_computation()->root_instruction(),
+              op::Parameter(0));
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSWindowedNonContractingReduce1) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[32,39295,64,128] parameter(1)
+  %rhs.copy = f32[32,39295,64,128] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  %dot = f32[32,24,39295] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,2,1]0,1}
+  %constant = f32[] constant(0)
+  %constant.1 = f32[] constant(2)
+  %broadcast = f32[32,24,39295] broadcast(%constant.1), dimensions={},
+    sharding={devices=[1,2,1]0,1}
+  %multiply = f32[32,24,39295] multiply(%dot, %broadcast),
+  sharding={devices=[1,2,1]0,1}
+  ROOT %reduce = f32[32,24] reduce(%multiply, %constant), dimensions={2},
+    to_apply=sum, sharding={devices=[1,2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string,
+                                                            /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  // Involves loop code motion, skips pattern matching.
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSWindowedNonContractingReduce2) {
+  const char* const hlo_string = R"(
+HloModule module
+
+sum {
+  a = f32[] parameter(0)
+  b = f32[] parameter(1)
+  ROOT add = f32[] add(a, b)
+}
+
+ENTRY entry {
+  %lhs = f32[32,24,64,128] parameter(0)
+  %lhs.copy = f32[32,24,64,128] copy(%lhs), sharding={devices=[1,2,1,1]0,1}
+  %rhs = f32[32,39295,64,128] parameter(1)
+  %rhs.copy = f32[32,39295,64,128] copy(%rhs), sharding={devices=[1,2,1,1]0,1}
+  %dot = f32[32,24,39295] dot(%lhs.copy, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,2,1]0,1}
+  %constant = f32[] constant(0)
+  %constant.1 = f32[] constant(2)
+  %broadcast = f32[32,24,39295] broadcast(%constant.1), dimensions={},
+    sharding={devices=[1,2,1]0,1}
+  %multiply = f32[32,24,39295] multiply(%dot, %broadcast),
+    sharding={devices=[1,2,1]0,1}
+  ROOT %reduce = f32[32,39295] reduce(%multiply, %constant), dimensions={1},
+    to_apply=sum, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string,
+                                                            /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  // Involves loop code motion, skips pattern matching.
+}
+
+TEST_F(SpmdPartitioningTest, EinsumRHSWindowedContractingFromBroadcast) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %rhs = f32[32,39296,63,128] parameter(0)
+  %rhs.copy = f32[32,39296,63,128] copy(%rhs), sharding={devices=[1,1,2,1]0,1}
+  %constant.1 = f32[] constant(2)
+  %broadcast = f32[32,24,63,128] broadcast(%constant.1), dimensions={},
+    sharding={devices=[1,2,1,1]0,1}
+  %add = f32[32,24,63,128] add(%broadcast, %broadcast),
+    sharding={devices=[1,2,1,1]0,1}
+  ROOT %dot = f32[32,24,39296] dot(%add, %rhs.copy),
+    lhs_batch_dims={0}, rhs_batch_dims={0},
+    lhs_contracting_dims={2,3}, rhs_contracting_dims={2,3},
+    sharding={devices=[1,2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string,
+                                                            /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  // Involves loop code motion, skips pattern matching.
+}
+
+TEST_F(SpmdPartitioningTest, ReplicatedRng) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = s32[] parameter(0)
+  %lhs.copy = s32[] copy(%lhs), sharding={replicated}
+  %rhs = s32[] parameter(1)
+  %rhs.copy = s32[] copy(%rhs), sharding={replicated}
+  ROOT %rng = s32[4]{0} rng(%lhs.copy, %rhs.copy),
+      distribution=rng_uniform, sharding={replicated}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::Parameter(0)), op::Shape("s32[]"));
+  auto rhs = AllOf(op::Copy(op::Parameter(1)), op::Shape("s32[]"));
+  EXPECT_THAT(
+      root,
+      AllOf(op::AllReduce(op::Select(
+                op::Broadcast(op::Compare(op::PartitionId(), op::Constant())),
+                op::Rng(), op::Broadcast(op::Constant()))),
+            op::Shape("s32[4]")));
+}
+
+TEST_F(SpmdPartitioningTest, PartitionedRng) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %lhs = s32[] parameter(0)
+  %lhs.copy = s32[] copy(%lhs), sharding={replicated}
+  %rhs = s32[] parameter(1)
+  %rhs.copy = s32[] copy(%rhs), sharding={maximal device=1}
+  ROOT %rng = s32[4]{0} rng(%lhs.copy, %rhs.copy),
+      distribution=rng_uniform, sharding={devices=[2]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto lhs = AllOf(op::Copy(op::Parameter(0)), op::Shape("s32[]"));
+  auto rhs = AllOf(op::Copy(op::Copy(op::Parameter(1))), op::Shape("s32[]"));
+  EXPECT_THAT(root, AllOf(op::Rng(lhs, op::AllReduce(op::Select(
+                                           op::Broadcast(op::Compare()), rhs,
+                                           op::Broadcast(op::Constant())))),
+                          op::Shape("s32[2]")));
+}
+
+TEST_F(SpmdPartitioningTest, DynamicSliceAlongNonPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %input = s32[128,64] parameter(0)
+  %input.copy = s32[128,64] copy(%input), sharding={devices=[2,1]0,1}
+  %index = s32[] parameter(1)
+  %constant = s32[] constant(0)
+  ROOT %dynamic-slice = s32[128,2] dynamic-slice(%input.copy, %constant, %index),
+    dynamic_slice_sizes={128,2}, sharding={devices=[2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto input = AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Reshape(),
+                                               op::Constant())),
+                     op::Shape("s32[64,64]"));
+  EXPECT_THAT(root,
+              AllOf(op::DynamicSlice(input, op::Constant(), op::Parameter(1)),
+                    op::Shape("s32[64,2]")));
+}
+
+TEST_F(SpmdPartitioningTest, DynamicUpdateSliceAlongNonPartitionedDimension) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %input = s32[128,64] parameter(0)
+  %input.copy = s32[128,64] copy(%input), sharding={devices=[2,1]0,1}
+  %index = s32[] parameter(1)
+  %constant = s32[] constant(0)
+  %update = s32[128,2] parameter(2)
+  %update.copy = s32[128,2] copy(%update), sharding={devices=[2,1]0,1}
+  ROOT %dynamic-update-slice = s32[128,64]
+    dynamic-update-slice(%input.copy, %update.copy, %constant, %index),
+    sharding={devices=[2,1]0,1}
+})";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+
+  auto root = module->entry_computation()->root_instruction();
+  auto input = AllOf(op::Copy(op::DynamicSlice(op::Parameter(0), op::Reshape(),
+                                               op::Constant())),
+                     op::Shape("s32[64,64]"));
+  auto update = AllOf(op::Copy(op::DynamicSlice(op::Parameter(2), op::Reshape(),
+                                                op::Constant())),
+                      op::Shape("s32[64,2]"));
+  EXPECT_THAT(root, AllOf(op::DynamicUpdateSlice(input, update, op::Constant(),
+                                                 op::Parameter(1)),
+                          op::Shape("s32[64,64]")));
+}
+
+TEST_F(SpmdPartitioningTest, PassthroughGather) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %input = f32[2,9] parameter(0), sharding={devices=[1,2]0,1}
+  %indices = s32[3] parameter(1), sharding={replicated}
+  ROOT %gather = f32[3,9] gather(%input, %indices), offset_dims={1},
+    collapsed_slice_dims={0}, start_index_map={0}, index_vector_dim=1,
+    slice_sizes={1,9}, sharding={devices=[1,2]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Gather(op::Parameter(0), op::Parameter(1)),
+                          op::Shape("f32[3,5]")));
+}
+
+TEST_F(SpmdPartitioningTest, GatherPartitionedOnTrivialSliceDims) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  %input = f32[17,9] parameter(0), sharding={devices=[2,1]0,1}
+  %indices = s32[2,3] parameter(1), sharding={replicated}
+  ROOT %gather = f32[2,3,9] gather(%input, %indices), offset_dims={2},
+    collapsed_slice_dims={0}, start_index_map={0}, index_vector_dim=2,
+    slice_sizes={1,9}, sharding={replicated}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  auto offset = op::Reshape(
+      op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant()));
+  auto min = AllOf(op::Broadcast(offset), op::Shape("s32[2,3]"));
+  auto max = AllOf(op::Broadcast(op::Add(offset, op::Constant())),
+                   op::Shape("s32[2,3]"));
+  auto clamp = op::Clamp(min, op::Parameter(1), max);
+  auto gather = op::Gather(op::Parameter(0), op::Subtract(clamp, min));
+  auto mask =
+      op::Or(op::Lt(op::Parameter(1), min), op::Gt(op::Parameter(1), max));
+  auto masked =
+      op::Select(op::Broadcast(mask), op::Broadcast(op::Constant()), gather);
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::AllReduce(masked), op::Shape("f32[2,3,9]")));
+}
+
+TEST_F(SpmdPartitioningTest, PassthroughScatter) {
+  const char* const hlo_string = R"(
+HloModule module
+
+add (lhs: f32[], rhs: f32[]) -> f32[] {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT sum = f32[] add(lhs, rhs)
+}
+
+ENTRY entry {
+  %input = f32[2,9] parameter(0), sharding={devices=[1,2]0,1}
+  %indices = s32[3] parameter(1), sharding={replicated}
+  %updates = f32[3,9] parameter(2), sharding={devices=[1,2]0,1}
+  ROOT %scatter = f32[2,9] scatter(%input, %indices, %updates),
+      to_apply=add,
+      update_window_dims={1},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0},
+      index_vector_dim=1, sharding={devices=[1,2]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Scatter(op::Parameter(0), op::Parameter(1),
+                                      op::Parameter(2)),
+                          op::Shape("f32[2,5]")));
+}
+
+TEST_F(SpmdPartitioningTest, ScatterPartitionedOnTrivialSliceDims) {
+  const char* const hlo_string = R"(
+HloModule module
+
+add (lhs: f32[], rhs: f32[]) -> f32[] {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT sum = f32[] add(lhs, rhs)
+}
+
+ENTRY entry {
+  %input = f32[17,9] parameter(0), sharding={devices=[2,1]0,1}
+  %indices = s32[2,3] parameter(1), sharding={replicated}
+  %updates = f32[2,3,9] parameter(2), sharding={replicated}
+  ROOT %scatter = f32[17,9] scatter(%input, %indices, %updates),
+      to_apply=add,
+      update_window_dims={2},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0},
+      index_vector_dim=2, sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  auto offset = op::Reshape(
+      op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant()));
+  auto indices = op::Subtract(
+      op::Parameter(1), AllOf(op::Broadcast(offset), op::Shape("s32[2,3]")));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root,
+              AllOf(op::Scatter(op::Parameter(0), indices, op::Parameter(2)),
+                    op::Shape("f32[9,9]")));
+}
+
+TEST_F(SpmdPartitioningTest, TiledReverse) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  constant = f32[3,3]{1,0} constant({{1,1,1},{1,1,1},{1,1,1}}),
+    sharding={devices=[2,1]0,1}
+  ROOT reverse = f32[3,3]{1,0} reverse(constant), dimensions={1},
+    sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, AllOf(op::Shape("f32[2,3]{1,0}"),
+                          op::Reverse(op::DynamicSlice(
+                              op::Pad(op::Constant(), op::Constant()),
+                              op::Reshape(), op::Constant()))));
+}
+
+TEST_F(SpmdPartitioningTest, MixWithManualPartitioning) {
+  const char* const hlo_string = R"(
+HloModule module
+
+ENTRY entry {
+  param = f32[8,2] parameter(0), sharding={devices=[2,1]0,1}
+  to_shard = f32[4,2] custom-call(param), custom_call_target="SPMDFullToShardShape", sharding={replicated}
+  add = f32[4,2] add(to_shard, to_shard), sharding={replicated}
+  to_full = f32[8,2] custom-call(add), custom_call_target="SPMDShardToFullShape", sharding={devices=[2,1]0,1}
+  ROOT mul = f32[8,2] multiply(to_full, param), sharding={devices=[2,1]0,1}
+})";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          PartitionComputation(hlo_string, /*num_devices=*/2));
+  VLOG(1) << module->ToString();
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  auto to_shard = op::Copy(op::Parameter(0));
+  EXPECT_THAT(root, AllOf(op::Shape("f32[4,2]"),
+                          op::Multiply(op::Copy(op::Add(to_shard, to_shard)),
+                                       op::Parameter(0))));
+}
+
+}  // namespace
+}  // namespace spmd
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc
new file mode 100644
index 00000000000..207f854cd9f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc
@@ -0,0 +1,662 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h"
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding.h"
+#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+namespace spmd {
+
+bool HasReplicatedSharding(const HloSharding& sharding) {
+  if (sharding.IsTuple()) {
+    return absl::c_any_of(sharding.tuple_elements(), HasReplicatedSharding);
+  }
+  return sharding.IsReplicated();
+}
+
+HloInstruction* CreateZero(const Shape& shape, SpmdBuilder* b) {
+  if (shape.IsTuple()) {
+    std::vector<HloInstruction*> elements;
+    for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+      elements.push_back(
+          CreateZero(ShapeUtil::GetTupleElementShape(shape, i), b));
+    }
+    return b->AddInstruction(HloInstruction::CreateTuple(elements));
+  }
+
+  if (shape.IsToken()) {
+    return b->AddInstruction(HloInstruction::CreateToken());
+  }
+  auto zero = b->AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::Zero(shape.element_type())));
+  return b->AddInstruction(HloInstruction::CreateBroadcast(shape, zero, {}));
+}
+
+HloComputation* MakeBinaryAdd(PrimitiveType type, HloModule* module) {
+  HloComputation::Builder sum_b("add");
+  auto x = sum_b.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/0, ShapeUtil::MakeShape(type, {}), "x"));
+  auto y = sum_b.AddInstruction(HloInstruction::CreateParameter(
+      /*parameter_number=*/1, ShapeUtil::MakeShape(type, {}), "y"));
+  if (type == PRED) {
+    sum_b.AddInstruction(HloInstruction::CreateBinary(
+        ShapeUtil::MakeShape(type, {}), HloOpcode::kOr, x, y));
+  } else {
+    sum_b.AddInstruction(HloInstruction::CreateBinary(
+        ShapeUtil::MakeShape(type, {}), HloOpcode::kAdd, x, y));
+  }
+  HloComputation* reduction = module->AddEmbeddedComputation(sum_b.Build());
+  return reduction;
+}
+
+bool EvenlyPartitions(const Shape& shape, const HloSharding& sharding) {
+  if (sharding.IsTuple()) {
+    for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+      if (!EvenlyPartitions(ShapeUtil::GetTupleElementShape(shape, i),
+                            sharding.GetSubSharding(shape, {i}))) {
+        return false;
+      }
+    }
+  }
+
+  if (sharding.IsTileMaximal()) {
+    return sharding.IsReplicated();
+  }
+  for (int64 i = 0; i < shape.dimensions_size(); ++i) {
+    if (shape.dimensions(i) % sharding.tile_assignment().dim(i) != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+Shape MakePartitionedShape(const Shape& shape, const HloSharding& sharding) {
+  if (sharding.IsTuple()) {
+    std::vector<Shape> subshapes;
+    for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+      subshapes.push_back(
+          MakePartitionedShape(ShapeUtil::GetTupleElementShape(shape, i),
+                               sharding.GetSubSharding(shape, {i})));
+    }
+    return ShapeUtil::MakeTupleShape(subshapes);
+  }
+  return sharding.TileShape(shape);
+}
+
+Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape,
+                                          const HloSharding& sharding,
+                                          int64 partition_id) {
+  if (sharding.IsTuple()) {
+    std::vector<Shape> subshapes;
+    for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) {
+      subshapes.push_back(MakeNonPaddedShapeForGivenPartition(
+          ShapeUtil::GetTupleElementShape(shape, i),
+          sharding.GetSubSharding(shape, {i}), partition_id));
+    }
+    return ShapeUtil::MakeTupleShape(subshapes);
+  }
+
+  auto partition_shape = shape;
+  std::vector<int64> tile_offset =
+      sharding.TileOffsetForDevice(shape, partition_id);
+  std::vector<int64> tile_limit =
+      sharding.TileLimitForDevice(shape, partition_id);
+  for (int64 i = 0; i < tile_offset.size(); ++i) {
+    if (sharding.UsesDevice(partition_id)) {
+      partition_shape.set_dimensions(i, tile_limit[i] - tile_offset[i]);
+    } else {
+      partition_shape.set_dimensions(i, 0);
+    }
+  }
+  return partition_shape;
+}
+
+std::vector<HloInstruction*> MakePartitionOffsets(const Shape& shape,
+                                                  const HloSharding& sharding,
+                                                  HloInstruction* partition_id,
+                                                  SpmdBuilder* b) {
+  CHECK(!shape.IsTuple());
+
+  Array2D<int32> offset_array(
+      {sharding.tile_assignment().num_elements(), shape.rank()});
+  offset_array.Each([&](int64 i, int64 j, int32* value) {
+    *value = sharding.TileOffsetForDevice(shape, i)[j];
+  });
+  auto offset_table = b->AddInstruction(HloInstruction::CreateConstant(
+      LiteralUtil::CreateR2FromArray2D(offset_array)));
+  std::vector<HloInstruction*> offsets;
+  for (int64 i = 0; i < shape.rank(); ++i) {
+    if (sharding.tile_assignment().dim(i) == 1) {
+      offsets.push_back(b->AddInstruction(
+          HloInstruction::CreateConstant(LiteralUtil::Zero(S32))));
+    } else {
+      auto index = b->AddInstruction(HloInstruction::CreateDynamicSlice(
+          ShapeUtil::MakeShape(S32, {1, 1}), offset_table,
+          {partition_id, b->AddInstruction(HloInstruction::CreateConstant(
+                             LiteralUtil::CreateR0<uint32>(i)))},
+          {1, 1}));
+      offsets.push_back(b->AddInstruction(
+          HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), index)));
+    }
+  }
+  return offsets;
+}
+
+std::vector<HloInstruction*> MakeTiledPartitionOrdinals(
+    const HloSharding& sharding, HloInstruction* partition_id, SpmdBuilder* b) {
+  CHECK(!sharding.IsTileMaximal());
+  auto table_shape =
+      ShapeUtil::MakeShape(S32, sharding.tile_assignment().dimensions());
+  return MakePartitionOffsets(table_shape, sharding, partition_id, b);
+}
+
+HloInstruction* PadToShape(HloInstruction* hlo, const Shape& padded_shape,
+                           SpmdBuilder* b, HloComputation* computation) {
+  CHECK(b == nullptr || computation == nullptr);
+  if (ShapeUtil::Compatible(hlo->shape(), padded_shape)) {
+    return hlo;
+  }
+  PaddingConfig padding_config;
+  for (int64 i = 0; i < padded_shape.rank(); ++i) {
+    auto padding_config_dim = padding_config.add_dimensions();
+    padding_config_dim->set_edge_padding_low(0);
+    padding_config_dim->set_interior_padding(0);
+    padding_config_dim->set_edge_padding_high(padded_shape.dimensions(i) -
+                                              hlo->shape().dimensions(i));
+  }
+  auto add_hlo = [&](std::unique_ptr<HloInstruction> to_add) {
+    if (b == nullptr) {
+      return computation->AddInstruction(std::move(to_add));
+    }
+    return b->AddInstruction(std::move(to_add));
+  };
+  auto zero = add_hlo(HloInstruction::CreateConstant(
+      LiteralUtil::Zero(hlo->shape().element_type())));
+  return add_hlo(
+      HloInstruction::CreatePad(padded_shape, hlo, zero, padding_config));
+}
+
+Shape GetPaddedShapeForUnevenPartitioning(const Shape& base_shape,
+                                          const HloSharding& sharding) {
+  if (sharding.IsTileMaximal()) {
+    return base_shape;
+  }
+  if (EvenlyPartitions(base_shape, sharding)) {
+    return base_shape;
+  }
+  auto shard_shape = MakePartitionedShape(base_shape, sharding);
+  Shape padded_base_shape = base_shape;
+  for (int64 i = 0; i < padded_base_shape.rank(); ++i) {
+    padded_base_shape.set_dimensions(
+        i, shard_shape.dimensions(i) * sharding.tile_assignment().dim(i));
+  }
+  return padded_base_shape;
+}
+
+HloInstruction* PadBaseShapeBeforeUnevenTiledSharding(
+    HloInstruction* hlo, const HloSharding& sharding, SpmdBuilder* b) {
+  auto padded_base_shape =
+      GetPaddedShapeForUnevenPartitioning(hlo->shape(), sharding);
+  if (ShapeUtil::Compatible(padded_base_shape, hlo->shape())) {
+    return hlo;
+  }
+  return PadToShape(hlo, padded_base_shape, b);
+}
+
+absl::optional<int64> UniqueTiledDim(const HloSharding& sharding) {
+  if (sharding.IsTileMaximal()) {
+    return absl::nullopt;
+  }
+  int64 dim = -1;
+  for (int64 i = 0; i < sharding.tile_assignment().num_dimensions(); ++i) {
+    if (sharding.tile_assignment().dim(i) > 1) {
+      if (dim != -1) {
+        return absl::nullopt;
+      }
+      dim = i;
+    }
+  }
+  CHECK_NE(dim, -1);
+  return dim;
+}
+
+MultiplyAddDivideOffsetCalculation::MultiplyAddDivideOffsetCalculation(
+    int64 multiplier, int64 offset, int64 divisor)
+    : multiplier_(multiplier), offset_(offset), divisor_(divisor) {
+  CHECK_GT(divisor_, 0);
+  Simplify();
+}
+
+OffsetCalculation MultiplyAddDivideOffsetCalculation::operator-(
+    const MultiplyAddDivideOffsetCalculation& other) const {
+  if (divisor_ == 1 && other.divisor_ == 1) {
+    return OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+        multiplier_ - other.multiplier_, offset_ - other.offset_, 1));
+  }
+  return OffsetCalculation(HloOpcode::kSubtract, *this, other);
+}
+
+void MultiplyAddDivideOffsetCalculation::Simplify() {
+  // We could simplify the calculation when multiplier is a multiple of
+  // divisor_. However, when offset_ is not a multiple of divisor_, we must
+  // make sure that offset_ and multiplier_ are both non-negative or both
+  // non-positive. E.g., (3 * i  - 1) / 3 is not equivalent to i or i - 1.
+  if (divisor_ != 1 && multiplier_ % divisor_ == 0 &&
+      (offset_ % divisor_ == 0 || offset_ * multiplier_ > 0)) {
+    multiplier_ /= divisor_;
+    offset_ /= divisor_;
+    divisor_ = 1;
+  }
+}
+
+int64 MultiplyAddDivideOffsetCalculation::Calculate(int64 shard_ordinal) const {
+  return (shard_ordinal * multiplier_ + offset_) / divisor_;
+}
+
+HloInstruction* MultiplyAddDivideOffsetCalculation::Calculate(
+    HloInstruction* shard_ordinal, SpmdBuilder* b) const {
+  auto scalar_shape = ShapeUtil::MakeShape(S32, {});
+  if (multiplier_ == 0) {
+    return b->AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::CreateR0<int32>(offset_ / divisor_)));
+  }
+  HloInstruction* result = shard_ordinal;
+  if (multiplier_ != 1) {
+    result = b->AddInstruction(HloInstruction::CreateBinary(
+        scalar_shape, HloOpcode::kMultiply, shard_ordinal,
+        b->AddInstruction(HloInstruction::CreateConstant(
+            LiteralUtil::CreateR0<int32>(multiplier_)))));
+  }
+  if (offset_ != 0) {
+    auto offset = b->AddInstruction(
+        HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(offset_)));
+    result = b->AddInstruction(HloInstruction::CreateBinary(
+        scalar_shape, HloOpcode::kAdd, result, offset));
+  }
+  if (divisor_ != 1) {
+    auto divisor = b->AddInstruction(
+        HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(divisor_)));
+    result = b->AddInstruction(HloInstruction::CreateBinary(
+        scalar_shape, HloOpcode::kDivide, result, divisor));
+  }
+  return result;
+}
+
+int64 MultiplyAddDivideOffsetCalculation::MaxInRange(
+    int64 start_ordinal, int64 limit_ordinal) const {
+  int64 max = Calculate(start_ordinal);
+  for (int64 i = start_ordinal + 1; i < limit_ordinal; ++i) {
+    max = std::max(max, Calculate(i));
+  }
+  return max;
+}
+
+OffsetCalculation& OffsetCalculation::operator=(
+    const OffsetCalculation& other) {
+  opcode_ = other.opcode_;
+  copy_from_ = other.copy_from_;
+  if (opcode_ != HloOpcode::kCopy) {
+    lhs_ = absl::make_unique<OffsetCalculation>(*other.lhs_);
+    rhs_ = absl::make_unique<OffsetCalculation>(*other.rhs_);
+  }
+  return *this;
+}
+
+bool OffsetCalculation::IsConstant() const {
+  if (opcode_ == HloOpcode::kCopy) {
+    return copy_from_.IsConstant();
+  }
+  if (opcode_ == HloOpcode::kSubtract && *lhs_ == *rhs_) {
+    return true;
+  }
+  return lhs_->IsConstant() && rhs_->IsConstant();
+}
+
+OffsetCalculation OffsetCalculation::operator-(
+    const OffsetCalculation& other) const {
+  if (opcode_ == HloOpcode::kCopy && other.opcode_ == HloOpcode::kCopy) {
+    return copy_from_ - other.copy_from_;
+  }
+  return OffsetCalculation(HloOpcode::kSubtract, *this, other);
+}
+
+bool OffsetCalculation::operator==(const OffsetCalculation& other) const {
+  if (opcode_ != other.opcode_) {
+    return false;
+  }
+  if (opcode_ == HloOpcode::kCopy) {
+    return copy_from_ == other.copy_from_;
+  }
+  return *lhs_ == *other.lhs_ && *rhs_ == *other.rhs_;
+}
+
+int64 OffsetCalculation::Calculate(int64 shard_ordinal) const {
+  switch (opcode_) {
+    case HloOpcode::kCopy:
+      return copy_from_.Calculate(shard_ordinal);
+    case HloOpcode::kSubtract:
+      return lhs_->Calculate(shard_ordinal) - rhs_->Calculate(shard_ordinal);
+    case HloOpcode::kMultiply:
+      return lhs_->Calculate(shard_ordinal) * rhs_->Calculate(shard_ordinal);
+    default:
+      LOG(FATAL) << "Should not happen";
+  }
+}
+
+HloInstruction* OffsetCalculation::Calculate(HloInstruction* shard_ordinal,
+                                             SpmdBuilder* b) const {
+  if (opcode_ == HloOpcode::kCopy) {
+    return copy_from_.Calculate(shard_ordinal, b);
+  }
+  auto lhs = lhs_->Calculate(shard_ordinal, b);
+  auto rhs = rhs_->Calculate(shard_ordinal, b);
+  return b->AddInstruction(
+      HloInstruction::CreateBinary(lhs->shape(), opcode_, lhs, rhs));
+}
+
+int64 OffsetCalculation::MaxInRange(int64 start_ordinal,
+                                    int64 limit_ordinal) const {
+  if (IsConstant()) {
+    return Calculate(start_ordinal);
+  }
+  if (opcode_ == HloOpcode::kCopy) {
+    return std::max(Calculate(start_ordinal), Calculate(limit_ordinal - 1));
+  }
+  int64 max = Calculate(start_ordinal);
+  for (int64 i = start_ordinal + 1; i < limit_ordinal; ++i) {
+    max = std::max(max, Calculate(i));
+  }
+  return max;
+}
+
+absl::optional<HloInstruction*> ExchangeHalo(
+    HloInstruction* hlo, const OffsetCalculation& left_halo_size_function,
+    const OffsetCalculation& right_halo_size_function, int64 dim,
+    const HloSharding& target,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdBuilder* b) {
+  int64 input_shard_size = hlo->shape().dimensions(dim);
+  int64 shard_count = target.tile_assignment().dim(dim);
+
+  std::vector<HloInstruction*> concat_pieces;
+
+  int64 max_left_halo_size = left_halo_size_function.MaxInRange(1, shard_count);
+  if (max_left_halo_size > input_shard_size) {
+    VLOG(1) << "ExchangeHalo failed: halo is beyond the left neighbor.";
+    return absl::nullopt;
+  }
+  if (max_left_halo_size > 0) {
+    std::vector<std::pair<int64, int64>> source_target_pairs;
+    target.tile_assignment().Each(
+        [&](absl::Span<const int64> indices, int64 device) {
+          if (indices[dim] > 0) {
+            std::vector<int64> source_indices(indices.begin(), indices.end());
+            source_indices[dim] -= 1;
+            source_target_pairs.emplace_back(
+                target.tile_assignment()(source_indices), device);
+          }
+        });
+    auto halo_shape = hlo->shape();
+    auto source_halo_slice = hlo;
+    if (max_left_halo_size != hlo->shape().dimensions(dim)) {
+      halo_shape.set_dimensions(dim, max_left_halo_size);
+      std::vector<int64> halo_start_indices(halo_shape.rank(), 0);
+      halo_start_indices[dim] =
+          hlo->shape().dimensions(dim) - max_left_halo_size;
+      std::vector<int64> halo_slice_strides(halo_shape.rank(), 1);
+
+      source_halo_slice = b->AddInstruction(
+          hlo->CreateSlice(halo_shape, hlo, halo_start_indices,
+                           hlo->shape().dimensions(), halo_slice_strides));
+    }
+    auto left_halo =
+        collective_ops_creator.create_cross_partition_collective_permute(
+            b, source_halo_slice, source_target_pairs, (*next_channel_id)++);
+    concat_pieces.push_back(left_halo);
+  }
+
+  concat_pieces.push_back(hlo);
+
+  // Right halo.
+  int64 max_right_halo_size =
+      right_halo_size_function.MaxInRange(0, shard_count - 1);
+  if (max_right_halo_size > input_shard_size) {
+    VLOG(1) << "ExchangeHalo failed: halo is beyond the right neighbor.";
+    return absl::nullopt;
+  }
+  if (max_right_halo_size > 0) {
+    std::vector<std::pair<int64, int64>> source_target_pairs;
+    target.tile_assignment().Each(
+        [&](absl::Span<const int64> indices, int64 device) {
+          if (indices[dim] > 0) {
+            std::vector<int64> target_indices(indices.begin(), indices.end());
+            target_indices[dim] -= 1;
+            source_target_pairs.emplace_back(
+                device, target.tile_assignment()(target_indices));
+          }
+        });
+    auto halo_shape = hlo->shape();
+    halo_shape.set_dimensions(dim, max_right_halo_size);
+    std::vector<int64> halo_start_indices(halo_shape.rank(), 0);
+    std::vector<int64> halo_slice_strides(halo_shape.rank(), 1);
+
+    auto source_halo_slice = b->AddInstruction(
+        hlo->CreateSlice(halo_shape, hlo, halo_start_indices,
+                         halo_shape.dimensions(), halo_slice_strides));
+    auto right_halo =
+        collective_ops_creator.create_cross_partition_collective_permute(
+            b, source_halo_slice, source_target_pairs, (*next_channel_id)++);
+    concat_pieces.push_back(right_halo);
+  }
+
+  auto concat = hlo;
+  // Concat with halos/padding.
+  if (concat_pieces.size() > 1) {
+    auto concat_shape = hlo->shape();
+    int64 concat_dim_size = 0;
+    for (auto piece : concat_pieces) {
+      concat_dim_size += piece->shape().dimensions(dim);
+    }
+    concat_shape.set_dimensions(dim, concat_dim_size);
+    concat = b->AddInstruction(
+        HloInstruction::CreateConcatenate(concat_shape, concat_pieces, dim));
+  }
+
+  return concat;
+}
+
+absl::optional<HloInstruction*> ExchangeHalo(
+    HloInstruction* hlo,
+    std::vector<OffsetCalculation> left_halo_size_functions,
+    std::vector<OffsetCalculation> right_halo_size_functions,
+    const HloSharding& target,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdBuilder* b) {
+  CHECK(left_halo_size_functions.size() == hlo->shape().rank());
+  CHECK(right_halo_size_functions.size() == hlo->shape().rank());
+
+  HloInstruction* visiting_hlo = hlo;
+  for (int dim = 0; dim < hlo->shape().rank(); ++dim) {
+    auto concat = ExchangeHalo(visiting_hlo, left_halo_size_functions[dim],
+                               right_halo_size_functions[dim], dim, target,
+                               collective_ops_creator, next_channel_id, b);
+    if (!concat) {
+      return absl::nullopt;
+    }
+    visiting_hlo = *concat;
+  }
+  return visiting_hlo;
+}
+
+absl::optional<HloInstruction*> ExchangeHaloAndGetValidData(
+    HloInstruction* hlo, const Shape& base_shape,
+    const OffsetCalculation& left_halo_size_function,
+    const OffsetCalculation& right_halo_size_function,
+    int64 explicit_left_padding_on_full_shape, int64 padded_full_shape_size,
+    int64 shard_size_with_halo, int64 dim, const HloSharding& target,
+    HloInstruction* offset_on_padded_shape, HloInstruction* pad_value,
+    HloInstruction* partition_ordinal,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdBuilder* b, bool mask_invalid_region) {
+  auto halo_exchange_result =
+      ExchangeHalo(hlo, left_halo_size_function, right_halo_size_function, dim,
+                   target, collective_ops_creator, next_channel_id, b);
+  if (!halo_exchange_result) {
+    return absl::nullopt;
+  }
+  auto concat = *halo_exchange_result;
+  int64 shard_count = target.tile_assignment().dim(dim);
+  int64 max_left_halo_size = left_halo_size_function.MaxInRange(1, shard_count);
+
+  // Now we determine if we need extra padding after the concat.
+  //
+  // The max of halo size or the first shard's explicit left padding.
+  int64 max_left_halo_or_padding_size =
+      std::max(std::max(int64{0}, max_left_halo_size),
+               explicit_left_padding_on_full_shape);
+  // The calculation that returns the dynamic slice index for a shard on the
+  // padded concat, which is the difference between
+  // max_left_halo_or_padding_size and its left halo size.
+  auto start_offset_on_padded_concat_calculation =
+      OffsetCalculation(MultiplyAddDivideOffsetCalculation(
+          0, max_left_halo_or_padding_size, 1)) -
+      left_halo_size_function;
+
+  // See if we need to pad the concat before dynamic slice.
+  int64 extra_left_padding =
+      std::max(int64{0}, max_left_halo_or_padding_size -
+                             std::max(int64{0}, max_left_halo_size));
+  int64 extra_right_padding =
+      start_offset_on_padded_concat_calculation.MaxInRange(0, shard_count) +
+      shard_size_with_halo - concat->shape().dimensions(dim) -
+      extra_left_padding;
+  extra_right_padding = std::max(int64{0}, extra_right_padding);
+  if (extra_left_padding > 0 || extra_right_padding > 0) {
+    PaddingConfig padding_config;
+    auto padded_concat_shape = concat->shape();
+    for (int64 i = 0; i < base_shape.rank(); ++i) {
+      auto padding_config_dim = padding_config.add_dimensions();
+      padding_config_dim->set_interior_padding(0);
+      padding_config_dim->set_edge_padding_low(0);
+      padding_config_dim->set_edge_padding_high(0);
+      if (i != dim) {
+        continue;
+      }
+      padding_config_dim->set_edge_padding_low(extra_left_padding);
+      padding_config_dim->set_edge_padding_high(extra_right_padding);
+      padded_concat_shape.set_dimensions(dim, concat->shape().dimensions(dim) +
+                                                  extra_left_padding +
+                                                  extra_right_padding);
+    }
+    concat = b->AddInstruction(HloInstruction::CreatePad(
+        padded_concat_shape, concat, pad_value, padding_config));
+  }
+
+  auto valid_slice = concat;
+  if (shard_size_with_halo != concat->shape().dimensions(dim)) {
+    // Concat is bigger than the shard shape, so we need a dynamic slice.
+    CHECK_LT(shard_size_with_halo, concat->shape().dimensions(dim));
+    auto slice_shape = concat->shape();
+    slice_shape.set_dimensions(dim, shard_size_with_halo);
+
+    if (left_halo_size_function.IsConstant() &&
+        left_halo_size_function.Calculate(0) ==
+            explicit_left_padding_on_full_shape) {
+      std::vector<int64> start_indices(slice_shape.rank(), 0);
+      std::vector<int64> strides(slice_shape.rank(), 1);
+      valid_slice = b->AddInstruction(
+          HloInstruction::CreateSlice(slice_shape, concat, start_indices,
+                                      slice_shape.dimensions(), strides));
+    } else {
+      auto zero = b->AddInstruction(
+          HloInstruction::CreateConstant(LiteralUtil::Zero(S32)));
+      std::vector<HloInstruction*> slice_offsets(base_shape.rank(), zero);
+      slice_offsets[dim] = start_offset_on_padded_concat_calculation.Calculate(
+          partition_ordinal, b);
+      valid_slice = b->AddInstruction(HloInstruction::CreateDynamicSlice(
+          slice_shape, concat, slice_offsets, slice_shape.dimensions()));
+    }
+  }
+
+  if (!mask_invalid_region) {
+    return valid_slice;
+  }
+
+  int64 total_right_padding = padded_full_shape_size -
+                              base_shape.dimensions(dim) -
+                              explicit_left_padding_on_full_shape;
+  // Mask off garbage data due to uneven partition or low/high padding.
+  if (explicit_left_padding_on_full_shape > 0 || total_right_padding > 0) {
+    auto index_shape = ShapeUtil::ChangeElementType(valid_slice->shape(), S32);
+    auto iota = b->AddInstruction(HloInstruction::CreateIota(index_shape, dim));
+    auto broadcast_start_index_in_padded_shape =
+        b->AddInstruction(HloInstruction::CreateBroadcast(
+            index_shape, offset_on_padded_shape, {}));
+    auto index_in_padded_shape = b->AddInstruction(
+        HloInstruction::CreateBinary(index_shape, HloOpcode::kAdd, iota,
+                                     broadcast_start_index_in_padded_shape));
+    auto mask_shape = ShapeUtil::ChangeElementType(index_shape, PRED);
+    std::vector<HloInstruction*> predicates;
+    if (explicit_left_padding_on_full_shape > 0) {
+      auto valid_index_start =
+          b->AddInstruction(HloInstruction::CreateBroadcast(
+              index_shape,
+              b->AddInstruction(
+                  HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(
+                      explicit_left_padding_on_full_shape))),
+              {}));
+      predicates.push_back(b->AddInstruction(HloInstruction::CreateCompare(
+          mask_shape, index_in_padded_shape, valid_index_start,
+          ComparisonDirection::kGe)));
+    }
+    if (total_right_padding > 0) {
+      auto valid_index_limit =
+          b->AddInstruction(HloInstruction::CreateBroadcast(
+              index_shape,
+              b->AddInstruction(
+                  HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(
+                      base_shape.dimensions(dim) +
+                      explicit_left_padding_on_full_shape))),
+              {}));
+      predicates.push_back(b->AddInstruction(HloInstruction::CreateCompare(
+          mask_shape, index_in_padded_shape, valid_index_limit,
+          ComparisonDirection::kLt)));
+    }
+    CHECK(!predicates.empty());
+    auto is_valid =
+        predicates.size() == 2
+            ? b->AddInstruction(HloInstruction::CreateBinary(
+                  mask_shape, HloOpcode::kAnd, predicates[0], predicates[1]))
+            : predicates[0];
+    auto masking_value = b->AddInstruction(
+        HloInstruction::CreateBroadcast(valid_slice->shape(), pad_value, {}));
+    valid_slice = b->AddInstruction(
+        HloInstruction::CreateTernary(valid_slice->shape(), HloOpcode::kSelect,
+                                      is_valid, valid_slice, masking_value));
+  }
+  return valid_slice;
+}
+
+}  // namespace spmd
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h
new file mode 100644
index 00000000000..f96b23d7073
--- /dev/null
+++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h
@@ -0,0 +1,229 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_SPMD_SPMD_PARTITIONER_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_SPMD_SPMD_PARTITIONER_UTIL_H_
+
+#include <memory>
+#include <string>
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_sharding.h"
+#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h"
+
+namespace xla {
+namespace spmd {
+
+// Returns true if the given sharding contains any replicated sharding.
+bool HasReplicatedSharding(const HloSharding& sharding);
+
+// Creates zero value instructions of the given shape.
+HloInstruction* CreateZero(const Shape& shape, SpmdBuilder* b);
+
+template <typename NativeT>
+HloInstruction* CreateR0WithType(PrimitiveType type, NativeT value,
+                                 SpmdBuilder* b) {
+  auto literal = LiteralUtil::CreateR0(value)
+                     .ConvertToShape(ShapeUtil::MakeShape(type, {}))
+                     .ValueOrDie();
+  return b->AddInstruction(HloInstruction::CreateConstant(std::move(literal)));
+}
+
+// Create a binary add computation of the given type and add to the module.
+HloComputation* MakeBinaryAdd(PrimitiveType type, HloModule* module);
+
+// Returns true if the shape can be evenly partitioned for the given sharding.
+// All tile sharded dimensions should be evenly divisible and there should be no
+// single-device sharding. Replicate sharding is considered even partition.
+bool EvenlyPartitions(const Shape& shape, const HloSharding& sharding);
+
+// Returns the shard shape of the given shape when it is partitioned for the
+// target sharding.
+Shape MakePartitionedShape(const Shape& shape, const HloSharding& sharding);
+
+// Returns the shard shape for a partition without padding due to uneven
+// sharding.
+Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape,
+                                          const HloSharding& sharding,
+                                          int64 partition_id);
+
+// Generates the HLO instructions that represent the dimension offsets on any
+// device. The size of the returned vector is the rank of the given shape.
+std::vector<HloInstruction*> MakePartitionOffsets(const Shape& shape,
+                                                  const HloSharding& sharding,
+                                                  HloInstruction* partition_id,
+                                                  SpmdBuilder* b);
+
+// Returns the offsets of the partition in the tile assignment.
+std::vector<HloInstruction*> MakeTiledPartitionOrdinals(
+    const HloSharding& sharding, HloInstruction* partition_id, SpmdBuilder* b);
+
+// Pads hlo to the desired shape using high padding. Either a builder or a
+// computation needs to be supplied, but not both.
+HloInstruction* PadToShape(HloInstruction* hlo, const Shape& padded_shape,
+                           SpmdBuilder* b,
+                           HloComputation* computation = nullptr);
+
+// Returns the padded shape when combining all partitions.
+Shape GetPaddedShapeForUnevenPartitioning(const Shape& base_shape,
+                                          const HloSharding& sharding);
+
+// Pads the HLO (with base shape) for uneven tiled partition to make it evenly
+// partitionable.
+HloInstruction* PadBaseShapeBeforeUnevenTiledSharding(
+    HloInstruction* hlo, const HloSharding& sharding, SpmdBuilder* b);
+
+// Returns the index of the unique tile dimension. Returns absl::nullopt if the
+// given sharding is not tiled or tiled along multiple dimensions.
+absl::optional<int64> UniqueTiledDim(const HloSharding& sharding);
+
+// Utilities for symbolic offset calculation and halo exchange.
+class OffsetCalculation;
+
+// Represents a calculation over integers:
+//   (shard_ordinal * multiplier + offset) / divisor
+class MultiplyAddDivideOffsetCalculation {
+ public:
+  MultiplyAddDivideOffsetCalculation()
+      : multiplier_(0), offset_(0), divisor_(1) {}
+  MultiplyAddDivideOffsetCalculation(int64 multiplier, int64 offset,
+                                     int64 divisor);
+
+  OffsetCalculation operator-(
+      const MultiplyAddDivideOffsetCalculation& other) const;
+
+  bool operator==(const MultiplyAddDivideOffsetCalculation& other) const {
+    return multiplier_ == other.multiplier_ && offset_ == other.offset_ &&
+           divisor_ == other.divisor_;
+  }
+
+  bool IsConstant() const { return multiplier_ == 0; }
+  void Simplify();
+  int64 Calculate(int64 shard_ordinal) const;
+  HloInstruction* Calculate(HloInstruction* shard_ordinal,
+                            SpmdBuilder* b) const;
+
+  // Returns the maximum result for shard ordinals in the range
+  // [start_ordinal, limit_ordinal).
+  int64 MaxInRange(int64 start_ordinal, int64 limit_ordinal) const;
+
+ private:
+  int64 multiplier_;
+  int64 offset_;
+  int64 divisor_;
+};
+
+// Represents a calculation over integers based on results of other calculations
+// defined by an opcode. If the opcode is kCopy, it simply wraps an
+// MultiplyAddDivideOffsetCalculation.
+class OffsetCalculation {
+ public:
+  OffsetCalculation() : opcode_(HloOpcode::kCopy), copy_from_() {}
+  explicit OffsetCalculation(
+      const MultiplyAddDivideOffsetCalculation& copy_from)
+      : opcode_(HloOpcode::kCopy), copy_from_(copy_from) {}
+  OffsetCalculation(const OffsetCalculation& copy_from) { *this = copy_from; }
+  OffsetCalculation(HloOpcode opcode,
+                    const MultiplyAddDivideOffsetCalculation& lhs,
+                    const MultiplyAddDivideOffsetCalculation& rhs)
+      : opcode_(opcode),
+        lhs_(absl::make_unique<OffsetCalculation>(lhs)),
+        rhs_(absl::make_unique<OffsetCalculation>(rhs)) {}
+  OffsetCalculation(HloOpcode opcode, const OffsetCalculation& lhs,
+                    const OffsetCalculation& rhs)
+      : opcode_(opcode),
+        lhs_(absl::make_unique<OffsetCalculation>(lhs)),
+        rhs_(absl::make_unique<OffsetCalculation>(rhs)) {}
+
+  OffsetCalculation& operator=(const OffsetCalculation& other);
+
+  // Returns whether the calculation returns the same value for all shards. This
+  // is conservative and could return false even if it is actually constant.
+  bool IsConstant() const;
+
+  OffsetCalculation operator-(const OffsetCalculation& other) const;
+  bool operator==(const OffsetCalculation& other) const;
+  int64 Calculate(int64 shard_ordinal) const;
+  HloInstruction* Calculate(HloInstruction* shard_ordinal,
+                            SpmdBuilder* b) const;
+
+  // Returns the maximum result for shard ordinals in the range
+  // [start_ordinal, limit_ordinal).
+  int64 MaxInRange(int64 start_ordinal, int64 limit_ordinal) const;
+
+ private:
+  HloOpcode opcode_;
+  std::unique_ptr<OffsetCalculation> lhs_;
+  std::unique_ptr<OffsetCalculation> rhs_;
+  MultiplyAddDivideOffsetCalculation copy_from_;
+};
+
+// Performs halo exchange on the given dimension based on the provided
+// left/right halo size functions. Returns nullopt if the halo is beyond the
+// direct neighbor of the shard.
+absl::optional<HloInstruction*> ExchangeHalo(
+    HloInstruction* hlo, const OffsetCalculation& left_halo_size_function,
+    const OffsetCalculation& right_halo_size_function, int64 dim,
+    const HloSharding& target,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdBuilder* b);
+
+// Exchange halo on all dimensions of the HLO. Returns nullopt if any one of the
+// dimensions fails to exchange halo (halo is beyond the neighbor shard).
+absl::optional<HloInstruction*> ExchangeHalo(
+    HloInstruction* hlo,
+    std::vector<OffsetCalculation> left_halo_size_functions,
+    std::vector<OffsetCalculation> right_halo_size_functions,
+    const HloSharding& target,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdBuilder* b);
+
+// Exchanges halos and performs pad/dynamic-slice on the concatenated data such
+// that the result starts with the first needed element on each shard. It also
+// masks off invalid data due to padding.
+// Arguments:
+//  hlo: the HLO op before halo exchange
+//  explicit_left_padding_on_full_shape: the amount of left padding to be added
+//   explicitly by this function on the base shape before partitioning. Without
+//   base dilation, this is usually set to the window's padding_low so that the
+//   sharded op do not need to add padding_low on the window; however, with base
+//   dilation, this could only be set to a custom size.
+//  padded_full_shape_size: the size of the padded full shape on the given
+//   dimension, which includes explicit_left_padding_on_full_shape and required
+//   right padding to make the shape evenly shardable.
+//  shard_size_with_halo: the shard size on the dimension after halo exchange.
+//   If different shards have different sizes, use the maximum size.
+//  offset_on_padded_shape: the offset HLO (S32) that represents the start of
+//   each shard on the padded full shape.
+//  pad_value: the padding value used on the full shape.
+absl::optional<HloInstruction*> ExchangeHaloAndGetValidData(
+    HloInstruction* hlo, const Shape& base_shape,
+    const OffsetCalculation& left_halo_size_function,
+    const OffsetCalculation& right_halo_size_function,
+    int64 explicit_left_padding_on_full_shape, int64 padded_full_shape_size,
+    int64 shard_size_with_halo, int64 dim, const HloSharding& target,
+    HloInstruction* offset_on_padded_shape, HloInstruction* pad_value,
+    HloInstruction* partition_ordinal,
+    const SPMDCollectiveOpsCreator& collective_ops_creator,
+    int64* next_channel_id, SpmdBuilder* b, bool mask_invalid_region = true);
+
+}  // namespace spmd
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_SPMD_SPMD_PARTITIONER_UTIL_H_
diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
index 2d33184b7d0..1111811d3a3 100644
--- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
+++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc
@@ -300,7 +300,7 @@ WhileLoopInvariantCodeMotion::TryHoistingInvariantInstructionsFromWhileBody(
 }
 
 StatusOr<bool> WhileLoopInvariantCodeMotion::Run(HloModule* module) {
-  VLOG(2) << "HLO module before WhileLoopConstantSinking:";
+  VLOG(2) << "HLO module before WhileLoopInvariantCodeMotion:";
   XLA_VLOG_LINES(2, module->ToString());
 
   bool changed = false;
@@ -332,10 +332,10 @@ StatusOr<bool> WhileLoopInvariantCodeMotion::Run(HloModule* module) {
   }
 
   if (changed) {
-    VLOG(2) << "HLO module after WhileLoopConstantSinking:";
+    VLOG(2) << "HLO module after WhileLoopInvariantCodeMotion:";
     XLA_VLOG_LINES(2, module->ToString());
   } else {
-    VLOG(2) << "HLO module unchanged after WhileLoopConstantSinking";
+    VLOG(2) << "HLO module unchanged after WhileLoopInvariantCodeMotion";
   }
 
   return changed;
diff --git a/tensorflow/compiler/xla/shape.h b/tensorflow/compiler/xla/shape.h
index 2793ddfc1ae..dfaac677724 100644
--- a/tensorflow/compiler/xla/shape.h
+++ b/tensorflow/compiler/xla/shape.h
@@ -63,6 +63,8 @@ class Shape {
   // shapes are traversed recursively.
   bool is_static() const;
 
+  bool is_dynamic() const { return !is_static(); }
+
   // Returns true if the given dimension is dynamically-sized.
   bool is_dynamic_dimension(int dimension) const {
     return dynamic_dimensions_.at(dimension);
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 22ee5a16a30..52cbb8f95ac 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/algorithm/container.h"
 #include "absl/container/inlined_vector.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/numbers.h"
@@ -150,6 +151,19 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
   return equal;
 }
 
+/* static */ bool ShapeUtil::EqualStructure(const Shape& lhs,
+                                            const Shape& rhs) {
+  bool equal = true;
+  ForEachSubshape(lhs, [&](const Shape& /*subshape*/, const ShapeIndex& index) {
+    equal &= IndexIsValid(rhs, index);
+  });
+  ForEachSubshape(rhs, [&](const Shape& /*subshape*/, const ShapeIndex& index) {
+    equal &= IndexIsValid(lhs, index);
+  });
+
+  return equal;
+}
+
 /* static */ int64 ShapeUtil::TrueRank(const Shape& shape) {
   int64 accum = 0;
   for (int64 dimension : shape.dimensions()) {
@@ -261,6 +275,12 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   return ValidateShape(*shape);
 }
 
+/* static */ Shape ShapeUtil::MakeStaticShape(const Shape& original) {
+  Shape result = original;
+  result.clear_dynamic_dimensions();
+  return result;
+}
+
 /* static */ Shape ShapeUtil::MakeTupleShape(absl::Span<const Shape> shapes) {
   Shape result;
   result.set_element_type(TUPLE);
@@ -626,8 +646,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   if (shape.element_type() == TUPLE) {
     return ByteSizeOfTupleIndexTable(shape, pointer_size);
   } else if (shape.IsArray()) {
-    int64 byte_size = ByteSizeOfElements(shape);
-    return byte_size;
+    return ByteSizeOfElements(shape);
   } else if (shape.element_type() == TOKEN) {
     return 0;
   } else if (shape.element_type() == OPAQUE_TYPE) {
@@ -1441,6 +1460,19 @@ ShapeUtil::ReshapeLeavesDimensionsUnmodified(
   return shape;
 }
 
+/* static */ bool ShapeUtil::DynamicShapeIsCompatible(
+    const xla::Shape& dynamic_shape, const xla::Shape& bounded_shape) {
+  if (dynamic_shape.rank() != bounded_shape.rank()) {
+    return false;
+  }
+  for (int64 i = 0; i < dynamic_shape.rank(); ++i) {
+    if (dynamic_shape.dimensions(i) > bounded_shape.dimensions(i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 /* static */ Shape ShapeUtil::FilterDimensions(
     const std::function<bool(int64)>& p, Shape shape) {
   CHECK(shape.IsArray());
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 7e05e17865d..dde56587482 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -298,6 +298,16 @@ class ShapeUtil {
   // As Equal, but allow one of lhs and rhs to be F16 while the other is F32.
   static bool EqualIgnoringFpPrecision(const Shape& lhs, const Shape& rhs);
 
+  // Two shapes have same structure if all subshape indices of lhs are presented
+  // on rhs and vice versa.
+  // A nested tuple shape of (F32, (S32[2], F32[2, 2])) is structurally equal to
+  // (S32, (F32[3], S32[2])) as their structures are both (,(,))
+  //
+  // In contrast, (F32, (F32, F32)) is structurally different from
+  // ((F32, F32), F32) as the former has structure (,(,)) while the latter has
+  // ((,),)
+  static bool EqualStructure(const Shape& lhs, const Shape& rhs);
+
   // Returns the number of dimensions for which the dimension is not (trivially)
   // 1. e.g., f32[2x1x1] has a true rank of 1D, the other dimensions are just
   // fluff. Note that zero dimensions are included in the true rank, e.g.,
@@ -339,6 +349,9 @@ class ShapeUtil {
   // element type changed to type.
   static Shape ChangeElementType(const Shape& original, PrimitiveType type);
 
+  // Retursn a shape with same dimensions but with all dimensions set to static.
+  static Shape MakeStaticShape(const Shape& original);
+
   // Creates a tuple shape from a slice of element shapes within the tuple.
   static Shape MakeTupleShape(absl::Span<const Shape> shapes);
 
@@ -643,12 +656,16 @@ class ShapeUtil {
   static Shape FilterDimensions(const std::function<bool(int64)>& p,
                                 Shape shape);
 
-  // Iterates through all the shape indexes, in minor to major order, starting
-  // from the base indexes, incrementing by the incr steps, up to count
-  // (index[i] < base[i] + count[i]), and calls the visitor_function with the
-  // current index.
-  // The visitor_function visitor function should return true if it wants to
-  // continue, or false otherwise.
+  // Returns true if `dynamic_shape` has dimensions that are less-equal to the
+  // "bounded_shape".
+  static bool DynamicShapeIsCompatible(const xla::Shape& dynamic_shape,
+                                       const xla::Shape& bounded_shape);
+
+  // Iterates through all the shape indexes, in minor to major order,
+  // starting from the base indexes, incrementing by the incr steps, up to
+  // count (index[i] < base[i] + count[i]), and calls the visitor_function
+  // with the current index. The visitor_function visitor function should
+  // return true if it wants to continue, or false otherwise.
   //
   // visitor_function must be a callable of type
   // StatusOr<bool>(absl::Span<int64>) or compatible.
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index c453c5fefa0..c8a242c156a 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -1104,6 +1104,7 @@ xla_test(
     shard_count = 40,
     tags = [
         "no_rocm",
+        "nozapfhahn",
         "optonly",
     ],
     deps = CONVOLUTION_TEST_DEPS + [
@@ -1500,6 +1501,7 @@ xla_test(
     srcs = ["select_and_scatter_test.cc"],
     tags = [
         "no_rocm",
+        "nozapfhahn",
         "optonly",
     ],
     deps = [
diff --git a/tensorflow/compiler/xla/tests/exhaustive_unary_test_f32_or_smaller.cc b/tensorflow/compiler/xla/tests/exhaustive_unary_test_f32_or_smaller.cc
index 0ed79fa0ad8..44e1b7b5a6f 100644
--- a/tensorflow/compiler/xla/tests/exhaustive_unary_test_f32_or_smaller.cc
+++ b/tensorflow/compiler/xla/tests/exhaustive_unary_test_f32_or_smaller.cc
@@ -352,6 +352,17 @@ UNARY_TEST_FLOAT_32_BITS_OR_LESS(Sqrt, {
   Run(Sqrt, std::sqrt, error_spec_gen);
 })
 
+UNARY_TEST_FLOAT_32_BITS_OR_LESS(Cbrt, {
+  if (platform_ == "Host" || platform_ == "CUDA") {
+    ErrorSpecGen error_spec_gen = +[](NativeT x) {
+      return ErrorSpec{0.01, 0.01};
+    };
+    Run(Cbrt, std::cbrt, error_spec_gen);
+  } else {
+    Run(Cbrt, std::cbrt);
+  }
+})
+
 // TODO(jlebar): Test trig functions over complex inputs.
 XLA_TEST_P(ExhaustiveF32UnaryTest, Acosh) {
   // Error inherited from Log, which our implementation of Acosh uses.
diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc
index 5a482305513..d575bbb1f3e 100644
--- a/tensorflow/compiler/xla/tests/while_test.cc
+++ b/tensorflow/compiler/xla/tests/while_test.cc
@@ -863,7 +863,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) {
     // Starts = iteration * 2;
     auto starts = Mul(iteration, ConstantR0<int32>(&builder, 2));
     // UpdateSlice.
-    auto out1 = DynamicUpdateSlice(input, update, starts);
+    auto out1 = DynamicUpdateSlice(input, update, {starts});
 
     Tuple(&builder, {out0, out1});
     body = builder.Build().ConsumeValueOrDie();
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 826876ed9cb..f4b08f454b9 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -272,7 +272,15 @@ message DebugOptions {
   // True if TraceMe annotations are enabled for XLA:CPU.
   bool xla_cpu_enable_xprof_traceme = 137;
 
-  // Next id: 138
+  // It is usually preferable to not fallback to the driver; it can consume more
+  // memory, or have bugs.
+  bool xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found = 138;
+
+  // It is usually preferable to not fallback to the driver; it can consume more
+  // memory, or have bugs.
+  bool xla_gpu_unsafe_fallback_to_driver_on_ptxas_error = 139;
+
+  // Next id: 140
 
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
@@ -325,6 +333,10 @@ message ExecutionOptions {
 
   // Used to identify a set of programs that should be launch together.
   int32 launch_id = 10;
+
+  // Indicates whether to use SPMD (true) or MPMD (false) partitioning when
+  // num_partitions > 1 and XLA is requested to partition the input program.
+  bool use_spmd_partitioning = 11;
 }
 
 message GetDeviceHandlesRequest {
diff --git a/tensorflow/compiler/xrt/BUILD b/tensorflow/compiler/xrt/BUILD
index d1445144b76..332c8ff9a14 100644
--- a/tensorflow/compiler/xrt/BUILD
+++ b/tensorflow/compiler/xrt/BUILD
@@ -58,6 +58,7 @@ cc_library(
         "xrt_state.h",
         "xrt_util.h",
     ],
+    visibility = ["//visibility:public"],
     deps = [
         ":xrt_proto_cc",
         "//tensorflow/compiler/jit:xla_device",
diff --git a/tensorflow/compiler/xrt/kernels/BUILD b/tensorflow/compiler/xrt/kernels/BUILD
index 309b4f4c85a..494ba29e981 100644
--- a/tensorflow/compiler/xrt/kernels/BUILD
+++ b/tensorflow/compiler/xrt/kernels/BUILD
@@ -49,6 +49,7 @@ cc_library(
     deps = [
         ":xrt_state_ops",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/compiler/xla:statusor",
@@ -59,6 +60,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:compiler",
         "//tensorflow/compiler/xla/service:computation_placer",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service/gpu:gpu_executable_run_options",
         "//tensorflow/compiler/xrt:xrt_compile_ops_op_lib",
         "//tensorflow/compiler/xrt:xrt_execute_op_op_lib",
         "//tensorflow/compiler/xrt:xrt_proto_cc",
diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
index 83b1b4c8a05..ba6e6a093d6 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc
@@ -51,6 +51,46 @@ namespace tensorflow {
 
 namespace {
 
+Status GenerateXlaDeviceAssignment(
+    const xrt::DeviceAssignment& xrt_device_assignment, int num_replicas,
+    int num_cores_per_replica, xla::DeviceAssignment* device_assignment) {
+  if (num_cores_per_replica !=
+      xrt_device_assignment.computation_devices_size()) {
+    return errors::InvalidArgument(
+        "Device assignment does not have the correct number of "
+        "computation_devices: num_cores_per_replica=",
+        num_cores_per_replica, " computation_devices=",
+        xrt_device_assignment.computation_devices_size());
+  }
+  for (int64 c = 0; c < xrt_device_assignment.computation_devices_size(); ++c) {
+    const auto& computation_devices =
+        xrt_device_assignment.computation_devices(c);
+    if (num_replicas != computation_devices.replica_devices_size()) {
+      return errors::InvalidArgument(
+          "Device assignment does not have the correct number of "
+          "replica_device_ids: num_replicas=",
+          num_replicas,
+          " replica_devices=", computation_devices.replica_devices_size());
+    }
+    for (int64 r = 0; r < computation_devices.replica_devices_size(); ++r) {
+      const auto& coords = computation_devices.replica_devices(r);
+      if (coords.value_size() != 4) {
+        return errors::InvalidArgument(
+            "Device assignment mesh coordinates must have 4 entries, got ",
+            coords.value_size());
+      }
+      for (int n = 0; n < 3; ++n) {
+        if (coords.value(n) != 0) {
+          return errors::InvalidArgument("Mesh coordinate at index ", n,
+                                         " must be 0, got ", coords.value(n));
+        }
+      }
+      (*device_assignment)(r, c) = coords.value(3);
+    }
+  }
+  return Status::OK();
+}
+
 class XRTCompileOp : public OpKernel {
  public:
   explicit XRTCompileOp(OpKernelConstruction* ctx);
@@ -83,14 +123,13 @@ Status XRTCompileOp::Compile(OpKernelContext* ctx,
                              const xrt::XLAComputation& computation_proto,
                              std::unique_ptr<xla::LocalExecutable>* program) {
   const xrt::XLAComputationConfig& config = computation_proto.config();
+  // Sanity checks for options not yet supported.
+  int num_cores_per_replica = std::max<int>(config.num_cores_per_replica(), 1);
+  TF_RET_CHECK(num_cores_per_replica == 1);
+  TF_RET_CHECK(config.per_core_program_shape_size() == 0);
 
   // The default config value is 0; treat it as 1 for convenience.
   int num_replicas = config.num_replicas() ? config.num_replicas() : 1;
-  TF_RET_CHECK(num_replicas == 1);
-  int num_cores_per_replica =
-      config.num_cores_per_replica() ? config.num_cores_per_replica() : 1;
-  TF_RET_CHECK(num_cores_per_replica == 1);
-  TF_RET_CHECK(config.per_core_program_shape_size() == 0);
 
   // We are guaranteed that the underlying device object won't be deleted out
   // from under us, while the ScopedRef is live.
@@ -119,13 +158,22 @@ Status XRTCompileOp::Compile(OpKernelContext* ctx,
     argument_layout_ptrs[i] = &argument_layouts[i];
   }
   xla::ExecutableBuildOptions build_options;
-  build_options.set_device_ordinal(client->default_device_ordinal());
+  build_options.set_device_ordinal(device_ref.device_ordinal());
+  build_options.set_num_replicas(num_replicas);
   build_options.set_result_layout(xla::Shape(config.program_shape().result()));
   build_options.set_device_allocator(device_ref.backend()->memory_allocator());
   if (config.has_debug_options()) {
     *build_options.mutable_debug_options() =
         BuildXlaDebugOptions(config.debug_options());
   }
+  if (config.has_device_assignment()) {
+    xla::DeviceAssignment device_assignment(num_replicas,
+                                            num_cores_per_replica);
+    TF_RETURN_IF_ERROR(
+        GenerateXlaDeviceAssignment(config.device_assignment(), num_replicas,
+                                    num_cores_per_replica, &device_assignment));
+    build_options.set_device_assignment(device_assignment);
+  }
 
   VLOG(1) << "Building executable";
   TF_ASSIGN_OR_RETURN(
@@ -158,7 +206,8 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) {
   OP_REQUIRES_OK(ctx, CompilationCacheKey(computation_proto, &key));
 
   // Process-wide cache of XLA executables.
-  auto cache_or = GetOrCreateCompilationCache(rm, /*max_number_of_entries=*/0);
+  auto cache_or = XRTGenericDeviceAccessor::GetOrCreateCompilationCache(
+      ctx, /*max_number_of_entries=*/0);
   OP_REQUIRES_OK(ctx, cache_or.status());
   auto cache = cache_or.ConsumeValueOrDie();
 
@@ -211,15 +260,11 @@ void XRTReleaseCompilationRefOp::Compute(OpKernelContext* ctx) {
   VLOG(1) << "XRTReleaseCompilationRefOp::Compute";
   auto timed = monitoring::MakeTimed(xrt_metrics::GetReleaseCompilationCell());
 
-  ResourceMgr* rm;
-  OP_REQUIRES_OK(ctx, XRTGenericDeviceAccessor::GetResourceManager(ctx, &rm));
-
   // Process-wide cache of XLA executables.
-  XRTCompilationCache* cache;
-  OP_REQUIRES_OK(ctx, rm->Lookup<XRTCompilationCache>(
-                          rm->default_container(),
-                          kXRTCompilationCacheResourceName, &cache));
-  core::ScopedUnref cache_unref(cache);
+  auto cache_or = XRTGenericDeviceAccessor::GetOrCreateCompilationCache(
+      ctx, /*max_number_of_entries=*/0);
+  OP_REQUIRES_OK(ctx, cache_or.status());
+  auto cache = cache_or.ConsumeValueOrDie();
 
   const Tensor& keys_tensor = ctx->input(0);
   auto flat_keys = keys_tensor.flat<int64>();
diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
index 45c8e1ad59a..2fc599e42df 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc
@@ -18,7 +18,9 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/computation_placer.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_executable_run_options.h"
 #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -37,7 +39,11 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/monitoring/timed.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/stream_executor/device_memory.h"
+#include "tensorflow/stream_executor/device_memory_allocator.h"
+#include "tensorflow/stream_executor/platform.h"
 #include "tensorflow/stream_executor/stream_executor.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
 
@@ -145,31 +151,301 @@ xla::StatusOr<InputBuffers> GetChainedOpInputs(
   return std::move(input_buffers);
 }
 
+// Given a shape, returns a byte array representing the shape metadata of the
+// shape. The shape metadata contains dimensions sizes stored as contiguous S32.
+std::vector<int32> PrepareMetadata(const xla::Shape& shape) {
+  DCHECK(shape.is_static());
+  DCHECK(shape.IsArray());
+  // Each dimension size is stored as a S32.
+  std::vector<int32> result(shape.dimensions_size());
+  for (int64 i = 0; i < shape.dimensions_size(); ++i) {
+    result[i] = shape.dimensions(i);
+  }
+  return result;
+}
+
+// Given a buffer with dynamic shape, update buffer metadata at the correct
+// offset starting from that buffer.
+//
+// +-----------+
+// |Payload    |
+// +-----------+
+// | Padding   |
+// +-----------+
+// |dim_size_0 |  (each dim_size is a S32):
+// +-----------+
+// |dim_size_1 |
+// +-----------+
+//  ..........
+// +-----------+
+//
+// Size of payload = ByteSizeOf(runtime_shape)
+// Size of payload + padding = ByteSizeOf(compile_time_shape_static)
+// Size of payload + padding + metadata = ByteSizeOf(compile_time_shape)
+Status UpdateMetadata(se::Stream* stream, se::DeviceMemory<uint8>* buffer,
+                      const xla::Shape& compile_time_shape,
+                      const xla::Shape& runtime_shape) {
+  TF_ASSIGN_OR_RETURN(auto compiler, xla::Compiler::GetForPlatform(
+                                         stream->parent()->platform()));
+  TF_ASSIGN_OR_RETURN(
+      auto transfer_manager,
+      xla::TransferManager::GetForPlatform(stream->parent()->platform()));
+  auto shape_size_fn = compiler->ShapeSizeBytesFunction();
+  xla::Shape compile_time_shape_static =
+      xla::ShapeUtil::MakeStaticShape(compile_time_shape);
+  uint64 offset = shape_size_fn(compile_time_shape_static);
+  uint64 metadata_size = shape_size_fn(compile_time_shape) - offset;
+  auto metadata_buffer =
+      stream->parent()->GetSubBuffer(buffer, offset, metadata_size);
+
+  auto metadata_literal = std::make_shared<xla::Literal>(
+      xla::LiteralUtil::CreateR1<int32>(PrepareMetadata(runtime_shape)));
+  TF_RETURN_IF_ERROR(transfer_manager->TransferArrayToDeviceAsync(
+      stream, *metadata_literal, metadata_buffer));
+  // Retain the literal until the end of the transfer.
+  stream->ThenDoHostCallback([metadata_literal]() { return Status::OK(); });
+  return Status::OK();
+}
+
+// Given a static input buffer, convert it to dynamic form by expanding it to
+// the bounded size and attaching a metadata filled with dimension sizes.
+//
+// From:
+// +--------+
+// |Payload |
+// +--------+
+//
+// To:
+//
+// +--------+
+// |Payload |
+// +--------+
+// | Padding|
+// +--------+
+// |Metadata|
+// +--------+
+//
+// As we can't expand the size of an existing memory allocation, a reallocation
+// is required. A list of new allocations are returned after this function. The
+// caller is reponsible for maintaining those allocations.
+xla::StatusOr<std::vector<se::OwningDeviceMemory>> UpdateDynamicInputs(
+    se::Stream* stream, se::DeviceMemoryAllocator* allocator,
+    std::vector<xla::ShapedBuffer*> runtime_inputs,
+    const std::vector<xla::ShapeLayout>& compile_time_shapes) {
+  std::vector<se::OwningDeviceMemory> new_allocations;
+  TF_RET_CHECK(runtime_inputs.size() == compile_time_shapes.size());
+  TF_ASSIGN_OR_RETURN(auto compiler, xla::Compiler::GetForPlatform(
+                                         stream->parent()->platform()));
+  auto shape_size_fn = compiler->ShapeSizeBytesFunction();
+  for (int64 i = 0; i < compile_time_shapes.size(); i++) {
+    const xla::Shape& compile_time_shape = compile_time_shapes[i].shape();
+    if (compile_time_shape.is_static()) {
+      continue;
+    }
+    auto* runtime_input = runtime_inputs[i];
+
+    bool element_modified = false;
+    TF_RETURN_IF_ERROR(xla::ShapeUtil::ForEachSubshapeWithStatus(
+        compile_time_shape,
+        [&](const xla::Shape& compile_time_shape,
+            const xla::ShapeIndex& index) -> Status {
+          if (compile_time_shape.IsTuple() || compile_time_shape.is_static()) {
+            return Status::OK();
+          }
+          const xla::Shape& runtime_shape = xla::ShapeUtil::GetSubshape(
+              runtime_input->on_device_shape(), index);
+          TF_RET_CHECK(!runtime_shape.IsTuple());
+          TF_RET_CHECK(xla::ShapeUtil::DynamicShapeIsCompatible(
+              runtime_shape, compile_time_shape));
+          se::DeviceMemoryBase* static_input =
+              runtime_input->buffers().mutable_element(index);
+          TF_ASSIGN_OR_RETURN(
+              auto dynamic_input,
+              allocator->Allocate(stream->parent()->device_ordinal(),
+                                  shape_size_fn(compile_time_shape)));
+          new_allocations.emplace_back(std::move(dynamic_input));
+          se::DeviceMemory<uint8>* dynamic_input_base =
+              new_allocations.back().ptr();
+          // Send the original data to the new location.
+          stream->ThenMemcpyD2D(dynamic_input_base, *static_input,
+                                static_input->size());
+          TF_RETURN_IF_ERROR(UpdateMetadata(stream, dynamic_input_base,
+                                            compile_time_shape, runtime_shape));
+          // Modify the memory location in the input shape tree to point to the
+          // new input.
+          runtime_input->set_buffer(*dynamic_input_base, index);
+          element_modified = true;
+          return Status::OK();
+        }));
+    if (element_modified) {
+      runtime_input->set_shapes(compile_time_shape, compile_time_shape);
+      // The input location has been modified, need to fix tuple table to
+      // point to the correct address.
+      TF_ASSIGN_OR_RETURN(
+          auto transfer_manager,
+          xla::TransferManager::GetForPlatform(stream->parent()->platform()));
+      TF_RETURN_IF_ERROR(
+          transfer_manager->WriteTupleIndexTablesAsync(stream, *runtime_input));
+    }
+  }
+  return std::move(new_allocations);
+}
+
+xla::StatusOr<xla::Literal> ReadMetadataLiteral(
+    se::Stream* stream, se::DeviceMemoryBase* buffer,
+    const xla::Shape& buffer_shape, xla::TransferManager* transfer_manager) {
+  TF_ASSIGN_OR_RETURN(auto compiler, xla::Compiler::GetForPlatform(
+                                         stream->parent()->platform()));
+  auto shape_size_fn = compiler->ShapeSizeBytesFunction();
+  xla::Shape buffer_shape_static =
+      xla::ShapeUtil::MakeStaticShape(buffer_shape);
+  const int64 offset = shape_size_fn(buffer_shape_static);
+  int64 metadata_size = shape_size_fn(buffer_shape) - offset;
+  TF_RET_CHECK(metadata_size != 0);
+  auto buffer_8 = se::DeviceMemory<uint8>(*buffer);
+  auto metadata_buffer =
+      stream->parent()->GetSubBuffer(&buffer_8, offset, metadata_size);
+  return transfer_manager->TransferArrayFromDevice(
+      stream,
+      xla::ShapeUtil::MakeShape(xla::S32, {buffer_shape.dimensions_size()}),
+      metadata_buffer);
+}
+
+// For each subshape in the result buffer that's dynamic, read the dynamic
+// dimension sizes from the metadata, and update output shapes. The result shape
+// is a static and concrete shape.
+xla::Status UpdateDynamicOutputs(se::Stream* stream,
+                                 xla::ShapedBuffer* shaped_buffer,
+                                 xla::Shape* output_host_shape,
+                                 xla::Shape* output_device_shape) {
+  DCHECK(output_device_shape->is_dynamic());
+  TF_ASSIGN_OR_RETURN(
+      auto transfer_manager,
+      xla::TransferManager::GetForPlatform(stream->parent()->platform()));
+  TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
+  TF_RETURN_IF_ERROR(shaped_buffer->buffers().ForEachMutableElementWithStatus(
+      [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) {
+        const xla::Shape& buffer_shape =
+            xla::ShapeUtil::GetSubshape(*output_device_shape, index);
+        if (buffer_shape.IsTuple()) {
+          return Status::OK();
+        }
+        xla::Shape& host_shape =
+            *xla::ShapeUtil::GetMutableSubshape(output_host_shape, index);
+        xla::Shape& device_shape =
+            *xla::ShapeUtil::GetMutableSubshape(output_device_shape, index);
+        if (device_shape.is_static()) {
+          return Status::OK();
+        }
+        TF_ASSIGN_OR_RETURN(auto metadata,
+                            ReadMetadataLiteral(stream, buffer, buffer_shape,
+                                                transfer_manager));
+        // Update shape size from metadata.
+        for (int64 i = 0; i < metadata.element_count(); ++i) {
+          host_shape.mutable_dimensions()[i] = metadata.Get<int32>({i});
+          device_shape.mutable_dimensions()[i] = metadata.Get<int32>({i});
+        }
+        return Status::OK();
+      }));
+  output_host_shape->clear_dynamic_dimensions();
+  output_device_shape->clear_dynamic_dimensions();
+  return Status::OK();
+}
+
+// Create output tuple from run_result.
+xla::StatusOr<RefPtr<XRTTupleAllocation>> CreateOutputTuple(
+    se::Stream* stream, xla::ScopedShapedBuffer run_result,
+    xla::Backend* backend, int device_ordinal) {
+  XRTTupleAllocation* output_tuple;
+  xla::ShapedBuffer shaped_buffer = run_result.release();
+  if (shaped_buffer.on_device_shape().is_dynamic()) {
+    // Update dynamic shapes from output buffer, and create a XRT tensor with
+    // dimension sizes read from metadata.
+    xla::Shape output_host_shape = shaped_buffer.on_host_shape();
+    xla::Shape output_device_shape = shaped_buffer.on_device_shape();
+    TF_RETURN_IF_ERROR(UpdateDynamicOutputs(
+        stream, &shaped_buffer, &output_host_shape, &output_device_shape));
+    TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer(
+        shaped_buffer, output_host_shape, output_device_shape, backend,
+        device_ordinal, &output_tuple));
+  } else {
+    // Fast-path: Don't copy shapes of output buffer.
+    TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer(
+        shaped_buffer, backend, device_ordinal, &output_tuple));
+  }
+  return RefPtr<XRTTupleAllocation>(output_tuple);
+}
+
 xla::StatusOr<RefPtr<XRTTupleAllocation>> RunExecutable(
     OpKernelContext* context, XRTGenericDeviceAccessor::ScopedRef* device_ref,
     xla::LocalExecutable* executable, const InputBuffers& input_buffers,
-    se::Stream* stream, int rng_seed) {
+    se::Stream* stream, int rng_seed,
+    const xrt::CommonExecutionConfig& config) {
   VLOG(2) << "Executing computation.";
   xla::ExecutableRunOptions run_options;
   run_options.set_stream(stream);
   run_options.set_allocator(device_ref->backend()->memory_allocator());
   run_options.set_intra_op_thread_pool(&context->eigen_cpu_device());
   run_options.set_rng_seed(rng_seed);
+  if (config.run_id() != 0) {
+    run_options.set_run_id(xla::RunId(config.run_id()));
+  }
+  if (executable->executable()
+          ->module_config()
+          .has_static_device_assignment()) {
+    run_options.set_device_assignment(
+        &executable->executable()->module_config().static_device_assignment());
+  }
+  xla::GpuExecutableRunOptions gpu_options;
+  std::vector<xla::GlobalDeviceId> gpu_global_ids;
+  if (config.local_replica_mapping_size() > 0) {
+    gpu_global_ids.reserve(config.local_replica_mapping_size());
+    for (auto& gid : config.local_replica_mapping()) {
+      gpu_global_ids.emplace_back(xla::GlobalDeviceId(gid));
+    }
+    gpu_options.set_gpu_global_device_ids(gpu_global_ids);
+  }
+  std::shared_ptr<NcclUniqueIdFactory> nccl_factory = GetNcclUniqueIdFactory();
+  if (nccl_factory != nullptr) {
+    auto uid_callback =
+        [&](const xla::NcclCliqueKey& key) -> xla::StatusOr<std::string> {
+      std::vector<xla::int64> replicas;
+      for (auto& device : key.devices()) {
+        replicas.push_back(device.value());
+      }
+      return nccl_factory->GetUniqueId(replicas);
+    };
+    gpu_options.set_nccl_unique_id_callback(uid_callback);
+  }
+  run_options.set_gpu_executable_run_options(&gpu_options);
 
   Env* env = Env::Default();
   auto start_time = env->NowMicros();
+  const std::vector<xla::ShapeLayout>& shape_layouts =
+      executable->executable()
+          ->module_config()
+          .entry_computation_layout()
+          .parameter_layouts();
+  TF_ASSIGN_OR_RETURN(auto new_allocations,
+                      UpdateDynamicInputs(stream, run_options.allocator(),
+                                          input_buffers.input_pointers,
+                                          shape_layouts));
+  auto new_allocations_ptr =
+      std::make_shared<std::vector<se::OwningDeviceMemory>>(
+          std::move(new_allocations));
   TF_ASSIGN_OR_RETURN(
       xla::ScopedShapedBuffer run_result,
       executable->Run(input_buffers.input_pointers, run_options));
+  // Retain the new allocation for input memory until the end of execution.
+  stream->ThenDoHostCallback([new_allocations_ptr]() { return Status::OK(); });
+
   auto elapsed = env->NowMicros() - start_time;
   VLOG(2) << "Elapsed time: " << elapsed << "us";
 
-  auto shaped_buffer = run_result.release();
-  XRTTupleAllocation* output_tuple;
-  TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer(
-      shaped_buffer, device_ref->backend(), device_ref->device_ordinal(),
-      &output_tuple));
-  RefPtr<XRTTupleAllocation> output_tuple_ptr(output_tuple);
+  TF_ASSIGN_OR_RETURN(
+      RefPtr<XRTTupleAllocation> output_tuple_ptr,
+      CreateOutputTuple(stream, std::move(run_result), device_ref->backend(),
+                        device_ref->device_ordinal()));
 
   // The ScopedShapedBuffer returned by the executable Run() API, in case of
   // input/output buffer aliasing, might have holes in it, which need to be
@@ -182,7 +458,7 @@ xla::StatusOr<RefPtr<XRTTupleAllocation>> RunExecutable(
           const xla::HloInputOutputAliasConfig::Alias& alias) -> Status {
     TF_RET_CHECK(alias.parameter_number < input_buffers.input_tuples.size());
     return alias.kind == xla::HloInputOutputAliasConfig::AliasKind::kUserAlias
-               ? output_tuple->AliasBufferFrom(
+               ? output_tuple_ptr->AliasBufferFrom(
                      *input_buffers.input_tuples[alias.parameter_number],
                      alias.parameter_index, output_index)
                : Status::OK();
@@ -196,10 +472,11 @@ xla::StatusOr<RefPtr<XRTTupleAllocation>> ExecuteComputation(
     OpKernelContext* context, XRTMemoryManager* memory_manager,
     XRTGenericDeviceAccessor::ScopedRef* device_ref,
     xla::LocalExecutable* executable, const InputBuffers& input_buffers,
-    se::Stream* stream, int rng_seed) {
+    se::Stream* stream, int rng_seed,
+    const xrt::CommonExecutionConfig& config) {
   auto runfn = [&]() {
     return RunExecutable(context, device_ref, executable, input_buffers, stream,
-                         rng_seed);
+                         rng_seed, config);
   };
 
   // We pass zero as requested_free_size as there is no simple way to get the
@@ -215,13 +492,15 @@ xla::StatusOr<RefPtr<XRTTupleAllocation>> ExecuteComputation(
     XRTGenericDeviceAccessor::ScopedRef* device_ref,
     xla::LocalExecutable* executable,
     const std::vector<InputCoords>& input_coords, bool release_inputs,
-    se::Stream* stream, int rng_seed) {
+    se::Stream* stream, int rng_seed,
+    const xrt::CommonExecutionConfig& config) {
   XRTMemoryManager::WorkingSet working_set(memory_manager);
   TF_ASSIGN_OR_RETURN(InputBuffers input_buffers,
                       GetInputBuffers(&working_set, device_ref->backend(),
                                       input_coords, release_inputs));
   return ExecuteComputation(context, memory_manager.get(), device_ref,
-                            executable, input_buffers, stream, rng_seed);
+                            executable, input_buffers, stream, rng_seed,
+                            config);
 }
 
 // XRTExecuteOp
@@ -270,8 +549,9 @@ Status XRTExecuteOp::DoWork(OpKernelContext* context) {
   bool release_inputs = config_proto.release_input_handles();
   bool release_compilation = config_proto.release_compilation_handle();
 
-  TF_ASSIGN_OR_RETURN(
-      auto cache, GetOrCreateCompilationCache(rm, /*max_number_of_entries=*/0));
+  TF_ASSIGN_OR_RETURN(auto cache,
+                      XRTGenericDeviceAccessor::GetOrCreateCompilationCache(
+                          context, /*max_number_of_entries=*/0));
   // We are guaranteed that the underlying device object won't be deleted out
   // from under us, while the ScopedRef is live.
   class XRTGenericDeviceAccessor::ScopedRef device_ref;
@@ -302,7 +582,8 @@ Status XRTExecuteOp::DoWork(OpKernelContext* context) {
   TF_ASSIGN_OR_RETURN(
       RefPtr<XRTTupleAllocation> output_tuple,
       ExecuteComputation(context, memory_manager, &device_ref, executable,
-                         input_coords, release_inputs, stream, rng_seed));
+                         input_coords, release_inputs, stream, rng_seed,
+                         config_proto.common_config()));
 
   return CreateExecuteOutput(context, memory_manager.get(),
                              std::move(output_tuple),
@@ -351,8 +632,9 @@ Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) {
   xrt::XRTChainedExecuteConfig config;
   TF_RET_CHECK(ParseFromTString(execution_config.scalar<tstring>()(), &config));
 
-  TF_ASSIGN_OR_RETURN(
-      auto cache, GetOrCreateCompilationCache(rm, /*max_number_of_entries=*/0));
+  TF_ASSIGN_OR_RETURN(auto cache,
+                      XRTGenericDeviceAccessor::GetOrCreateCompilationCache(
+                          context, /*max_number_of_entries=*/0));
   // We are guaranteed that the underlying device object won't be deleted out
   // from under us, while the ScopedRef is live.
   class XRTGenericDeviceAccessor::ScopedRef device_ref;
@@ -379,7 +661,8 @@ Status XRTExecuteChainedOp::DoWork(OpKernelContext* context) {
     xla::LocalExecutable* executable = entry->get().get_executable();
 
     return ExecuteComputation(context, memory_manager.get(), &device_ref,
-                              executable, input_buffers, stream, rng_seed);
+                              executable, input_buffers, stream, rng_seed,
+                              config.common_config());
   };
 
   return ExecuteChained(context, memory_manager, device_ref.backend(),
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 243289c8821..fbf9dfd0a17 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -49,6 +49,67 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+xla::XlaComputation ReturnDynamicR1() {
+  xla::XlaBuilder builder("ReturnDynamicR1");
+  auto p0 = xla::Parameter(&builder, 0,
+                           xla::ShapeUtil::MakeShape(xla::F32, {4}), "P0");
+  auto p1 = xla::Parameter(&builder, 1,
+                           xla::ShapeUtil::MakeShape(xla::F32, {4}), "P1");
+  auto p2 = xla::Parameter(&builder, 2, xla::ShapeUtil::MakeShape(xla::S32, {}),
+                           "P2");
+  auto sum = xla::Add(p0, p1);
+  auto pad_sum = xla::SetDimensionSize(sum, p2, 0);
+  return builder.Build(pad_sum).ValueOrDie();
+}
+
+xla::XlaComputation AcceptDynamicR1() {
+  xla::XlaBuilder builder("AcceptDynamicR1");
+  xla::Shape dyn_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_shape.set_dynamic_dimension(0, true);
+  auto p0 = xla::Parameter(&builder, 0, dyn_shape, "P0");
+  auto p1 = xla::Parameter(&builder, 1, dyn_shape, "P1");
+  auto sum = xla::Add(p0, p1);
+  return builder.Build(sum).ValueOrDie();
+}
+
+xla::XlaComputation ReturnDynamicR1Tuple() {
+  xla::XlaBuilder builder("ReturnDynamicR1Tuple");
+  auto p0 = xla::Parameter(&builder, 0,
+                           xla::ShapeUtil::MakeShape(xla::F32, {4}), "P0");
+  auto p1 = xla::Parameter(&builder, 1,
+                           xla::ShapeUtil::MakeShape(xla::F32, {4}), "P1");
+  auto p2 = xla::Parameter(&builder, 2, xla::ShapeUtil::MakeShape(xla::S32, {}),
+                           "P2");
+  auto sum = xla::Add(p0, p1);
+  auto sub = xla::Sub(p0, p1);
+  auto one = xla::One(&builder, xla::S32);
+  auto pad_sum = xla::SetDimensionSize(sum, p2, 0);
+  auto pad_sub = xla::SetDimensionSize(sub, p2 + one, 0);
+  auto tuple = xla::Tuple(&builder, {pad_sum, sum, pad_sub});
+  return builder.Build(tuple, /*remove_dynamic_dimensions=*/true).ValueOrDie();
+}
+
+xla::XlaComputation AcceptDynamicR1Tuple() {
+  xla::XlaBuilder builder("AcceptDynamicR1");
+  xla::Shape dyn_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_shape.set_dynamic_dimension(0, true);
+  xla::Shape tuple_shape =
+      xla::ShapeUtil::MakeTupleShape({dyn_shape, dyn_shape});
+  xla::Shape nest_tuple_shape =
+      xla::ShapeUtil::MakeTupleShape({dyn_shape, dyn_shape});
+  auto p = xla::Parameter(&builder, 0, tuple_shape, "P0");
+  auto p0 = xla::GetTupleElement(p, 0);
+  auto p1 = xla::GetTupleElement(p, 1);
+  auto sum = xla::Add(p0, p1);
+  return builder.Build(sum).ValueOrDie();
+}
+
+template <typename T>
+xla::LiteralProto CreateR0(T v) {
+  auto array = xla::LiteralUtil::CreateR0<T>(v);
+  return array.ToProto();
+}
+
 class XrtClientSession : public ClientSession {
  public:
   explicit XrtClientSession(const Scope& scope) : ClientSession(scope) {
@@ -61,6 +122,11 @@ class XrtClientSession : public ClientSession {
 string* xla_test_device_ptr;  // initial value set in main()
 string* xla_platform_ptr;     // initial value set in main()
 
+bool SupportDynamicShapes() {
+  // TODO(jackcao): Support dynamic shapes on XLA GPU.
+  return *xla_test_device_ptr != "XLA_GPU";
+}
+
 string DeviceFromFlag() {
   string xla_test_device = *xla_test_device_ptr;
   return absl::StrCat("/device:", xla_test_device, ":0");
@@ -1035,6 +1101,239 @@ TEST(RawApiTest, CompileAndExecute) {
   EXPECT_EQ(program_shape.parameters_size(), 2);
 }
 
+TEST(RawApiTest, DynamicR1Test) {
+  if (!SupportDynamicShapes()) {
+    return;
+  }
+  xrt::XLAAllocation p0;
+  *p0.mutable_value() = FloatVector({1.0f, 2.0f, 0.5f, -1.0f});
+  xrt::XLAAllocation p1;
+  *p1.mutable_value() = FloatVector({1.0f, -1.0f, 2.5f, 1.17f});
+  xrt::XLAAllocation p2;
+  *p2.mutable_value() = CreateR0<xla::int32>(2);
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() =
+      xla::ShapeUtil::MakeShape(xla::F32, {4}).ToProto();
+  *shapes->add_parameters() =
+      xla::ShapeUtil::MakeShape(xla::F32, {4}).ToProto();
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::S32, {}).ToProto();
+  xla::Shape dyn_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_shape.set_dynamic_dimension(0, true);
+  *shapes->mutable_result() = dyn_shape.ToProto();
+  StoreComputationSnapshot(ReturnDynamicR1(), c.mutable_hlo_snapshot());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  Scope cpu_root = root.WithDevice("/device:CPU:0");
+  auto e_config = ops::Const(cpu_root, e.SerializeAsString());
+  auto computation = ops::Const(cpu_root, c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto p0_value = ops::Const(cpu_root, p0.SerializeAsString());
+  auto p0_handle = ops::XRTAllocate(root, p0_value);
+  auto p1_value = ops::Const(cpu_root, p1.SerializeAsString());
+  auto p1_handle = ops::XRTAllocate(root, p1_value);
+  auto p2_value = ops::Const(cpu_root, p2.SerializeAsString());
+  auto p2_handle = ops::XRTAllocate(root, p2_value);
+  auto result = ops::XRTExecute(
+      root, c_handle.handle, e_config,
+      {Output(p0_handle), Output(p1_handle), Output(p2_handle)});
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  XrtClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<tstring>()()));
+  auto expected = xla::LiteralUtil::CreateR1<float>({2.0f, 1.0f});
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+}
+
+TEST(RawApiTest, DynamicR1TupleTest) {
+  if (!SupportDynamicShapes()) {
+    return;
+  }
+  xrt::XLAAllocation p0;
+  *p0.mutable_value() = FloatVector({1.0f, 2.0f, 0.5f, -1.0f});
+  xrt::XLAAllocation p1;
+  *p1.mutable_value() = FloatVector({1.0f, -1.0f, -0.5f, 1.0f});
+  xrt::XLAAllocation p2;
+  *p2.mutable_value() = CreateR0<xla::int32>(2);
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() =
+      xla::ShapeUtil::MakeShape(xla::F32, {4}).ToProto();
+  *shapes->add_parameters() =
+      xla::ShapeUtil::MakeShape(xla::F32, {4}).ToProto();
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::S32, {}).ToProto();
+  xla::Shape dyn_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_shape.set_dynamic_dimension(0, true);
+  *shapes->mutable_result() =
+      xla::ShapeUtil::MakeTupleShape(
+          {dyn_shape, xla::ShapeUtil::MakeShape(xla::F32, {4}), dyn_shape})
+          .ToProto();
+  StoreComputationSnapshot(ReturnDynamicR1Tuple(), c.mutable_hlo_snapshot());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  Scope cpu_root = root.WithDevice("/device:CPU:0");
+  auto e_config = ops::Const(cpu_root, e.SerializeAsString());
+  auto computation = ops::Const(cpu_root, c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto p0_value = ops::Const(cpu_root, p0.SerializeAsString());
+  auto p0_handle = ops::XRTAllocate(root, p0_value);
+  auto p1_value = ops::Const(cpu_root, p1.SerializeAsString());
+  auto p1_handle = ops::XRTAllocate(root, p1_value);
+  auto p2_value = ops::Const(cpu_root, p2.SerializeAsString());
+  auto p2_handle = ops::XRTAllocate(root, p2_value);
+  auto result = ops::XRTExecute(
+      root, c_handle.handle, e_config,
+      {Output(p0_handle), Output(p1_handle), Output(p2_handle)});
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  XrtClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<tstring>()()));
+
+  auto expected0 = xla::LiteralUtil::CreateR1<float>({2.0f, 1.0f});
+  auto expected1 = xla::LiteralUtil::CreateR1<float>({2.0f, 1.0f, 0.0f, 0.0f});
+  auto expected2 = xla::LiteralUtil::CreateR1<float>({0.0f, 3.0f, 1.0f});
+  auto expected =
+      xla::LiteralUtil::MakeTuple({&expected0, &expected1, &expected2});
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+}
+
+TEST(RawApiTest, AcceptDynamicR1TupleTest) {
+  if (!SupportDynamicShapes()) {
+    return;
+  }
+  xrt::XLAAllocation p0;
+  *p0.mutable_value() = FloatVector({1.0f, 2.0f, 0.5f});
+  xrt::XLAAllocation p1;
+  *p1.mutable_value() = FloatVector({1.0f, -1.0f, -0.5f});
+
+  xrt::XLATupleNode tuple_desc;
+  auto subdesc_10 = tuple_desc.add_tuples();
+  auto subdesc_11 = tuple_desc.add_tuples();
+  subdesc_10->set_input_index(0);
+  subdesc_10->set_release_input_handle(true);
+  subdesc_11->set_input_index(1);
+  subdesc_11->set_release_input_handle(true);
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  xla::Shape dyn_input_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_input_shape.set_dynamic_dimension(0, true);
+  xla::Shape dyn_tuple_shape =
+      xla::ShapeUtil::MakeTupleShape({dyn_input_shape, dyn_input_shape});
+  *shapes->add_parameters() = dyn_tuple_shape.ToProto();
+  xla::Shape dyn_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_shape.set_dynamic_dimension(0, true);
+  *shapes->mutable_result() = dyn_shape.ToProto();
+  StoreComputationSnapshot(AcceptDynamicR1Tuple(), c.mutable_hlo_snapshot());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  Scope cpu_root = root.WithDevice("/device:CPU:0");
+  auto e_config = ops::Const(cpu_root, e.SerializeAsString());
+  auto computation = ops::Const(cpu_root, c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto p0_value = ops::Const(cpu_root, p0.SerializeAsString());
+  auto p0_handle = ops::XRTAllocate(root, p0_value);
+  auto p1_value = ops::Const(cpu_root, p1.SerializeAsString());
+  auto p1_handle = ops::XRTAllocate(root, p1_value);
+
+  auto tuple_0 = ops::Const(root.WithDevice("/device:CPU:0"),
+                            tuple_desc.SerializeAsString());
+  auto t0_handle = ops::XRTMakeTuple(
+      root, tuple_0,
+      {static_cast<Output>(p0_handle), static_cast<Output>(p1_handle)});
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+                                {static_cast<Output>(t0_handle)});
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  XrtClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<tstring>()()));
+
+  auto expected = xla::LiteralUtil::CreateR1<float>({2.0f, 1.0f, 0.0f});
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+}
+
+TEST(RawApiTest, AcceptDynamicR1Test) {
+  if (!SupportDynamicShapes()) {
+    return;
+  }
+  xrt::XLAAllocation p0;
+  *p0.mutable_value() = FloatVector({1.0f, 2.0f, 0.5f});
+  xrt::XLAAllocation p1;
+  *p1.mutable_value() = FloatVector({1.0f, -1.0f, -0.5f});
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  xla::Shape dyn_input_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_input_shape.set_dynamic_dimension(0, true);
+  *shapes->add_parameters() = dyn_input_shape.ToProto();
+  *shapes->add_parameters() = dyn_input_shape.ToProto();
+  xla::Shape dyn_shape = xla::ShapeUtil::MakeShape(xla::F32, {4});
+  dyn_shape.set_dynamic_dimension(0, true);
+  *shapes->mutable_result() = dyn_shape.ToProto();
+  StoreComputationSnapshot(AcceptDynamicR1(), c.mutable_hlo_snapshot());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  Scope cpu_root = root.WithDevice("/device:CPU:0");
+  auto e_config = ops::Const(cpu_root, e.SerializeAsString());
+  auto computation = ops::Const(cpu_root, c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto p0_value = ops::Const(cpu_root, p0.SerializeAsString());
+  auto allocate_op_0 = ops::XRTAllocate(root, p0_value);
+  auto p1_value = ops::Const(cpu_root, p1.SerializeAsString());
+  auto allocate_op_1 = ops::XRTAllocate(root, p1_value);
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+                                {Output(allocate_op_0), Output(allocate_op_1)});
+  auto read_back = ops::XRTReadLiteralAndRelease(root, result);
+  TF_ASSERT_OK(root.status());
+
+  XrtClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs));
+
+  xla::LiteralProto response;
+  EXPECT_TRUE(response.ParseFromString(outputs[0].scalar<tstring>()()));
+
+  auto expected = xla::LiteralUtil::CreateR1<float>({2.0f, 1.0f, 0.0f});
+  EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+}
+
 TEST(RawApiTest, CompileAndExecuteWithArgumentVector) {
   xrt::XLAAllocation p0;
   *p0.mutable_value() = FloatVector({1.0f, 2.0f});
diff --git a/tensorflow/compiler/xrt/xrt.proto b/tensorflow/compiler/xrt/xrt.proto
index 47b7cda2760..9a351732c4b 100644
--- a/tensorflow/compiler/xrt/xrt.proto
+++ b/tensorflow/compiler/xrt/xrt.proto
@@ -111,6 +111,17 @@ message XLATupleNode {
   repeated XLATupleNode tuples = 3;
 }
 
+message CommonExecutionConfig {
+  // The replica index this execute is driving.
+  int32 replica_id = 1;
+  // Mapping local device ordinals to global replica IDs.
+  // local_replica_mapping[LOCAL_DEVICE_ORDINAL] = GLOBAL_REPLICA_ID
+  repeated int32 local_replica_mapping = 2;
+  // The execution run ID used to correlate different XRT execute operations
+  // happeining in parallel from different threads.
+  int64 run_id = 3;
+}
+
 // Options for an XLA execution.
 message XRTExecutionConfig {
   // Local device to run on. This is present because the execute Op
@@ -133,6 +144,9 @@ message XRTExecutionConfig {
   // a single tuple allocation the execution will return a vector of
   // allocations, one for each of the first-level elements of the result tuple.
   bool return_exploded_tuple = 7;
+  reserved 8;
+  // The common configuration for XRT execute operations.
+  CommonExecutionConfig common_config = 9;
 }
 
 message XRTChainedExecuteConfig {
@@ -143,6 +157,9 @@ message XRTChainedExecuteConfig {
   // Optional key to disambiguate between executions. This is only needed if
   // multiple host send/recvs may be outstanding concurrently with executions.
   string execution_instance_key = 3;
+  reserved 4;
+  // The common configuration for XRT execute operations.
+  CommonExecutionConfig common_config = 5;
 }
 
 // A single chained execute operation. An operation can either be a device data
diff --git a/tensorflow/compiler/xrt/xrt_device.cc b/tensorflow/compiler/xrt/xrt_device.cc
index 1b5557d556d..46954572c5d 100644
--- a/tensorflow/compiler/xrt/xrt_device.cc
+++ b/tensorflow/compiler/xrt/xrt_device.cc
@@ -17,19 +17,56 @@ limitations under the License.
 
 #include "tensorflow/compiler/xrt/xrt_device.h"
 
+#include <map>
+
 #include "tensorflow/compiler/jit/xla_device.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
+namespace {
+
+class ResourceMgrArena {
+ public:
+  static ResourceMgrArena* Get() {
+    static ResourceMgrArena* arena = new ResourceMgrArena();
+    return arena;
+  }
+
+  ResourceMgr* GetResourceMgr(const std::string& platform_name) {
+    mutex_lock lock(mutex_);
+    auto it = resource_managers_.find(platform_name);
+    if (it == resource_managers_.end()) {
+      it = resource_managers_.emplace(platform_name, new ResourceMgr()).first;
+    }
+    return it->second;
+  }
+
+ private:
+  mutex mutex_;
+  std::map<std::string, ResourceMgr*> resource_managers_;
+};
+
+}  // namespace
 
 /*static*/ Status XRTGenericDeviceAccessor::GetResourceManager(
     OpKernelContext* ctx, ResourceMgr** rm) {
-  *rm = ctx->resource_manager();
+  const XlaDevice::Metadata* metadata;
+  TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata));
+  *rm = ResourceMgrArena::Get()->GetResourceMgr(metadata->platform()->Name());
   return Status::OK();
 }
 
+/* static */ xla::StatusOr<RefPtr<XRTCompilationCache>>
+XRTGenericDeviceAccessor::GetOrCreateCompilationCache(
+    OpKernelContext* ctx, int64 max_number_of_entries) {
+  ResourceMgr* rm;
+  TF_RETURN_IF_ERROR(GetResourceManager(ctx, &rm));
+  return tensorflow::GetOrCreateCompilationCache(rm, max_number_of_entries);
+}
+
 /*static*/ Status XRTGenericDeviceAccessor::InitScopedRef(
     OpKernelContext* ctx, int device_ordinal, ScopedRef* scoped_ref) {
   const XlaDevice::Metadata* metadata;
diff --git a/tensorflow/compiler/xrt/xrt_device.h b/tensorflow/compiler/xrt/xrt_device.h
index 5ebee7641f0..02fab315830 100644
--- a/tensorflow/compiler/xrt/xrt_device.h
+++ b/tensorflow/compiler/xrt/xrt_device.h
@@ -19,6 +19,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XRT_XRT_DEVICE_H_
 
 #include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xrt/xrt_compilation_cache.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 
@@ -31,6 +32,9 @@ class XRTGenericDeviceAccessor {
  public:
   static Status GetResourceManager(OpKernelContext* ctx, ResourceMgr** rm);
 
+  static xla::StatusOr<RefPtr<XRTCompilationCache>> GetOrCreateCompilationCache(
+      OpKernelContext* ctx, int64 max_number_of_entries);
+
   // We use a ScopedRef pattern here even though it's not strictly necessary,
   // just so that templated uses of this and the TPU accessor class will be as
   // similar as possible.
diff --git a/tensorflow/compiler/xrt/xrt_util.cc b/tensorflow/compiler/xrt/xrt_util.cc
index 4d19d4b1226..b8a0afc92c5 100644
--- a/tensorflow/compiler/xrt/xrt_util.cc
+++ b/tensorflow/compiler/xrt/xrt_util.cc
@@ -21,10 +21,14 @@ limitations under the License.
 #include "tensorflow/compiler/xla/debug_options_flags.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mutex.h"
 
 namespace tensorflow {
 namespace {
 
+mutex nccl_factory_mutex(LINKER_INITIALIZED);
+std::shared_ptr<NcclUniqueIdFactory>* nccl_factory;
+
 // The ScopedHandles data structure is used in the ExecuteChained() API and its
 // task is to track tuple allocation registrations. It is used both the track
 // intermediate results of a chained computation, or its final results. Anything
@@ -162,6 +166,19 @@ Status PopulateOpWorkingSet(xla::Backend* backend,
 
 }  // namespace
 
+void SetNcclUniqueIdFactory(std::shared_ptr<NcclUniqueIdFactory> factory) {
+  mutex_lock lock(nccl_factory_mutex);
+  if (nccl_factory == nullptr) {
+    nccl_factory = new std::shared_ptr<NcclUniqueIdFactory>();
+  }
+  *nccl_factory = std::move(factory);
+}
+
+std::shared_ptr<NcclUniqueIdFactory> GetNcclUniqueIdFactory() {
+  mutex_lock lock(nccl_factory_mutex);
+  return nccl_factory != nullptr ? *nccl_factory : nullptr;
+}
+
 xla::DebugOptions BuildXlaDebugOptions(const xla::DebugOptions& ref_options) {
   static const bool options_passthrough = DebugOptionsPassThroughEnabled();
   if (options_passthrough) {
diff --git a/tensorflow/compiler/xrt/xrt_util.h b/tensorflow/compiler/xrt/xrt_util.h
index 32244a63081..cc1480fdb00 100644
--- a/tensorflow/compiler/xrt/xrt_util.h
+++ b/tensorflow/compiler/xrt/xrt_util.h
@@ -18,6 +18,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XRT_XRT_UTIL_H_
 #define TENSORFLOW_COMPILER_XRT_XRT_UTIL_H_
 
+#include <memory>
+#include <string>
+#include <vector>
+
 #include "tensorflow/compiler/xla/service/backend.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -31,6 +35,19 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Factory class which creates NCCL unique IDs based on the replicas
+// participating to a given communication. This is only used for GPU backends.
+struct NcclUniqueIdFactory {
+  virtual ~NcclUniqueIdFactory() {}
+
+  // Generates the NCCL unique ID for the given set of replica IDs.
+  virtual std::string GetUniqueId(absl::Span<const xla::int64> replicas) = 0;
+};
+
+void SetNcclUniqueIdFactory(std::shared_ptr<NcclUniqueIdFactory> factory);
+
+std::shared_ptr<NcclUniqueIdFactory> GetNcclUniqueIdFactory();
+
 struct InputCoords {
   explicit InputCoords(int64 handle) : handle(handle) {}
   InputCoords(int64 handle, xla::ShapeIndex index)
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 19097fb8922..6b4874a8393 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -83,7 +83,6 @@ load(
     "tf_gen_op_libs",
     "tf_genrule_cmd_append_to_srcs",
     "tf_opts_nortti_if_lite_protos",
-    "tf_opts_nortti_if_mobile",
     "tf_portable_full_lite_protos",
     "transitive_hdrs",
 )
@@ -100,28 +99,23 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu")
 # buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tf_cc_tests_gpu")
 
-# buildifier: disable=same-origin-load
-# Placeholder: load("//tensorflow:tensorflow.bzl", "tf_portable_proto_lib")
-
 # buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "tf_monitoring_deps")
 
 # For platform specific build config
 load(
     "//tensorflow/core/platform:build_config.bzl",
-    "tf_additional_all_protos",
     "tf_additional_lib_deps",
     "tf_additional_test_deps",
     "tf_jspb_proto_library",
     "tf_kernel_tests_linkstatic",
     "tf_lib_proto_parsing_deps",
     "tf_portable_deps_no_runtime",
+    "tf_portable_proto_lib",
     "tf_proto_library",
-    "tf_proto_library_cc",
     "tf_protos_all_impl",
     "tf_protos_grappler_impl",
     "tf_protos_profiler_impl",
-    "tf_pyclif_proto_library",
 )
 load(
     "//tensorflow/core/platform:rules_cc.bzl",
@@ -184,18 +178,18 @@ package_group(name = "friends")
 # filegroup; e.g.  ones with individual proto_library targets.
 # LINT.IfChange
 COMMON_PROTO_SRCS = [
-    "protobuf/bfc_memory_map.proto",
-    "protobuf/config.proto",
-    "protobuf/cluster.proto",
-    "protobuf/debug.proto",
-    "protobuf/device_filters.proto",
-    "protobuf/device_properties.proto",
-    "protobuf/graph_debug_info.proto",
-    "protobuf/queue_runner.proto",
-    "protobuf/rewriter_config.proto",
-    "protobuf/tensor_bundle.proto",
-    "protobuf/saver.proto",
-    "protobuf/verifier_config.proto",
+    "//tensorflow/core/protobuf:bfc_memory_map.proto",
+    "//tensorflow/core/protobuf:config.proto",
+    "//tensorflow/core/protobuf:cluster.proto",
+    "//tensorflow/core/protobuf:debug.proto",
+    "//tensorflow/core/protobuf:device_filters.proto",
+    "//tensorflow/core/protobuf:device_properties.proto",
+    "//tensorflow/core/protobuf:graph_debug_info.proto",
+    "//tensorflow/core/protobuf:queue_runner.proto",
+    "//tensorflow/core/protobuf:rewriter_config.proto",
+    "//tensorflow/core/protobuf:tensor_bundle.proto",
+    "//tensorflow/core/protobuf:saver.proto",
+    "//tensorflow/core/protobuf:verifier_config.proto",
 ]
 
 EXAMPLE_PROTO_SRCS = [
@@ -242,7 +236,7 @@ PROFILER_PROTO_SRCS = [
 ]
 
 ERROR_CODES_PROTO_SRCS = [
-    "protobuf/error_codes.proto",
+    "//tensorflow/core/protobuf:error_codes.proto",
     "//tensorflow/core/lib/core:error_codes.proto",
 ]
 # LINT.ThenChange(//tensorflow/core/portable_proto_config.asciipb)
@@ -255,11 +249,13 @@ tf_proto_library(
     cc_api_version = 2,
     make_default_target_header_only = True,
     protodeps = [
-        ":core_protos",
-        ":error_codes_proto_impl",
         "//tensorflow/core/example:protos_all",
         "//tensorflow/core/framework:protos_all",
         "//tensorflow/core/lib/core:error_codes_proto",
+        "//tensorflow/core/profiler/protobuf:xplane_proto",
+        "//tensorflow/core/profiler:profiler_options_proto",
+        "//tensorflow/core/protobuf:error_codes_proto_impl",
+        "//tensorflow/core/protobuf:for_core_protos",
         "//tensorflow/core/util:protos_all",
         "//tensorflow/core/util:test_log_proto_impl",
     ],
@@ -619,6 +615,7 @@ tf_gen_op_libs(
         "clustering_ops",
         "collective_ops",
         "control_flow_ops",
+        "count_ops",
         "ctc_ops",
         "data_flow_ops",
         "dataset_ops",
@@ -847,6 +844,7 @@ cc_library(
         ":clustering_ops_op_lib",
         ":collective_ops_op_lib",
         ":control_flow_ops_op_lib",
+        ":count_ops_op_lib",
         ":ctc_ops_op_lib",
         ":cudnn_rnn_ops_op_lib",
         ":data_flow_ops_op_lib",
@@ -889,23 +887,29 @@ cc_library(
         ":state_ops_op_lib",
         ":stateless_random_ops_op_lib",
         ":string_ops_op_lib",
-        ":tpu_configuration_ops_op_lib",
-        ":tpu_cross_replica_ops_op_lib",
-        ":tpu_embedding_ops_op_lib",
-        ":tpu_embedding_load_retrieve_ops_op_lib",
-        ":tpu_functional_ops_op_lib",
-        ":tpu_heartbeat_ops_op_lib",
-        ":tpu_host_compute_ops_op_lib",
-        ":tpu_infeed_ops_op_lib",
-        ":tpu_outfeed_ops_op_lib",
-        ":tpu_ordinal_selector_ops_op_lib",
-        ":tpu_replication_ops_op_lib",
         ":training_ops_op_lib",
         ":user_ops_op_lib",
         ":word2vec_ops",
         "//tensorflow/c/kernels:bitcast_op_lib",
         "//tensorflow/compiler/mlir/tensorflow:mlir_passthrough_op",
-    ] + if_mkl([
+    ] + if_chromiumos(
+        [],
+        # Non-tpu platforms don't need tpu dependency. It would be best to guard
+        # them by if_tpu. But there is no such flag yet.
+        [
+            ":tpu_configuration_ops_op_lib",
+            ":tpu_cross_replica_ops_op_lib",
+            ":tpu_embedding_ops_op_lib",
+            ":tpu_embedding_load_retrieve_ops_op_lib",
+            ":tpu_functional_ops_op_lib",
+            ":tpu_heartbeat_ops_op_lib",
+            ":tpu_host_compute_ops_op_lib",
+            ":tpu_infeed_ops_op_lib",
+            ":tpu_outfeed_ops_op_lib",
+            ":tpu_ordinal_selector_ops_op_lib",
+            ":tpu_replication_ops_op_lib",
+        ],
+    ) + if_mkl([
         ":mkl_array_ops_op_lib",
         ":mkl_nn_ops_op_lib",
     ]) + if_tensorrt([
@@ -1006,6 +1010,7 @@ cc_library(
         "//tensorflow/core/kernels:collective_ops",
         "//tensorflow/core/kernels:constant_op",
         "//tensorflow/core/kernels:control_flow_ops",
+        "//tensorflow/core/kernels:count_ops",
         "//tensorflow/core/kernels:ctc_ops",
         "//tensorflow/core/kernels:data_flow",
         "//tensorflow/core/kernels:decode_proto_op",
@@ -1265,7 +1270,7 @@ filegroup(
         "//tensorflow/core/platform:mobile_srcs_no_runtime",
         "//tensorflow/core/public:mobile_srcs_no_runtime",
         "//tensorflow/core/util:mobile_srcs_no_runtime",
-        "//tensorflow/core/util/ctc:android_srcs",
+        "//tensorflow/core/util/ctc:mobile_srcs",
     ] + glob(
         [
             "client/**/*.cc",
@@ -1295,12 +1300,12 @@ filegroup(
         "//tensorflow/core/common_runtime/eager:srcs",
         "//tensorflow/core/framework:mobile_srcs_only_runtime",
         "//tensorflow/core/graph:mobile_srcs_only_runtime",
-        "//tensorflow/core/kernels:android_srcs",
+        "//tensorflow/core/kernels:mobile_srcs",
         "//tensorflow/core/lib/io:mobile_srcs_only_runtime",
         "//tensorflow/core/profiler:mobile_srcs",
         "//tensorflow/core/public:mobile_srcs_only_runtime",
         "//tensorflow/core/util/sparse:mobile_srcs_only_runtime",
-        "//tensorflow/core/util/tensor_bundle:android_srcs",
+        "//tensorflow/core/util/tensor_bundle:mobile_srcs",
         "//tensorflow/core/util:mobile_srcs_only_runtime",
 
         # Sources for which we already have granular targets.
@@ -1365,10 +1370,7 @@ cc_library(
     name = "portable_tensorflow_lib_lite",
     srcs = if_mobile([":mobile_srcs"]),
     copts = tf_copts(android_optimization_level_override = None) + tf_opts_nortti_if_lite_protos() + if_ios(["-Os"]),
-    defines = ["SUPPORT_SELECTIVE_REGISTRATION"] + tf_portable_full_lite_protos(
-        full = [],
-        lite = ["TENSORFLOW_LITE_PROTOS"],
-    ) + if_chromiumos(["IS_MOBILE_PLATFORM"]) + tf_defines_nortti_if_lite_protos(),
+    defines = ["SUPPORT_SELECTIVE_REGISTRATION"] + if_chromiumos(["IS_MOBILE_PLATFORM"]) + tf_defines_nortti_if_lite_protos(),
     linkopts = if_android(["-lz"]) + if_ios(["-lz"]),
     tags = [
         "manual",
@@ -1376,10 +1378,9 @@ cc_library(
     ],
     visibility = ["//visibility:public"],
     deps = [
-        ":protos_all_cc_impl",
         "//tensorflow/core/util:stats_calculator_portable",
         "//tensorflow/core:mobile_additional_lib_deps",
-    ] + tf_portable_deps_no_runtime(),
+    ] + tf_portable_proto_lib() + tf_portable_deps_no_runtime(),
     alwayslink = 1,
 )
 
@@ -1411,54 +1412,12 @@ cc_library(
     ],
 )
 
-# Native library support for iOS applications.
-#
-# bazel  build --config=ios_x86_64 \
-# :ios_tensorflow_lib
-cc_library(
-    name = "ios_tensorflow_lib",
-    srcs = if_ios([
-        ":portable_op_registrations_and_gradients",
-        "//tensorflow/core/kernels:android_core_ops",
-        "//tensorflow/core/kernels:android_extended_ops",
-    ]),
-    copts = tf_copts() + tf_opts_nortti_if_lite_protos() + ["-Os"],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":portable_tensorflow_lib_lite",
-        ":protos_all_cc_impl",
-        "//third_party/eigen3",
-        "//third_party/fft2d:fft2d_headers",
-        "@com_google_protobuf//:protobuf",
-        "@fft2d",
-        "@gemmlowp",
-    ],
-    alwayslink = 1,
-)
-
 alias(
     name = "ios_tensorflow_lib_lite",
     actual = ":portable_tensorflow_lib_lite",
     visibility = ["//visibility:public"],
 )
 
-cc_library(
-    name = "ios_tensorflow_test_lib",
-    testonly = 1,
-    srcs = if_ios([":android_test_srcs"]),
-    copts = tf_copts() + ["-Os"],
-    tags = [
-        "manual",
-        "notap",
-    ],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":ios_tensorflow_lib",
-        "//tensorflow/core/platform/default/build_config:gtest",
-        "//third_party/eigen3",
-    ],
-)
-
 # Full TensorFlow library with operator support. Use this unless reducing
 # binary size (by packaging a reduced operator set) is a concern.
 alias(
@@ -1467,10 +1426,16 @@ alias(
     visibility = ["//visibility:public"],
 )
 
+alias(
+    name = "ios_tensorflow_lib",
+    actual = ":portable_tensorflow_lib",
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "portable_tensorflow_lib",
     srcs = if_mobile([":portable_op_registrations_and_gradients"]),
-    copts = tf_copts() + tf_opts_nortti_if_lite_protos(),
+    copts = tf_copts() + tf_opts_nortti_if_lite_protos() + if_ios(["-Os"]),
     features = tf_features_nomodules_if_mobile(),
     tags = [
         "manual",
@@ -1553,6 +1518,12 @@ alias(
     visibility = ["//visibility:public"],
 )
 
+alias(
+    name = "ios_tensorflow_test_lib",
+    actual = ":portable_tensorflow_test_lib",
+    visibility = ["//visibility:public"],
+)
+
 cc_library(
     name = "portable_tensorflow_test_lib",
     testonly = 1,
@@ -1563,7 +1534,7 @@ cc_library(
         "//tensorflow/core/framework:android_test_hdrs",
         "//tensorflow/core/util:android_test_hdrs",
     ],
-    copts = tf_copts(android_optimization_level_override = None),
+    copts = tf_copts(android_optimization_level_override = None) + if_ios(["-Os"]),
     features = tf_features_nomodules_if_mobile() + tf_opts_nortti_if_lite_protos(),
     tags = [
         "manual",
@@ -1631,20 +1602,13 @@ alias(
     [
         alias(
             name = "protobuf_%s_pyclif%s" % (proto_name, target_suffix),
-            actual = ":protobuf/%s_pyclif%s" % (proto_name, target_suffix),
+            actual = "//tensorflow/core/protobuf:%s_pyclif%s" % (proto_name, target_suffix),
             visibility = ["//visibility:public"],
         )
         for target_suffix in [
             "",
             "_pb2",
         ]
-    ] + [
-        tf_pyclif_proto_library(
-            name = "protobuf/%s_pyclif" % proto_name,
-            proto_lib = ":protos_all",
-            proto_srcfile = "protobuf/%s.proto" % proto_name,
-            visibility = ["//visibility:public"],
-        ),
     ]
     for proto_name in [
         "config",
@@ -1658,77 +1622,74 @@ alias(
 # -----------------------------------------------------------------------------
 # Internal targets
 
-tf_proto_library(
+alias(
     name = "autotuning_proto",
-    srcs = ["protobuf/autotuning.proto"],
-    cc_api_version = 2,
-    make_default_target_header_only = True,
+    actual = "//tensorflow/core/protobuf:autotuning_proto",
     visibility = [
         "//tensorflow:internal",
     ],
 )
 
-tf_proto_library(
+alias(
+    name = "autotuning_proto_cc",
+    actual = "//tensorflow/core/protobuf:autotuning_proto_cc",
+    visibility = [
+        "//tensorflow:internal",
+    ],
+)
+
+alias(
     name = "conv_autotuning_proto",
-    srcs = ["protobuf/conv_autotuning.proto"],
-    cc_api_version = 2,
-    make_default_target_header_only = True,
-    protodeps = [
-        "//tensorflow/stream_executor:dnn_proto",
-    ],
+    actual = "//tensorflow/core/protobuf:conv_autotuning_proto",
     visibility = [
         "//tensorflow:internal",
     ],
 )
 
-tf_proto_library_cc(
-    name = "worker_proto",
-    srcs = ["protobuf/worker.proto"],
-    cc_api_version = 2,
-    protodeps = tf_additional_all_protos(),
-    visibility = ["//visibility:public"],
-)
-
-tf_proto_library_cc(
-    name = "worker_service_proto",
-    srcs = ["protobuf/worker_service.proto"],
-    has_services = 1,
-    cc_api_version = 2,
-    cc_stubby_versions = ["2"],
-    protodeps = [":worker_proto"],
+alias(
+    name = "conv_autotuning_proto_cc",
+    actual = "//tensorflow/core/protobuf:conv_autotuning_proto_cc",
     visibility = [
         "//tensorflow:internal",
     ],
 )
 
-tf_proto_library_cc(
-    name = "master_proto",
-    srcs = ["protobuf/master.proto"],
-    cc_api_version = 2,
-    protodeps = tf_additional_all_protos(),
-    visibility = ["//tensorflow:internal"],
-)
-
-tf_proto_library_cc(
-    name = "master_service_proto",
-    srcs = ["protobuf/master_service.proto"],
-    has_services = 1,
-    cc_api_version = 2,
-    cc_stubby_versions = ["2"],
-    protodeps = [":master_proto"],
+alias(
+    name = "worker_proto_cc",
+    actual = "//tensorflow/core/protobuf:worker_proto_cc",
     visibility = [
         "//tensorflow:internal",
     ],
 )
 
-tf_proto_library_cc(
-    name = "eager_service_proto",
-    srcs = ["protobuf/eager_service.proto"],
-    has_services = 1,
-    cc_api_version = 2,
-    cc_grpc_version = 1,
-    cc_stubby_versions = ["2"],
-    protodeps = tf_additional_all_protos(),
+alias(
+    name = "worker_service_proto_cc",
+    actual = "//tensorflow/core/protobuf:worker_service_proto_cc",
+    visibility = [
+        "//tensorflow:internal",
+    ],
+)
+
+alias(
+    name = "master_proto_cc",
+    actual = "//tensorflow/core/protobuf:master_proto_cc",
+    visibility = [
+        "//learning/brain/frameworks/uptc:__subpackages__",
+        "//tensorflow:internal",
+    ],
+)
+
+alias(
+    name = "master_service_proto_cc",
+    actual = "//tensorflow/core/protobuf:master_service_proto_cc",
+    visibility = [
+        "//tensorflow:internal",
+    ],
+)
+
+alias(
+    name = "eager_service_proto_cc",
+    actual = "//tensorflow/core/protobuf:eager_service_proto_cc",
     visibility = [
         "//tensorflow:internal",
     ],
@@ -2140,49 +2101,14 @@ cc_library(
     ],
 )
 
-tf_proto_library(
+alias(
     name = "error_codes_proto_impl",
-    srcs = ["protobuf/error_codes.proto"],
-    cc_api_version = 2,
-    make_default_target_header_only = True,
+    actual = "//tensorflow/core/protobuf:error_codes_proto_impl",
 )
 
-tf_proto_library(
-    name = "core_protos",
-    srcs = COMMON_PROTO_SRCS + [
-        # Protos which are not needed on mobile builds, but should be included
-        # in protos_all.
-        #
-        # Note that some protos are in neither core_proto_srcs nor this
-        # filegroup; e.g. ones with individual proto_library targets.
-        "protobuf/control_flow.proto",
-        # TODO(ebrevdo): Re-enable once CriticalSection is in core.
-        # "protobuf/critical_section.proto",
-        "protobuf/data/experimental/snapshot.proto",
-        "protobuf/debug_event.proto",
-        "protobuf/meta_graph.proto",
-        "protobuf/named_tensor.proto",
-        "protobuf/remote_tensor_handle.proto",
-        "protobuf/saved_model.proto",
-        "protobuf/saved_object_graph.proto",
-        "protobuf/struct.proto",
-        "protobuf/tensorflow_server.proto",
-        "protobuf/trackable_object_graph.proto",
-        "protobuf/transport_options.proto",
-    ],
-    cc_api_version = 2,
-    make_default_target_header_only = True,
-    protodeps = [
-        ":error_codes_proto_impl",
-        "//tensorflow/core/example:protos_all",
-        "//tensorflow/core/framework:protos_all",
-        "//tensorflow/core/lib/core:error_codes_proto",
-        "//tensorflow/core/profiler/protobuf:xplane_proto",
-        "//tensorflow/core/profiler:profiler_options_proto",
-        "//tensorflow/core/util:protos_all",
-        "//tensorflow/core/util:test_log_proto_impl",
-    ],
-    visibility = ["//visibility:private"],
+alias(
+    name = "error_codes_proto_impl_cc",
+    actual = "//tensorflow/core/protobuf:error_codes_proto_impl_cc",
 )
 
 alias(
@@ -2391,10 +2317,6 @@ alias(
 
 # Library containing all of the graph construction code that is
 # independent of the runtime.
-#
-# TODO(mrry): Refactor graph_constructor.cc so that it does not depend on code
-# in "common_runtime/", and then the entire "graph/" directory can be included
-# in this library.
 tf_cuda_library(
     name = "graph",
     srcs = ["//tensorflow/core/graph:graph_srcs"],
@@ -2478,13 +2400,9 @@ alias(
     visibility = ["//visibility:public"],
 )
 
-tf_proto_library_cc(
-    name = "replay_log_proto",
-    srcs = ["protobuf/replay_log.proto"],
-    cc_api_version = 2,
-    protodeps = [
-        ":master_proto",
-    ] + tf_additional_all_protos(),
+alias(
+    name = "replay_log_proto_cc",
+    actual = "//tensorflow/core/protobuf:replay_log_proto_cc",
     visibility = [
         "//tensorflow:internal",
     ],
@@ -2740,42 +2658,6 @@ tf_cc_tests(
     ],
 )
 
-tf_cc_tests(
-    name = "higher_level_tests_needing_kernels",
-    size = "small",
-    srcs = [
-        "//tensorflow/core/graph:higher_level_tests_needing_kernels",
-    ],
-    linkopts = select({
-        "//tensorflow:macos": ["-headerpad_max_install_names"],
-        "//conditions:default": [],
-    }),
-    linkstatic = tf_kernel_tests_linkstatic(),
-    deps = [
-        ":all_kernels",
-        ":core",
-        ":core_cpu",
-        ":core_cpu_internal",
-        ":direct_session_internal",
-        ":framework",
-        ":framework_internal",
-        ":lib",
-        ":lib_internal",
-        ":ops",
-        ":protos_all_cc",
-        ":test",
-        ":test_main",
-        ":testlib",
-        "//tensorflow/cc:cc_ops",
-        "//tensorflow/cc:cc_ops_internal",
-        "//tensorflow/cc:scope",
-        "//tensorflow/cc:sendrecv_ops",
-        "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/util:protos_test_cc",
-        "//third_party/eigen3",
-    ],
-)
-
 tf_cc_test(
     name = "cudnn_rnn_ops_test_cc",
     size = "small",
@@ -3151,6 +3033,11 @@ alias(
     actual = "//tensorflow/core/platform:cuda_libdevice_path",
 )
 
+# Normalize CORE_PROTO_SRCS to generate valid output file names.
+PORTABLE_PROTO_HEADERS_OUT = tf_android_core_proto_headers(CORE_PROTO_SRCS) + [
+    "//google/protobuf/any.proto.h",
+]
+
 transitive_hdrs(
     name = "headers",
     visibility = ["//tensorflow:__subpackages__"],
@@ -3163,8 +3050,3 @@ transitive_hdrs(
         "//tensorflow/core/platform:platform_strings",
     ],
 )
-
-# Normalize CORE_PROTO_SRCS to generate valid output file names.
-PORTABLE_PROTO_HEADERS_OUT = tf_android_core_proto_headers(CORE_PROTO_SRCS) + [
-    "//google/protobuf/any.proto.h",
-]
diff --git a/tensorflow/core/api_def/base_api/api_def_AdjustHue.pbtxt b/tensorflow/core/api_def/base_api/api_def_AdjustHue.pbtxt
index bfaf6768601..c34b5c6fbcb 100644
--- a/tensorflow/core/api_def/base_api/api_def_AdjustHue.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_AdjustHue.pbtxt
@@ -21,7 +21,7 @@ END
   summary: "Adjust the hue of one or more images."
   description: <<END
 `images` is a tensor of at least 3 dimensions.  The last dimension is
-interpretted as channels, and must be three.
+interpreted as channels, and must be three.
 
 The input image is considered in the RGB colorspace. Conceptually, the RGB
 colors are first mapped into HSV. A delta is then applied all the hue values,
diff --git a/tensorflow/core/api_def/base_api/api_def_AdjustSaturation.pbtxt b/tensorflow/core/api_def/base_api/api_def_AdjustSaturation.pbtxt
index 97be0fda11f..727f793c8b1 100644
--- a/tensorflow/core/api_def/base_api/api_def_AdjustSaturation.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_AdjustSaturation.pbtxt
@@ -21,7 +21,7 @@ END
   summary: "Adjust the saturation of one or more images."
   description: <<END
 `images` is a tensor of at least 3 dimensions.  The last dimension is
-interpretted as channels, and must be three.
+interpreted as channels, and must be three.
 
 The input image is considered in the RGB colorspace. Conceptually, the RGB
 colors are first mapped into HSV. A scale is then applied all the saturation
diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt
index 0f49a18a114..f3379461a5f 100644
--- a/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt
@@ -65,7 +65,7 @@ END
   summary: "Update \'*var\' according to the Ftrl-proximal scheme."
   description: <<END
 accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
 quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
 var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
 accum = accum_new
diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt
index 3218ab7776c..1eb33005e91 100644
--- a/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ApplyFtrlV2.pbtxt
@@ -65,8 +65,8 @@ END
   summary: "Update \'*var\' according to the Ftrl-proximal scheme."
   description: <<END
 grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
+accum_new = accum + grad * grad
+linear += grad_with_shrinkage -
     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
 quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
 var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt
index 2bbaba26257..84382d8a99c 100644
--- a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestFeatureSplitV2.pbtxt
@@ -47,7 +47,7 @@ END
   in_arg {
     name: "min_node_weight"
     description: <<END
-mininum avg of hessians in a node before required for the node to be considered for splitting.
+minimum avg of hessians in a node before required for the node to be considered for splitting.
 END
   }
   out_arg {
diff --git a/tensorflow/core/api_def/base_api/api_def_CreateJob.pbtxt b/tensorflow/core/api_def/base_api/api_def_CreateJob.pbtxt
deleted file mode 100644
index f6e41e58897..00000000000
--- a/tensorflow/core/api_def/base_api/api_def_CreateJob.pbtxt
+++ /dev/null
@@ -1,5 +0,0 @@
-op {
-  graph_op_name: "CreateJob"
-  visibility: HIDDEN
-  summary: "Creates a tf.data service job."
-}
diff --git a/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt b/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt
index 461b498662d..99774c69e18 100644
--- a/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt
@@ -28,7 +28,7 @@ output: A 3-D tensor with the shape of [seq_length, batch_size,
     dir * num_units].
 output_h: The same shape has input_h.
 output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-is_training: Indicates whether this operation is used for inferenece or
+is_training: Indicates whether this operation is used for inference or
   training.
 reserve_space: An opaque tensor that can be used in backprop calculation. It
   is only produced if is_training is false.
diff --git a/tensorflow/core/api_def/base_api/api_def_CudnnRNNV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_CudnnRNNV2.pbtxt
index c8a39de68cf..cd9e10e4487 100644
--- a/tensorflow/core/api_def/base_api/api_def_CudnnRNNV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_CudnnRNNV2.pbtxt
@@ -29,7 +29,7 @@ output: A 3-D tensor with the shape of [seq_length, batch_size,
     dir * num_units].
 output_h: The same shape has input_h.
 output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-is_training: Indicates whether this operation is used for inferenece or
+is_training: Indicates whether this operation is used for inference or
   training.
 reserve_space: An opaque tensor that can be used in backprop calculation. It
   is only produced if is_training is true.
diff --git a/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt b/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt
index e076d3cda28..a1809391c27 100644
--- a/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt
@@ -34,7 +34,7 @@ output: If time_major is true, this is a 3-D tensor with the shape of
     shape is [batch_size, seq_length, dir * num_units].
 output_h: The same shape has input_h.
 output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-is_training: Indicates whether this operation is used for inferenece or
+is_training: Indicates whether this operation is used for inference or
   training.
 time_major: Indicates whether the input/output format is time major or batch
     major.
diff --git a/tensorflow/core/api_def/base_api/api_def_DenseBincount.pbtxt b/tensorflow/core/api_def/base_api/api_def_DenseBincount.pbtxt
new file mode 100644
index 00000000000..11043899ba4
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DenseBincount.pbtxt
@@ -0,0 +1,46 @@
+op {
+  graph_op_name: "DenseBincount"
+  in_arg {
+    name: "input"
+    description: <<END
+1D or 2D int `Tensor`.
+END
+  }
+  in_arg {
+    name: "size"
+    description: <<END
+non-negative int scalar `Tensor`.
+END
+  }
+  in_arg {
+    name: "weights"
+    description: <<END
+is an int32, int64, float32, or float64 `Tensor` with the same
+shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+equal to 1.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`].
+The counts or summed weights for each value in the range [0, size).
+END
+  }
+  attr {
+    name: "binary_output"
+    description: <<END
+bool; Whether the kernel should count the appearance or number of occurrences.
+END
+  }
+  summary: "Counts the number of occurrences of each value in an integer array."
+  description: <<END
+Outputs a vector with length `size` and the same dtype as `weights`. If
+`weights` are empty, then index `i` stores the number of times the value `i` is
+counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+the value in `weights` at each index where the corresponding value in `arr` is
+`i`.
+
+Values in `arr` outside of the range [0, size) are ignored.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DenseCountSparseOutput.pbtxt b/tensorflow/core/api_def/base_api/api_def_DenseCountSparseOutput.pbtxt
new file mode 100644
index 00000000000..8296bfe6d7b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DenseCountSparseOutput.pbtxt
@@ -0,0 +1,69 @@
+op {
+  graph_op_name: "DenseCountSparseOutput"
+  visibility: HIDDEN
+  in_arg {
+    name: "values"
+    description: <<END
+Tensor containing data to count.
+END
+  }
+  in_arg {
+    name: "weights"
+    description: <<END
+A Tensor of the same shape as indices containing per-index weight values. May
+also be the empty tensor if no weights are used.
+END
+  }
+  out_arg {
+    name: "output_indices"
+    description: <<END
+Indices tensor for the resulting sparse tensor object.
+END
+  }
+  out_arg {
+    name: "output_values"
+    description: <<END
+Values tensor for the resulting sparse tensor object.
+END
+  }
+  out_arg {
+    name: "output_dense_shape"
+    description: <<END
+Shape tensor for the resulting sparse tensor object.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+Dtype of the input values tensor.
+END
+  }
+  attr {
+    name: "minlength"
+    description: <<END
+Minimum value to count. Can be set to -1 for no minimum.
+END
+  }
+  attr {
+    name: "maxlength"
+    description: <<END
+Maximum value to count. Can be set to -1 for no maximum.
+END
+  }
+  attr {
+    name: "binary_output"
+    description: <<END
+Whether to output the number of occurrences of each value or 1.
+END
+  }
+  attr {
+    name: "output_type"
+    description: <<END
+Dtype of the output values tensor.
+END
+  }
+  summary: "Performs sparse-output bin counting for a tf.tensor input."
+  description: <<END
+  Counts the number of times each value occurs in the input.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_DummyIterationCounter.pbtxt b/tensorflow/core/api_def/base_api/api_def_DummyIterationCounter.pbtxt
new file mode 100644
index 00000000000..dcaf11ef54b
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_DummyIterationCounter.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "DummyIterationCounter"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
index c6104da4a64..7f2a8a1cf1a 100644
--- a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
@@ -30,8 +30,8 @@ END
   summary: "Gather slices from `params` axis `axis` according to `indices`."
   description: <<END
 `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-Produces an output tensor with shape `params.shape[:axis] + indices.shape +
-params.shape[axis + 1:]` where:
+Produces an output tensor with shape `params.shape[:axis] +
+indices.shape[batch_dims:] + params.shape[axis + 1:]` where:
 
 ```python
     # Scalar indices (output is rank(params) - 1).
diff --git a/tensorflow/core/api_def/base_api/api_def_GenerateBoundingBoxProposals.pbtxt b/tensorflow/core/api_def/base_api/api_def_GenerateBoundingBoxProposals.pbtxt
index 6403e16a8bc..a2a3d011268 100644
--- a/tensorflow/core/api_def/base_api/api_def_GenerateBoundingBoxProposals.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GenerateBoundingBoxProposals.pbtxt
@@ -71,7 +71,7 @@ END
       `nms_threshold` intersection-over-union (iou) value, discarding boxes where shorter
       side is less than `min_size`.
       Inputs:
-      `scores`: A 4D tensor of shape [Batch, Height, Width, Num Anchors] containing the scores per anchor at given postion
+      `scores`: A 4D tensor of shape [Batch, Height, Width, Num Anchors] containing the scores per anchor at given position
       `bbox_deltas`: is a tensor of shape [Batch, Height, Width, 4 x Num Anchors] boxes encoded to each anchor
       `anchors`: A 1D tensor of shape [4 x Num Anchors], representing the anchors.
       Outputs:
diff --git a/tensorflow/core/api_def/base_api/api_def_InplaceUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_InplaceUpdate.pbtxt
index 2fcd3659dc7..c0c160d1be4 100644
--- a/tensorflow/core/api_def/base_api/api_def_InplaceUpdate.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_InplaceUpdate.pbtxt
@@ -20,9 +20,11 @@ op {
         "A `Tensor` of type T. An alias of `x`. The content "
         "of `y` is undefined if there are duplicates in `i`."
   }
-  summary: <<END
-    Updates specified rows with values in `v`.
+  summary: "Updates specified rows 'i' with values 'v'."
+  description: <<END
+Computes `x[i, :] = v; return x`.
 
-    Computes `x[i, :] = v; return x`.
+Originally this function is mutative however for compilation we make this
+operation create / operate on a copy of `x`.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_MakeDataServiceIterator.pbtxt b/tensorflow/core/api_def/base_api/api_def_MakeDataServiceIterator.pbtxt
deleted file mode 100644
index 0d516687ebc..00000000000
--- a/tensorflow/core/api_def/base_api/api_def_MakeDataServiceIterator.pbtxt
+++ /dev/null
@@ -1,5 +0,0 @@
-op {
-  graph_op_name: "MakeDataServiceIterator"
-  visibility: HIDDEN
-  summary: "Creates an iterator for reading from the tf.data service."
-}
diff --git a/tensorflow/core/api_def/base_api/api_def_ParseSequenceExampleV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_ParseSequenceExampleV2.pbtxt
index e20bdf23e85..b718d50550e 100644
--- a/tensorflow/core/api_def/base_api/api_def_ParseSequenceExampleV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ParseSequenceExampleV2.pbtxt
@@ -59,7 +59,7 @@ END
   in_arg {
     name: "feature_list_dense_missing_assumed_empty"
     description: <<END
-A vector corresponding 1:1 with featue_list_dense_keys, indicating which
+A vector corresponding 1:1 with feature_list_dense_keys, indicating which
 features may be missing from the SequenceExamples.  If the associated
 FeatureList is missing, it is treated as empty.
 END
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBias.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBias.pbtxt
index 140144961f3..a510716b09e 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBias.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBias.pbtxt
@@ -79,6 +79,6 @@ The inputs must be two-dimensional matrices and 1D bias vector. And the inner
 dimension of `a` (after being transposed if `transpose_a` is non-zero) must
 match the outer dimension of `b` (after being transposed if `transposed_b` is
 non-zero). Then do broadcast add operation with bias values on the matrix
-mulplication result. The bias size must match inner dimension of `b`.
+multiplication result. The bias size must match inner dimension of `b`.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndRelu.pbtxt
index 19b4fc6f84d..2475fcb3321 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndRelu.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndRelu.pbtxt
@@ -79,7 +79,7 @@ The inputs must be two-dimensional matrices and 1D bias vector. And the inner
 dimension of `a` (after being transposed if `transpose_a` is non-zero) must
 match the outer dimension of `b` (after being transposed if `transposed_b` is
 non-zero). Then do broadcast add operation with bias values on the matrix
-mulplication result. The bias size must match inner dimension of `b`. Then do
+multiplication result. The bias size must match inner dimension of `b`. Then do
 relu activation to get non-negative result.
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt
index c6f949f82d6..df5ee848b5b 100644
--- a/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizedMatMulWithBiasAndReluAndRequantize.pbtxt
@@ -92,7 +92,7 @@ The inputs must be two-dimensional matrices and 1D bias vector. And the inner
 dimension of `a` (after being transposed if `transpose_a` is non-zero) must
 match the outer dimension of `b` (after being transposed if `transposed_b` is
 non-zero). Then do broadcast add operation with bias values on the matrix
-mulplication result. The bias size must match inner dimension of `b`.  Then do
+multiplication result. The bias size must match inner dimension of `b`.  Then do
 relu activation to get non-negative result. Then do requantize operation to get
 final uint8 result.
 END
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedBincount.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedBincount.pbtxt
new file mode 100644
index 00000000000..b6299ada526
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedBincount.pbtxt
@@ -0,0 +1,52 @@
+op {
+  graph_op_name: "RaggedBincount"
+  in_arg {
+    name: "splits"
+    description: <<END
+1D int64 `Tensor`.
+END
+  }
+  in_arg {
+    name: "values"
+    description: <<END
+2D int `Tensor`.
+END
+  }
+  in_arg {
+    name: "size"
+    description: <<END
+non-negative int scalar `Tensor`.
+END
+  }
+  in_arg {
+    name: "weights"
+    description: <<END
+is an int32, int64, float32, or float64 `Tensor` with the same
+shape as `input`, or a length-0 `Tensor`, in which case it acts as all weights
+equal to 1.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`].
+The counts or summed weights for each value in the range [0, size).
+END
+  }
+  attr {
+    name: "binary_output"
+    description: <<END
+bool; Whether the kernel should count the appearance or number of occurrences.
+END
+  }
+  summary: "Counts the number of occurrences of each value in an integer array."
+  description: <<END
+Outputs a vector with length `size` and the same dtype as `weights`. If
+`weights` are empty, then index `i` stores the number of times the value `i` is
+counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+the value in `weights` at each index where the corresponding value in `arr` is
+`i`.
+
+Values in `arr` outside of the range [0, size) are ignored.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedCountSparseOutput.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedCountSparseOutput.pbtxt
new file mode 100644
index 00000000000..37224d841de
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedCountSparseOutput.pbtxt
@@ -0,0 +1,75 @@
+op {
+  graph_op_name: "RaggedCountSparseOutput"
+  visibility: HIDDEN
+  in_arg {
+    name: "splits"
+    description: <<END
+Tensor containing the row splits of the ragged tensor to count.
+END
+  }
+in_arg {
+    name: "values"
+    description: <<END
+Tensor containing values of the sparse tensor to count.
+END
+  }
+  in_arg {
+    name: "weights"
+    description: <<END
+A Tensor of the same shape as indices containing per-index weight values.
+May also be the empty tensor if no weights are used.
+END
+  }
+  out_arg {
+    name: "output_indices"
+    description: <<END
+Indices tensor for the resulting sparse tensor object.
+END
+  }
+  out_arg {
+    name: "output_values"
+    description: <<END
+Values tensor for the resulting sparse tensor object.
+END
+  }
+  out_arg {
+    name: "output_dense_shape"
+    description: <<END
+Shape tensor for the resulting sparse tensor object.
+  END
+  }
+  attr {
+    name: "T"
+    description: <<END
+Dtype of the input values tensor.
+END
+  }
+  attr {
+    name: "minlength"
+    description: <<END
+Minimum value to count. Can be set to -1 for no minimum.
+END
+  }
+  attr {
+    name: "maxlength"
+    description: <<END
+Maximum value to count. Can be set to -1 for no maximum.
+END
+  }
+  attr {
+    name: "binary_output"
+    description: <<END
+Whether to output the number of occurrences of each value or 1.
+END
+  }
+  attr {
+    name: "output_type"
+    description: <<END
+Dtype of the output values tensor.
+END
+  }
+  summary: "Performs sparse-output bin counting for a ragged tensor input."
+  description: <<END
+  Counts the number of times each value occurs in the input.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrlV2.pbtxt
index df924f29636..5300b5570cb 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrlV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrlV2.pbtxt
@@ -72,8 +72,8 @@ END
   description: <<END
 That is for rows we have grad for, we update var, accum and linear as follows:
 grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
+accum_new = accum + grad * grad
+linear += grad_with_shrinkage -
     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
 quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
 var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseBincount.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseBincount.pbtxt
new file mode 100644
index 00000000000..12cb5f43218
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseBincount.pbtxt
@@ -0,0 +1,58 @@
+op {
+  graph_op_name: "SparseBincount"
+  in_arg {
+    name: "indices"
+    description: <<END
+2D int64 `Tensor`.
+END
+  }
+  in_arg {
+    name: "values"
+    description: <<END
+1D int `Tensor`.
+END
+  }
+  in_arg {
+    name: "dense_shape"
+    description: <<END
+1D int64 `Tensor`.
+END
+  }
+  in_arg {
+    name: "size"
+    description: <<END
+non-negative int scalar `Tensor`.
+END
+  }
+  in_arg {
+    name: "weights"
+    description: <<END
+is an int32, int64, float32, or float64 `Tensor` with the same
+shape as `input`, or a length-0 `Tensor`, in which case it acts as all weights
+equal to 1.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`].
+The counts or summed weights for each value in the range [0, size).
+END
+  }
+  attr {
+    name: "binary_output"
+    description: <<END
+bool; Whether the kernel should count the appearance or number of occurrences.
+END
+  }
+  summary: "Counts the number of occurrences of each value in an integer array."
+  description: <<END
+Outputs a vector with length `size` and the same dtype as `weights`. If
+`weights` are empty, then index `i` stores the number of times the value `i` is
+counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+the value in `weights` at each index where the corresponding value in `arr` is
+`i`.
+
+Values in `arr` outside of the range [0, size) are ignored.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseCountSparseOutput.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseCountSparseOutput.pbtxt
new file mode 100644
index 00000000000..a346710c8b3
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_SparseCountSparseOutput.pbtxt
@@ -0,0 +1,81 @@
+op {
+  graph_op_name: "SparseCountSparseOutput"
+  visibility: HIDDEN
+  in_arg {
+    name: "indices"
+    description: <<END
+Tensor containing the indices of the sparse tensor to count.
+END
+  }
+in_arg {
+    name: "values"
+    description: <<END
+Tensor containing values of the sparse tensor to count.
+END
+  }
+in_arg {
+    name: "dense_shape"
+    description: <<END
+Tensor containing the dense shape of the sparse tensor to count.
+END
+  }
+ in_arg {
+    name: "weights"
+    description: <<END
+A Tensor of the same shape as indices containing per-index weight values.
+May also be the empty tensor if no weights are used.
+END
+  }
+  out_arg {
+    name: "output_indices"
+    description: <<END
+Indices tensor for the resulting sparse tensor object.
+END
+  }
+  out_arg {
+      name: "output_values"
+      description: <<END
+Values tensor for the resulting sparse tensor object.
+END
+  }
+  out_arg {
+      name: "output_dense_shape"
+      description: <<END
+Shape tensor for the resulting sparse tensor object.
+END
+  }
+  attr {
+    name: "T"
+    description: <<END
+Dtype of the input values tensor.
+END
+  }
+  attr {
+    name: "minlength"
+    description: <<END
+Minimum value to count. Can be set to -1 for no minimum.
+END
+  }
+  attr {
+    name: "maxlength"
+    description: <<END
+Maximum value to count. Can be set to -1 for no maximum.
+END
+  }
+  attr {
+    name: "binary_output"
+    description: <<END
+Whether to output the number of occurrences of each value or 1.
+END
+  }
+  attr {
+    name: "output_type"
+    description: <<END
+Dtype of the output values tensor.
+END
+  }
+  summary: "Performs sparse-output bin counting for a sparse tensor input."
+  description: <<END
+  Counts the number of times each value occurs in the input.
+END
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
index 65b2358830e..5f7e59c9f10 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
@@ -28,7 +28,7 @@ END
   summary: "Computes the mean along sparse segments of a tensor."
   description: <<END
 Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
-misisng, the `output` tensor at that position will be zeroed.
+missing, the `output` tensor at that position will be zeroed.
 
 Read
 [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
index 8a5d2bb02c4..57f0bac5a01 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
@@ -30,7 +30,7 @@ END
 N is the size of the segment being reduced.
 
 Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
-misisng, the `output` tensor at that position will be zeroed.
+missing, the `output` tensor at that position will be zeroed.
 
 Read
 [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
index 039ca9a23ba..4c176ca68f3 100644
--- a/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
@@ -28,7 +28,7 @@ END
   summary: "Computes the sum along sparse segments of a tensor."
   description: <<END
 Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
-misisng, the `output` tensor at that position will be zeroed.
+missing, the `output` tensor at that position will be zeroed.
 
 Read
 [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation)
diff --git a/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt b/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
index cce72dfe302..30caee56fe7 100644
--- a/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Substr.pbtxt
@@ -116,7 +116,7 @@ Raises:
 
   * `ValueError`: If the first argument cannot be converted to a
      Tensor of `dtype string`.
-  * `InvalidArgumentError`: If indicies are out of range.
+  * `InvalidArgumentError`: If indices are out of range.
   * `ValueError`: If `pos` and `len` are not the same shape.
 
 END
diff --git a/tensorflow/core/api_def/python_api/api_def_DenseBincount.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseBincount.pbtxt
new file mode 100644
index 00000000000..75d4a235664
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_DenseBincount.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "DenseBincount"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_RaggedBincount.pbtxt b/tensorflow/core/api_def/python_api/api_def_RaggedBincount.pbtxt
new file mode 100644
index 00000000000..2e3ca0fbb71
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_RaggedBincount.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "RaggedBincount"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_SparseBincount.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseBincount.pbtxt
new file mode 100644
index 00000000000..c09c6ff7768
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_SparseBincount.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "SparseBincount"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD
index c6b0088d8d0..016896b36f4 100644
--- a/tensorflow/core/common_runtime/BUILD
+++ b/tensorflow/core/common_runtime/BUILD
@@ -243,6 +243,7 @@ filegroup(
         "memory_types.h",
         "mkl_cpu_allocator.h",
         "mkl_layout_pass.h",
+        "mkl_tfconversion_pass.h",
         "optimization_registry.h",
         "partitioning_utils.h",
         "placer.h",
@@ -631,6 +632,7 @@ cc_library(
     ],
     copts = tf_copts(),
     deps = [
+        ":composite_device",
         ":device",
         ":device_mgr",
         ":device_set",
@@ -651,6 +653,7 @@ cc_library(
         ":process_util",
         ":rendezvous_mgr",
         ":rendezvous_util",
+        ":replicate_per_replica_nodes",
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
@@ -658,6 +661,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/profiler/lib:traceme",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
@@ -1025,9 +1029,13 @@ cc_library(
 cc_library(
     name = "mkl_layout_pass",
     srcs = ["mkl_layout_pass.cc"],
-    hdrs = ["mkl_layout_pass.h"],
+    hdrs = [
+        "mkl_layout_pass.h",
+        "//tensorflow/core/graph:mkl_graph_util_header",
+    ],
     copts = tf_copts(),
     deps = [
+        ":function",
         ":optimization_registry",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
@@ -1040,9 +1048,13 @@ cc_library(
 cc_library(
     name = "mkl_tfconversion_pass",
     srcs = ["mkl_tfconversion_pass.cc"],
-    hdrs = ["mkl_tfconversion_pass.h"],
+    hdrs = [
+        "mkl_tfconversion_pass.h",
+        "//tensorflow/core/graph:mkl_graph_util_header",
+    ],
     copts = tf_copts(),
     deps = [
+        ":function",
         ":optimization_registry",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
@@ -1209,6 +1221,7 @@ cc_library(
         ":propagator_debug_utils",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core/platform:hash",
         "//tensorflow/core/profiler/lib:traceme",
     ],
 )
@@ -1237,6 +1250,7 @@ cc_library(
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
     ],
+    alwayslink = 1,
 )
 
 cc_library(
@@ -2281,7 +2295,7 @@ tf_cc_test(
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:scope",
         "//tensorflow/core/kernels:cwise_op",
-    ] + if_mkl([":mkl_array_ops_op_lib"]),
+    ] + if_mkl(["//tensorflow/core:mkl_array_ops_op_lib"]),
 )
 
 tf_cc_test(
@@ -2293,6 +2307,10 @@ tf_cc_test(
         ":core",
         ":core_cpu",
         ":core_cpu_internal",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/cc:function_ops",
+        "//tensorflow/cc:ops",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
@@ -2303,6 +2321,8 @@ tf_cc_test(
         "//tensorflow/core:testlib",
         "//tensorflow/core/kernels:array",
         "//tensorflow/core/kernels:control_flow_ops",
+        "//tensorflow/core/kernels:function_ops",
+        "//tensorflow/core/kernels:functional_ops",
         "//tensorflow/core/kernels:math",
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:state",
@@ -2344,6 +2364,7 @@ tf_cc_test(
         "//tensorflow/core/kernels:partitioned_function_ops",
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:shape_ops",
+        "//tensorflow/core/kernels/data:single_threaded_executor",
         "//third_party/eigen3",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
@@ -2499,10 +2520,12 @@ tf_cc_test(
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -2605,3 +2628,37 @@ tf_cc_test(
         "//tensorflow/core/util:protos_test_cc",
     ],
 )
+
+tf_cc_test(
+    name = "graph_constructor_test",
+    size = "small",
+    srcs = ["graph_constructor_test.cc"],
+    linkopts = select({
+        "//tensorflow:macos": ["-headerpad_max_install_names"],
+        "//conditions:default": [],
+    }),
+    linkstatic = tf_kernel_tests_linkstatic(),
+    deps = [
+        ":core",
+        ":core_cpu",
+        ":core_cpu_internal",
+        ":direct_session_internal",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/cc:cc_ops_internal",
+        "//tensorflow/cc:scope",
+        "//tensorflow/cc:sendrecv_ops",
+        "//tensorflow/core:all_kernels",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_util",
+        "//tensorflow/core/util:protos_test_cc",
+        "//third_party/eigen3",
+    ],
+)
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index f9c0ba13eb6..371d52da58f 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -465,12 +465,6 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name, const void* ptr) {
 void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
                               const void* chunk_ptr, int64 req_bytes,
                               int64 alloc_bytes) {
-  // Internal users will see the memory profile with default trace level.
-  auto traceme_level = profiler::TraceMeLevel::kVerbose;
-#ifdef PLATFORM_GOOGLE
-  traceme_level = profiler::TraceMeLevel::kInfo;
-#endif
-
   tensorflow::profiler::TraceMe trace_me(
       [&]() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
         AllocatorStats stats = stats_;
@@ -496,7 +490,7 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
                             ",data_type=", annotation.pending_data_type,
                             ",shape=", tensor_shape, "#");
       },
-      traceme_level);
+      /*level=*/profiler::TraceMeLevel::kInfo);
 }
 
 void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
diff --git a/tensorflow/core/common_runtime/device_mgr.cc b/tensorflow/core/common_runtime/device_mgr.cc
index c7583c374f2..0b693085da3 100644
--- a/tensorflow/core/common_runtime/device_mgr.cc
+++ b/tensorflow/core/common_runtime/device_mgr.cc
@@ -45,6 +45,7 @@ StaticDeviceMgr::StaticDeviceMgr(std::vector<std::unique_ptr<Device>> devices)
     }
     const auto& t = d->device_type();
     device_type_counts_[t]++;
+    device_incarnation_set_.insert(d->attributes().incarnation());
     if (cpu_device_ == nullptr && t == "CPU" && d->parsed_name().id == 0) {
       cpu_device_ = d.get();
     }
@@ -123,6 +124,10 @@ Status StaticDeviceMgr::LookupDevice(StringPiece name, Device** device) const {
   return Status::OK();
 }
 
+bool StaticDeviceMgr::ContainsDevice(int64 device_incarnation) const {
+  return device_incarnation_set_.contains(device_incarnation);
+}
+
 void StaticDeviceMgr::ClearContainers(
     gtl::ArraySlice<string> containers) const {
   Status s;
diff --git a/tensorflow/core/common_runtime/device_mgr.h b/tensorflow/core/common_runtime/device_mgr.h
index 56248b39078..83a0d0cc29c 100644
--- a/tensorflow/core/common_runtime/device_mgr.h
+++ b/tensorflow/core/common_runtime/device_mgr.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/lib/core/arena.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -56,6 +57,11 @@ class DeviceMgr {
   // Accepts either a full device name, or just the replica-local suffix.
   virtual Status LookupDevice(StringPiece name, Device** device) const = 0;
 
+  // Check if the current device manager contains device with the given
+  // incarnation ID. Looking up by incarnation IDs because they are randomly
+  // generated and not intentionally reused (unlike device pointers).
+  virtual bool ContainsDevice(int64 device_incarnation) const = 0;
+
   // Clears given containers of all devices if 'container' is
   // non-empty. Otherwise, clears default containers of all devices.
   virtual void ClearContainers(gtl::ArraySlice<string> containers) const = 0;
@@ -86,6 +92,7 @@ class StaticDeviceMgr : public DeviceMgr {
   string DebugString() const override;
   string DeviceMappingString() const override;
   Status LookupDevice(StringPiece name, Device** device) const override;
+  bool ContainsDevice(int64 device_incarnation) const override;
   void ClearContainers(gtl::ArraySlice<string> containers) const override;
   int NumDeviceType(const string& type) const override;
   Device* HostCPU() const override;
@@ -95,6 +102,7 @@ class StaticDeviceMgr : public DeviceMgr {
 
   StringPiece CopyToBackingStore(StringPiece s);
 
+  absl::flat_hash_set<int64> device_incarnation_set_;
   std::unordered_map<StringPiece, Device*, StringPieceHasher> device_map_;
   core::Arena name_backing_store_;  // Storage for keys in device_map_
   std::unordered_map<string, int> device_type_counts_;
@@ -117,6 +125,7 @@ class DynamicDeviceMgr : public DeviceMgr {
   string DebugString() const override;
   string DeviceMappingString() const override;
   Status LookupDevice(StringPiece name, Device** device) const override;
+  bool ContainsDevice(int64 device_incarnation) const override;
   void ClearContainers(gtl::ArraySlice<string> containers) const override;
   int NumDeviceType(const string& type) const override;
   Device* HostCPU() const override;
@@ -140,6 +149,7 @@ class DynamicDeviceMgr : public DeviceMgr {
   std::unordered_map<Device*, std::unique_ptr<Device>> dynamic_devices_
       TF_GUARDED_BY(devices_mu_);
 
+  absl::flat_hash_set<int64> device_incarnation_set_ TF_GUARDED_BY(devices_mu_);
   std::unordered_map<string, Device*> device_map_ TF_GUARDED_BY(devices_mu_);
 
   std::unordered_map<string, int> device_type_counts_
diff --git a/tensorflow/core/common_runtime/dynamic_device_mgr.cc b/tensorflow/core/common_runtime/dynamic_device_mgr.cc
index f35fa7e416a..f47de47c5b9 100644
--- a/tensorflow/core/common_runtime/dynamic_device_mgr.cc
+++ b/tensorflow/core/common_runtime/dynamic_device_mgr.cc
@@ -92,6 +92,11 @@ Status DynamicDeviceMgr::LookupDevice(StringPiece name, Device** device) const {
   return Status::OK();
 }
 
+bool DynamicDeviceMgr::ContainsDevice(int64 device_incarnation) const {
+  tf_shared_lock l(devices_mu_);
+  return device_incarnation_set_.contains(device_incarnation);
+}
+
 void DynamicDeviceMgr::ClearContainers(
     gtl::ArraySlice<string> containers) const {
   Status s;
@@ -138,6 +143,7 @@ Status DynamicDeviceMgr::AddDevices(
       device_map_[name] = d.get();
     }
     device_type_counts_[d->device_type()]++;
+    device_incarnation_set_.insert(d->attributes().incarnation());
     dynamic_devices_.emplace(d.get(), std::move(d));
   }
   return Status::OK();
@@ -171,6 +177,7 @@ Status DynamicDeviceMgr::RemoveDevices(std::vector<Device*> devices) {
       device_map_.erase(name);
     }
     device_type_counts_[d->device_type()]--;
+    device_incarnation_set_.erase(d->attributes().incarnation());
     dynamic_devices_.erase(it);
   }
   return Status::OK();
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index 695342d5e7a..625468b39d5 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -47,7 +47,7 @@ tf_cuda_library(
     visibility = ["//tensorflow:internal"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu_lib",
@@ -83,7 +83,7 @@ tf_cuda_library(
         "//tensorflow/core/distributed_runtime:worker_env",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "@com_google_absl//absl/types:optional",
@@ -147,7 +147,7 @@ tf_cuda_library(
         "//tensorflow/core/platform:platform_port",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu_lib",
@@ -181,7 +181,7 @@ tf_cuda_library(
         ":eager_executor",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "@com_google_absl//absl/types:variant",
@@ -207,7 +207,7 @@ tf_cuda_library(
         ":tensor_handle_data",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "@com_google_absl//absl/strings",
@@ -305,13 +305,14 @@ tf_cuda_library(
     visibility = ["//tensorflow:internal"],
     deps = [
         ":attr_builder",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:optional",
         "@farmhash_archive//:farmhash",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//tensorflow:windows": KERNEL_AND_DEVICE_DEPS,
         "//conditions:default": KERNEL_AND_DEVICE_DEPS + [
@@ -369,6 +370,7 @@ cc_library(
         ":eager_operation",
         ":kernel_and_device",
         ":tensor_handle",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/strings",
@@ -379,7 +381,7 @@ cc_library(
         "//tensorflow/core/profiler/lib:traceme",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core/distributed_runtime/eager:remote_mgr",
@@ -396,6 +398,24 @@ cc_library(
     }) + if_mkl([":mkl_eager_op_rewrite"]),
 )
 
+tf_cc_test(
+    name = "execute_node_test",
+    srcs = ["execute_node_test.cc"],
+    deps = [
+        ":context",
+        ":core",
+        ":execute",
+        ":kernel_and_device",
+        ":tensor_handle",
+        "//tensorflow/core:core_cpu_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 cc_library(
     name = "mkl_eager_op_rewrite",
     srcs = ["mkl_eager_op_rewrite.cc"],
@@ -466,6 +486,7 @@ cc_library(
         ":eager_operation",
         ":kernel_and_device",
         ":tensor_handle",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/strings",
@@ -477,7 +498,7 @@ cc_library(
         "//tensorflow/core/profiler/lib:traceme",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core/distributed_runtime/eager:remote_mgr",
@@ -506,7 +527,7 @@ tf_cuda_library(
         "@farmhash_archive//:farmhash",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu",
diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h
index c53286dd999..3dfae3396f7 100644
--- a/tensorflow/core/common_runtime/eager/attr_builder.h
+++ b/tensorflow/core/common_runtime/eager/attr_builder.h
@@ -111,6 +111,8 @@ class AttrBuilder {
     return *this;
   }
 
+  size_t NumAttributes() const { return encoded_attrs_.size(); }
+
   AttrBuilder& Set(StringPiece attr_name, const AttrValue& value) {
     AddAttrIfNotPresent(attr_name, value);
     cached_cache_key_ = absl::nullopt;
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 219ba03b9dc..b8dfe92aac6 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/platform/platform.h"
 // clang-format on
 
+#include "tensorflow/c/tf_tensor.h"
 #include "tensorflow/c/tf_tensor_internal.h"
 #include "tensorflow/c/eager/operation_interface.h"
 #include "tensorflow/c/eager/tensor_handle_interface.h"
@@ -168,6 +169,28 @@ AbstractTensorInterface* EagerContext::CreateTensor(
   return new TensorInterface(Tensor(dtype, TensorShape(dim_sizes)));
 }
 
+AbstractTensorInterface* EagerContext::CreateTensor(
+    DataType dtype, const int64_t* dims, int num_dims, void* data, size_t len,
+    bool convert_string, MemoryReleaser memory_releaser,
+    void* memory_releaser_arg) {
+  TF_Tensor* tensor_wrapper =
+      TF_NewTensor(static_cast<TF_DataType>(dtype), dims, num_dims, data, len,
+                   memory_releaser, memory_releaser_arg);
+
+  if (convert_string) {
+    tensorflow::Tensor tensor;
+    Status status = TF_TensorToTensor(tensor_wrapper, &tensor);
+    TF_DeleteTensor(tensor_wrapper);
+    if (!status.ok()) return nullptr;
+    return new TensorInterface(std::move(tensor));
+  } else {
+    AbstractTensorInterface* result = nullptr;
+    std::swap(result, tensor_wrapper->tensor);
+    TF_DeleteTensor(tensor_wrapper);
+    return result;
+  }
+}
+
 std::unique_ptr<SavedModelAPI> EagerContext::LoadSavedModelAPI(
     const std::string& directory,
     const absl::optional<std::unordered_set<std::string>>& tags,
@@ -511,6 +534,10 @@ EagerContext::~EagerContext() {
   // don't send RPCs and block in destructor.
   WaitForAndCloseRemoteContexts();
 
+  // Custom devices may have obtained references to various context components
+  // (executors, thread pool). It's safer to run their destructors early.
+  custom_devices_.clear();
+
   ClearCachesAndThreadExecutors();
   for (auto& entry : registered_functions_) {
     while (!entry.second->Unref()) {
@@ -849,6 +876,18 @@ Status EagerContext::FindDeviceFromName(const char* device_name,
   return status;
 }
 
+Status EagerContext::FindCompositeDeviceFromName(
+    const char* device_name, CompositeDevice** device) const {
+  tf_shared_lock l(composite_devices_mu_);
+  for (const auto& d : composite_devices_) {
+    if (d.second->name() == device_name) {
+      *device = d.second.get();
+      return Status::OK();
+    }
+  }
+  return errors::NotFound("Unknown composite device: ", device_name);
+}
+
 Status EagerContext::FindCustomDeviceFromName(const string& device_name,
                                               CustomDevice** dev) const {
   auto dev_it = custom_devices_.find(device_name);
@@ -900,8 +939,7 @@ Status EagerContext::FindOrCreateCompositeDevice(
                                             composite_devices_.size(), &s);
   TF_RETURN_IF_ERROR(s);
   *composite_device = device.get();
-  // TODO(b/145922293): Add the composite device to the device set of pflr in
-  // order to make placer recognize it.
+  pflr_->AddCompositeDevice(*composite_device);
   composite_devices_.emplace(hash_key, std::move(device));
   return Status::OK();
 }
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 59da67566e6..d034aaf2f9c 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -173,6 +173,11 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted {
 
   AbstractTensorInterface* CreateTensor(
       DataType dtype, absl::Span<const int64> dim_sizes) override;
+  AbstractTensorInterface* CreateTensor(DataType dtype, const int64_t* dims,
+                                        int num_dims, void* data, size_t len,
+                                        bool convert_string,
+                                        MemoryReleaser memory_releaser,
+                                        void* memory_releaser_arg) override;
 
   AbstractTensorHandleInterface* CreateLocalHandle(
       AbstractTensorInterface* t) override;
@@ -295,7 +300,9 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted {
   void AddKernelToCache(Fprint128 cache_key, KernelAndDevice* kernel);
 
   bool LogDevicePlacement() const { return log_device_placement_; }
+  void SetLogDevicePlacement(bool enable) { log_device_placement_ = enable; }
   bool AllowSoftPlacement() const { return allow_soft_placement_; }
+  void SetAllowSoftPlacement(bool enable) { allow_soft_placement_ = enable; }
   bool LogMemory() const { return log_memory_; }
 
   Rendezvous* GetRendezvous() const { return rendezvous_; }
@@ -346,8 +353,8 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted {
   RunMetadata* RunMetadataProto() { return &run_metadata_; }
   void ClearRunMetadata() TF_EXCLUSIVE_LOCKS_REQUIRED(metadata_mu_);
 
-  void StartStep();
-  void EndStep();
+  void StartStep() override;
+  void EndStep() override;
   ScopedStepContainer* StepContainer();
 
   FunctionLibraryDefinition* FuncLibDef() { return &func_lib_def_; }
@@ -483,6 +490,9 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted {
 
   Status FindDeviceFromName(const char* device_name, Device** device) const;
 
+  Status FindCompositeDeviceFromName(const char* device_name,
+                                     CompositeDevice** device) const;
+
   Status FindCustomDeviceFromName(const string& device_name,
                                   CustomDevice** dev) const;
 
@@ -617,9 +627,8 @@ class EagerContext : public AbstractContextInterface, public core::RefCounted {
   mutex metadata_mu_;
   RunMetadata run_metadata_ TF_GUARDED_BY(metadata_mu_);
   GraphCollector graph_collector_;
-  // TODO(fishx): Allow update following two bool after context creation.
-  const bool log_device_placement_;
-  const bool allow_soft_placement_;
+  std::atomic<bool> log_device_placement_;
+  std::atomic<bool> allow_soft_placement_;
 
   // Information related to step containers.
   std::atomic<int> num_active_steps_;
diff --git a/tensorflow/core/common_runtime/eager/context_test.cc b/tensorflow/core/common_runtime/eager/context_test.cc
index 9154a288a84..f83e3f0b45d 100644
--- a/tensorflow/core/common_runtime/eager/context_test.cc
+++ b/tensorflow/core/common_runtime/eager/context_test.cc
@@ -180,6 +180,10 @@ TEST_F(EagerContextTest, CompositeDevice) {
                                                       &composite_device_0));
   EXPECT_EQ(composite_device_0->name(),
             "/job:worker/replica:0/task:0/device:COMPOSITE:0");
+  CompositeDevice* device = nullptr;
+  TF_EXPECT_OK(context()->FindCompositeDeviceFromName(
+      "/job:worker/replica:0/task:0/device:COMPOSITE:0", &device));
+  EXPECT_EQ(device, composite_device_0);
   CompositeDevice* composite_device_1 = nullptr;
   TF_ASSERT_OK(context()->FindOrCreateCompositeDevice(underlying_devices,
                                                       &composite_device_1));
@@ -190,6 +194,12 @@ TEST_F(EagerContextTest, CompositeDevice) {
                                                       &composite_device_2));
   EXPECT_EQ(composite_device_2->name(),
             "/job:worker/replica:0/task:0/device:COMPOSITE:1");
+  TF_EXPECT_OK(context()->FindCompositeDeviceFromName(
+      "/job:worker/replica:0/task:0/device:COMPOSITE:1", &device));
+  EXPECT_EQ(device, composite_device_2);
+
+  EXPECT_TRUE(errors::IsNotFound(context()->FindCompositeDeviceFromName(
+      "/job:worker/replica:0/task:0/device:COMPOSITE:2", &device)));
 }
 
 }  // namespace
diff --git a/tensorflow/core/common_runtime/eager/copy_to_device_node.h b/tensorflow/core/common_runtime/eager/copy_to_device_node.h
index b3f38c2843e..5d93b0e42ff 100644
--- a/tensorflow/core/common_runtime/eager/copy_to_device_node.h
+++ b/tensorflow/core/common_runtime/eager/copy_to_device_node.h
@@ -54,7 +54,13 @@ class CopyToDeviceNode : public EagerNode {
         "eager::CopyToDeviceNode", "dynamic", tensor.dtype(), &tensor.shape());
     TF_RETURN_IF_ERROR(src_->CopyToDevice(ctx_, dstd_, &tensor));
     if (!async_ && mirror_) {
-      return dst_->AddLocalMirror(std::move(tensor), dstd_);
+      Status s = dst_->AddLocalMirror(std::move(tensor), dstd_);
+      // If a mirror was added since we called HasLocalMirror then just return
+      // and ignore the error.
+      if (s.ok() || (s.code() == error::Code::ALREADY_EXISTS)) {
+        return Status::OK();
+      }
+      return s;
     } else {
       return dst_->SetTensor(std::move(tensor), dstd_);
     }
diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc
index 08137c2857b..e342f6ae6cd 100644
--- a/tensorflow/core/common_runtime/eager/core.cc
+++ b/tensorflow/core/common_runtime/eager/core.cc
@@ -78,16 +78,26 @@ AbstractTensorInterface* TensorHandle::Resolve(Status* status) {
         *status = Tensor(&src);
       }
       if (!status->ok()) return nullptr;
+
       tensor = *src;
     } else {
       *status = CopyToDevice(*ctx_, ctx_->HostCPU(), &tensor);
       if (!status->ok()) return nullptr;
 
-      *status = AddEmptyLocalMirror(nullptr);
-      if (!status->ok()) return nullptr;
       tensorflow::Tensor mirror = tensor;
-      *status = SetTensor(std::move(mirror), nullptr);
-      if (!status->ok()) return nullptr;
+      *status = AddLocalMirror(std::move(mirror), nullptr);
+      if (!status->ok()) {
+        // If a mirror was added since we called HasLocalMirror then drop the
+        // newly copied tensor and use the previously added mirror.
+        if (status->code() != error::Code::ALREADY_EXISTS) {
+          return nullptr;
+        }
+        const tensorflow::Tensor* src = nullptr;
+        *status = TensorFromDevice(nullptr, &src);
+        if (!status->ok()) return nullptr;
+
+        tensor = *src;
+      }
     }
     // TODO(b/153052876): Change TF_TensorFromTensor to just return an
     // AbstractTensorInterface
diff --git a/tensorflow/core/common_runtime/eager/eager_executor.cc b/tensorflow/core/common_runtime/eager/eager_executor.cc
index 13630a01ea9..7850978410f 100644
--- a/tensorflow/core/common_runtime/eager/eager_executor.cc
+++ b/tensorflow/core/common_runtime/eager/eager_executor.cc
@@ -98,7 +98,7 @@ const char* EagerExecutor::StateStringLocked() {
 
 Status EagerExecutor::SyncExecute(EagerNode* node) {
   if (Async()) {
-    return errors::Internal("Executor does not support sync execution");
+    return errors::Internal("Executor does not support async execution");
   }
   if (node->AsAsync() != nullptr) {
     return errors::Internal("Executor does not support executing async nodes");
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index a4e7f2f3304..f23b0fa7877 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -365,8 +365,12 @@ Status GetOrCreateKernelAndDevice(
   Device* device = absl::get<Device*>(op->Device());
 
   Fprint128 cache_key = op->MutableAttrs()->CacheKey(op->DeviceName());
+  /// Include soft placement policy in cache key since the placement strategy
+  // can change and thus affect which kernel is picked.
+  cache_key = FingerprintCat128(cache_key, ctx.AllowSoftPlacement());
 
   std::vector<Device*> input_dev_ptrs;
+  absl::flat_hash_map<string, const std::vector<string>*> composite_devices;
   std::unordered_map<int, DtypeAndPartialTensorShape>
       input_resource_variable_dtypes_and_shapes;
   // We can eliminate some overhead by running simple functions using regular
@@ -410,6 +414,13 @@ Status GetOrCreateKernelAndDevice(
       Device* input_device;
       TF_RETURN_IF_ERROR(GetDeviceForInput(ctx, input, &input_device));
       input_dev_ptrs.push_back(input_device);
+      CompositeDevice* composite_device = nullptr;
+      if (ctx.FindCompositeDeviceFromName(input_device->name().c_str(),
+                                          &composite_device)
+              .ok()) {
+        composite_devices[input_device->name()] =
+            composite_device->underlying_devices();
+      }
       cache_key =
           FingerprintCat128(cache_key, Fingerprint128(input_device->name()));
 
@@ -480,13 +491,6 @@ Status GetOrCreateKernelAndDevice(
                << KernelsRegisteredForOp(op->Name());
       op->SetDevice(device);
     }
-    if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) {
-      string msg = strings::StrCat("Executing op ", ndef.op(), " in device ",
-                                   DeviceNameOrUnspecified(device));
-      if (!logging::LogToListeners(msg)) {
-        LOG(INFO) << msg;
-      }
-    }
 
     FunctionLibraryRuntime* flr =
         device == nullptr ? nullptr : ctx.func_lib(device);
@@ -520,6 +524,7 @@ Status GetOrCreateKernelAndDevice(
 #endif  // IS_MOBILE_PLATFORM
       kernel.reset(new KernelAndDeviceFunc(
           flr, ctx.pflr(), std::move(input_dev_ptrs),
+          std::move(composite_devices),
           std::move(input_resource_variable_dtypes_and_shapes), runner,
           ctx.GetCollectiveExecutorHandle(), ctx.HostCPU(), op->Name(),
           [&ctx](const int64 step_id) { return ctx.CreateRendezvous(step_id); },
@@ -598,6 +603,14 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
   int num_outputs = kernel->num_outputs();
   TF_RETURN_IF_ERROR(ValidateInputTypeAndPlacement(&ctx, op, kernel));
 
+  if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) {
+    string msg = strings::StrCat("Executing op ", op->Name(), " in device ",
+                                 kernel->device()->name());
+    if (!logging::LogToListeners(msg)) {
+      LOG(INFO) << msg;
+    }
+  }
+
   GraphCollector* graph_collector = nullptr;
   if (ctx.ShouldStoreGraphs()) {
     graph_collector = ctx.GetGraphCollector();
@@ -769,9 +782,15 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
         }
       }
       auto* input_handle = remote_op->add_op_inputs()->mutable_remote_handle();
+      // For a multi-device function, a remote RunComponentFunction request is
+      // not sent through StreamingEnqueueAsync. It could arrive at a remote
+      // worker before a remote execution request which produces an input of the
+      // component function. So we wait until the remote input is ready before
+      // serializing it.
+      const bool wait_until_ready = op->is_function();
       TF_RETURN_IF_ERROR(ctx.RemoteMgr()->SerializeRemoteTensorHandle(
-          input, input_handle, input_device, *input_device_name,
-          serialize_resource_dtype_and_shape));
+          input, wait_until_ready, input_handle, input_device,
+          *input_device_name, serialize_resource_dtype_and_shape));
       if (!input_handle->resource_dtypes_and_shapes().empty()) {
         TF_RETURN_IF_ERROR(
             input->AddResourceShapeMirror(op_device, input_handle->op_id(),
@@ -832,6 +851,16 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
       ctx.GetContextViewId(), eager_client.get(),
       op->MutableAttrs()->BuildNodeDef(), op->EagerContext().FuncLibDef(),
       op->Inputs(), {retvals, num_outputs}));
+
+  if (op->EagerContext().LogDevicePlacement() || VLOG_IS_ON(1)) {
+    string msg = strings::StrCat(
+        "Executing op ", op->Name(), " on task ",
+        DeviceNameUtils::ParsedNameToString(op->GetDeviceParsedName()));
+    if (!logging::LogToListeners(msg)) {
+      LOG(INFO) << msg;
+    }
+  }
+
   Status s = executor.AddOrExecute(std::move(node));
   // Since the operation failed, we need to Unref any outputs that were
   // allocated.
@@ -865,6 +894,19 @@ bool IsPinnableOp(const string& op_type) {
          !absl::StartsWith(op_type, "XRT");
 }
 
+// Validate if the remote device with the given incarnation is valid in the
+// remote device manager of the current eager context.
+Status ValidateTensorHandleRemoteDevice(EagerContext* ctx,
+                                        int64 device_incarnation) {
+  if (ctx->remote_device_mgr()->ContainsDevice(device_incarnation)) {
+    return Status::OK();
+  }
+  return errors::InvalidArgument(
+      "Resource input tensor contains an invalid device. This might happen "
+      "when the client has connected to a different cluster, or some remote "
+      "workers have been restarted.");
+}
+
 // The Op device may be updated if:
 // - A resource touching input is specified: all resource-touching ops run in
 // the device the resource is, regardless of anything else that has been
@@ -926,6 +968,10 @@ Status MaybeUpdateOpDevice(EagerOperation* op) {
   for (int i = 0; i < op->Inputs().size(); ++i) {
     TensorHandle* tensor_handle = op->Inputs()[i];
     if (tensor_handle->dtype == DT_RESOURCE) {
+      if (tensor_handle->resource_remote_device_incarnation() != 0) {
+        TF_RETURN_IF_ERROR(ValidateTensorHandleRemoteDevice(
+            &ctx, tensor_handle->resource_remote_device_incarnation()));
+      }
       Device* resource_device = tensor_handle->resource_device();
       DVLOG(2) << "for op " << op->Name() << " input " << i << " "
                << DataTypeString(tensor_handle->dtype)
@@ -1093,15 +1139,6 @@ Status EagerExecute(EagerOperation* op, TensorHandle** retvals,
     return EagerLocalExecute(op, retvals, num_retvals);
   }
 
-  if (op->EagerContext().LogDevicePlacement() || VLOG_IS_ON(1)) {
-    string msg = strings::StrCat(
-        "Executing op ", op->Name(), " on task ",
-        DeviceNameUtils::ParsedNameToString(op->GetDeviceParsedName()));
-    if (!logging::LogToListeners(msg)) {
-      LOG(INFO) << msg;
-    }
-  }
-
 #if defined(IS_MOBILE_PLATFORM)
   return errors::Unimplemented(
       "Eager's remote execution is not available on mobile devices.");
@@ -1165,15 +1202,33 @@ Status LocalEagerCopyToDevice(TensorHandle* h, EagerContext* ctx,
 
   bool async = executor->Async();
   if (mirror) {
+    h->Ref();
+    *result = h;
+
+    if (h->HasLocalMirror(d)) {
+      return Status::OK();
+    }
+
     // We don't bother adding an empty local mirror in sync mode since we'll be
     // executing the operation directly and be calling AddLocalMirror. A
     // reference count is still needed which will be removed if the operation
     // fails.
     if (async) {
-      TF_RETURN_IF_ERROR(h->AddEmptyLocalMirror(d));
+      Status s = h->AddEmptyLocalMirror(d);
+      if (!s.ok()) {
+        // If a mirror was added since we called HasLocalMirror then just return
+        // since another thread has already added the mirror.
+        if (s.code() == error::Code::ALREADY_EXISTS) {
+          return Status::OK();
+        }
+
+        // Remove the previously added reference count since adding the mirror
+        // failed.
+        h->Unref();
+        *result = nullptr;
+        return s;
+      }
     }
-    h->Ref();
-    *result = h;
   } else {
     *result = TensorHandle::CreateEmptyLocalHandle(
         d, dstd, h->resource_device(), h->dtype, ctx);
@@ -1230,19 +1285,31 @@ Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx,
     uint64 recv_op_id = 0;
     if (receiver_is_local) {
       Device* d = ctx->CanonicalDevice(device);
-      if (mirror && h->HasLocalMirror(d)) {
-        h->Ref();
-        *result = h;
-        return Status::OK();
-      }
-
       // TODO(gjn): Need to add support for async execution. Note if receiver
       // is local, we need to first add support in TensorHandle to wait on local
       // mirrors.
       if (mirror) {
-        TF_RETURN_IF_ERROR(h->AddEmptyLocalMirror(d));
         h->Ref();
         *result = h;
+
+        if (h->HasLocalMirror(d)) {
+          return Status::OK();
+        }
+
+        Status s = h->AddEmptyLocalMirror(d);
+        if (!s.ok()) {
+          // If a mirror was added since we called HasLocalMirror then just
+          // return since another thread has already added the mirror.
+          if (s.code() == error::Code::ALREADY_EXISTS) {
+            return Status::OK();
+          }
+
+          // Remove the previously added reference count since adding the mirror
+          // failed.
+          h->Unref();
+          *result = nullptr;
+          return s;
+        }
       } else {
         *result = TensorHandle::CreateEmptyLocalHandle(
             /* d= */ d, /* op_device= */ device,
@@ -1372,6 +1439,14 @@ void EagerLocalExecuteAsync(EagerOperation* op, TensorHandle** retvals,
     return;
   }
 
+  if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) {
+    string msg = strings::StrCat("Executing op ", op->Name(), " in device ",
+                                 kernel->device()->name());
+    if (!logging::LogToListeners(msg)) {
+      LOG(INFO) << msg;
+    }
+  }
+
   GraphCollector* graph_collector = nullptr;
   if (ctx.ShouldStoreGraphs()) {
     graph_collector = ctx.GetGraphCollector();
diff --git a/tensorflow/core/common_runtime/eager/execute_node.cc b/tensorflow/core/common_runtime/eager/execute_node.cc
index 8084ae98abc..27503cfd99d 100644
--- a/tensorflow/core/common_runtime/eager/execute_node.cc
+++ b/tensorflow/core/common_runtime/eager/execute_node.cc
@@ -17,6 +17,51 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
+
+#if !defined(IS_MOBILE_PLATFORM)
+bool ExecuteNodeArgs::IsRemote(EagerContext* ctx, Device* input_device,
+                               TensorHandle* handle) {
+  uint64 context_view_id = ctx->GetContextViewId();
+  if (handle->Type() == TensorHandle::REMOTE ||
+      handle->HasRemoteMirror(input_device, context_view_id)) {
+    if (!has_remote_inputs_) {
+      has_remote_inputs_ = true;
+    }
+    return true;
+  }
+  return false;
+}
+#endif  // IS_MOBILE_PLATFORM
+
+Status ExecuteNodeArgs::InitPackedHandle(const int index, EagerContext* ctx,
+                                         Device* input_device,
+                                         TensorHandle* packed_handle) {
+  int num_handles = packed_handle->NumPackedHandles();
+  packed_args_.emplace(index, gtl::InlinedVector<TensorValue, 4>(num_handles));
+  TensorValue* packed_arg_flat = &(packed_args_[index][0]);
+  for (int i = 0; i < num_handles; ++i) {
+    TensorHandle* h = nullptr;
+    TF_RETURN_IF_ERROR(packed_handle->ExtractPackedHandle(i, &h));
+    // We have validated that h->device() is not a CustomDevice when
+    // constructing a pack TensorHandle.
+    const Status status =
+        h->TensorValue(absl::get<Device*>(h->device()), &packed_arg_flat[i]);
+    if (!status.ok()) {
+#if !defined(IS_MOBILE_PLATFORM)
+      if (IsRemote(ctx, input_device, h)) {
+        continue;
+      }
+#endif  // IS_MOBILE_PLATFORM
+      if (h->Type() == TensorHandle::PACKED) {
+        return errors::InvalidArgument(
+            "Nested packed handles are not supported");
+      }
+      return status;
+    }
+  }
+  return Status::OK();
+}
+
 Status ExecuteNodeArgs::Init(
     EagerContext* ctx, const gtl::InlinedVector<TensorHandle*, 4>& op_inputs,
     const core::RefCountPtr<KernelAndDevice>& kernel) {
@@ -35,38 +80,72 @@ Status ExecuteNodeArgs::Init(
       Status s = in->TensorValue(ctx->CanonicalDevice(d), &tensor_args_flat[i]);
       if (!s.ok()) {
 #if !defined(IS_MOBILE_PLATFORM)
-        uint64 context_view_id = ctx->GetContextViewId();
-        if (in->Type() == TensorHandle::REMOTE ||
-            in->HasRemoteMirror(d, context_view_id)) {
-          if (!has_remote_inputs_) {
-            has_remote_inputs_ = true;
-          }
+        if (IsRemote(ctx, d, in)) {
           continue;
         }
 #endif
-        return s;
+        if (in->Type() != TensorHandle::PACKED) {
+          return s;
+        }
+        if (!has_packed_inputs_) {
+          has_packed_inputs_ = true;
+        }
+        TF_RETURN_IF_ERROR(InitPackedHandle(i, ctx, d, in));
       }
     }
   }
 
 #if !defined(IS_MOBILE_PLATFORM)
   if (has_remote_inputs_) {
+    const bool is_function = kernel->IsFunction();
     serialize_remote_handle_ =
-        [ctx, &op_inputs](const int i,
-                          eager::RemoteTensorHandle* handle) -> Status {
-      VariantDevice variant_device = op_inputs[i]->device();
+        [ctx, &op_inputs, is_function](
+            const FunctionArgIndex& index,
+            eager::RemoteTensorHandle* handle) -> Status {
+      TensorHandle* h = op_inputs[index.index];
+      if (op_inputs[index.index]->Type() == TensorHandle::PACKED) {
+        TF_RETURN_IF_ERROR(
+            op_inputs[index.index]->ExtractPackedHandle(index.sub_index, &h));
+      }
+      VariantDevice variant_device = h->device();
       if (VariantDeviceIsCustom(variant_device)) {
         return errors::Internal(
             "Custom devices and remote execution are currently not supported "
             "together.");
       }
       Device* device = absl::get<Device*>(variant_device);
+      // For a multi-device function, a remote RunComponentFunction request is
+      // not sent through StreamingEnqueueAsync. It could arrive at a remote
+      // worker before a remote execution request which produces an input of the
+      // component function. So we wait until the remote input is ready before
+      // serializing it.
+      const bool wait_util_ready = is_function;
       return ctx->RemoteMgr()->SerializeRemoteTensorHandle(
-          op_inputs[i], handle, device, device->name());
+          h, wait_util_ready, handle, device, device->name());
     };
   }
 #endif  // !IS_MOBILE_PLATFORM
   return Status::OK();
 }
 
+Status ExecuteNodeArgs::GetLocalArg(const FunctionArgIndex& index,
+                                    Tensor* val) const {
+  Status s = EagerKernelArgs::GetLocalArg(index, val);
+  if (s.ok()) {
+    return Status::OK();
+  }
+  if (packed_args_.contains(index.index)) {
+    Tensor* arg = packed_args_.at(index.index).at(index.sub_index).tensor;
+    if (arg) {
+      *val = *arg;
+      return Status::OK();
+    } else {
+      return errors::NotFound("Argument (", index.index, ",", index.sub_index,
+                              ") has no local tensor.");
+    }
+  } else {
+    return s;
+  }
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/eager/execute_node.h b/tensorflow/core/common_runtime/eager/execute_node.h
index be6e4009896..7924471066e 100644
--- a/tensorflow/core/common_runtime/eager/execute_node.h
+++ b/tensorflow/core/common_runtime/eager/execute_node.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <cstddef>
 #include <memory>
 #include <string>
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/platform.h"
 // clang-format on
@@ -54,19 +55,36 @@ class ExecuteNodeArgs : public EagerKernelArgs {
               const absl::InlinedVector<TensorHandle*, 4>& op_inputs,
               const core::RefCountPtr<KernelAndDevice>& kernel);
 
-  bool HasRemoteInputs() const override { return has_remote_inputs_; };
+  Status GetLocalArg(const FunctionArgIndex& index, Tensor* val) const override;
+
+  bool HasRemoteOrPackedInputs() const override {
+    return has_remote_inputs_ || has_packed_inputs_;
+  };
 
 #if !defined(IS_MOBILE_PLATFORM)
-  Status GetRemoteArg(const int index,
+  Status GetRemoteArg(const FunctionArgIndex& index,
                       eager::RemoteTensorHandle* val) const override {
     return serialize_remote_handle_(index, val);
   }
 #endif  // IS_MOBILE_PLATFORM
 
  private:
-  bool has_remote_inputs_ = false;
 #if !defined(IS_MOBILE_PLATFORM)
-  std::function<Status(const int, eager::RemoteTensorHandle*)>
+  // Returns whether `handle` is a remote handle or has a remote mirror on
+  // `input_device`
+  bool IsRemote(EagerContext* ctx, Device* input_device, TensorHandle* handle);
+#endif  // IS_MOBILE_PLATFORM
+
+  // Initialize a packed TensorHandle which is the `index`-th argument.
+  Status InitPackedHandle(const int index, EagerContext* ctx,
+                          Device* input_device, TensorHandle* packed_handle);
+
+  bool has_remote_inputs_ = false;
+  bool has_packed_inputs_ = false;
+  // Maps from the index of a packed arg to a list of sub-args.
+  absl::flat_hash_map<int, gtl::InlinedVector<TensorValue, 4>> packed_args_;
+#if !defined(IS_MOBILE_PLATFORM)
+  std::function<Status(const FunctionArgIndex&, eager::RemoteTensorHandle*)>
       serialize_remote_handle_;
 #endif  // IS_MOBILE_PLATFORM
 };
diff --git a/tensorflow/core/common_runtime/eager/execute_node_test.cc b/tensorflow/core/common_runtime/eager/execute_node_test.cc
new file mode 100644
index 00000000000..99f030322df
--- /dev/null
+++ b/tensorflow/core/common_runtime/eager/execute_node_test.cc
@@ -0,0 +1,142 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/common_runtime/eager/execute_node.h"
+
+#include <memory>
+
+#include "tensorflow/core/common_runtime/composite_device.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
+#include "tensorflow/core/common_runtime/eager/context.h"
+#include "tensorflow/core/common_runtime/eager/kernel_and_device.h"
+#include "tensorflow/core/common_runtime/eager/tensor_handle.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+class TestKernelAndDeviceFunc final : public KernelAndDeviceFunc {
+ public:
+  TestKernelAndDeviceFunc(std::vector<Device*> input_devices,
+                          Device* host_cpu_device)
+      : KernelAndDeviceFunc(
+            /*flr=*/nullptr, /*pflr=*/nullptr, /*input_devices=*/{},
+            /*composite_devices=*/{}, /*input_resource_dtypes_and_shapes=*/{},
+            /*runner=*/nullptr, /*collective_executor=*/nullptr,
+            host_cpu_device, /*name=*/"",
+            /*rendezvous_creator=*/nullptr, /*get_op_id=*/nullptr),
+        test_input_devices_(std::move(input_devices)) {}
+
+  Device* InputDevice(int i) const override { return test_input_devices_[i]; }
+
+ private:
+  std::vector<Device*> test_input_devices_;
+};
+
+TEST(ExecuteNodeTest, ExecuteNodeArgs) {
+  StaticDeviceMgr device_mgr(
+      DeviceFactory::NewDevice("CPU", {}, "/job:localhost/replica:0/task:0"));
+  Device* device0 = device_mgr.ListDevices().at(0);
+  auto remote_device_mgr = absl::make_unique<DynamicDeviceMgr>();
+  std::vector<std::unique_ptr<Device>> remote_devices;
+  remote_devices.emplace_back(
+      DeviceFactory::NewDevice("CPU", {}, "/job:localhost/replica:0/task:1"));
+  TF_ASSERT_OK(remote_device_mgr->AddDevices(std::move(remote_devices)));
+  Device* device1 = remote_device_mgr->ListDevices().at(0);
+
+  Status s;
+  std::unique_ptr<CompositeDevice> composite_device =
+      CompositeDevice::MakeDevice({device0->name(), device1->name()},
+                                  /*unique_device_id=*/0, &s);
+  TF_ASSERT_OK(s);
+
+  auto ctx = new EagerContext(
+      SessionOptions(),
+      tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
+      tensorflow::ContextMirroringPolicy::MIRRORING_NONE, false, false,
+      &device_mgr, false, nullptr, nullptr, nullptr);
+
+  // Set a RemoteMgr to the EagerContext.
+  auto remote_mgr = absl::make_unique<eager::RemoteMgr>(
+      /*is_master=*/true, ctx);
+  TF_ASSERT_OK(ctx->InitializeRemoteMaster(
+      /*server=*/nullptr, /*worker_env=*/nullptr,
+      /*worker_session=*/nullptr, /*remote_eager_workers=*/nullptr,
+      std::move(remote_device_mgr), /*remote_contexts=*/{},
+      EagerContext::NewContextId(),
+      /*r=*/nullptr, &device_mgr, /*keep_alive_secs*/ 600,
+      /*cluster_flr=*/nullptr, std::move(remote_mgr)));
+
+  DataType dtype = DT_FLOAT;
+  Tensor t0(dtype, TensorShape({}));
+  // Create two local TensorHandles
+  t0.scalar<float>()() = {1.0f};
+  TensorHandle* h0 =
+      TensorHandle::CreateLocalHandle(std::move(t0), device0, device0, ctx);
+  Tensor t1(dtype, TensorShape({}));
+  t1.scalar<float>()() = {2.0f};
+  TensorHandle* h1 =
+      TensorHandle::CreateLocalHandle(std::move(t1), device0, device0, ctx);
+  // Create two remote TensorHandles
+  TensorHandle* h2 = TensorHandle::CreateLazyRemoteHandle(
+      /*op_id=*/1, /*output_num=*/0, dtype, device1, ctx);
+  TensorHandle* h3 = TensorHandle::CreateLazyRemoteHandle(
+      /*op_id=*/2, /*output_num=*/1, dtype, device1, ctx);
+  // Create a packed TensorHandle
+  TensorHandle* packed_h = nullptr;
+  TF_ASSERT_OK(TensorHandle::CreatePackedHandle({h1, h2}, ctx, &packed_h));
+
+  // LOCAL, PACKED, REMOTE
+  absl::InlinedVector<TensorHandle*, 4> inputs = {h0, packed_h, h3};
+
+  std::vector<Device*> input_devices;
+  for (auto* h : inputs) {
+    input_devices.push_back(absl::get<Device*>(h->DeviceOrHostCPU(*ctx)));
+  }
+  const core::RefCountPtr<KernelAndDevice> kernel(
+      new TestKernelAndDeviceFunc(std::move(input_devices), device0));
+
+  ExecuteNodeArgs args(inputs.size());
+  TF_EXPECT_OK(args.Init(ctx, inputs, kernel));
+  EXPECT_TRUE(args.HasRemoteOrPackedInputs());
+  Tensor local0;
+  TF_EXPECT_OK(args.GetLocalArg(FunctionArgIndex(0), &local0));
+  EXPECT_EQ(local0.flat<float>().size(), 1);
+  EXPECT_EQ(local0.flat<float>()(0), 1.0);
+  Tensor local1;
+  TF_EXPECT_OK(args.GetLocalArg(FunctionArgIndex(1, 0), &local1));
+  EXPECT_EQ(local1.flat<float>().size(), 1);
+  EXPECT_EQ(local1.flat<float>()(0), 2.0);
+  eager::RemoteTensorHandle remote0;
+  TF_EXPECT_OK(args.GetRemoteArg(FunctionArgIndex(1, 1), &remote0));
+  EXPECT_EQ(remote0.op_id(), 1);
+  EXPECT_EQ(remote0.output_num(), 0);
+  eager::RemoteTensorHandle remote1;
+  TF_EXPECT_OK(args.GetRemoteArg(FunctionArgIndex(2), &remote1));
+  EXPECT_EQ(remote1.op_id(), 2);
+  EXPECT_EQ(remote1.output_num(), 1);
+
+  h0->Unref();
+  h1->Unref();
+  h2->Unref();
+  h3->Unref();
+  packed_h->Unref();
+  ctx->Unref();
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
index c9ff9e506b8..bf7c083f24b 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
@@ -35,7 +35,10 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/denormal.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
+#include "tensorflow/core/platform/setround.h"
 #include "tensorflow/core/profiler/lib/annotated_traceme.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
 #include "tensorflow/core/public/version.h"
@@ -49,13 +52,18 @@ limitations under the License.
 
 namespace tensorflow {
 
-Status EagerKernelArgs::GetLocalArg(const int index, Tensor* val) const {
-  Tensor* arg = tensor_args_.at(index).tensor;
+Status EagerKernelArgs::GetLocalArg(const FunctionArgIndex& index,
+                                    Tensor* val) const {
+  if (index.sub_index >= 0) {
+    return errors::InvalidArgument("Got unexpected sub_index ", index.sub_index,
+                                   " for argument ", index.index);
+  }
+  Tensor* arg = tensor_args_.at(index.index).tensor;
   if (arg) {
     *val = *arg;
     return Status::OK();
   } else {
-    return errors::NotFound("Argument ", index, " has no local tensor.");
+    return errors::NotFound("Argument ", index.index, " has no local tensor.");
   }
 }
 
@@ -152,6 +160,7 @@ Status KernelAndDeviceFunc::InstantiateFunc(const NodeDef& ndef,
   for (const Device* device : input_devices_) {
     options.input_devices.push_back(device->name());
   }
+  options.composite_devices = composite_devices_;
   options.input_resource_dtypes_and_shapes = input_resource_dtypes_and_shapes_;
 
   const auto& it = ndef.attr().find("executor_type");
@@ -274,6 +283,8 @@ Status KernelAndDeviceOp::Run(
   OpKernelContext context(&params);
 
   {
+    port::ScopedFlushDenormal flush;
+    port::ScopedSetRound round(FE_TONEAREST);
     // 'AnnotatedTraceMe' will trace both scheduling time on host and execution
     // time on device of the OpKernel.
     profiler::AnnotatedTraceMe activity(
@@ -419,7 +430,9 @@ Device* KernelAndDeviceOp::InputDevice(int i) const {
 }
 
 Device* KernelAndDeviceFunc::InputDevice(int i) const {
-  if (input_dtypes_[i] == DT_RESOURCE) {
+  if ((input_dtypes_[i] == DT_RESOURCE) &&
+      (composite_devices_.find(input_devices_[i]->name()) ==
+       composite_devices_.end())) {
     return host_cpu_device_;
   } else {
     return input_devices_[i];
diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h
index 0597dc0aa2e..d2c54322513 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.h
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/platform/platform.h"
 // clang-format on
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/optional.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
@@ -66,16 +67,16 @@ class EagerKernelArgs : public FunctionArgsInterface {
 
   ~EagerKernelArgs() override{};
 
-  bool HasRemoteInputs() const override { return false; };
+  bool HasRemoteOrPackedInputs() const override { return false; };
   TensorValue* MutableInput(int i) { return &tensor_args_[i]; }
 
-  Status GetLocalArg(const int index, Tensor* val) const override;
+  Status GetLocalArg(const FunctionArgIndex& index, Tensor* val) const override;
 
   std::vector<Tensor> GetLocalTensors() const override;
 
-  const gtl::InlinedVector<TensorValue, 4>* GetTensorValues() const override {
+  const gtl::InlinedVector<TensorValue, 4>* GetTensorValues() const {
     return &tensor_args_;
-  };
+  }
 
  protected:
   gtl::InlinedVector<TensorValue, 4> tensor_args_;
@@ -241,7 +242,7 @@ class KernelAndDeviceOp final : public KernelAndDevice {
 // Represents a multi-device function. Functions can also be run using
 // various function-calling kernels including CallOp and PartitionedCallOp.
 // In such cases, KernelAndDeviceOp is used.
-class KernelAndDeviceFunc final : public KernelAndDevice {
+class KernelAndDeviceFunc : public KernelAndDevice {
  public:
   // `flr` can be nullptr.
   // `pflr` must not be nullptr.
@@ -249,6 +250,7 @@ class KernelAndDeviceFunc final : public KernelAndDevice {
   KernelAndDeviceFunc(
       FunctionLibraryRuntime* flr, ProcessFunctionLibraryRuntime* pflr,
       std::vector<Device*> input_devices,
+      absl::flat_hash_map<string, const std::vector<string>*> composite_devices,
       std::unordered_map<int, DtypeAndPartialTensorShape>
           input_resource_dtypes_and_shapes,
       std::function<void(std::function<void()>)>* runner,
@@ -261,6 +263,7 @@ class KernelAndDeviceFunc final : public KernelAndDevice {
         pflr_(pflr),
         handle_(kInvalidHandle),
         input_devices_(std::move(input_devices)),
+        composite_devices_(std::move(composite_devices)),
         input_resource_dtypes_and_shapes_(
             std::move(input_resource_dtypes_and_shapes)),
         name_(name),
@@ -320,6 +323,8 @@ class KernelAndDeviceFunc final : public KernelAndDevice {
   // CPU devices are not null. Resource handles' devices are actual backing
   // devices.
   std::vector<Device*> input_devices_;
+  // Maps from a CompositeDevice name to a list of physical device names.
+  absl::flat_hash_map<string, const std::vector<string>*> composite_devices_;
   std::unordered_map<int, DtypeAndPartialTensorShape>
       input_resource_dtypes_and_shapes_;
 
diff --git a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
index 2d4ae338144..f2339806814 100644
--- a/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
+++ b/tensorflow/core/common_runtime/eager/mkl_eager_op_rewrite.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <unordered_map>
 
 #include "tensorflow/core/common_runtime/eager/eager_op_rewrite_registry.h"
-#include "tensorflow/core/common_runyime/mkl_layout_pass.h"
+#include "tensorflow/core/common_runtime/mkl_layout_pass.h"
 #include "tensorflow/core/graph/mkl_graph_util.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/util/mkl_util.h"
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc
index 8b18b9696fc..dbfc5639017 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.cc
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/strings/substitute.h"
 #include "absl/types/variant.h"
 #include "tensorflow/c/tf_tensor_internal.h"
 #include "tensorflow/core/common_runtime/composite_device.h"
@@ -48,6 +49,13 @@ limitations under the License.
 
 namespace tensorflow {
 
+namespace {
+int64 GetRemoteDeviceIncarnation(Device* device) {
+  if (device == nullptr || device->IsLocal()) return 0;
+  return device->attributes().incarnation();
+}
+}  // namespace
+
 TensorHandle::PackedTensorHandleData::PackedTensorHandleData(
     std::vector<TensorHandle*>&& handles, const TensorShape& shape)
     : handles_(std::move(handles)), shape_(shape) {
@@ -123,6 +131,10 @@ string TensorHandle::PackedTensorHandleData::DebugString() const {
   return debug_str;
 }
 
+int TensorHandle::PackedTensorHandleData::NumPackedHandles() const {
+  return handles_.size();
+}
+
 Status TensorHandle::PackedTensorHandleData::ExtractPackedHandle(
     const int index, TensorHandle** handle) const {
   if (index < 0 || index >= handles_.size()) {
@@ -184,6 +196,13 @@ Status TensorHandle::GetResourceAllowedDevices(std::vector<string>* result) {
   return GetResourceHandleInfoImpl(get_resource_info);
 }
 
+int TensorHandle::NumPackedHandles() const {
+  if (Type() != PACKED) {
+    return 0;
+  }
+  return absl::get<PackedTensorHandleData>(data_).NumPackedHandles();
+}
+
 Status TensorHandle::ExtractPackedHandle(const int index,
                                          TensorHandle** handle) const {
   if (Type() != PACKED) {
@@ -232,6 +251,8 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device,
       device_((!ctx || d == ctx->HostCPU()) ? nullptr : d),
       op_device_(op_device),
       resource_device_(resource_device),
+      resource_remote_device_incarnation_(
+          GetRemoteDeviceIncarnation(resource_device_)),
       ctx_(ctx),
       data_(absl::in_place_type<LocalTensorHandleData>, std::move(t)) {
   DVLOG(3) << "Creating Local TensorHandle: " << this
@@ -246,6 +267,8 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device,
       op_device_(op_device),
       resource_device_(
           GetResourceDevice(t.flat<class ResourceHandle>()(0), ctx)),
+      resource_remote_device_incarnation_(
+          GetRemoteDeviceIncarnation(resource_device_)),
       ctx_(ctx),
       resource_handle_info_(
           {t.flat<class ResourceHandle>()(0).dtypes_and_shapes(),
@@ -262,6 +285,7 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, CustomDevice* d,
       device_(d),
       op_device_(nullptr),
       resource_device_(nullptr),
+      resource_remote_device_incarnation_(0),
       ctx_(ctx),
       data_(absl::in_place_type<LocalTensorHandleData>, std::move(t)) {
   // TODO(allenl): Figure out a better op_device story for custom devices,
@@ -285,6 +309,8 @@ TensorHandle::TensorHandle(Device* d, Device* op_device,
       device_((d == ctx->HostCPU()) ? nullptr : d),
       op_device_(op_device),
       resource_device_(resource_device),
+      resource_remote_device_incarnation_(
+          GetRemoteDeviceIncarnation(resource_device_)),
       ctx_(ctx),
       data_(absl::in_place_type<LocalTensorHandleData>) {
   DVLOG(3) << "Creating empty Local TensorHandle: " << this
@@ -292,17 +318,14 @@ TensorHandle::TensorHandle(Device* d, Device* op_device,
 }
 
 Status TensorHandle::CreatePackedHandle(std::vector<TensorHandle*>&& handles,
+                                        const tensorflow::DataType dtype,
+                                        const tensorflow::TensorShape& shape,
                                         EagerContext* ctx,
                                         TensorHandle** packed_handle) {
   if (handles.empty()) {
     return errors::InvalidArgument("Handles should not be empty.");
   }
 
-  // Get the dtype and shape from the fisrt handle since all handles have the
-  // same dtype and shape.
-  tensorflow::DataType dtype = handles.at(0)->dtype;
-  tensorflow::TensorShape shape;
-  TF_RETURN_IF_ERROR(handles.at(0)->Shape(&shape));
   ResourceHandleInfo resource_handle_info;
   if (dtype == DT_RESOURCE) {
     TF_RETURN_IF_ERROR(
@@ -314,8 +337,8 @@ Status TensorHandle::CreatePackedHandle(std::vector<TensorHandle*>&& handles,
       return errors::InvalidArgument(
           "CustomDevice is not supported for packing.");
     } else {
-      devices.push_back(
-          absl::get<Device*>(handle->DeviceOrHostCPU(*ctx))->name());
+      devices.push_back(handle->op_device() ? handle->op_device()->name()
+                                            : ctx->HostCPU()->name());
     }
   }
 
@@ -334,6 +357,22 @@ Status TensorHandle::CreatePackedHandle(std::vector<TensorHandle*>&& handles,
   return Status::OK();
 }
 
+Status TensorHandle::CreatePackedHandle(std::vector<TensorHandle*>&& handles,
+                                        EagerContext* ctx,
+                                        TensorHandle** packed_handle) {
+  if (handles.empty()) {
+    return errors::InvalidArgument("Handles should not be empty.");
+  }
+
+  // Get the dtype and shape from the fisrt handle since all handles have the
+  // same dtype and shape.
+  tensorflow::DataType dtype = handles.at(0)->dtype;
+  tensorflow::TensorShape shape;
+  TF_RETURN_IF_ERROR(handles.at(0)->Shape(&shape));
+  return CreatePackedHandle(std::move(handles), dtype, shape, ctx,
+                            packed_handle);
+}
+
 TensorHandle::TensorHandle(std::vector<TensorHandle*>&& handles, Device* device,
                            const tensorflow::DataType dtype,
                            const tensorflow::TensorShape& shape,
@@ -342,6 +381,8 @@ TensorHandle::TensorHandle(std::vector<TensorHandle*>&& handles, Device* device,
       device_(device),
       op_device_(device),
       resource_device_(dtype == DT_RESOURCE ? device : nullptr),
+      resource_remote_device_incarnation_(
+          GetRemoteDeviceIncarnation(resource_device_)),
       ctx_(ctx),
       data_(absl::in_place_type<PackedTensorHandleData>, std::move(handles),
             shape) {
@@ -364,6 +405,8 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num,
       device_(d),
       op_device_(d),
       resource_device_(dtype == DT_RESOURCE ? d : nullptr),
+      resource_remote_device_incarnation_(
+          GetRemoteDeviceIncarnation(resource_device_)),
       ctx_(ctx),
       data_(absl::in_place_type<RemoteTensorHandleData>, op_id, output_num,
             remote_task, ctx) {
@@ -386,6 +429,8 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num,
       device_(d),
       op_device_(d),
       resource_device_(dtype == DT_RESOURCE ? d : nullptr),
+      resource_remote_device_incarnation_(
+          GetRemoteDeviceIncarnation(resource_device_)),
       ctx_(ctx),
       data_(absl::in_place_type<RemoteTensorHandleData>, op_id, output_num,
             ctx->GetContextViewId()) {
@@ -650,7 +695,7 @@ Status TensorHandle::AddEmptyLocalMirror(const Device* d) {
 
   mutex_lock l(mu_);
   if (local_mirrors_.find(d) != local_mirrors_.end()) {
-    return errors::Internal("Attempted to duplicate a local mirror.");
+    return errors::AlreadyExists("Attempted to duplicate a local mirror.");
   }
 
   local_mirrors_.emplace(std::piecewise_construct, std::forward_as_tuple(d),
@@ -660,8 +705,8 @@ Status TensorHandle::AddEmptyLocalMirror(const Device* d) {
 }
 
 #if !defined(IS_MOBILE_PLATFORM)
-Status TensorHandle::RemoteAddress(const Device* d, int64* op_id,
-                                   int32* output_num) const {
+Status TensorHandle::RemoteAddress(const Device* d, const bool wait_until_ready,
+                                   int64* op_id, int32* output_num) const {
   DVLOG(3) << "RemoteAddress on TensorHandle: " << this << " device: " << d
            << " " << d->name();
 
@@ -669,9 +714,8 @@ Status TensorHandle::RemoteAddress(const Device* d, int64* op_id,
     tf_shared_lock l(mu_);
     auto mirror = remote_mirrors_.find(d->name());
     if (mirror != remote_mirrors_.end()) {
-      *op_id = mirror->second.op_id();
-      *output_num = mirror->second.output_num();
-      return Status::OK();
+      return mirror->second.OpIdAndOutputNum(wait_until_ready, op_id,
+                                             output_num);
     }
 
     return errors::FailedPrecondition(
@@ -683,10 +727,7 @@ Status TensorHandle::RemoteAddress(const Device* d, int64* op_id,
   }
 
   auto& data = absl::get<RemoteTensorHandleData>(data_);
-  *op_id = data.op_id();
-  *output_num = data.output_num();
-
-  return Status::OK();
+  return data.OpIdAndOutputNum(wait_until_ready, op_id, output_num);
 }
 
 bool TensorHandle::HasRemoteMirror(const Device* d,
@@ -735,7 +776,7 @@ Status TensorHandle::AddUnshapedRemoteMirror(const Device* d, int64 op_id,
   mutex_lock l(mu_);
   auto remote_mirror = remote_mirrors_.find(d->name());
   if (remote_mirror != remote_mirrors_.end()) {
-    if (remote_mirror->second.context_view_id() == ctx->GetContextId()) {
+    if (remote_mirror->second.context_view_id() >= ctx->GetContextId()) {
       return errors::Internal("Attempted to duplicate a remote mirror.");
     }
     // Remove stale mirror
@@ -777,16 +818,24 @@ Status TensorHandle::SetRemoteShape(const TensorShape& shape, const Device* d,
            << " " << d->name();
 
   if (VariantDeviceIsCustom(device_) || d != absl::get<Device*>(device_)) {
-    mutex_lock l(mu_);
+    tf_shared_lock l(mu_);
     auto remote_mirror = remote_mirrors_.find(d->name());
-    if (remote_mirror != remote_mirrors_.end()) {
-      auto& mirror = remote_mirror->second;
-      if (mirror.context_view_id() == context_view_id) {
-        return mirror.SetShape(shape);
-      }
-      remote_mirrors_.erase(remote_mirror);
+    if (remote_mirror == remote_mirrors_.end()) {
+      return Status::OK();
+    }
+    auto& mirror = remote_mirror->second;
+    if (mirror.context_view_id() == context_view_id) {
+      return mirror.SetShape(shape);
+    } else if (mirror.context_view_id() < context_view_id) {
+      return errors::Internal(
+          absl::Substitute("Unexpected context_view_id ($0) which should not "
+                           "be newer than the "
+                           "one ($1) associated to the remote mirror.",
+                           context_view_id, mirror.context_view_id()));
+    } else {
+      LOG(WARNING) << "SetRemoteShape is ignored for a remote mirror that is "
+                      "accociated with a newer context_view_id.";
     }
-
     return Status::OK();
   }
 
@@ -840,7 +889,7 @@ Status TensorHandle::AddLocalMirror(tensorflow::Tensor&& tensor,
       local_mirrors_.emplace(std::piecewise_construct, std::forward_as_tuple(d),
                              std::forward_as_tuple(std::move(tensor)));
   if (!elem.second) {
-    return errors::Internal("Attempted to set tensor for existing mirror.");
+    return errors::AlreadyExists("Attempted to add existing mirror.");
   }
 
   return Status::OK();
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h
index ead37b0d45f..5e7638ae03c 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.h
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.h
@@ -91,6 +91,11 @@ class TensorHandle : public AbstractTensorHandleInterface,
   // Create a handle which packs the given handles of the same dtype and shape.
   // If handles are on different devices, assign the packed handle to a
   // CompositeDevice.
+  static Status CreatePackedHandle(std::vector<TensorHandle*>&& handles,
+                                   const tensorflow::DataType dtype,
+                                   const tensorflow::TensorShape& shape,
+                                   EagerContext* ctx,
+                                   TensorHandle** packed_handle);
   static Status CreatePackedHandle(std::vector<TensorHandle*>&& handles,
                                    EagerContext* ctx,
                                    TensorHandle** packed_handle);
@@ -133,6 +138,9 @@ class TensorHandle : public AbstractTensorHandleInterface,
   VariantDevice device() const { return device_; }
   Device* op_device() const { return op_device_; }
   Device* resource_device() const { return resource_device_; }
+  int64 resource_remote_device_incarnation() const {
+    return resource_remote_device_incarnation_;
+  }
 
   VariantDevice DeviceOrHostCPU(const EagerContext& ctx) const;
 
@@ -161,7 +169,10 @@ class TensorHandle : public AbstractTensorHandleInterface,
                                 EagerContext* ctx);
 
   // Return the op_id and output num if the handle refers to a remote tensor.
-  Status RemoteAddress(const Device* d, int64* op_id, int32* output_num) const;
+  // If wait_until_ready is true, block until the remote tensor is ready on the
+  // given remote worker.
+  Status RemoteAddress(const Device* d, const bool wait_until_ready,
+                       int64* op_id, int32* output_num) const;
 
   // Called on an async remote tensor once it's shape has been determined. This
   // transitions the tensor handle from a non-ready to a ready state by
@@ -229,6 +240,8 @@ class TensorHandle : public AbstractTensorHandleInterface,
       std::vector<DtypeAndPartialTensorShape>* result);
   Status GetResourceAllowedDevices(std::vector<string>* result);
 
+  // Returns the number of packed handles. 0 if the handle type is not PACKED.
+  int NumPackedHandles() const;
   // It's called on a packed TensorHandle. Extract a handle with the given
   // index.
   Status ExtractPackedHandle(const int index, TensorHandle** handle) const;
@@ -261,6 +274,9 @@ class TensorHandle : public AbstractTensorHandleInterface,
   // If the tensor dtype is DT_RESOURCE, resource_device_ holds the device
   // backing the resource. Else resource_device_ is nullptr.
   tensorflow::Device* const resource_device_;
+  // Incarnation ID of the resource device if it locates on a remote device, or
+  // 0 if it locates on a local device.
+  const int64 resource_remote_device_incarnation_;
 
   mutable mutex mu_;
 
@@ -314,6 +330,8 @@ class TensorHandle : public AbstractTensorHandleInterface,
     void Poison(Status status);
     string DebugString() const;
 
+    // Number of packed handles.
+    int NumPackedHandles() const;
     // Extract a handle on the given index.
     Status ExtractPackedHandle(const int index, TensorHandle** handle) const;
 
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle_test.cc b/tensorflow/core/common_runtime/eager/tensor_handle_test.cc
index c823b6aa9b0..779158375de 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle_test.cc
+++ b/tensorflow/core/common_runtime/eager/tensor_handle_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/random.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -66,17 +67,28 @@ TEST(TensorHandle_ShapeTest, AsyncShape) {
   ctx->Unref();
 }
 
-static Device* CreateDevice(const char* type, const char* name) {
+static Device* CreateDevice(const char* type, const char* name,
+                            bool is_local = true) {
   class FakeDevice : public Device {
    public:
-    explicit FakeDevice(const DeviceAttributes& attr) : Device(nullptr, attr) {}
+    explicit FakeDevice(const DeviceAttributes& attr, bool is_local)
+        : Device(nullptr, attr), is_local_(is_local) {}
     Status Sync() override { return Status::OK(); }
     Allocator* GetAllocator(AllocatorAttributes) override { return nullptr; }
+    bool IsLocal() const override { return is_local_; }
+
+   private:
+    const bool is_local_;
   };
   DeviceAttributes attr;
   attr.set_name(name);
   attr.set_device_type(type);
-  return new FakeDevice(attr);
+  int64 incarnation = random::New64();
+  while (incarnation == 0) {
+    incarnation = random::New64();
+  }
+  attr.set_incarnation(incarnation);
+  return new FakeDevice(attr, is_local);
 }
 
 }  // namespace
@@ -164,6 +176,7 @@ TEST_F(PackedTensorHandleTest, PackedHandle) {
   h2->Unref();
   h3->Unref();
 
+  EXPECT_EQ(packed_handle->NumPackedHandles(), 4);
   EXPECT_EQ(packed_handle->Type(), TensorHandle::PACKED);
   EXPECT_EQ(packed_handle->dtype, dtype);
   TensorShape packed_shape;
@@ -185,7 +198,7 @@ TEST_F(PackedTensorHandleTest, PackedHandle) {
   const std::vector<TensorHandle::HandleType> expected_handle_types = {
       TensorHandle::LOCAL, TensorHandle::LOCAL, TensorHandle::REMOTE,
       TensorHandle::REMOTE};
-  for (int i = 0; i < 4; ++i) {
+  for (int i = 0; i < packed_handle->NumPackedHandles(); ++i) {
     TensorHandle* h = nullptr;
     TF_ASSERT_OK(packed_handle->ExtractPackedHandle(i, &h));
     EXPECT_EQ(absl::get<Device*>(h->device()), ListDevices().at(i));
@@ -203,4 +216,87 @@ TEST_F(PackedTensorHandleTest, PackedHandle) {
   packed_handle->Unref();
 }
 
+TEST(TensorHandle_ResourceDeviceTest, OnLocalDevice) {
+  std::unique_ptr<Device> d0(
+      CreateDevice("CPU", "/job:localhost/replica:0/task:0/device:CPU:0"));
+  StaticDeviceMgr local_device_mgr(std::move(d0));
+  auto ctx = new EagerContext(
+      SessionOptions(),
+      tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
+      tensorflow::ContextMirroringPolicy::MIRRORING_NONE, false, false,
+      &local_device_mgr, false, nullptr, nullptr, nullptr);
+
+  tensorflow::DataType dtype = DT_RESOURCE;
+  TensorShape shape = {2};
+  Tensor t(dtype, shape);
+
+  Device* d = local_device_mgr.ListDevices()[0];
+  TensorHandle* th =
+      TensorHandle::CreateLocalHandle(std::move(t), d, d, d, ctx);
+  // Remote device incarnation for local resource should be 0 (invalid)
+  EXPECT_EQ(0, th->resource_remote_device_incarnation());
+  // Local device manager must contain the resource device.
+  EXPECT_TRUE(local_device_mgr.ContainsDevice(
+      th->resource_device()->attributes().incarnation()));
+
+  std::unique_ptr<Device> d1(
+      CreateDevice("CPU", "/job:localhost/replica:0/task:0/device:CPU:0"));
+  StaticDeviceMgr new_device_mgr(std::move(d1));
+  EXPECT_FALSE(new_device_mgr.ContainsDevice(
+      th->resource_device()->attributes().incarnation()));
+
+  th->Unref();
+  ctx->Unref();
+}
+
+TEST(TensorHandle_ResourceDeviceTest, OnRemoteDevice) {
+  std::unique_ptr<Device> d_local(
+      CreateDevice("CPU", "/job:localhost/replica:0/task:0/device:CPU:0"));
+  StaticDeviceMgr local_device_mgr(std::move(d_local));
+  auto ctx = new EagerContext(
+      SessionOptions(),
+      tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
+      tensorflow::ContextMirroringPolicy::MIRRORING_NONE, false, false,
+      &local_device_mgr, false, nullptr, nullptr, nullptr);
+
+  std::unique_ptr<Device> d0(
+      CreateDevice("CPU", "/job:worker/task:0/device:CPU:0", false));
+  Device* d0_ptr = d0.get();
+  std::unique_ptr<Device> d1(
+      CreateDevice("CPU", "/job:worker/task:1/device:CPU:0", false));
+  Device* d1_ptr = d1.get();
+
+  DynamicDeviceMgr remote_device_mgr;
+  std::vector<std::unique_ptr<Device>> vector_d0;
+  vector_d0.emplace_back(std::move(d0));
+  TF_ASSERT_OK(remote_device_mgr.AddDevices(std::move(vector_d0)));
+
+  TensorHandle* th0 = TensorHandle::CreateUnshapedRemoteHandle(
+      0, 0, "", DT_RESOURCE, d0_ptr, ctx);
+  EXPECT_TRUE(remote_device_mgr.ContainsDevice(
+      th0->resource_remote_device_incarnation()));
+
+  std::vector<std::unique_ptr<Device>> vector_d1;
+  vector_d1.emplace_back(std::move(d1));
+  TF_ASSERT_OK(remote_device_mgr.AddDevices(std::move(vector_d1)));
+  EXPECT_TRUE(remote_device_mgr.ContainsDevice(
+      th0->resource_remote_device_incarnation()));
+
+  TensorHandle* th1 = TensorHandle::CreateUnshapedRemoteHandle(
+      0, 0, "", DT_RESOURCE, d1_ptr, ctx);
+  EXPECT_TRUE(remote_device_mgr.ContainsDevice(
+      th1->resource_remote_device_incarnation()));
+
+  std::vector<Device*> remove_d1{d1_ptr};
+  TF_ASSERT_OK(remote_device_mgr.RemoveDevices(std::move(remove_d1)));
+  EXPECT_FALSE(remote_device_mgr.ContainsDevice(
+      th1->resource_remote_device_incarnation()));
+  EXPECT_TRUE(remote_device_mgr.ContainsDevice(
+      th0->resource_remote_device_incarnation()));
+
+  th0->Unref();
+  th1->Unref();
+  ctx->Unref();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index 1f2a364258f..447a9e0ae77 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -403,7 +403,7 @@ ExecutorState<PropagatorStateType>::ExecutorState(
       runner_(args.runner),
       sync_on_finish_(args.sync_on_finish),
       run_all_kernels_inline_(args.run_all_kernels_inline),
-      propagator_(immutable_state, step_id_),
+      propagator_(immutable_state, step_id_, vlog_),
       num_outstanding_ops_(0) {
   if (args.user_intra_op_threadpool != nullptr) {
     Device* device = immutable_state_.params().device;
@@ -811,16 +811,14 @@ template <class PropagatorStateType>
 Status ExecutorState<PropagatorStateType>::PrepareInputs(
     const NodeItem& item, Entry* first_input, TensorValueVec* inputs,
     AllocatorAttributeVec* input_alloc_attrs, bool* is_input_dead) {
-  inputs->clear();
   inputs->resize(item.num_inputs);
-  input_alloc_attrs->clear();
   input_alloc_attrs->resize(item.num_inputs);
 
   *is_input_dead = false;
 
-  bool is_merge = item.is_merge;
   for (int i = 0; i < item.num_inputs; ++i) {
-    const bool expect_ref = IsRefType(item.input_type(i));
+    const bool expect_ref = TF_PREDICT_FALSE(item.is_any_input_ref_typed) &&
+                            IsRefType(item.input_type(i));
     Entry* entry = first_input + i;
     (*input_alloc_attrs)[i] = entry->alloc_attr;
 
@@ -830,7 +828,10 @@ Status ExecutorState<PropagatorStateType>::PrepareInputs(
     switch (entry->state) {
       case Entry::State::NO_VALUE: {
         // Only merge and transfer nodes can have no-value inputs.
-        if (!is_merge) {
+        inp->mutex_if_ref = nullptr;
+        if (item.is_merge) {
+          inp->tensor = nullptr;
+        } else {
           DCHECK(item.is_transfer_node)
               << item.kernel->name() << " - input " << i;
           entry->state = Entry::State::HAS_CONST_TENSOR;
@@ -846,17 +847,18 @@ Status ExecutorState<PropagatorStateType>::PrepareInputs(
       }
 
       case Entry::State::HAS_VALUE: {
-        if (expect_ref) {
+        if (TF_PREDICT_FALSE(expect_ref)) {
           return AttachDef(
               errors::InvalidArgument(i, "-th input expects a ref type"),
               item.kernel->def());
         }
+        inp->mutex_if_ref = nullptr;
         inp->tensor = entry->val.get();
         break;
       }
 
       case Entry::State::HAS_CONST_TENSOR: {
-        if (expect_ref) {
+        if (TF_PREDICT_FALSE(expect_ref)) {
           return AttachDef(
               errors::InvalidArgument(i, "-th input expects a ref type"),
               item.kernel->def());
@@ -865,6 +867,7 @@ Status ExecutorState<PropagatorStateType>::PrepareInputs(
         // stores a non-const `Tensor*`, and relies on the `OpKernelContext`
         // accessors making dynamic checks that prevent using an immutable
         // tensor as a mutable tensor.
+        inp->mutex_if_ref = nullptr;
         inp->tensor = const_cast<Tensor*>(entry->const_tensor);
         break;
       }
@@ -872,8 +875,8 @@ Status ExecutorState<PropagatorStateType>::PrepareInputs(
       case Entry::State::HAS_REF_TENSOR: {
         {
           tf_shared_lock ml(*entry->ref_tensor.mu);
-          if (!entry->ref_tensor.tensor->IsInitialized() &&
-              !item.is_initialization_op) {
+          if (TF_PREDICT_FALSE(!entry->ref_tensor.tensor->IsInitialized() &&
+                               !item.is_initialization_op)) {
             return AttachDef(errors::FailedPrecondition(
                                  "Attempting to use uninitialized value ",
                                  item.kernel->requested_input(i)),
@@ -896,12 +899,13 @@ Status ExecutorState<PropagatorStateType>::PrepareInputs(
           }
           entry->state = Entry::State::HAS_VALUE;
 
+          inp->mutex_if_ref = nullptr;
           inp->tensor = entry->val.get();
           // The dtype of entry->ref_tensor.tensor could have been changed by
           // another operation that ran after the operation that "produced" it
           // executed, so re-validate that the type of the dereferenced tensor
           // matches the expected input type.
-          if (item.input_type(i) != inp->tensor->dtype()) {
+          if (TF_PREDICT_FALSE(item.input_type(i) != inp->tensor->dtype())) {
             return AttachDef(
                 errors::InvalidArgument(
                     i, "-th input expects type ",
diff --git a/tensorflow/core/common_runtime/executor_test.cc b/tensorflow/core/common_runtime/executor_test.cc
index 79dbdd3bf44..dd65b5dce1d 100644
--- a/tensorflow/core/common_runtime/executor_test.cc
+++ b/tensorflow/core/common_runtime/executor_test.cc
@@ -17,15 +17,24 @@ limitations under the License.
 
 #include <algorithm>
 
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/control_flow_ops_internal.h"
+#include "tensorflow/cc/ops/function_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/graph_constructor.h"
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/common_runtime/lower_functional_ops.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/rendezvous.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -477,6 +486,34 @@ BENCHMARK(BM_executor)->ArgPair(8192, 32);
 // Tall fat graph
 BENCHMARK(BM_executor)->ArgPair(1024, 1024);
 
+static void BM_const_identity(int iters, int width, int outputs_per_const) {
+#ifdef PLATFORM_GOOGL
+  BenchmarkUseRealTime();
+#endif  // PLATFORM_GOOGLE
+  Graph* g = new Graph(OpRegistry::Global());
+  for (int i = 0; i < width; ++i) {
+    Tensor i_t(i);
+    Node* const_node = test::graph::Constant(g, i_t);
+    for (int j = 0; j < outputs_per_const; ++j) {
+      test::graph::Identity(g, const_node);
+    }
+  }
+  FixupSourceAndSinkEdges(g);
+#ifdef PLATFORM_GOOGLE
+  SetBenchmarkLabel(
+      strings::StrCat("Nodes = ", (1 + outputs_per_const) * width));
+  SetBenchmarkItemsProcessed((1 + outputs_per_const) * width *
+                             static_cast<int64>(iters));
+#endif  // PLATFORM_GOOGLE
+  test::Benchmark("cpu", g).Run(iters);
+}
+
+// Graph with actual op execution.
+BENCHMARK(BM_const_identity)->ArgPair(1, 1);
+BENCHMARK(BM_const_identity)->ArgPair(1, 100);
+BENCHMARK(BM_const_identity)->ArgPair(100, 1);
+BENCHMARK(BM_const_identity)->ArgPair(100, 100);
+
 static void BM_FeedInputFetchOutput(int iters) {
   testing::StopTiming();
   Graph* g = new Graph(OpRegistry::Global());
@@ -504,4 +541,157 @@ static void BM_FeedInputFetchOutput(int iters) {
 }
 BENCHMARK(BM_FeedInputFetchOutput);
 
+// Defines a graph to perform the following computation:
+//
+//     i = 0
+//     while (i < loop_iters)
+//       i += 1;
+//
+// ...using the functional `WhileOp` (if `lower` is false) or the
+// `Switch`/`Merge`-style of control flow (if `lower` is true).
+static void BM_WhileLoopHelper(int iters, int loop_iters, int loop_vars,
+                               bool lower) {
+  testing::StopTiming();
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  // Add test functions for cond and body.
+  FunctionDefLibrary f_lib_proto;
+
+  // Define the loop body as a function: `x = x + 1`.
+  const Tensor one_t = test::AsScalar<int32>(1);
+
+  std::vector<string> args;
+  args.reserve(loop_vars);
+  args.push_back("x: int32");
+  for (int i = 1; i < loop_vars; ++i) {
+    args.push_back(strings::StrCat("x", i, ": int32"));
+  }
+
+  std::vector<string> body_rets;
+  body_rets.reserve(loop_vars);
+  body_rets.push_back("y: int32");
+  for (int i = 1; i < loop_vars; ++i) {
+    body_rets.push_back(strings::StrCat("y", i, ": int32"));
+  }
+
+  std::vector<FunctionDefHelper::Node> body_nodes;
+  body_nodes.reserve(1 + loop_vars);
+  body_nodes.push_back(
+      {{"one"}, "Const", {}, {{"value", one_t}, {"dtype", DT_INT32}}});
+  body_nodes.push_back({{"y"}, "Add", {"x", "one"}, {{"T", DT_INT32}}});
+  for (int i = 1; i < loop_vars; ++i) {
+    body_nodes.push_back({{strings::StrCat("y", i)},
+                          "Identity",
+                          {strings::StrCat("x", i)},
+                          {{"T", DT_INT32}}});
+  }
+
+  *f_lib_proto.add_function() = FunctionDefHelper::Define(
+      // Name
+      "XPlusOne",
+      // Args
+      args,
+      // Return values
+      body_rets,
+      // Attr def
+      {},
+      // Nodes
+      body_nodes);
+
+  // Define the loop condition as a function: `x < loop_iters`.
+  const Tensor loop_iters_t = test::AsScalar<int32>(loop_iters);
+  *f_lib_proto.add_function() = FunctionDefHelper::Define(
+      // Name
+      "LessThanOrEqualToN",
+      // Args
+      args,
+      // Return values
+      {"z: bool"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"N"}, "Const", {}, {{"value", loop_iters_t}, {"dtype", DT_INT32}}},
+          {{"z"}, "LessEqual", {"x", "N"}, {{"T", DT_INT32}}},
+      });
+
+  Scope root = Scope::NewRootScope().ExitOnError();
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto));
+  auto a = ops::Const(root.WithOpName("A"), 0, {});
+  Node* while_node;
+  std::vector<NodeBuilder::NodeOut> inputs;
+  std::vector<DataType> input_types(loop_vars, DT_INT32);
+  inputs.reserve(loop_vars);
+  for (int i = 0; i < loop_vars; ++i) {
+    inputs.push_back(NodeBuilder::NodeOut(a.node()));
+  }
+  AttrValue int32_attr;
+  int32_attr.set_type(DT_INT32);
+  AttrValue cond_func;
+  cond_func.mutable_func()->set_name("LessThanOrEqualToN");
+  AttrValue body_func;
+  body_func.mutable_func()->set_name("XPlusOne");
+  TF_ASSERT_OK(
+      NodeBuilder("while", "While", &root.graph()->flib_def())
+          .Input(inputs)
+          .Attr("T", input_types)
+          .Attr("cond", cond_func)
+          .Attr("body", body_func)
+          .Attr("parallel_iterations", 100)
+          .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true)
+          .Finalize(root.graph(), &while_node));
+  auto c = ops::Identity(
+      root.WithOpName("C").WithControlDependencies(Output(while_node)),
+      Output(while_node));
+  TF_ASSERT_OK(root.DoShapeInference(while_node));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  if (lower) {
+    FunctionLibraryDefinition flib_def(graph->flib_def());
+    GraphOptimizationPassOptions opt_options;
+    SessionOptions session_options;
+    session_options.config.mutable_graph_options()
+        ->mutable_optimizer_options()
+        ->set_do_function_inlining(true);
+    opt_options.session_options = &session_options;
+    opt_options.graph = &graph;
+    opt_options.flib_def = &flib_def;
+    LowerFunctionalOpsPass pass;
+    TF_ASSERT_OK(pass.Run(opt_options));
+  }
+
+  FixupSourceAndSinkEdges(graph.get());
+  testing::StartTiming();
+  test::Benchmark("cpu", graph.release()).Run(iters);
+}
+
+static void BM_LoweredWhileLoop(int iters, int loop_iters, int loop_vars) {
+  BM_WhileLoopHelper(iters, loop_iters, loop_vars, /* lower= */ true);
+}
+BENCHMARK(BM_LoweredWhileLoop)
+    ->ArgPair(0, 1)
+    ->ArgPair(1, 1)
+    ->ArgPair(10, 1)
+    ->ArgPair(100, 1)
+    ->ArgPair(1000, 1)
+    ->ArgPair(0, 100)
+    ->ArgPair(1, 100)
+    ->ArgPair(10, 100)
+    ->ArgPair(100, 100)
+    ->ArgPair(1000, 100);
+
+static void BM_FunctionalWhileLoop(int iters, int loop_iters, int loop_vars) {
+  BM_WhileLoopHelper(iters, loop_iters, loop_vars, /* lower= */ false);
+}
+BENCHMARK(BM_FunctionalWhileLoop)
+    ->ArgPair(0, 1)
+    ->ArgPair(1, 1)
+    ->ArgPair(10, 1)
+    ->ArgPair(100, 1)
+    ->ArgPair(1000, 1)
+    ->ArgPair(0, 100)
+    ->ArgPair(1, 100)
+    ->ArgPair(10, 100)
+    ->ArgPair(100, 100)
+    ->ArgPair(1000, 100);
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc
index 08619371833..72a08b4dc9d 100644
--- a/tensorflow/core/common_runtime/function.cc
+++ b/tensorflow/core/common_runtime/function.cc
@@ -1235,7 +1235,8 @@ Status FunctionLibraryRuntimeImpl::PrepareRunSync(
     run_opts->create_rendezvous = false;
   }
 
-  LocalHandle local_handle = parent_->GetHandleOnDevice(device_name_, handle);
+  LocalHandle local_handle = parent_->GetHandleOnDevice(
+      device_name_, handle, /*include_multi_device=*/true);
   if (local_handle == kInvalidLocalHandle) {
     *out_item = nullptr;
     return Status::OK();
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 581b7adbef7..1deafe31ae2 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -407,6 +407,87 @@ TEST_F(FunctionLibraryRuntimeTest, XTimesTwo_MultiDeviceBacked) {
   test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
 }
 
+class ConsumeArgumentCallFrame : public CallFrameInterface {
+ public:
+  ConsumeArgumentCallFrame(Tensor* arg, Tensor* retval)
+      : arg_(arg), retval_(retval) {}
+
+  size_t num_args() const override { return 1; }
+  size_t num_retvals() const override { return 1; }
+
+  Status GetArg(int index, const Tensor** val) override {
+    LOG(FATAL) << "Should not be called.";
+  }
+
+  bool CanConsumeArg(int index) const override { return index == 0; }
+
+  void ConsumeArg(int index, Tensor* val) override { *val = std::move(*arg_); }
+
+  Status SetRetval(int index, const Tensor& val) override {
+    CHECK_EQ(index, 0);
+    *retval_ = val;
+    return Status::OK();
+  }
+
+ private:
+  Tensor* const arg_;
+  Tensor* const retval_;
+};
+
+TEST_F(FunctionLibraryRuntimeTest, XTimesTwo_ConsumeArgument_DefaultExecutor) {
+  Init({test::function::XTimesTwo()});
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(flr0_->Instantiate(
+      "XTimesTwo", test::function::Attrs({{"T", DT_FLOAT}}), &handle));
+
+  auto x = test::AsTensor<float>({1, 2, 3, 4});
+  float* x_base_ptr = &x.flat<float>()(0);
+  Tensor y;
+  ConsumeArgumentCallFrame frame(&x, &y);
+
+  FunctionLibraryRuntime::Options opts;
+  TF_CHECK_OK(Run(flr0_, handle, opts, &frame));
+
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+
+  // Expect that the buffer for `x` has been forwarded to and used as the buffer
+  // for `y`.
+  float* y_base_ptr = &y.flat<float>()(0);
+  EXPECT_EQ(x_base_ptr, y_base_ptr);
+  EXPECT_FALSE(x.IsInitialized());
+
+  TF_CHECK_OK(flr0_->ReleaseHandle(handle));
+}
+
+TEST_F(FunctionLibraryRuntimeTest,
+       XTimesTwo_ConsumeArgument_SingleThreadedExecutor) {
+  Init({test::function::XTimesTwo()});
+  FunctionLibraryRuntime::InstantiateOptions instantiate_opts;
+  instantiate_opts.executor_type = "SINGLE_THREADED_EXECUTOR";
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(flr0_->Instantiate("XTimesTwo",
+                                 test::function::Attrs({{"T", DT_FLOAT}}),
+                                 instantiate_opts, &handle));
+
+  auto x = test::AsTensor<float>({1, 2, 3, 4});
+  float* x_base_ptr = &x.flat<float>()(0);
+  Tensor y;
+  ConsumeArgumentCallFrame frame(&x, &y);
+
+  FunctionLibraryRuntime::Options opts;
+  TF_CHECK_OK(Run(flr0_, handle, opts, &frame, /* add_runner= */ false));
+
+  test::ExpectTensorEqual<float>(y, test::AsTensor<float>({2, 4, 6, 8}));
+
+  // Expect that the buffer for `x` has been forwarded to and used as the buffer
+  // for `y`.
+  float* y_base_ptr = &y.flat<float>()(0);
+  EXPECT_EQ(x_base_ptr, y_base_ptr);
+  EXPECT_FALSE(x.IsInitialized());
+
+  TF_CHECK_OK(flr0_->ReleaseHandle(handle));
+}
+
 TEST_F(FunctionLibraryRuntimeTest, XTimesN) {
   Init({test::function::XTimesTwo(), test::function::XTimesFour(),
         test::function::XTimes16()});
diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/common_runtime/graph_constructor_test.cc
similarity index 100%
rename from tensorflow/core/graph/graph_constructor_test.cc
rename to tensorflow/core/common_runtime/graph_constructor_test.cc
diff --git a/tensorflow/core/common_runtime/graph_view.cc b/tensorflow/core/common_runtime/graph_view.cc
index 7db0781551d..7a63e06814a 100644
--- a/tensorflow/core/common_runtime/graph_view.cc
+++ b/tensorflow/core/common_runtime/graph_view.cc
@@ -191,9 +191,11 @@ char* GraphView::InitializeNode(char* ptr, const Node* n) {
 
   DCHECK_LT(DataType_MAX, 255);  // Must fit in uint8
   uint8* input_types = item->input_type_base();
+  item->is_any_input_ref_typed = false;
   for (int i = 0; i < num_inputs; i++) {
     input_types[i] = static_cast<uint8>(n->input_type(i));
     DCHECK_EQ(item->input_type(i), n->input_type(i));
+    item->is_any_input_ref_typed |= IsRefType(n->input_type(i));
   }
 
   // Check ScopedAllocatorAttrs and forward_from.  Also assign output_types.
diff --git a/tensorflow/core/common_runtime/graph_view.h b/tensorflow/core/common_runtime/graph_view.h
index 6d31555ed9a..38eb3e33bcb 100644
--- a/tensorflow/core/common_runtime/graph_view.h
+++ b/tensorflow/core/common_runtime/graph_view.h
@@ -81,6 +81,8 @@ struct NodeItem {
                                                      // of any output edge is a
                                                      // merge or control trigger
                                                      // node.
+  bool is_any_input_ref_typed : 1;  // True iff any IsRefType(dt) for dt in this
+                                    // node's input types.
 
   // The kernel for this node.
   OpKernel* kernel = nullptr;
diff --git a/tensorflow/core/common_runtime/immutable_executor_state.cc b/tensorflow/core/common_runtime/immutable_executor_state.cc
index a98d9f0feaa..03d12a0e98a 100644
--- a/tensorflow/core/common_runtime/immutable_executor_state.cc
+++ b/tensorflow/core/common_runtime/immutable_executor_state.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/graph/edgeset.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/graph_node_util.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -39,9 +40,6 @@ ImmutableExecutorState::~ImmutableExecutorState() {
       params_.delete_kernel(item->kernel);
     }
   }
-  for (auto fiter : frame_info_) {
-    delete fiter.second;
-  }
 }
 
 namespace {
@@ -71,11 +69,16 @@ void GetMaxPendingCounts(const Node* n, size_t* max_pending,
 
 ImmutableExecutorState::FrameInfo* ImmutableExecutorState::EnsureFrameInfo(
     const string& fname) {
-  auto slot = &frame_info_[fname];
-  if (*slot == nullptr) {
-    *slot = new FrameInfo;
+  auto iter = frame_info_.find(fname);
+  if (iter != frame_info_.end()) {
+    return iter->second.get();
+  } else {
+    auto frame_info = absl::make_unique<FrameInfo>(fname);
+    absl::string_view fname_view = frame_info->name;
+    auto emplace_result =
+        frame_info_.emplace(fname_view, std::move(frame_info));
+    return emplace_result.first->second.get();
   }
-  return *slot;
 }
 
 Status ImmutableExecutorState::Initialize(const Graph& graph) {
@@ -89,7 +92,7 @@ Status ImmutableExecutorState::Initialize(const Graph& graph) {
     EnsureFrameInfo(it)->nodes =
         absl::make_unique<std::vector<const NodeItem*>>();
   }
-  root_frame_info_ = frame_info_[""];
+  root_frame_info_ = frame_info_[""].get();
 
   pending_ids_.resize(gview_.num_nodes());
 
@@ -157,6 +160,28 @@ Status ImmutableExecutorState::Initialize(const Graph& graph) {
       TF_RETURN_IF_ERROR(
           GetNodeAttr(n->attrs(), "is_constant", &is_constant_enter));
       item->is_constant_enter = is_constant_enter;
+
+      string frame_name;
+      TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "frame_name", &frame_name));
+      FrameInfo* frame_info = frame_info_[frame_name].get();
+
+      int parallel_iterations;
+      TF_RETURN_IF_ERROR(
+          GetNodeAttr(n->attrs(), "parallel_iterations", &parallel_iterations));
+
+      if (frame_info->parallel_iterations == -1) {
+        frame_info->parallel_iterations = parallel_iterations;
+      } else if (frame_info->parallel_iterations != parallel_iterations) {
+        LOG(WARNING) << "Loop frame \"" << frame_name
+                     << "\" had two different values for parallel_iterations: "
+                     << frame_info->parallel_iterations << " vs. "
+                     << parallel_iterations << ".";
+      }
+
+      if (enter_frame_info_.size() <= id) {
+        enter_frame_info_.resize(id + 1);
+      }
+      enter_frame_info_[id] = frame_info;
     } else {
       item->is_constant_enter = false;
     }
diff --git a/tensorflow/core/common_runtime/immutable_executor_state.h b/tensorflow/core/common_runtime/immutable_executor_state.h
index 50c98939ea8..a35edfe227c 100644
--- a/tensorflow/core/common_runtime/immutable_executor_state.h
+++ b/tensorflow/core/common_runtime/immutable_executor_state.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/common_runtime/graph_view.h"
 #include "tensorflow/core/common_runtime/local_executor_params.h"
 #include "tensorflow/core/common_runtime/pending_counts.h"
@@ -41,11 +42,16 @@ class Graph;
 class ImmutableExecutorState {
  public:
   struct FrameInfo {
-    FrameInfo()
-        : input_count(0),
+    explicit FrameInfo(string name)
+        : name(std::move(name)),
+          input_count(0),
           total_inputs(0),
           pending_counts(nullptr),
-          nodes(nullptr) {}
+          nodes(nullptr),
+          parallel_iterations(-1) {}
+
+    // The name of the frame.
+    string name;
 
     // The total number of inputs to a frame.
     int input_count;
@@ -63,6 +69,9 @@ class ImmutableExecutorState {
 
     // The nodes in a frame. Used only for debugging.
     std::unique_ptr<std::vector<const NodeItem*>> nodes;
+
+    // The number of iterations of this frame that can execute concurrently.
+    int32 parallel_iterations;
   };
 
   explicit ImmutableExecutorState(const LocalExecutorParams& p)
@@ -83,17 +92,13 @@ class ImmutableExecutorState {
   }
   const std::vector<const NodeItem*>& root_nodes() const { return root_nodes_; }
 
-  const FrameInfo* get_frame_info(const string& frame_name) const {
-    auto it_frame_info = frame_info_.find(frame_name);
-    if (it_frame_info == frame_info_.end()) {
-      return nullptr;
-    } else {
-      return it_frame_info->second;
-    }
-  }
-
   const FrameInfo& get_root_frame_info() const { return *root_frame_info_; }
 
+  const FrameInfo& get_enter_frame_info(const NodeItem& node_item) const {
+    DCHECK(node_item.is_enter);
+    return *enter_frame_info_[node_item.node_id];
+  }
+
   bool requires_control_flow_support() const { return requires_control_flow_; }
 
   // Copies the pending counts for nodes in this graph to the given array.
@@ -135,9 +140,14 @@ class ImmutableExecutorState {
   // Mapping from frame name to static information about the frame.
   // TODO(yuanbyu): We could cache it along with the graph so to avoid
   // the overhead of constructing it for each executor instance.
-  gtl::FlatMap<string, FrameInfo*> frame_info_;
+  absl::flat_hash_map<absl::string_view, std::unique_ptr<FrameInfo>>
+      frame_info_;
   const FrameInfo* root_frame_info_;  // Not owned.
 
+  // If the graph contains any "Enter" or "RefEnter" nodes, this vector maps
+  // dense node IDs to the corresponding FrameInfo.
+  std::vector<FrameInfo*> enter_frame_info_;
+
   // If `requires_control_flow_` is false, this points to an array of initial
   // pending counts for the nodes in the graph, indexed by node ID.
   std::unique_ptr<std::atomic<int32>[]> atomic_pending_counts_;
diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc
index 4118534cb3e..1b1234d114f 100644
--- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc
+++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc
@@ -19,7 +19,9 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/executor_factory.h"
+#include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -62,8 +64,10 @@ Benchmark::Benchmark(const string& device, Graph* g,
   // Allow NewDevice to allocate a new threadpool with different number of
   // threads for each new benchmark.
   LocalDevice::set_use_global_threadpool(false);
-  device_ =
-      DeviceFactory::NewDevice(t, *options, "/job:localhost/replica:0/task:0");
+
+  device_mgr_ = absl::make_unique<StaticDeviceMgr>(
+      DeviceFactory::NewDevice(t, *options, "/job:localhost/replica:0/task:0"));
+  device_ = device_mgr_->ListDevices()[0];
   CHECK(device_) << "Could not create a " << device << " device";
 
   pool_ =
@@ -81,14 +85,24 @@ Benchmark::Benchmark(const string& device, Graph* g,
 
   const int graph_def_version = g->versions().producer();
 
+  flib_def_ = absl::make_unique<FunctionLibraryDefinition>(g->flib_def());
+
+  pflr_ = std::unique_ptr<ProcessFunctionLibraryRuntime>(
+      new ProcessFunctionLibraryRuntime(
+          device_mgr_.get(), Env::Default(), nullptr, graph_def_version,
+          flib_def_.get(), OptimizerOptions(), pool_, nullptr, nullptr, nullptr,
+          Rendezvous::Factory()));
+
+  flr_ = pflr_->GetFLR(device_->name());
+
   LocalExecutorParams params;
-  params.device = device_.get();
-  params.function_library = nullptr;
+  params.device = device_;
+  params.function_library = flr_;
   params.create_kernel = [this, graph_def_version](
                              const std::shared_ptr<const NodeProperties>& props,
                              OpKernel** kernel) {
-    return CreateNonCachedKernel(device_.get(), nullptr, props,
-                                 graph_def_version, kernel);
+    return CreateNonCachedKernel(device_, flr_, props, graph_def_version,
+                                 kernel);
   };
   params.delete_kernel = [](OpKernel* kernel) {
     DeleteNonCachedKernel(kernel);
@@ -109,11 +123,12 @@ Benchmark::Benchmark(const string& device, Graph* g,
 Benchmark::~Benchmark() {
   if (device_) {
     rendez_->Unref();
-    // We delete `exec_` before `device_` because the `exec_` destructor may
+    // We delete `exec_` before `device_mgr_` because the `exec_` destructor may
     // run kernel destructors that may attempt to access state borrowed from
-    // `device_`, such as the resource manager.
+    // `device_mgr_`, such as the resource manager.
     exec_.reset();
-    device_.reset();
+    pflr_.reset();
+    device_mgr_.reset();
     delete pool_;
   }
 }
diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.h b/tensorflow/core/common_runtime/kernel_benchmark_testlib.h
index 742f40de0c2..9c6b1eb088c 100644
--- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.h
+++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.h
@@ -29,7 +29,10 @@ limitations under the License.
 namespace tensorflow {
 
 class Device;
+class FunctionLibraryRuntime;
+class ProcessFunctionLibraryRuntime;
 struct SessionOptions;
+class StaticDeviceMgr;
 
 namespace test {
 
@@ -55,9 +58,13 @@ class Benchmark {
       const std::vector<string>& outputs, int iters);
 
  private:
-  thread::ThreadPool* pool_ = nullptr;
-  std::unique_ptr<Device> device_ = nullptr;
+  thread::ThreadPool* pool_ = nullptr;  // Not owned.
+  Device* device_ = nullptr;            // Not owned.
   Rendezvous* rendez_ = nullptr;
+  std::unique_ptr<StaticDeviceMgr> device_mgr_;
+  std::unique_ptr<FunctionLibraryDefinition> flib_def_;
+  std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
+  FunctionLibraryRuntime* flr_;  // Not owned.
   std::unique_ptr<Executor> exec_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(Benchmark);
diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc
index e9d322721f2..90fdc886c50 100644
--- a/tensorflow/core/common_runtime/lower_while_op.cc
+++ b/tensorflow/core/common_runtime/lower_while_op.cc
@@ -238,12 +238,14 @@ Status LowerWhileHelper::CreateEnterNodes() {
   TF_RETURN_IF_ERROR(while_op_->input_edges(&edges));
   for (const Edge* edge : edges) {
     Node* enter_node;
-    NodeBuilder builder = NodeBuilder(NewName("enter"), "Enter",
-                                      graph_->op_registry(), &debug_info_)
-                              .Input(NodeOut(edge->src(), edge->src_output()))
-                              .Attr("frame_name", name_)
-                              .Attr("parallel_iterations", parallel_iterations_)
-                              .Device(while_op_->requested_device());
+    NodeBuilder builder =
+        NodeBuilder(NewName("enter"), "Enter", graph_->op_registry(),
+                    &debug_info_)
+            .Input(NodeOut(edge->src(), edge->src_output()))
+            .Attr("frame_name", name_)
+            .Attr("parallel_iterations", parallel_iterations_)
+            .Device(edge->src()->requested_device())
+            .AssignedDevice(edge->src()->assigned_device_name());
     if (IsResource(edge->dst_input())) {
       builder.Attr("is_constant", true);
     }
@@ -282,7 +284,8 @@ Status LowerWhileHelper::CreateMergeNodes() {
         NodeBuilder(NewName("merge"), "Merge", graph_->op_registry(),
                     &debug_info_)
             .Input({NodeOut(enter_node, 0), NodeOut(enter_node, 0)})
-            .Device(while_op_->requested_device())
+            .Device(enter_node->requested_device())
+            .AssignedDevice(enter_node->assigned_device_name())
             .Finalize(graph_, &merge_node));
     merge_nodes_.emplace_back(merge_node);
   }
@@ -323,21 +326,19 @@ Status LowerWhileHelper::CreateSwitchNodes() {
       TF_RETURN_IF_ERROR(while_op_->input_node(i, &input_node));
       op_name = strings::StrCat(input_node->name(), "_switch");
     }
+    Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]];
     Node* switch_node;
     string op_type = "Switch";
-    if (IsRefType(
-            merge_nodes_[op_input_output_to_lowered_node_[i]]->output_type(
-                0))) {
+    if (IsRefType(merge_node->output_type(0))) {
       op_type = "RefSwitch";
     }
-    TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName(op_name), op_type, graph_->op_registry(),
-                    &debug_info_)
-            .Input(
-                NodeOut(merge_nodes_[op_input_output_to_lowered_node_[i]], 0))
-            .Input(NodeOut(loop_cond_node_, 0))
-            .Device(while_op_->requested_device())
-            .Finalize(graph_, &switch_node));
+    TF_RETURN_IF_ERROR(NodeBuilder(NewName(op_name), op_type,
+                                   graph_->op_registry(), &debug_info_)
+                           .Input(NodeOut(merge_node, 0))
+                           .Input(NodeOut(loop_cond_node_, 0))
+                           .Device(merge_node->requested_device())
+                           .AssignedDevice(merge_node->assigned_device_name())
+                           .Finalize(graph_, &switch_node));
     switch_nodes_.emplace_back(switch_node);
   }
   return Status::OK();
@@ -392,7 +393,10 @@ Status LowerWhileHelper::CreateExitNodes() {
                       &debug_info_)
               .Input(NodeOut(switch_nodes_[op_input_output_to_lowered_node_[i]],
                              0))
-              .Device(while_op_->requested_device())
+              .Device(switch_nodes_[op_input_output_to_lowered_node_[i]]
+                          ->requested_device())
+              .AssignedDevice(switch_nodes_[op_input_output_to_lowered_node_[i]]
+                                  ->assigned_device_name())
               .Finalize(graph_, &exit_node));
       exit_nodes_.emplace_back(exit_node);
       outputs.emplace_back(NodeOut(exit_node, 0));
@@ -440,11 +444,13 @@ Status LowerWhileHelper::CreateNextIterationNodes() {
     if (IsResource(i)) {
       continue;
     }
+    Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]];
     TF_RETURN_IF_ERROR(NodeBuilder(NewName("next_iteration"), "NextIteration",
                                    graph_->op_registry(), &debug_info_)
                            .Input(NodeOut(body_call_node_, i))
                            .ControlInput(body_call_node_)
-                           .Device(while_op_->requested_device())
+                           .Device(merge_node->requested_device())
+                           .AssignedDevice(merge_node->assigned_device_name())
                            .Finalize(graph_, &next_iteration));
     next_iterations_nodes_.emplace_back(next_iteration);
   }
diff --git a/tensorflow/core/common_runtime/lower_while_op_test.cc b/tensorflow/core/common_runtime/lower_while_op_test.cc
index 0fc005cfb6f..9d7870f891d 100644
--- a/tensorflow/core/common_runtime/lower_while_op_test.cc
+++ b/tensorflow/core/common_runtime/lower_while_op_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "absl/strings/match.h"
 #include "tensorflow/cc/client/client_session.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/ops/array_ops.h"
@@ -25,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -169,6 +171,238 @@ TEST(LowerWhileOpTest, Simple) {
   }
 }
 
+TEST(LowerWhileOpTest, ForwardAssignedInputDevice) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  // Add test functions for cond and body.
+  FunctionDefLibrary f_lib_proto;
+  *f_lib_proto.add_function() = test::function::XTimesTwo();
+  *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8);
+
+  TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto));
+  auto type = DT_FLOAT;
+  Node* placeholder;
+  TF_CHECK_OK(NodeBuilder("placed_node", "Placeholder")
+                  .Attr("dtype", type)
+                  .Finalize(graph.get(), &placeholder));
+  const string assigned_device_name = "/job:localhost/replica:0/task:0/gpu:0";
+  placeholder->set_assigned_device_name(assigned_device_name);
+  Node* while_node;
+  std::vector<NodeBuilder::NodeOut> inputs({NodeBuilder::NodeOut(placeholder)});
+  AttrValue cond_func;
+  cond_func.mutable_func()->set_name("LessThanOrEqualToN");
+  AttrValue body_func;
+  body_func.mutable_func()->set_name("XTimesTwo");
+  TF_ASSERT_OK(
+      NodeBuilder("while", "While", &graph->flib_def())
+          .Input(inputs)
+          .Attr("T", {type})
+          .Attr("cond", cond_func)
+          .Attr("body", body_func)
+          .Attr("parallel_iterations", 100)
+          .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true)
+          .Finalize(graph.get(), &while_node));
+  TF_ASSERT_OK(Rewrite(&graph));
+
+  const Node* placeholder_node = nullptr;
+  for (const auto* op : graph->op_nodes()) {
+    if (op->name() == "placed_node") {
+      placeholder_node = op;
+    }
+  }
+  ASSERT_NE(placeholder_node, nullptr);
+  // Verify the assigned device of the Enter node.
+  int enter_consumers = 0;
+  const Node* enter_node = nullptr;
+  for (const Node* consumer : placeholder_node->out_nodes()) {
+    if (consumer->type_string() == "Enter") {
+      enter_consumers += 1;
+      enter_node = consumer;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(enter_consumers, 1);
+  // Verify the assigned device of the Merge node.
+  int merge_consumers = 0;
+  const Node* merge_node = nullptr;
+  for (const Node* consumer : enter_node->out_nodes()) {
+    if (consumer->type_string() == "Merge") {
+      merge_consumers += 1;
+      merge_node = consumer;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(merge_consumers, 1);
+  // Verify the assigned device of the NextIteration node.
+  int next_iteration_consumers = 0;
+  for (const Node* consumer : merge_node->in_nodes()) {
+    if (consumer->type_string() == "NextIteration") {
+      next_iteration_consumers += 1;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(next_iteration_consumers, 1);
+  // Verify the assigned device of the Switch node.
+  int switch_consumers = 0;
+  const Node* switch_node = nullptr;
+  for (const Node* consumer : merge_node->out_nodes()) {
+    if (consumer->type_string() == "Switch") {
+      switch_consumers += 1;
+      switch_node = consumer;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(switch_consumers, 1);
+  // Verify the assigned device of the Exit node.
+  int exit_consumers = 0;
+  for (const Node* consumer : switch_node->out_nodes()) {
+    if (consumer->type_string() == "Exit") {
+      exit_consumers += 1;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(exit_consumers, 1);
+}
+
+TEST(LowerWhileOpTest, ForwardRequestedInputDevice) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  // Add test functions for cond and body.
+  FunctionDefLibrary f_lib_proto;
+  *f_lib_proto.add_function() = test::function::XTimesTwo();
+  *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8);
+
+  TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto));
+  auto type = DT_FLOAT;
+  // We will place the loop var on the gpu:0.
+  const string gpu_0_device = "/job:localhost/replica:0/task:0/gpu:0";
+  // We will place loop's control input on the gpu:1.
+  const string gpu_1_device = "/job:localhost/replica:0/task:0/gpu:1";
+  // We will place While op on gpu:2.
+  const string gpu_2_device = "/job:localhost/replica:0/task:0/gpu:2";
+  Node* gpu_0_ph;
+  TF_CHECK_OK(NodeBuilder("placed_node", "Placeholder")
+                  .Attr("dtype", type)
+                  .Device(gpu_0_device)
+                  .Finalize(graph.get(), &gpu_0_ph));
+  Node* control_in;
+  // Add a control input to the While op to trigger the creation of a
+  // LoopExecuted node.
+  TF_CHECK_OK(NodeBuilder("control_in", "Placeholder")
+                  .Attr("dtype", type)
+                  .Device(gpu_1_device)
+                  .Finalize(graph.get(), &control_in));
+  Node* while_node;
+  std::vector<NodeBuilder::NodeOut> inputs({NodeBuilder::NodeOut(gpu_0_ph)});
+  AttrValue cond_func;
+  cond_func.mutable_func()->set_name("LessThanOrEqualToN");
+  AttrValue body_func;
+  body_func.mutable_func()->set_name("XTimesTwo");
+  TF_ASSERT_OK(
+      NodeBuilder("while", "While", &graph->flib_def())
+          .Input(inputs)
+          .ControlInput(control_in)
+          .Device(gpu_2_device)
+          .Attr("T", {type})
+          .Attr("cond", cond_func)
+          .Attr("body", body_func)
+          .Attr("parallel_iterations", 100)
+          .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true)
+          .Finalize(graph.get(), &while_node));
+
+  // Create an empty Const node with control dep from the While op.
+  // This triggers the creation of a LoopExecuted node.
+  Node* control_out;
+  TensorProto proto;
+  proto.set_dtype(DT_FLOAT);
+  TensorShape empty_shape({0});
+  empty_shape.AsProto(proto.mutable_tensor_shape());
+  TF_ASSERT_OK(NodeBuilder("control_out", "Const")
+                   .ControlInput(while_node)
+                   .Attr("dtype", DT_FLOAT)
+                   .Attr("value", proto)
+                   .Finalize(graph.get(), &control_out));
+
+  TF_ASSERT_OK(Rewrite(&graph));
+
+  const Node* placeholder_node = nullptr;
+  for (const auto* op : graph->op_nodes()) {
+    if (op->name() == "placed_node") {
+      placeholder_node = op;
+    }
+  }
+  ASSERT_NE(placeholder_node, nullptr);
+  // Verify the requested device of the Enter node.
+  int enter_consumers = 0;
+  const Node* enter_node = nullptr;
+  for (const Node* consumer : placeholder_node->out_nodes()) {
+    if (consumer->type_string() == "Enter") {
+      enter_consumers += 1;
+      enter_node = consumer;
+      ASSERT_EQ(consumer->requested_device(), gpu_0_device);
+    }
+  }
+  ASSERT_EQ(enter_consumers, 1);
+  // Verify the requested device of the Merge node.
+  int merge_consumers = 0;
+  const Node* merge_node = nullptr;
+  for (const Node* consumer : enter_node->out_nodes()) {
+    if (consumer->type_string() == "Merge") {
+      merge_consumers += 1;
+      merge_node = consumer;
+      ASSERT_EQ(consumer->requested_device(), gpu_0_device);
+    }
+  }
+  ASSERT_EQ(merge_consumers, 1);
+  // Verify the requested device of the NextIteration node.
+  int next_iteration_consumers = 0;
+  for (const Node* consumer : merge_node->in_nodes()) {
+    if (consumer->type_string() == "NextIteration") {
+      next_iteration_consumers += 1;
+      ASSERT_EQ(consumer->requested_device(), gpu_0_device);
+    }
+  }
+  ASSERT_EQ(next_iteration_consumers, 1);
+  // Verify the requested device of the Switch node.
+  int switch_consumers = 0;
+  const Node* switch_node = nullptr;
+  for (const Node* consumer : merge_node->out_nodes()) {
+    if (consumer->type_string() == "Switch") {
+      switch_consumers += 1;
+      switch_node = consumer;
+      ASSERT_EQ(consumer->requested_device(), gpu_0_device);
+    }
+  }
+  ASSERT_EQ(switch_consumers, 1);
+  // Verify the requested device of the Exit node.
+  int exit_consumers = 0;
+  for (const Node* consumer : switch_node->out_nodes()) {
+    if (consumer->type_string() == "Exit") {
+      exit_consumers += 1;
+      ASSERT_EQ(consumer->requested_device(), gpu_0_device);
+    }
+  }
+  ASSERT_EQ(exit_consumers, 1);
+  // Verify the requested device of LoopControlInputs.
+  const Node* loop_control_inputs_node = nullptr;
+  for (const auto* op : graph->op_nodes()) {
+    if (absl::StrContains(op->name(), "LoopControlInputs")) {
+      loop_control_inputs_node = op;
+    }
+  }
+  ASSERT_NE(loop_control_inputs_node, nullptr);
+  ASSERT_EQ(loop_control_inputs_node->requested_device(), gpu_2_device);
+  // Verify the requested device of LoopExecuted.
+  const Node* loop_executed_node = nullptr;
+  for (const auto* op : graph->op_nodes()) {
+    if (absl::StrContains(op->name(), "LoopExecuted")) {
+      loop_executed_node = op;
+    }
+  }
+  ASSERT_NE(loop_executed_node, nullptr);
+  ASSERT_EQ(loop_executed_node->requested_device(), gpu_2_device);
+}
+
 TEST(LowerWhileOpTest, MultipleInputs) {
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
 
diff --git a/tensorflow/core/common_runtime/partitioning_utils.cc b/tensorflow/core/common_runtime/partitioning_utils.cc
index 6cb56080a27..6fb7526c512 100644
--- a/tensorflow/core/common_runtime/partitioning_utils.cc
+++ b/tensorflow/core/common_runtime/partitioning_utils.cc
@@ -15,6 +15,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/partitioning_utils.h"
 
 #include <algorithm>
+#include <utility>
 
 #include "tensorflow/core/common_runtime/graph_constructor.h"
 #include "tensorflow/core/framework/function.h"
@@ -73,11 +74,11 @@ Status PartitionFunctionGraph(
 }
 
 Status UpdateArgAndRetvalMetadata(
-    Graph* subgraph, const string& device_type, std::vector<int>* arg_indices,
-    std::vector<int>* ret_indices,
+    Graph* subgraph, const string& device_type,
+    std::vector<FunctionArgIndex>* arg_indices, std::vector<int>* ret_indices,
     std::vector<AllocatorAttributes>* arg_alloc_attrs,
     std::vector<AllocatorAttributes>* ret_alloc_attrs) {
-  std::vector<std::pair<Node*, int>> arg_nodes;
+  std::vector<std::pair<Node*, FunctionArgIndex>> arg_nodes;
   std::vector<std::pair<Node*, int>> ret_nodes;
   const AttrValue* attr_value;
 
@@ -87,7 +88,11 @@ Status UpdateArgAndRetvalMetadata(
     if (node->IsArg()) {
       TF_RETURN_IF_ERROR(node->attrs().Find("index", &attr_value));
       int index = static_cast<int>(attr_value->i());
-      arg_nodes.emplace_back(node, index);
+      int sub_index = -1;
+      if (node->attrs().Find("sub_index", &attr_value).ok()) {
+        sub_index = static_cast<int>(attr_value->i());
+      }
+      arg_nodes.emplace_back(node, FunctionArgIndex(index, sub_index));
     } else if (node->IsRetval()) {
       TF_RETURN_IF_ERROR(node->attrs().Find("index", &attr_value));
       int index = static_cast<int>(attr_value->i());
@@ -99,11 +104,16 @@ Status UpdateArgAndRetvalMetadata(
   //
   // In particular, this enables calling a single-partition function with
   // the same signature as the original unpartitioned function.
-  auto comparator = [](std::pair<Node*, int> a, std::pair<Node*, int> b) {
+  auto arg_comparator = [](std::pair<Node*, FunctionArgIndex> a,
+                           std::pair<Node*, FunctionArgIndex> b) {
+    return std::tie(a.second.index, a.second.sub_index) <
+           std::tie(b.second.index, b.second.sub_index);
+  };
+  std::sort(arg_nodes.begin(), arg_nodes.end(), arg_comparator);
+  auto ret_comparator = [](std::pair<Node*, int> a, std::pair<Node*, int> b) {
     return a.second < b.second;
   };
-  std::sort(arg_nodes.begin(), arg_nodes.end(), comparator);
-  std::sort(ret_nodes.begin(), ret_nodes.end(), comparator);
+  std::sort(ret_nodes.begin(), ret_nodes.end(), ret_comparator);
 
   arg_indices->reserve(arg_nodes.size());
   for (const auto& pair : arg_nodes) arg_indices->push_back(pair.second);
@@ -144,16 +154,6 @@ Status UpdateArgAndRetvalMetadata(
   return Status::OK();
 }
 
-std::vector<Tensor> GetArgsForIndices(const std::vector<int>& indices,
-                                      gtl::ArraySlice<Tensor> arguments) {
-  std::vector<Tensor> args;
-  args.reserve(indices.size());
-  for (int i : indices) {
-    args.push_back(arguments[i]);
-  }
-  return args;
-}
-
 string FunctionNameGenerator::GetName() {
   while (true) {
     const string candidate = strings::StrCat(name_, "_", counter_++);
diff --git a/tensorflow/core/common_runtime/partitioning_utils.h b/tensorflow/core/common_runtime/partitioning_utils.h
index 7d2a2c2d2eb..1eb17423de0 100644
--- a/tensorflow/core/common_runtime/partitioning_utils.h
+++ b/tensorflow/core/common_runtime/partitioning_utils.h
@@ -58,15 +58,11 @@ Status PartitionFunctionGraph(
 //  (3) records which `Arg` and `Retval` nodes live in host memory in
 //      `*_alloc_attrs`.
 Status UpdateArgAndRetvalMetadata(
-    Graph* subgraph, const string& device_type, std::vector<int>* arg_indices,
-    std::vector<int>* ret_indices,
+    Graph* subgraph, const string& device_type,
+    std::vector<FunctionArgIndex>* arg_indices, std::vector<int>* ret_indices,
     std::vector<AllocatorAttributes>* arg_alloc_attrs,
     std::vector<AllocatorAttributes>* ret_alloc_attrs);
 
-// Extracts tensors at `indices` from `arguments`.
-std::vector<Tensor> GetArgsForIndices(const std::vector<int>& indices,
-                                      gtl::ArraySlice<Tensor> arguments);
-
 // Utility for generating function names not present in `flib_def`, using
 // given `name` as the base for the name.
 class FunctionNameGenerator {
diff --git a/tensorflow/core/common_runtime/partitioning_utils_test.cc b/tensorflow/core/common_runtime/partitioning_utils_test.cc
index 9c4ce259bf8..b33eae85ba1 100644
--- a/tensorflow/core/common_runtime/partitioning_utils_test.cc
+++ b/tensorflow/core/common_runtime/partitioning_utils_test.cc
@@ -158,14 +158,23 @@ TEST_F(PartitioningUtilsTest, TwoDevices) {
   ASSERT_EQ(3, part2->num_op_nodes());
 }
 
-void CheckIndices(const std::vector<int>& expected,
-                  const std::vector<int>& actual) {
+void CheckRetIndices(const std::vector<int>& expected,
+                     const std::vector<int>& actual) {
   ASSERT_EQ(expected.size(), actual.size());
   for (int i = 0; i < expected.size(); ++i) {
     ASSERT_EQ(expected[i], actual[i]) << " at index " << i;
   }
 }
 
+void CheckArgIndices(const std::vector<FunctionArgIndex>& expected,
+                     const std::vector<FunctionArgIndex>& actual) {
+  ASSERT_EQ(expected.size(), actual.size());
+  for (int i = 0; i < expected.size(); ++i) {
+    ASSERT_EQ(expected[i].index, actual[i].index) << " at index " << i;
+    ASSERT_EQ(expected[i].sub_index, actual[i].sub_index) << " at index " << i;
+  }
+}
+
 void CheckAlloc(const std::vector<bool>& expected,
                 const std::vector<AllocatorAttributes>& actual) {
   ASSERT_EQ(expected.size(), actual.size());
@@ -185,7 +194,7 @@ TEST_F(PartitioningUtilsTest, UpdateArgsAndRets) {
   auto graph = absl::make_unique<Graph>(OpRegistry::Global());
   SubGraph(graph.get(), DT_FLOAT, {3}, {5});
 
-  std::vector<int> arg_indices;
+  std::vector<FunctionArgIndex> arg_indices;
   std::vector<int> ret_indices;
   std::vector<AllocatorAttributes> arg_alloc_attrs;
   std::vector<AllocatorAttributes> ret_alloc_attrs;
@@ -197,8 +206,8 @@ TEST_F(PartitioningUtilsTest, UpdateArgsAndRets) {
       &ret_alloc_attrs);
   ASSERT_TRUE(status.ok()) << status.ToString();
 
-  CheckIndices({3}, arg_indices);
-  CheckIndices({5}, ret_indices);
+  CheckArgIndices({{3, -1}}, arg_indices);
+  CheckRetIndices({5}, ret_indices);
   CheckAlloc({false}, arg_alloc_attrs);
   CheckAlloc({false}, ret_alloc_attrs);
 
@@ -213,7 +222,18 @@ TEST_F(PartitioningUtilsTest, UpdateArgsAndRets_Order) {
   auto graph = absl::make_unique<Graph>(OpRegistry::Global());
   SubGraph(graph.get(), DT_FLOAT, {9, 7, 5, 3, 1}, {2, 4, 6, 8, 10});
 
-  std::vector<int> arg_indices;
+  const std::map<int, int> sub_indices = {
+      {7, 2}, {3, 1}, {1, 0}, {5, 2}, {9, 0}};
+  const AttrValue* attr_value;
+  for (Node* n : graph->op_nodes()) {
+    if (n->IsArg()) {
+      TF_ASSERT_OK(n->attrs().Find("index", &attr_value));
+      n->AddAttr("sub_index",
+                 sub_indices.at(static_cast<int>(attr_value->i())));
+    }
+  }
+
+  std::vector<FunctionArgIndex> arg_indices;
   std::vector<int> ret_indices;
   std::vector<AllocatorAttributes> arg_alloc_attrs;
   std::vector<AllocatorAttributes> ret_alloc_attrs;
@@ -225,8 +245,8 @@ TEST_F(PartitioningUtilsTest, UpdateArgsAndRets_Order) {
       &ret_alloc_attrs);
   ASSERT_TRUE(status.ok()) << status.ToString();
 
-  CheckIndices({1, 3, 5, 7, 9}, arg_indices);
-  CheckIndices({2, 4, 6, 8, 10}, ret_indices);
+  CheckArgIndices({{1, 0}, {3, 1}, {5, 2}, {7, 2}, {9, 0}}, arg_indices);
+  CheckRetIndices({2, 4, 6, 8, 10}, ret_indices);
   CheckAlloc({false, false, false, false, false}, arg_alloc_attrs);
   CheckAlloc({false, false, false, false, false}, ret_alloc_attrs);
 }
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc
index c447832c91b..271169f2a5e 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <iterator>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/core/common_runtime/device_set.h"
@@ -29,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
 #include "tensorflow/core/common_runtime/rendezvous_util.h"
+#include "tensorflow/core/common_runtime/replicate_per_replica_nodes.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -301,7 +303,7 @@ ProcessFunctionLibraryRuntime::GetHandleOnDevice(
 
     // Replace the given handle with the handle for the single component
     // function.
-    handle = component_data.handle_;
+    handle = component_data.handle;
   }
 
   auto iter = function_data_.find(handle);
@@ -777,6 +779,14 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice(
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, optimization_options));
 
+  // Expand the nodes assigned to a CompositeDevice before graph partition to
+  // avoid generating a subgraph on a virtual device for execution.
+  // This transformation should happen as late as possible, in order to run as
+  // more graph optimization passes (e.g. PRE_PLACEMENT, PLACER,
+  // POST_PLACEMENT, POST_REWRITE_FOR_EXEC) on a smaller graph as possible.
+  TF_RETURN_IF_ERROR(ReplicatePerReplicaNodesInFunctionGraph(
+      options.composite_devices, graph.get()));
+
   if (options.graph_collector != nullptr) {
     GraphDef def;
     graph->ToGraphDef(&def);
@@ -869,9 +879,9 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice(
       Graph* subgraph = pair.second.get();
 
       status->Update(UpdateArgAndRetvalMetadata(
-          subgraph, device_type, &comp_data->arg_indices_,
-          &comp_data->ret_indices_, &comp_data->arg_alloc_attrs_,
-          &comp_data->ret_alloc_attrs_));
+          subgraph, device_type, &comp_data->arg_indices,
+          &comp_data->ret_indices, &comp_data->arg_alloc_attrs,
+          &comp_data->ret_alloc_attrs));
       if (!status->ok()) {
         counter.DecrementCount();
         return;
@@ -913,7 +923,7 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice(
               data->is_cross_process_ = true;
             }
           }
-          comp_data->handle_ = *component_handle;
+          comp_data->handle = *component_handle;
         }
         delete component_handle;
         counter.DecrementCount();
@@ -955,16 +965,16 @@ Status ProcessFunctionLibraryRuntime::GetOutputDevices(
 
   for (const auto& pair : data->glue_) {
     const ComponentFunctionData& comp_data = pair.second;
-    DCHECK(comp_data.ret_alloc_attrs_.size() == comp_data.ret_indices_.size());
+    DCHECK(comp_data.ret_alloc_attrs.size() == comp_data.ret_indices.size());
 
     const string& target = pair.first;
     FunctionLibraryRuntime* target_flr = GetFLR(target);
     if (target_flr == nullptr) {
-      if (!comp_data.ret_indices_.empty()) {
+      if (!comp_data.ret_indices.empty()) {
         return errors::Unimplemented(
             "Currently, outputting tensors on remote devices is not supported. "
             "The ",
-            comp_data.ret_indices_[0],
+            comp_data.ret_indices[0],
             "-th return value of the function outputs to target_device: ",
             target,
             " Please copy the tensor to local device explicitly using "
@@ -973,17 +983,17 @@ Status ProcessFunctionLibraryRuntime::GetOutputDevices(
       continue;
     }
     Device* target_device = target_flr->device();
-    const FunctionBody* fbody = target_flr->GetFunctionBody(comp_data.handle_);
+    const FunctionBody* fbody = target_flr->GetFunctionBody(comp_data.handle);
     DCHECK(fbody != nullptr);
 
     output_devices->resize(data->num_outputs_);
-    for (int j = 0; j < comp_data.ret_indices_.size(); ++j) {
-      int ret_index = comp_data.ret_indices_[j];
+    for (int j = 0; j < comp_data.ret_indices.size(); ++j) {
+      int ret_index = comp_data.ret_indices[j];
       if (fbody->ret_types[j] == DT_RESOURCE) {
         (*output_devices)[ret_index] = target_device;
       } else {
         (*output_devices)[ret_index] =
-            comp_data.ret_alloc_attrs_[j].on_host() ? nullptr : target_device;
+            comp_data.ret_alloc_attrs[j].on_host() ? nullptr : target_device;
       }
     }
   }
@@ -1013,9 +1023,8 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice(
 
   const MultiDeviceFunctionData* data = IsMultiDevice(handle);
   if (data == nullptr) {
-    done(
-        errors::InvalidArgument("Failed for find multi-device function handle ",
-                                handle, ". Was the function instantiated?"));
+    done(errors::NotFound("Multi-device function handle ", handle,
+                          "not found. Was the function instantiated?"));
     return;
   }
 
@@ -1046,10 +1055,10 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice(
   for (const auto& pair : data->glue_) {
     const string& target = pair.first;
     const ComponentFunctionData& comp_data = pair.second;
-    FunctionLibraryRuntime::Handle handle = pair.second.handle_;
+    FunctionLibraryRuntime::Handle handle = pair.second.handle;
 
-    opts_copy.args_alloc_attrs = comp_data.arg_alloc_attrs_;
-    opts_copy.rets_alloc_attrs = comp_data.ret_alloc_attrs_;
+    opts_copy.args_alloc_attrs = comp_data.arg_alloc_attrs;
+    opts_copy.rets_alloc_attrs = comp_data.ret_alloc_attrs;
     opts_copy.remote_execution = false;
 
     InternalArgs comp_args;
@@ -1086,7 +1095,7 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice(
                        Status(status.code(), function_and_msg));
                  } else {
                    for (int i = 0; i < comp_rets->size(); ++i) {
-                     (*rets)[comp_data.ret_indices_[i]] = (*comp_rets)[i];
+                     (*rets)[comp_data.ret_indices[i]] = (*comp_rets)[i];
                    }
                  }
                  delete comp_rets;
@@ -1108,7 +1117,7 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice(
               refcounted_done->UpdateStatus(status);
             } else {
               for (int i = 0; i < comp_rets->size(); ++i) {
-                (*rets)[comp_data.ret_indices_[i]] = (*comp_rets)[i];
+                (*rets)[comp_data.ret_indices[i]] = (*comp_rets)[i];
               }
             }
             delete comp_rets;
@@ -1225,7 +1234,7 @@ Status ProcessFunctionLibraryRuntime::ReleaseMultiDeviceHandle(
   Status overall_status;
   for (const auto& it : mdata->glue_) {
     const string& device = it.first;
-    FunctionLibraryRuntime::Handle flr_handle = it.second.handle_;
+    FunctionLibraryRuntime::Handle flr_handle = it.second.handle;
     FunctionLibraryRuntime* flr = GetFLR(device);
     if (flr == nullptr) {
       // TODO(nareshmodi): Implement DeregisterGraph call to remote device if
@@ -1297,6 +1306,19 @@ ProcessFunctionLibraryRuntime::ApplyCleanUpToDoneCallback(
       };
 }
 
+Status ProcessFunctionLibraryRuntime::CreateRendezvous(
+    const FunctionLibraryRuntime::Options& opts,
+    Rendezvous** created_rendezvous) const {
+  if (rendezvous_factory_) {
+    return rendezvous_factory_(opts.step_id, device_mgr_, created_rendezvous);
+  } else {
+    return errors::FailedPrecondition(
+        "The caller does not provide a rendezvous and "
+        "ProcessFunctionLibraryRuntime was created without a rendezvous "
+        "factory.");
+  }
+}
+
 void ProcessFunctionLibraryRuntime::Run(
     const FunctionLibraryRuntime::Options& opts,
     FunctionLibraryRuntime::Handle handle, gtl::ArraySlice<Tensor> args,
@@ -1305,21 +1327,12 @@ void ProcessFunctionLibraryRuntime::Run(
   FunctionLibraryRuntime::Options new_opts = opts;
   Rendezvous* created_rendezvous = nullptr;
   if (!opts.rendezvous) {
-    if (rendezvous_factory_) {
-      Status s =
-          rendezvous_factory_(opts.step_id, device_mgr_, &created_rendezvous);
-      if (!s.ok()) {
-        done(s);
-        return;
-      }
-      new_opts.rendezvous = created_rendezvous;
-    } else {
-      done(
-          errors::FailedPrecondition("The caller does not provide a rendezvous "
-                                     "and ProcessFunctionLibraryRuntime was "
-                                     "created without a rendezvous factory."));
+    Status s = CreateRendezvous(opts, &created_rendezvous);
+    if (!s.ok()) {
+      done(s);
       return;
     }
+    new_opts.rendezvous = created_rendezvous;
     new_opts.create_rendezvous = false;
   }
 
@@ -1334,9 +1347,14 @@ void ProcessFunctionLibraryRuntime::Run(
   if (multi_device) {
     auto get_component_args = [&args](const ComponentFunctionData& comp_data,
                                       InternalArgs* comp_args) -> Status {
-      for (const auto& tensor :
-           GetArgsForIndices(comp_data.arg_indices_, args)) {
-        comp_args->args.push_back(tensor);
+      // "Index"s of _Arg nodes are unique when all arguments are local Tensors.
+      for (const auto& it : comp_data.arg_indices) {
+        if (it.sub_index >= 0) {
+          return errors::InvalidArgument("Got unexpected sub_index ",
+                                         it.sub_index, " for argument ",
+                                         it.index);
+        }
+        comp_args->args.push_back(args[it.index]);
       }
       return Status::OK();
     };
@@ -1520,11 +1538,23 @@ void ProcessFunctionLibraryRuntime::Run(
     FunctionLibraryRuntime::Handle handle, const FunctionArgsInterface& args,
     std::vector<Tensor>* rets,
     FunctionLibraryRuntime::DoneCallback done) const {
-  if (!args.HasRemoteInputs()) {
+  if (!args.HasRemoteOrPackedInputs()) {
     const std::vector<Tensor> local_inputs = args.GetLocalTensors();
     return Run(opts, handle, local_inputs, rets, std::move(done));
   }
 
+  FunctionLibraryRuntime::Options new_opts = opts;
+  Rendezvous* created_rendezvous = nullptr;
+  if (!opts.rendezvous) {
+    Status s = CreateRendezvous(opts, &created_rendezvous);
+    if (!s.ok()) {
+      done(s);
+      return;
+    }
+    new_opts.rendezvous = created_rendezvous;
+    new_opts.create_rendezvous = false;
+  }
+
 #if defined(IS_MOBILE_PLATFORM)
   done(errors::Unimplemented(
       "Remote inputs are not available on mobile devices."));
@@ -1532,12 +1562,12 @@ void ProcessFunctionLibraryRuntime::Run(
 #else   // !IS_MOBILE_PLATFORM
   auto* cleanup_items = new std::vector<std::unique_ptr<CleanUpItem>>;
   done = ApplyCleanUpToDoneCallback(cleanup_items, done, opts.step_id,
-                                    /*rendezvous=*/nullptr);
+                                    created_rendezvous);
 
   auto get_component_args = [&args](const ComponentFunctionData& comp_data,
                                     InternalArgs* comp_args) -> Status {
-    for (int i = 0; i < comp_data.arg_indices_.size(); ++i) {
-      const int index = comp_data.arg_indices_.at(i);
+    for (int i = 0; i < comp_data.arg_indices.size(); ++i) {
+      const FunctionArgIndex index = comp_data.arg_indices.at(i);
       Tensor tensor;
       if (args.GetLocalArg(index, &tensor).ok()) {
         comp_args->args.push_back(std::move(tensor));
@@ -1552,7 +1582,7 @@ void ProcessFunctionLibraryRuntime::Run(
     }
     return Status::OK();
   };
-  return RunMultiDevice(opts, handle, rets, cleanup_items, std::move(done),
+  return RunMultiDevice(new_opts, handle, rets, cleanup_items, std::move(done),
                         std::move(get_component_args));
 #endif  // !IS_MOBILE_PLATFORM
 }
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h
index 104872e5a1c..bc68c9c2807 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime.h
+++ b/tensorflow/core/common_runtime/process_function_library_runtime.h
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/types/optional.h"
 #include "absl/types/variant.h"
+#include "tensorflow/core/common_runtime/composite_device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/framework/function.h"
@@ -40,16 +41,15 @@ class FunctionArgsInterface {
  public:
   virtual ~FunctionArgsInterface() {}
 
-  virtual bool HasRemoteInputs() const = 0;
+  virtual bool HasRemoteOrPackedInputs() const = 0;
 
-  virtual Status GetLocalArg(const int index, Tensor* val) const = 0;
+  virtual Status GetLocalArg(const FunctionArgIndex& index,
+                             Tensor* val) const = 0;
 
   virtual std::vector<Tensor> GetLocalTensors() const = 0;
 
-  virtual const gtl::InlinedVector<TensorValue, 4>* GetTensorValues() const = 0;
-
 #if !defined(IS_MOBILE_PLATFORM)
-  virtual Status GetRemoteArg(const int index,
+  virtual Status GetRemoteArg(const FunctionArgIndex& index,
                               eager::RemoteTensorHandle* val) const {
     return errors::Unimplemented(
         "Serializing a remote argument is not implemented.");
@@ -217,6 +217,12 @@ class ProcessFunctionLibraryRuntime {
     return lib_def_;
   }
 
+  // Add a CompositeDevice to `device_set_`
+  void AddCompositeDevice(CompositeDevice* d) TF_LOCKS_EXCLUDED(mu_) {
+    mutex_lock l(mu_);
+    device_set_->AddDevice(d);
+  }
+
  protected:
   friend class FunctionLibraryRuntimeImpl;
 
@@ -232,21 +238,21 @@ class ProcessFunctionLibraryRuntime {
   // piece of a multi-device function) fits into the multi-device function.
   struct ComponentFunctionData {
     // The handle for the instantiated component function.
-    FunctionLibraryRuntime::Handle handle_;
-    // arg_indices_.size() is the number of arguments to the component function.
+    FunctionLibraryRuntime::Handle handle;
+    // arg_indices.size() is the number of arguments to the component function.
     // The i-th argument of the component function comes from the
-    // `arg_indices_[i]`-th argument of the multi-device function.
-    std::vector<int> arg_indices_;
-    // ret_indices_.size() is the number of return values of the component
+    // `arg_indices[i]`-th argument of the multi-device function.
+    std::vector<FunctionArgIndex> arg_indices;
+    // ret_indices.size() is the number of return values of the component
     // function.  The i-th return value of the component function goes to the
-    // `ret_indices_[i]`-th return value of the multi-device function.
-    std::vector<int> ret_indices_;
-    // arg_alloc_attrs_[i] are the allocator attributes of the i-th argument to
+    // `ret_indices[i]`-th return value of the multi-device function.
+    std::vector<int> ret_indices;
+    // arg_alloc_attrs[i] are the allocator attributes of the i-th argument to
     // the component function.
-    std::vector<AllocatorAttributes> arg_alloc_attrs_;
-    // ret_alloc_attrs_[i] are the allocator attributes of the i-th return value
+    std::vector<AllocatorAttributes> arg_alloc_attrs;
+    // ret_alloc_attrs[i] are the allocator attributes of the i-th return value
     // of the component function.
-    std::vector<AllocatorAttributes> ret_alloc_attrs_;
+    std::vector<AllocatorAttributes> ret_alloc_attrs;
   };
 
   // Data structure holding information for a single instantiated multi-device
@@ -304,6 +310,9 @@ class ProcessFunctionLibraryRuntime {
                            InternalArgs* args)>
           get_component_args) const;
 
+  Status CreateRendezvous(const FunctionLibraryRuntime::Options& opts,
+                          Rendezvous** created_rendezvous) const;
+
   FunctionLibraryRuntime::DoneCallback ApplyCleanUpToDoneCallback(
       std::vector<std::unique_ptr<CleanUpItem>>* items,
       FunctionLibraryRuntime::DoneCallback done, const int64 step_id,
diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
index aa31e2b11f2..247b94dc58c 100644
--- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
+++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
+#include "tensorflow/core/common_runtime/composite_device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
 #include "tensorflow/core/common_runtime/function_testlib.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
@@ -143,6 +144,10 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
             }}));
   }
 
+  void AddCompositeDevice(CompositeDevice* d) {
+    proc_flr_->AddCompositeDevice(d);
+  }
+
   Status Instantiate(
       const string& name, test::function::Attrs attrs,
       const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts,
@@ -187,11 +192,12 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   }
 
+  template <typename T>
   Status RunWithRuntime(
       const string& name, FunctionLibraryRuntime::Options opts,
       test::function::Attrs attrs,
       const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts,
-      const std::vector<Tensor>& args, std::vector<Tensor*> rets,
+      const T& args, std::vector<Tensor*> rets,
       ProcessFunctionLibraryRuntime* pflr) {
     FunctionLibraryRuntime::Handle handle;
     Status status = pflr->Instantiate(name, attrs, instantiate_opts, &handle);
@@ -248,9 +254,20 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
   Status Run(const string& name, FunctionLibraryRuntime::Options opts,
              test::function::Attrs attrs,
              const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts,
-             const std::vector<Tensor>& args, std::vector<Tensor*> rets) {
-    return RunWithRuntime(name, opts, attrs, instantiate_opts, args, rets,
-                          proc_flr_.get());
+             const std::vector<Tensor>& args, std::vector<Tensor*> rets,
+             ProcessFunctionLibraryRuntime* pflr = nullptr) {
+    return RunWithRuntime<std::vector<Tensor>>(
+        name, opts, attrs, instantiate_opts, args, rets, proc_flr_.get());
+  }
+
+  Status RunWithPackedArgs(
+      const string& name, FunctionLibraryRuntime::Options opts,
+      test::function::Attrs attrs,
+      const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts,
+      const FunctionArgsInterface& args, std::vector<Tensor*> rets,
+      ProcessFunctionLibraryRuntime* pflr = nullptr) {
+    return RunWithRuntime<FunctionArgsInterface>(
+        name, opts, attrs, instantiate_opts, args, rets, proc_flr_.get());
   }
 
   Status RunInstantiated(FunctionLibraryRuntime::Handle handle,
@@ -719,6 +736,112 @@ Tensor GetResourceHandle(const string& var_name, const string& container,
   return tensor;
 }
 
+// Returns a function which adds two variables on different devices.
+FunctionDef AddVarAcrossDevices() {
+  return FunctionDefHelper::Create(
+      // Name
+      "AddVarAcrossDevices",
+      // Args
+      {"x: resource"},
+      // Return values
+      {"y: float"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"read0"},
+           "ReadVariableOp",
+           {"x"},
+           {{"dtype", DT_FLOAT}},
+           {},
+           "/device:CPU:0"},
+          {{"read1"},
+           "ReadVariableOp",
+           {"x"},
+           {{"dtype", DT_FLOAT}},
+           {},
+           "/device:CPU:1"},
+          {{"add"},
+           "Add",
+           {"read0:value:0", "read1:value:0"},
+           {{"T", DT_FLOAT}},
+           {},
+           "/device:CPU:0"},
+      },
+      {{"y", "add:z:0"}});
+}
+
+// An implementation of FunctionArgsInterface for packed inputs.
+class TestFunctionPackedArgs : public FunctionArgsInterface {
+ public:
+  TestFunctionPackedArgs(const int index,
+                         gtl::InlinedVector<TensorValue, 4>&& tensor_args) {
+    packed_args_.emplace(index, std::move(tensor_args));
+  }
+
+  ~TestFunctionPackedArgs() override{};
+
+  bool HasRemoteOrPackedInputs() const override { return true; };
+
+  Status GetLocalArg(const FunctionArgIndex& index,
+                     Tensor* val) const override {
+    *val = *packed_args_.at(index.index).at(index.sub_index).tensor;
+    return Status::OK();
+  };
+
+  std::vector<Tensor> GetLocalTensors() const override { return {}; }
+
+ private:
+  absl::flat_hash_map<int, gtl::InlinedVector<TensorValue, 4>> packed_args_;
+};
+
+TEST_F(ProcessFunctionLibraryRuntimeTest, MultiDevice_CompositeDevice) {
+  Init({AddVarAcrossDevices()});
+  // Create two variables on two devices.
+  const Tensor initial_resource_value0 = test::AsTensor<float>({10, 20});
+  Var* resource0 = new Var(DT_FLOAT);
+  *resource0->tensor() = initial_resource_value0;
+  resource0->is_initialized = true;
+  const Tensor initial_resource_value1 = test::AsTensor<float>({30, 40});
+  Var* resource1 = new Var(DT_FLOAT);
+  *resource1->tensor() = initial_resource_value1;
+  resource1->is_initialized = true;
+  ResourceMgr* mgr0 = device0_->resource_manager();
+  ResourceMgr* mgr1 = device1_->resource_manager();
+  TF_ASSERT_OK(mgr0->Create(mgr0->default_container(), "var", resource0));
+  TF_ASSERT_OK(mgr1->Create(mgr1->default_container(), "var", resource1));
+
+  Tensor resource_handle0 =
+      GetResourceHandle("var", mgr0->default_container(), device0_->name());
+  Tensor resource_handle1 =
+      GetResourceHandle("var", mgr1->default_container(), device1_->name());
+
+  // Create a CompositeDevice
+  Status s;
+  std::unique_ptr<CompositeDevice> composite_device =
+      CompositeDevice::MakeDevice({device0_->name(), device1_->name()},
+                                  /*unique_device_id=*/0, &s);
+  TF_ASSERT_OK(s);
+  AddCompositeDevice(composite_device.get());
+
+  FunctionLibraryRuntime::Options opts;
+  FunctionLibraryRuntime::InstantiateOptions inst_opts =
+      MakeOptions("CPU:0", {"COMPOSITE:0"}, {"CPU:0"});
+  inst_opts.composite_devices[composite_device->name()] =
+      composite_device->underlying_devices();
+  inst_opts.input_resource_dtypes_and_shapes[0] = {
+      initial_resource_value0.dtype(), initial_resource_value0.shape()};
+
+  gtl::InlinedVector<TensorValue, 4> handles;
+  handles.push_back(TensorValue(&resource_handle0));
+  handles.push_back(TensorValue(&resource_handle1));
+  TestFunctionPackedArgs args(0, std::move(handles));
+  Tensor ret;
+  TF_CHECK_OK(RunWithPackedArgs("AddVarAcrossDevices", opts, {{"T", DT_FLOAT}},
+                                inst_opts, args, {&ret}));
+  test::ExpectTensorEqual<float>(ret, test::AsTensor<float>({40, 60}));
+}
+
 TEST_F(ProcessFunctionLibraryRuntimeTest, MultiDevice_ResourceOutput_GPU) {
   if (gpu_device_ == nullptr) {
     GTEST_SKIP() << "No GPUs available";
@@ -1025,9 +1148,9 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, SessionMetadataPresentAfterCloning) {
   instantiate_opts.target = "/job:a/replica:0/task:0/cpu:0";
   const auto x = test::AsTensor<int64>({17});
   Tensor y;
-  TF_CHECK_OK(RunWithRuntime("SessionMetadataReaderFn", opts, {},
-                             instantiate_opts, {x}, {&y},
-                             cloned_proc_flr.get()));
+  TF_CHECK_OK(RunWithRuntime<std::vector<Tensor>>(
+      "SessionMetadataReaderFn", opts, {}, instantiate_opts, {x}, {&y},
+      cloned_proc_flr.get()));
   SessionMetadata read_metadata;
   ASSERT_TRUE(protobuf::TextFormat::ParseFromString(y.scalar<tstring>()(),
                                                     &read_metadata));
diff --git a/tensorflow/core/common_runtime/propagator_state.cc b/tensorflow/core/common_runtime/propagator_state.cc
index 6d714d2fae9..a6639b1132e 100644
--- a/tensorflow/core/common_runtime/propagator_state.cc
+++ b/tensorflow/core/common_runtime/propagator_state.cc
@@ -16,31 +16,33 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/propagator_state.h"
 
 #include "tensorflow/core/common_runtime/graph_view.h"
+#include "tensorflow/core/common_runtime/immutable_executor_state.h"
 #include "tensorflow/core/common_runtime/propagator_debug_utils.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/platform/hash.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
 
 namespace tensorflow {
 
 PropagatorState::PropagatorState(const ImmutableExecutorState& immutable_state,
-                                 int64 step_id)
+                                 int64 step_id, bool vlog)
     : immutable_state_(immutable_state),
       step_id_(step_id),
-      vlog_(VLOG_IS_ON(1)) {
+      vlog_(vlog || VLOG_IS_ON(1)) {
   // We start the entire execution in iteration 0 of the root frame
   // so let us create the root frame and the state for iteration 0.
   // We assume root_frame_->frame_name.empty().
   root_frame_ = new FrameState(immutable_state_, 1);
   root_frame_->frame_id = 0;  // must be 0
-  root_frame_->InitializeFrameInfo(root_frame_->frame_name);
+  root_frame_->InitializeFrameInfo(immutable_state_.get_root_frame_info());
 
   // Initialize iteration 0.
   root_frame_->SetIteration(
-      0, new PropagatorState::IterationState(root_frame_->pending_counts,
+      0, new PropagatorState::IterationState(0, root_frame_->pending_counts,
                                              root_frame_->total_input_tensors));
 
-  outstanding_frames_.insert({root_frame_->frame_name, root_frame_});
+  outstanding_frames_.emplace(root_frame_->frame_id, root_frame_);
 }
 
 PropagatorState::~PropagatorState() {
@@ -51,12 +53,13 @@ PropagatorState::~PropagatorState() {
 
 void PropagatorState::ActivateRoots(gtl::ArraySlice<const NodeItem*> roots,
                                     TaggedNodeSeq* ready) {
+  mutex_lock l(root_frame_->mu);
+  IterationState* root_iter = root_frame_->GetIteration(0);
   for (const NodeItem* item : roots) {
     DCHECK_EQ(item->num_inputs, 0);
-    ready->emplace_back(item, root_frame_, 0, false);
+    ready->emplace_back(item, root_frame_, root_iter, false);
   }
-  mutex_lock l(root_frame_->mu);
-  root_frame_->GetIteration(0)->outstanding_ops = ready->size();
+  root_iter->outstanding_ops = ready->size();
 }
 
 void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
@@ -75,7 +78,7 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
 
   const NodeItem* const item = tagged_node.node_item;
   FrameState* const input_frame = tagged_node.input_frame;
-  const int64 input_iter = tagged_node.input_iter;
+  IterationState* const input_iter = tagged_node.input_iter;
   const bool is_dead = tagged_node.is_dead;
 
   // Propagates outputs along out edges, and puts newly ready nodes
@@ -83,7 +86,7 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
   DCHECK(ready->empty());
   bool is_frame_done = false;
   FrameState* output_frame = input_frame;
-  int64 output_iter = input_iter;
+  IterationState* output_iter = input_iter;
 
   if (!item->is_enter_exit_or_next_iter) {
     // Fast path for nodes types that don't need special handling
@@ -95,9 +98,9 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
         input_frame->DecrementOutstandingOpsLocked(input_iter, ready);
   } else if (item->is_enter) {
     FindOrCreateChildFrame(input_frame, input_iter, *item, &output_frame);
-    output_iter = 0;
     {
       mutex_lock l(output_frame->mu);
+      output_iter = output_frame->GetIteration(0);
       if (item->is_constant_enter) {
         // Propagate to all active iterations if this is a loop invariant.
         output_frame->AddLoopInv(item, (*outputs)[0], ready);
@@ -111,7 +114,7 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
     if (is_dead) {
       mutex_lock l(input_frame->mu);
       // Stop and remember this node if it is a dead exit.
-      if (input_iter == input_frame->iteration_count) {
+      if (input_iter->iter_num == input_frame->iteration_count) {
         input_frame->dead_exits.push_back(item);
       }
       is_frame_done =
@@ -132,7 +135,7 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
       // Stop the deadness propagation.
       output_frame = nullptr;
     } else {
-      if (input_iter == input_frame->iteration_count &&
+      if (input_iter->iter_num == input_frame->iteration_count &&
           input_frame->num_outstanding_iterations ==
               input_frame->max_parallel_iterations) {
         // Reached the maximum for parallel iterations.
@@ -140,10 +143,11 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
         output_frame = nullptr;
       } else {
         // If this is a new iteration, start it.
-        if (input_iter == input_frame->iteration_count) {
-          input_frame->IncrementIteration(ready);
+        if (input_iter->iter_num == input_frame->iteration_count) {
+          output_iter = input_frame->IncrementIteration(ready);
+        } else {
+          output_iter = input_frame->GetIteration(input_iter->iter_num + 1);
         }
-        output_iter = input_iter + 1;
       }
     }
     if (output_frame != nullptr) {
@@ -159,7 +163,7 @@ void PropagatorState::PropagateOutputs(const TaggedNode& tagged_node,
   // completion of this node makes its frame completed.
   if (is_frame_done) {
     FrameState* parent_frame = input_frame->parent_frame;
-    const int64 parent_iter = input_frame->parent_iter;
+    IterationState* parent_iter = input_frame->parent_iter;
     DeleteFrame(input_frame, ready);
     if (parent_frame != nullptr) {
       // The completion of frame may cause completions in its parent frame.
@@ -217,20 +221,21 @@ void PropagatorState::DumpState() {
   }
 }
 
-void PropagatorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
+void PropagatorState::FindOrCreateChildFrame(FrameState* frame,
+                                             IterationState* iter_state,
                                              const NodeItem& node_item,
                                              FrameState** child) {
   // Get the child frame name.
-  AttrSlice attrs(node_item.kernel->def());
-  const string& enter_name = GetNodeAttrString(attrs, "frame_name");
-  DCHECK(!enter_name.empty()) << "Could not find \"frame_name\" attr in node "
-                              << node_item.kernel->name();
-  const string child_name =
-      strings::StrCat(frame->frame_name, ";", iter, ";", enter_name);
+  const ImmutableExecutorState::FrameInfo& frame_info =
+      immutable_state_.get_enter_frame_info(node_item);
+
+  const uint64 child_id = Hash64Combine(
+      frame->frame_id,
+      Hash64Combine(iter_state->iter_num, Hash64(frame_info.name)));
 
   {
-    mutex_lock executor_lock(mu_);
-    auto it = outstanding_frames_.find(child_name);
+    tf_shared_lock executor_lock(mu_);
+    auto it = outstanding_frames_.find(child_id);
     if (it != outstanding_frames_.end()) {
       *child = it->second;
       return;
@@ -239,37 +244,35 @@ void PropagatorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
 
   // Need to create a new frame instance.
   // Note that this new frame instance is created without any locks.
-  if (vlog_) VLOG(2) << "Create frame: " << child_name;
+  if (vlog_) {
+    const string child_name = strings::StrCat(
+        frame->frame_name, ";", iter_state->iter_num, ";", frame_info.name);
+    VLOG(2) << "Create frame: " << child_name << " id: " << child_id;
+  }
 
-  int parallel_iters;
-  bool found_parallel_iters =
-      TryGetNodeAttr(attrs, "parallel_iterations", &parallel_iters);
-  DCHECK(found_parallel_iters)
-      << "Could not find \"parallel_iterations\" attr in node "
-      << node_item.kernel->name();
-  FrameState* temp = new FrameState(immutable_state_, parallel_iters);
-  temp->frame_name = child_name;
-  temp->frame_id = Hash64(child_name);
+  FrameState* temp =
+      new FrameState(immutable_state_, frame_info.parallel_iterations);
+  temp->frame_id = child_id;
   temp->parent_frame = frame;
-  temp->parent_iter = iter;
-  temp->InitializeFrameInfo(enter_name);
+  temp->parent_iter = iter_state;
+  temp->InitializeFrameInfo(frame_info);
 
   // Initialize iteration 0.
   {
     mutex_lock l(temp->mu);
-    temp->SetIteration(
-        0, new IterationState(temp->pending_counts, temp->total_input_tensors));
+    temp->SetIteration(0, new IterationState(0, temp->pending_counts,
+                                             temp->total_input_tensors));
   }
 
   {
     mutex_lock executor_lock(mu_);
-    auto it = outstanding_frames_.find(child_name);
+    auto it = outstanding_frames_.find(child_id);
     if (it != outstanding_frames_.end()) {
       *child = it->second;
     } else {
       mutex_lock frame_lock(frame->mu);
-      frame->GetIteration(iter)->outstanding_frame_count++;
-      outstanding_frames_[child_name] = temp;
+      iter_state->outstanding_frame_count++;
+      outstanding_frames_[child_id] = temp;
       *child = temp;
       temp = nullptr;
     }
@@ -280,20 +283,19 @@ void PropagatorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
 void PropagatorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) {
   // First, propagate dead_exits (if any) to the parent frame.
   FrameState* parent_frame = frame->parent_frame;
-  const int64 parent_iter = frame->parent_iter;
+  IterationState* parent_iter_state = frame->parent_iter;
   if (parent_frame != nullptr) {
     mutex_lock parent_frame_lock(parent_frame->mu);
     // Propagate all the dead exits to the parent frame.
     mutex_lock this_frame_lock(frame->mu);
 
     for (const NodeItem* item : frame->dead_exits) {
-      auto parent_iter_state = parent_frame->GetIteration(parent_iter);
-
       auto maybe_add_to_ready = [&](const NodeItem& dst_item, bool dst_ready,
                                     bool dst_dead) {
         if (dst_ready) {
           if (dst_item.is_control_trigger) dst_dead = false;
-          ready->emplace_back(&dst_item, parent_frame, parent_iter, dst_dead);
+          ready->emplace_back(&dst_item, parent_frame, parent_iter_state,
+                              dst_dead);
           parent_iter_state->outstanding_ops++;
         }
       };
@@ -347,26 +349,26 @@ void PropagatorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) {
   }
 
   // Delete the frame.
-  const string& frame_name = frame->frame_name;
-  if (vlog_) VLOG(2) << "Delete frame " << frame_name;
+  if (vlog_) VLOG(2) << "Delete frame " << frame->frame_id;
   {
     mutex_lock executor_lock(mu_);
-    outstanding_frames_.erase(frame_name);
+    outstanding_frames_.erase(frame->frame_id);
   }
   delete frame;
 }
 
-void PropagatorState::CleanupFramesIterations(FrameState* frame, int64 iter,
+void PropagatorState::CleanupFramesIterations(FrameState* frame,
+                                              IterationState* iter_state,
                                               TaggedNodeSeq* ready) {
   bool is_frame_done = false;
   {
     mutex_lock frame_lock(frame->mu);
-    frame->GetIteration(iter)->outstanding_frame_count--;
-    is_frame_done = frame->CleanupIterations(iter, ready);
+    iter_state->outstanding_frame_count--;
+    is_frame_done = frame->CleanupIterations(iter_state, ready);
   }
   if (is_frame_done) {
     FrameState* parent_frame = frame->parent_frame;
-    const int64 parent_iter = frame->parent_iter;
+    IterationState* parent_iter = frame->parent_iter;
     DeleteFrame(frame, ready);
     if (parent_frame != nullptr) {
       // The completion of frame may cause completions in its parent frame.
@@ -376,16 +378,13 @@ void PropagatorState::CleanupFramesIterations(FrameState* frame, int64 iter,
   }
 }
 
-void PropagatorState::FrameState::ActivateNodesFastPath(const NodeItem* item,
-                                                        const bool is_dead,
-                                                        int64 iter,
-                                                        EntryVector* outputs,
-                                                        TaggedNodeSeq* ready) {
+void PropagatorState::FrameState::ActivateNodesFastPath(
+    const NodeItem* item, const bool is_dead, IterationState* iter_state,
+    EntryVector* outputs, TaggedNodeSeq* ready) {
   // If we know that none of the item's edge destinations require special
   // handling (i.e. none of the nodes is a merge or control trigger node), we
   // can take a fast path that avoids accessing the destination NodeItem.
   const GraphView& gview = immutable_state.graph_view();
-  IterationState* iter_state = GetIteration(iter);
 
 // Add dst to the ready queue if it's ready
 //
@@ -398,7 +397,7 @@ void PropagatorState::FrameState::ActivateNodesFastPath(const NodeItem* item,
       TaggedNode& t = ready->emplace_back();              \
       t.node_item = dst_item;                             \
       t.input_frame = this;                               \
-      t.input_iter = iter;                                \
+      t.input_iter = iter_state;                          \
       t.is_dead = adjust_result.any_dead;                 \
       iter_state->outstanding_ops++;                      \
     }                                                     \
@@ -436,23 +435,20 @@ void PropagatorState::FrameState::ActivateNodesFastPath(const NodeItem* item,
 #undef MAYBE_ADD_TO_READY
 }
 
-void PropagatorState::FrameState::ActivateNodesSlowPath(const NodeItem* item,
-                                                        const bool is_dead,
-                                                        int64 iter,
-                                                        EntryVector* outputs,
-                                                        TaggedNodeSeq* ready) {
+void PropagatorState::FrameState::ActivateNodesSlowPath(
+    const NodeItem* item, const bool is_dead, IterationState* iter_state,
+    EntryVector* outputs, TaggedNodeSeq* ready) {
   // If any of the edge destinations is a merge or a control trigger node,
   // we need to read each destination NodeItem to determine what action
   // to take.
   const GraphView& gview = immutable_state.graph_view();
-  IterationState* iter_state = GetIteration(iter);
 
   auto maybe_add_to_ready = [&](int dst_id, const NodeItem* dst_item,
                                 bool dst_ready, bool dst_dead) {
     // Add dst to the ready queue if it's ready
     if (dst_ready) {
       if (dst_item->is_control_trigger) dst_dead = false;
-      ready->emplace_back(dst_item, this, iter, dst_dead);
+      ready->emplace_back(dst_item, this, iter_state, dst_dead);
       iter_state->outstanding_ops++;
     }
   };
@@ -551,17 +547,18 @@ void PropagatorState::FrameState::ActivateNodesSlowPath(const NodeItem* item,
 }
 
 void PropagatorState::FrameState::ActivateNodes(const NodeItem* item,
-                                                const bool is_dead, int64 iter,
+                                                const bool is_dead,
+                                                IterationState* iter_state,
                                                 EntryVector* outputs,
                                                 TaggedNodeSeq* ready) {
   if (TF_PREDICT_FALSE(item->is_any_consumer_merge_or_control_trigger)) {
-    ActivateNodesSlowPath(item, is_dead, iter, outputs, ready);
+    ActivateNodesSlowPath(item, is_dead, iter_state, outputs, ready);
   } else {
-    ActivateNodesFastPath(item, is_dead, iter, outputs, ready);
+    ActivateNodesFastPath(item, is_dead, iter_state, outputs, ready);
   }
 }
 
-void PropagatorState::FrameState::ActivateNexts(int64 iter,
+void PropagatorState::FrameState::ActivateNexts(IterationState* iter_state,
                                                 TaggedNodeSeq* ready) {
   // Propagate the deferred NextIteration nodes to the new iteration.
   for (auto& node_entry : next_iter_roots) {
@@ -569,12 +566,12 @@ void PropagatorState::FrameState::ActivateNexts(int64 iter,
     const Entry& entry = node_entry.second;
     const bool is_dead = entry.state == Entry::State::NO_VALUE;
     EntryVector outputs{entry};
-    ActivateNodes(item, is_dead, iter, &outputs, ready);
+    ActivateNodes(item, is_dead, iter_state, &outputs, ready);
   }
   next_iter_roots.clear();
 }
 
-void PropagatorState::FrameState::ActivateLoopInvs(int64 iter,
+void PropagatorState::FrameState::ActivateLoopInvs(IterationState* iter_state,
                                                    TaggedNodeSeq* ready) {
   // Propagate loop invariants to the new iteration.
   for (auto& node_entry : inv_values) {
@@ -582,7 +579,7 @@ void PropagatorState::FrameState::ActivateLoopInvs(int64 iter,
     const Entry& entry = node_entry.second;
     const bool is_dead = entry.state == Entry::State::NO_VALUE;
     EntryVector outputs{entry};
-    ActivateNodes(item, is_dead, iter, &outputs, ready);
+    ActivateNodes(item, is_dead, iter_state, &outputs, ready);
   }
 }
 
@@ -596,33 +593,32 @@ void PropagatorState::FrameState::AddLoopInv(const NodeItem* item,
   const bool is_dead = entry.state == Entry::State::NO_VALUE;
   for (int i = 0; i <= iteration_count; ++i) {
     EntryVector outputs{entry};
-    ActivateNodes(item, is_dead, i, &outputs, ready);
+    ActivateNodes(item, is_dead, GetIteration(i), &outputs, ready);
   }
 }
 
-bool PropagatorState::FrameState::IsIterationDone(int64 iter) {
-  IterationState* iter_state = GetIteration(iter);
+bool PropagatorState::FrameState::IsIterationDone(IterationState* iter_state) {
   if (iter_state->outstanding_ops == 0 &&
       iter_state->outstanding_frame_count == 0) {
-    if (iter == 0) {
+    if (iter_state->iter_num == 0) {
       // The enclosing frame has no pending input.
       return num_pending_inputs == 0;
     } else {
       // The preceding iteration is deleted (and therefore done).
-      return (GetIteration(iter - 1) == nullptr);
+      return (GetIteration(iter_state->iter_num - 1) == nullptr);
     }
   }
   return false;
 }
 
-void PropagatorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) {
+PropagatorState::IterationState*
+PropagatorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) {
   iteration_count++;
-  const int64 next_iter = iteration_count;
 
   // Initialize the next iteration.
-  IterationState* iter_state =
-      new IterationState(pending_counts, total_input_tensors);
-  SetIteration(next_iter, iter_state);
+  IterationState* next_iter =
+      new IterationState(iteration_count, pending_counts, total_input_tensors);
+  SetIteration(iteration_count, next_iter);
   num_outstanding_iterations++;
   dead_exits.clear();
 
@@ -631,14 +627,15 @@ void PropagatorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) {
 
   // Activate the loop invariants in the new iteration.
   ActivateLoopInvs(next_iter, ready);
+
+  return next_iter;
 }
 
-bool PropagatorState::FrameState::CleanupIterations(int64 iter,
+bool PropagatorState::FrameState::CleanupIterations(IterationState* iter_state,
                                                     TaggedNodeSeq* ready) {
-  int64 curr_iter = iter;
-  while (curr_iter <= iteration_count && IsIterationDone(curr_iter)) {
-    // Delete the iteration curr_iter.
-    delete GetIteration(curr_iter);
+  int64 curr_iter = iter_state->iter_num;
+  while (curr_iter <= iteration_count && IsIterationDone(iter_state)) {
+    delete iter_state;
     SetIteration(curr_iter, nullptr);
     --num_outstanding_iterations;
     ++curr_iter;
@@ -648,19 +645,20 @@ bool PropagatorState::FrameState::CleanupIterations(int64 iter,
     if (!next_iter_roots.empty()) {
       IncrementIteration(ready);
     }
+
+    if (curr_iter <= iteration_count) {
+      iter_state = GetIteration(curr_iter);
+    }
   }
   return IsFrameDone();
 }
 
 void PropagatorState::FrameState::InitializeFrameInfo(
-    const string& enter_name) {
-  const ImmutableExecutorState::FrameInfo* finfo =
-      immutable_state.get_frame_info(enter_name);
-  DCHECK_NE(finfo, nullptr);
-  pending_counts = finfo->pending_counts.get();
-  total_input_tensors = finfo->total_inputs;
-  num_pending_inputs = finfo->input_count;
-  nodes = finfo->nodes.get();
+    const ImmutableExecutorState::FrameInfo& finfo) {
+  pending_counts = finfo.pending_counts.get();
+  total_input_tensors = finfo.total_inputs;
+  num_pending_inputs = finfo.input_count;
+  nodes = finfo.nodes.get();
 }
 
 void PropagatorState::FrameState::SetIteration(int64 iter,
@@ -677,21 +675,21 @@ void PropagatorState::FrameState::SetIteration(int64 iter,
 // Decrement the outstanding op count and clean up the iterations in the
 // frame. Return true iff the execution of the frame is done.
 bool PropagatorState::FrameState::DecrementOutstandingOps(
-    int64 iter, TaggedNodeSeq* ready) {
+    IterationState* iter_state, TaggedNodeSeq* ready) {
   mutex_lock l(mu);
-  return DecrementOutstandingOpsLocked(iter, ready);
+  return DecrementOutstandingOpsLocked(iter_state, ready);
 }
 
 // Decrement the outstanding op count and clean up the iterations in the
 // frame. Return true iff the execution of the frame is done.
 bool PropagatorState::FrameState::DecrementOutstandingOpsLocked(
-    int64 iter, TaggedNodeSeq* ready) TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
-  IterationState* istate = GetIteration(iter);
-  istate->outstanding_ops--;
-  if (istate->outstanding_ops != 0) {
+    IterationState* iter_state, TaggedNodeSeq* ready)
+    TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
+  iter_state->outstanding_ops--;
+  if (iter_state->outstanding_ops != 0) {
     return false;
   } else {
-    return CleanupIterations(iter, ready);
+    return CleanupIterations(iter_state, ready);
   }
 }
 
diff --git a/tensorflow/core/common_runtime/propagator_state.h b/tensorflow/core/common_runtime/propagator_state.h
index 13aadde7ff0..167519ccc73 100644
--- a/tensorflow/core/common_runtime/propagator_state.h
+++ b/tensorflow/core/common_runtime/propagator_state.h
@@ -45,12 +45,15 @@ typedef gtl::InlinedVector<AllocatorAttributes, 4> AllocatorAttributeVec;
 // adding them to a `TaggedNodeSeq`.
 class PropagatorState {
  public:
-  PropagatorState(const ImmutableExecutorState& immutable_state, int64 step_id);
+  PropagatorState(const ImmutableExecutorState& immutable_state, int64 step_id,
+                  bool vlog);
   ~PropagatorState();
 
  private:
-  // Forward declaration so that `TaggedNode` can include a `FrameState*`.
+  // Forward declaration so that `TaggedNode` can include a `FrameState*` and an
+  // `IterationState*`.
   struct FrameState;
+  struct IterationState;
 
  public:
   // A `TaggedNode` corresponds to a single invocation of a node's kernel,
@@ -59,12 +62,12 @@ class PropagatorState {
   struct TaggedNode {
     const NodeItem* node_item;
     FrameState* input_frame;
-    int64 input_iter;
+    IterationState* input_iter;
     bool is_dead;
 
     TaggedNode() = default;
-    TaggedNode(const NodeItem* node_item, FrameState* in_frame, int64 in_iter,
-               bool dead)
+    TaggedNode(const NodeItem* node_item, FrameState* in_frame,
+               IterationState* in_iter, bool dead)
         : node_item(node_item),
           input_frame(in_frame),
           input_iter(in_iter),
@@ -73,7 +76,7 @@ class PropagatorState {
     const NodeItem& get_node_item() const { return *node_item; }
 
     bool get_is_dead() const { return is_dead; }
-    int64 get_iter_num() const { return input_iter; }
+    int64 get_iter_num() const;
   };
 
   // A drop-in replacement for std::deque<TaggedNode>.  We typically don't
@@ -116,16 +119,18 @@ class PropagatorState {
   typedef gtl::InlinedVector<TaggedNode, 8> TaggedNodeSeq;
 
  private:
+  // The state of an iteration in a particular frame.
   struct IterationState {
-    explicit IterationState(const PendingCounts* pending_counts,
+    explicit IterationState(int64 iter_num, const PendingCounts* pending_counts,
                             int total_input_tensors)
-        : input_tensors(new Entry[total_input_tensors]),
+        : iter_num(iter_num),
+          input_tensors(new Entry[total_input_tensors]),
           outstanding_ops(0),
           outstanding_frame_count(0),
           counts(*pending_counts) {  // Initialize with copy of *pending_counts
     }
 
-    // The state of an iteration.
+    const int64 iter_num;  // The index of this iteration in the enclosing loop.
 
     // One copy per iteration. For iteration k, i-th node's j-th input is in
     // input_tensors[k][immutable_state_.nodes[i].input_start + j]. An entry is
@@ -221,10 +226,10 @@ class PropagatorState {
     // frame_name.
     uint64 frame_id;
 
-    // The iteration id of its parent frame when this frame is created.
-    // -1 if there is no parent frame. The frame_name/parent_iter pair
+    // The iteration state of its parent frame when this frame is created.
+    // nullptr if there is no parent frame. The frame_name/parent_iter pair
     // uniquely identifies this FrameState.
-    int64 parent_iter = -1;
+    IterationState* parent_iter = nullptr;
 
     // The FrameState of its parent frame.
     FrameState* parent_frame = nullptr;
@@ -275,7 +280,7 @@ class PropagatorState {
     // during structured traversal: parent_frame->mu < mu.
     mutex mu;
 
-    void InitializeFrameInfo(const string& enter_name);
+    void InitializeFrameInfo(const ImmutableExecutorState::FrameInfo& finfo);
 
     inline IterationState* GetIteration(int64 iter)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu) {
@@ -291,28 +296,33 @@ class PropagatorState {
 
     // Decrement the outstanding op count and clean up the iterations in the
     // frame. Return true iff the execution of the frame is done.
-    bool DecrementOutstandingOps(int64 iter, TaggedNodeSeq* ready);
+    bool DecrementOutstandingOps(IterationState* iter_state,
+                                 TaggedNodeSeq* ready);
 
     // Decrement the outstanding op count and clean up the iterations in the
     // frame. Return true iff the execution of the frame is done.
-    bool DecrementOutstandingOpsLocked(int64 iter, TaggedNodeSeq* ready);
+    bool DecrementOutstandingOpsLocked(IterationState* iter_state,
+                                       TaggedNodeSeq* ready);
 
     // Returns true if the computation in the frame is completed.
     bool IsFrameDone();
 
     // Returns true if the iteration of the frame is completed.
-    bool IsIterationDone(int64 iter) TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
+    bool IsIterationDone(IterationState* iter_state)
+        TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
     // Increments the iteration id. If this is a new iteration, initialize it.
-    void IncrementIteration(TaggedNodeSeq* ready)
+    //
+    // Returns a pointer to the new iteration.
+    IterationState* IncrementIteration(TaggedNodeSeq* ready)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
     // Activate all the deferred NextIteration nodes in a new iteration.
-    void ActivateNexts(int64 iter, TaggedNodeSeq* ready)
+    void ActivateNexts(IterationState* iter_state, TaggedNodeSeq* ready)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
     // Activate all the current loop invariants in a new iteration.
-    void ActivateLoopInvs(int64 iter, TaggedNodeSeq* ready)
+    void ActivateLoopInvs(IterationState* iter_state, TaggedNodeSeq* ready)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
     // Add a new loop invariant and make it available to all active
@@ -322,12 +332,12 @@ class PropagatorState {
 
     // Activate the successors of a node. Contents of *outputs are left in an
     // indeterminate state after returning from this method.
-    void ActivateNodes(const NodeItem* item, const bool is_dead, int64 iter,
-                       EntryVector* outputs, TaggedNodeSeq* ready)
-        TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
+    void ActivateNodes(const NodeItem* item, const bool is_dead,
+                       IterationState* iter_state, EntryVector* outputs,
+                       TaggedNodeSeq* ready) TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
-    // Cleanup iterations of this frame starting from iteration iter.
-    bool CleanupIterations(int64 iter, TaggedNodeSeq* ready)
+    // Cleanup iterations of this frame starting from the given iteration.
+    bool CleanupIterations(IterationState* iter_state, TaggedNodeSeq* ready)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
     void DumpIterationState(PropagatorState* parent) {
@@ -350,12 +360,12 @@ class PropagatorState {
    private:
     // REQUIRES: `!item->is_any_consumer_merge_or_control_trigger`.
     void ActivateNodesFastPath(const NodeItem* item, const bool is_dead,
-                               int64 iter, EntryVector* outputs,
+                               IterationState* iter_state, EntryVector* outputs,
                                TaggedNodeSeq* ready)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
 
     void ActivateNodesSlowPath(const NodeItem* item, const bool is_dead,
-                               int64 iter, EntryVector* outputs,
+                               IterationState* iter_state, EntryVector* outputs,
                                TaggedNodeSeq* ready)
         TF_EXCLUSIVE_LOCKS_REQUIRED(mu);
   };
@@ -379,13 +389,13 @@ class PropagatorState {
   // same address while the iteration is live.
   Entry* GetInputTensors(const TaggedNode& tagged_node) const
       TF_NO_THREAD_SAFETY_ANALYSIS {
-    return tagged_node.input_frame->GetIteration(tagged_node.input_iter)
-               ->input_tensors +
+    return tagged_node.input_iter->input_tensors +
            tagged_node.node_item->input_start;
   }
 
   FrameAndIter GetFrameAndIter(const TaggedNode& tagged_node) const {
-    return {tagged_node.input_frame->frame_id, tagged_node.input_iter};
+    return {tagged_node.input_frame->frame_id,
+            tagged_node.input_iter->iter_num};
   }
 
   // Provide debugging output of the state of the executor.
@@ -397,9 +407,8 @@ class PropagatorState {
     // optional debugging support.
     if (TF_PREDICT_FALSE(vlog_) && VLOG_IS_ON(1)) {
       mutex_lock l(tagged_node.input_frame->mu);
-      tagged_node.input_frame->GetIteration(tagged_node.input_iter)
-          ->mark_started(
-              immutable_state_.pending_ids()[tagged_node.node_item->node_id]);
+      tagged_node.input_iter->mark_started(
+          immutable_state_.pending_ids()[tagged_node.node_item->node_id]);
     }
   }
 
@@ -408,16 +417,15 @@ class PropagatorState {
     // optional debugging support.
     if (TF_PREDICT_FALSE(vlog_) && VLOG_IS_ON(1)) {
       mutex_lock l(tagged_node.input_frame->mu);
-      tagged_node.input_frame->GetIteration(tagged_node.input_iter)
-          ->mark_completed(
-              immutable_state_.pending_ids()[tagged_node.node_item->node_id]);
+      tagged_node.input_iter->mark_completed(
+          immutable_state_.pending_ids()[tagged_node.node_item->node_id]);
     }
   }
 
  private:
   // Find an existing or create a new child frame in the frame 'frame' at
   // iteration 'iter'.
-  void FindOrCreateChildFrame(FrameState* frame, int64 iter,
+  void FindOrCreateChildFrame(FrameState* frame, IterationState* iter_state,
                               const NodeItem& node_item, FrameState** child);
 
   // Delete a frame. Called when the frame is done.
@@ -425,7 +433,7 @@ class PropagatorState {
 
   // Cleanup frames and iterations starting from frame/iter. Called when
   // a child frame is done.
-  void CleanupFramesIterations(FrameState* frame, int64 iter,
+  void CleanupFramesIterations(FrameState* frame, IterationState* iter_state,
                                TaggedNodeSeq* ready);
 
   // Provide debugging output about an outstanding iteration in the executor.
@@ -440,16 +448,21 @@ class PropagatorState {
   // The root frame in which the execution of this step is started.
   FrameState* root_frame_;
 
-  // Mapping from frame name to outstanding frames. A new frame is created
+  // Mapping from frame ID to outstanding frames. A new frame is created
   // at some iteration of an active frame. So the unique key for the new
-  // child frame is composed of the name of the parent frame, the iteration
+  // child frame is a hash composed of the ID of the parent frame, the iteration
   // number at which the parent frame is creating the new frame, and the
   // name of the new frame from nodedef.
-  gtl::FlatMap<string, FrameState*> outstanding_frames_ TF_GUARDED_BY(mu_);
+  absl::flat_hash_map<uint64, FrameState*> outstanding_frames_
+      TF_GUARDED_BY(mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(PropagatorState);
 };
 
+inline int64 PropagatorState::TaggedNode::get_iter_num() const {
+  return input_iter->iter_num;
+}
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_PROPAGATOR_STATE_H_
diff --git a/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc b/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc
index 4c32a54aee4..fbae80aef55 100644
--- a/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc
+++ b/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc
@@ -42,6 +42,9 @@ class ReplicateHelper {
       Node* replicated_node = graph->AddNode(node_def, &status);
       TF_RETURN_IF_ERROR(status);
       replicated_node->set_assigned_device_name(device);
+      if (replicated_node->IsArg()) {
+        replicated_node->AddAttr("sub_index", i);
+      }
       replicated_nodes[i] = replicated_node;
     }
     replicated_nodes_map_.emplace(node, std::move(replicated_nodes));
@@ -180,7 +183,8 @@ Status ReplicateEdges(const ReplicateHelper& helper,
 }  // namespace
 
 Status ReplicatePerReplicaNodesInFunctionGraph(
-    const absl::flat_hash_map<string, std::vector<string>>& composite_devices,
+    const absl::flat_hash_map<string, const std::vector<string>*>&
+        composite_devices,
     Graph* graph) {
   std::set<string> composite_device_names;
   for (const auto& it : composite_devices) {
@@ -193,12 +197,16 @@ Status ReplicatePerReplicaNodesInFunctionGraph(
   for (Node* n : graph->op_nodes()) {
     if (composite_device_names.find(n->assigned_device_name()) !=
         composite_device_names.end()) {
+      // TODO(b/145922293): Validate that an _Arg node assigned to a
+      // CompositeDevice should have an attribute indicating that the _Arg node
+      // represents a packed input.
       composite_device_to_cluster_nodes[n->assigned_device_name()].push_back(n);
     }
   }
 
   for (const auto& it : composite_device_to_cluster_nodes) {
-    const std::vector<string>& allowed_devices = composite_devices.at(it.first);
+    const std::vector<string>& allowed_devices =
+        *composite_devices.at(it.first);
     if (allowed_devices.empty()) {
       return errors::InvalidArgument("No allowed device of composite device: ",
                                      it.first);
@@ -208,6 +216,9 @@ Status ReplicatePerReplicaNodesInFunctionGraph(
       // Reuse the original nodes if there is only one allowed device.
       for (Node* n : cluster_nodes) {
         n->set_assigned_device_name(allowed_devices.at(0));
+        if (n->IsArg()) {
+          n->AddAttr("sub_index", 0);
+        }
       }
       continue;
     }
diff --git a/tensorflow/core/common_runtime/replicate_per_replica_nodes.h b/tensorflow/core/common_runtime/replicate_per_replica_nodes.h
index 872e77c8671..fd696db4905 100644
--- a/tensorflow/core/common_runtime/replicate_per_replica_nodes.h
+++ b/tensorflow/core/common_runtime/replicate_per_replica_nodes.h
@@ -35,7 +35,8 @@ namespace tensorflow {
 // dependency.
 // TODO(b/145922293): Register it as a POST_REWRITE_FOR_EXEC pass.
 Status ReplicatePerReplicaNodesInFunctionGraph(
-    const absl::flat_hash_map<string, std::vector<string>>& composite_devices,
+    const absl::flat_hash_map<string, const std::vector<string>*>&
+        composite_devices,
     Graph* graph);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc b/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc
index 094d86944ee..db05907710c 100644
--- a/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc
+++ b/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc
@@ -75,8 +75,9 @@ TEST(ReplicatePerReplicaNodesTest, SingleCompositeDevice) {
   auto ret = ops::_Retval(
       scope.WithOpName("ret").WithControlDependencies({write}), read, 0);
 
-  const absl::flat_hash_map<string, std::vector<string>> composite_devices = {
-      {"TPU_COMPOSITE:0", {"TPU:0", "TPU:1"}}};
+  const std::vector<string> underlying_devices = {"TPU:0", "TPU:1"};
+  const absl::flat_hash_map<string, const std::vector<string>*>
+      composite_devices = {{"TPU_COMPOSITE:0", &underlying_devices}};
 
   Graph graph(OpRegistry::Global());
   TF_ASSERT_OK(scope.ToGraph(&graph));
@@ -118,8 +119,9 @@ TEST(ReplicatePerReplicaNodesTest, SingleCompositeDeviceToSingleDevice) {
   auto read = ops::ReadVariableOp(scope.WithOpName("read"), arg, DT_INT32);
   auto ret = ops::_Retval(scope.WithOpName("ret"), read, 0);
 
-  const absl::flat_hash_map<string, std::vector<string>> composite_devices = {
-      {"TPU_COMPOSITE:0", {"TPU:0"}}};
+  const std::vector<string> underlying_devices = {"TPU:0"};
+  const absl::flat_hash_map<string, const std::vector<string>*>
+      composite_devices = {{"TPU_COMPOSITE:0", &underlying_devices}};
 
   Graph graph(OpRegistry::Global());
   TF_ASSERT_OK(scope.ToGraph(&graph));
@@ -156,9 +158,11 @@ TEST(ReplicatePerReplicaNodesTest, MultipleCompositeDevices) {
   auto add = ops::Add(scope.WithOpName("add"), identity0, identity1);
   auto ret = ops::_Retval(scope.WithOpName("ret"), add, 0);
 
-  const absl::flat_hash_map<string, std::vector<string>> composite_devices = {
-      {"TPU_COMPOSITE:0", {"TPU:0", "TPU:1"}},
-      {"TPU_COMPOSITE:1", {"TPU:2", "TPU:3"}}};
+  const std::vector<string> underlying_devices_0 = {"TPU:0", "TPU:1"};
+  const std::vector<string> underlying_devices_1 = {"TPU:2", "TPU:3"};
+  const absl::flat_hash_map<string, const std::vector<string>*>
+      composite_devices = {{"TPU_COMPOSITE:0", &underlying_devices_0},
+                           {"TPU_COMPOSITE:1", &underlying_devices_1}};
 
   Graph graph(OpRegistry::Global());
   TF_ASSERT_OK(scope.ToGraph(&graph));
@@ -204,8 +208,9 @@ TEST(ReplicatePerReplicaNodesTest, MultipleCompositeDevices) {
 }
 
 TEST(ReplicatePerReplicaNodesTest, NestedFunctions) {
-  const absl::flat_hash_map<string, std::vector<string>> composite_devices = {
-      {"TPU_COMPOSITE:0", {"TPU:0", "TPU:1"}}};
+  const std::vector<string> underlying_devices = {"TPU:0", "TPU:1"};
+  const absl::flat_hash_map<string, const std::vector<string>*>
+      composite_devices = {{"TPU_COMPOSITE:0", &underlying_devices}};
 
   FunctionDefLibrary fdef_lib;
   FunctionLibraryDefinition flib_def(OpRegistry::Global(), fdef_lib);
diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc
index 3812a8c181d..e14dbbe2b5e 100644
--- a/tensorflow/core/common_runtime/shape_refiner_test.cc
+++ b/tensorflow/core/common_runtime/shape_refiner_test.cc
@@ -849,17 +849,17 @@ REGISTER_OP("TensorAsShapeInt64")
 
 REGISTER_OP("NonConstScalarInt32")
     .Output("o: int32")
-    .SetIsStateful()  // prevents constant folding
+    .SetDoNotOptimize()
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("NonConstScalarInt64")
     .Output("o: int64")
-    .SetIsStateful()  // prevents constant folding
+    .SetDoNotOptimize()
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("WithEmptyVectorShape")
     .Output("o: int32")
-    .SetIsStateful()  // prevents constant folding
+    .SetDoNotOptimize()
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->Vector(0));
       return Status::OK();
@@ -867,7 +867,7 @@ REGISTER_OP("WithEmptyVectorShape")
 
 REGISTER_OP("WithPartialShape")
     .Output("o: int32")
-    .SetIsStateful()  // prevents constant folding
+    .SetDoNotOptimize()
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(
           0, c->MakeShape({1, shape_inference::InferenceContext::kUnknownDim, 3,
@@ -877,7 +877,7 @@ REGISTER_OP("WithPartialShape")
 
 REGISTER_OP("WithPartialShape2")
     .Output("o: int32")
-    .SetIsStateful()  // prevents constant folding
+    .SetDoNotOptimize()
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(
           0,
@@ -887,7 +887,7 @@ REGISTER_OP("WithPartialShape2")
 
 REGISTER_OP("WithUnknownShape")
     .Output("o: int32")
-    .SetIsStateful()  // prevents constant folding
+    .SetDoNotOptimize()
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       c->set_output(0, c->UnknownShape());
       return Status::OK();
diff --git a/tensorflow/core/common_runtime/simple_propagator_state.cc b/tensorflow/core/common_runtime/simple_propagator_state.cc
index 48fac96dd3d..01322cc3514 100644
--- a/tensorflow/core/common_runtime/simple_propagator_state.cc
+++ b/tensorflow/core/common_runtime/simple_propagator_state.cc
@@ -23,16 +23,16 @@ limitations under the License.
 namespace tensorflow {
 
 SimplePropagatorState::SimplePropagatorState(
-    const ImmutableExecutorState& immutable_state, int64 step_id)
+    const ImmutableExecutorState& immutable_state, int64 step_id, bool vlog)
     : SimplePropagatorState(immutable_state, step_id,
-                            immutable_state.get_root_frame_info()) {}
+                            immutable_state.get_root_frame_info(), vlog) {}
 
 SimplePropagatorState::SimplePropagatorState(
     const ImmutableExecutorState& immutable_state, int64 step_id,
-    const ImmutableExecutorState::FrameInfo& finfo)
+    const ImmutableExecutorState::FrameInfo& finfo, bool vlog)
     : immutable_state_(immutable_state),
       step_id_(step_id),
-      vlog_(VLOG_IS_ON(1)),
+      vlog_(vlog || VLOG_IS_ON(1)),
       input_tensors_(finfo.total_inputs),
       pending_(
           new std::atomic<int32>[immutable_state.graph_view().num_nodes()]),
diff --git a/tensorflow/core/common_runtime/simple_propagator_state.h b/tensorflow/core/common_runtime/simple_propagator_state.h
index 1aee4c7ff2f..024341e5048 100644
--- a/tensorflow/core/common_runtime/simple_propagator_state.h
+++ b/tensorflow/core/common_runtime/simple_propagator_state.h
@@ -47,7 +47,7 @@ namespace tensorflow {
 class SimplePropagatorState {
  public:
   SimplePropagatorState(const ImmutableExecutorState& immutable_state,
-                        int64 step_id);
+                        int64 step_id, bool vlog);
   ~SimplePropagatorState();
 
   // A `TaggedNode` corresponds to a single invocation of a node's kernel,
@@ -157,7 +157,8 @@ class SimplePropagatorState {
  private:
   SimplePropagatorState(const ImmutableExecutorState& immutable_state_,
                         int64 step_id,
-                        const ImmutableExecutorState::FrameInfo& finfo);
+                        const ImmutableExecutorState::FrameInfo& finfo,
+                        bool vlog);
 
   const ImmutableExecutorState& immutable_state_;
   const int64 step_id_;
diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD
index 4a973423519..5413493cb78 100644
--- a/tensorflow/core/data/service/BUILD
+++ b/tensorflow/core/data/service/BUILD
@@ -56,6 +56,7 @@ cc_library(
     deps = [
         ":common_proto_cc",
         ":credentials_factory",
+        ":data_service",
         ":grpc_util",
         ":master_proto_cc",
         ":worker_cc_grpc_proto",
diff --git a/tensorflow/core/data/service/data_service.cc b/tensorflow/core/data/service/data_service.cc
index f961683a775..915435d8fcb 100644
--- a/tensorflow/core/data/service/data_service.cc
+++ b/tensorflow/core/data/service/data_service.cc
@@ -26,6 +26,49 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+namespace {
+constexpr const char kParallelEpochs[] = "parallel_epochs";
+constexpr const char kOneEpoch[] = "one_epoch";
+}  // namespace
+
+Status ParseProcessingMode(const std::string& s, ProcessingMode* mode) {
+  if (s == kParallelEpochs) {
+    *mode = ProcessingMode::PARALLEL_EPOCHS;
+  } else if (s == kOneEpoch) {
+    *mode = ProcessingMode::ONE_EPOCH;
+  } else {
+    return errors::InvalidArgument("Unrecognized processing mode: ", s);
+  }
+  return Status::OK();
+}
+
+std::string ProcessingModeToString(ProcessingMode mode) {
+  switch (mode) {
+    case ProcessingMode::PARALLEL_EPOCHS:
+      return kParallelEpochs;
+    case ProcessingMode::ONE_EPOCH:
+      return kOneEpoch;
+    default:
+      DCHECK(false);
+      return "Unknown";
+  }
+}
+
+Status DataServiceMasterClient::RegisterDataset(GraphDef dataset,
+                                                int64* dataset_id) {
+  TF_RETURN_IF_ERROR(EnsureInitialized());
+  GetOrRegisterDatasetRequest req;
+  *req.mutable_dataset()->mutable_graph() = dataset;
+  GetOrRegisterDatasetResponse resp;
+  grpc::ClientContext client_ctx;
+  grpc::Status status = stub_->GetOrRegisterDataset(&client_ctx, req, &resp);
+  if (!status.ok()) {
+    return grpc_util::WrapError("Failed to register dataset", status);
+  }
+  *dataset_id = resp.dataset_id();
+  return Status::OK();
+}
+
 Status DataServiceMasterClient::CreateJob(int64 dataset_id,
                                           ProcessingMode processing_mode,
                                           int64* job_id) {
@@ -45,18 +88,27 @@ Status DataServiceMasterClient::CreateJob(int64 dataset_id,
   return Status::OK();
 }
 
-Status DataServiceMasterClient::RegisterDataset(GraphDef dataset,
-                                                int64* dataset_id) {
+Status DataServiceMasterClient::GetOrCreateJob(int64 dataset_id,
+                                               ProcessingMode processing_mode,
+                                               const std::string& job_name,
+                                               int job_name_index,
+                                               int64* job_id) {
   TF_RETURN_IF_ERROR(EnsureInitialized());
-  GetOrRegisterDatasetRequest req;
-  *req.mutable_dataset()->mutable_graph() = dataset;
-  GetOrRegisterDatasetResponse resp;
+  GetOrCreateJobRequest req;
+  req.set_dataset_id(dataset_id);
+  req.set_processing_mode(ProcessingModeDef(processing_mode));
+  req.set_job_name(job_name);
+  req.set_job_name_index(job_name_index);
+  GetOrCreateJobResponse resp;
   grpc::ClientContext client_ctx;
-  grpc::Status status = stub_->GetOrRegisterDataset(&client_ctx, req, &resp);
+  grpc::Status status = stub_->GetOrCreateJob(&client_ctx, req, &resp);
   if (!status.ok()) {
-    return grpc_util::WrapError("Failed to register dataset", status);
+    return grpc_util::WrapError(
+        absl::StrCat("Failed to get or create job for dataset with id ",
+                     dataset_id),
+        status);
   }
-  *dataset_id = resp.dataset_id();
+  *job_id = resp.job_id();
   return Status::OK();
 }
 
@@ -120,7 +172,7 @@ Status DataServiceWorkerClient::EnsureInitialized() {
 }
 
 Status CreateDataServiceMasterClient(
-    absl::string_view address, absl::string_view protocol,
+    const std::string& address, const std::string& protocol,
     std::unique_ptr<DataServiceMasterClient>* out) {
   auto client = absl::make_unique<DataServiceMasterClient>(address, protocol);
   TF_RETURN_IF_ERROR(client->Initialize());
@@ -129,7 +181,7 @@ Status CreateDataServiceMasterClient(
 }
 
 Status CreateDataServiceWorkerClient(
-    absl::string_view address, absl::string_view protocol,
+    const std::string& address, const std::string& protocol,
     std::unique_ptr<DataServiceWorkerClient>* out) {
   auto client = absl::make_unique<DataServiceWorkerClient>(address, protocol);
   TF_RETURN_IF_ERROR(client->Initialize());
diff --git a/tensorflow/core/data/service/data_service.h b/tensorflow/core/data/service/data_service.h
index c54c0c33390..d205b4d9ebf 100644
--- a/tensorflow/core/data/service/data_service.h
+++ b/tensorflow/core/data/service/data_service.h
@@ -33,12 +33,19 @@ enum class ProcessingMode : int64 {
   ONE_EPOCH = 1,
 };
 
+// Parses a string representing a processing mode and stores the result in
+// *mode. Returns an InvalidArgument status if the string is not recognized.
+Status ParseProcessingMode(const std::string& s, ProcessingMode* mode);
+
+// Converts a processing mode to its corresponding string.
+std::string ProcessingModeToString(ProcessingMode mode);
+
 // Base class for data service clients. Data service clients are
 // thread-compatible, requiring external synchronization when used from multiple
 // threads.
 class DataServiceClientBase {
  public:
-  DataServiceClientBase(absl::string_view address, absl::string_view protocol)
+  DataServiceClientBase(const std::string& address, const std::string& protocol)
       : address_(address), protocol_(protocol) {}
 
   virtual ~DataServiceClientBase() = default;
@@ -63,7 +70,8 @@ class DataServiceClientBase {
 // Client for communicating with the tf.data service master.
 class DataServiceMasterClient : public DataServiceClientBase {
  public:
-  DataServiceMasterClient(absl::string_view address, absl::string_view protocol)
+  DataServiceMasterClient(const std::string& address,
+                          const std::string& protocol)
       : DataServiceClientBase(address, protocol) {}
 
   // Registers a dataset with the tf.data service, and stores the generated
@@ -75,6 +83,13 @@ class DataServiceMasterClient : public DataServiceClientBase {
   Status CreateJob(int64 dataset_id, ProcessingMode processing_mode,
                    int64* job_id);
 
+  // Gets the job id for the job represented by the tuple
+  // (job_name, job_name_index), and stores the id in *job_id. If the
+  // job doesn't exist yet, it will be created.
+  Status GetOrCreateJob(int64 dataset_id, ProcessingMode processing_mode,
+                        const std::string& job_name, int job_name_index,
+                        int64* job_id);
+
   // Queries the master for the tasks associated with the specified job.
   // The tasks will be stored in *tasks, and whether the job is finished will
   // be stored in `*job_finished`.
@@ -91,7 +106,8 @@ class DataServiceMasterClient : public DataServiceClientBase {
 // Client for communicating with the tf.data service worker.
 class DataServiceWorkerClient : public DataServiceClientBase {
  public:
-  DataServiceWorkerClient(absl::string_view address, absl::string_view protocol)
+  DataServiceWorkerClient(const std::string& address,
+                          const std::string& protocol)
       : DataServiceClientBase(address, protocol) {}
 
   // Fetches the next element for the specified task_id. The element's
@@ -109,12 +125,12 @@ class DataServiceWorkerClient : public DataServiceClientBase {
 
 // Creates and initializes a new tf.data service master client.
 Status CreateDataServiceMasterClient(
-    absl::string_view address, absl::string_view protocol,
+    const std::string& address, const std::string& protocol,
     std::unique_ptr<DataServiceMasterClient>* out);
 
 // Creates and initializes a new tf.data service worker client.
 Status CreateDataServiceWorkerClient(
-    absl::string_view address, absl::string_view protocol,
+    const std::string& address, const std::string& protocol,
     std::unique_ptr<DataServiceWorkerClient>* out);
 
 }  // namespace data
diff --git a/tensorflow/core/data/service/data_service_test.cc b/tensorflow/core/data/service/data_service_test.cc
index f4c3c0e13e7..73a46bad3d0 100644
--- a/tensorflow/core/data/service/data_service_test.cc
+++ b/tensorflow/core/data/service/data_service_test.cc
@@ -38,6 +38,30 @@ namespace data {
 namespace {
 constexpr const char kProtocol[] = "grpc+local";
 
+TEST(DataService, ParseParallelEpochsProcessingMode) {
+  ProcessingMode mode;
+  TF_ASSERT_OK(ParseProcessingMode("parallel_epochs", &mode));
+  EXPECT_EQ(mode, ProcessingMode::PARALLEL_EPOCHS);
+}
+
+TEST(DataService, ParseOneEpochProcessingMode) {
+  ProcessingMode mode;
+  TF_ASSERT_OK(ParseProcessingMode("one_epoch", &mode));
+  EXPECT_EQ(mode, ProcessingMode::ONE_EPOCH);
+}
+
+TEST(DataService, ParseInvalidProcessingMode) {
+  ProcessingMode mode;
+  Status s = ParseProcessingMode("invalid", &mode);
+  EXPECT_EQ(s.code(), error::Code::INVALID_ARGUMENT);
+}
+
+TEST(DataService, ProcessingModeToString) {
+  EXPECT_EQ("parallel_epochs",
+            ProcessingModeToString(ProcessingMode::PARALLEL_EPOCHS));
+  EXPECT_EQ("one_epoch", ProcessingModeToString(ProcessingMode::ONE_EPOCH));
+}
+
 Status CheckWorkerOutput(const std::string& worker_address, int64 task_id,
                          std::vector<std::vector<Tensor>> expected_output) {
   DataServiceWorkerClient worker(worker_address, kProtocol);
diff --git a/tensorflow/core/data/service/grpc_master_impl.cc b/tensorflow/core/data/service/grpc_master_impl.cc
index 4e5e9f45cea..ba27959fee7 100644
--- a/tensorflow/core/data/service/grpc_master_impl.cc
+++ b/tensorflow/core/data/service/grpc_master_impl.cc
@@ -42,6 +42,7 @@ HANDLER(RegisterWorker);
 HANDLER(WorkerUpdate);
 HANDLER(GetOrRegisterDataset);
 HANDLER(CreateJob);
+HANDLER(GetOrCreateJob);
 HANDLER(GetTasks);
 #undef HANDLER
 
diff --git a/tensorflow/core/data/service/grpc_master_impl.h b/tensorflow/core/data/service/grpc_master_impl.h
index 2f775f8fd88..32eb0f3fc6a 100644
--- a/tensorflow/core/data/service/grpc_master_impl.h
+++ b/tensorflow/core/data/service/grpc_master_impl.h
@@ -46,6 +46,7 @@ class GrpcMasterImpl : public MasterService::Service {
   HANDLER(WorkerUpdate);
   HANDLER(GetOrRegisterDataset);
   HANDLER(CreateJob);
+  HANDLER(GetOrCreateJob);
   HANDLER(GetTasks);
 #undef HANDLER
 
diff --git a/tensorflow/core/data/service/master.proto b/tensorflow/core/data/service/master.proto
index 9361b7b6629..005e5affb7d 100644
--- a/tensorflow/core/data/service/master.proto
+++ b/tensorflow/core/data/service/master.proto
@@ -56,7 +56,24 @@ message CreateJobRequest {
 }
 
 message CreateJobResponse {
-  // An id for the begun job.
+  // An id for the created job.
+  int64 job_id = 1;
+}
+
+message GetOrCreateJobRequest {
+  // The id of the dataset to create a job for.
+  int64 dataset_id = 1;
+  // A mode controlling how the tf.data service produces data for the job.
+  ProcessingModeDef processing_mode = 2;
+  // A name for the job.
+  string job_name = 3;
+  // An index for the job. Multiple jobs can be created for the same name, if
+  // they have different indices.
+  int64 job_name_index = 4;
+}
+
+message GetOrCreateJobResponse {
+  // The id of the (potentially newly created) job.
   int64 job_id = 1;
 }
 
@@ -96,6 +113,9 @@ service MasterService {
   rpc GetOrRegisterDataset(GetOrRegisterDatasetRequest)
       returns (GetOrRegisterDatasetResponse);
 
+  // Gets a job if it already exists, otherwise creates it.
+  rpc GetOrCreateJob(GetOrCreateJobRequest) returns (GetOrCreateJobResponse);
+
   // Creates a job for reading from the tf.data service.
   rpc CreateJob(CreateJobRequest) returns (CreateJobResponse);
 
diff --git a/tensorflow/core/data/service/master_impl.cc b/tensorflow/core/data/service/master_impl.cc
index 4141b260ac9..336ab068c40 100644
--- a/tensorflow/core/data/service/master_impl.cc
+++ b/tensorflow/core/data/service/master_impl.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "absl/memory/memory.h"
 #include "tensorflow/core/data/service/common.pb.h"
 #include "tensorflow/core/data/service/credentials_factory.h"
+#include "tensorflow/core/data/service/data_service.h"
 #include "tensorflow/core/data/service/grpc_util.h"
 #include "tensorflow/core/data/service/master.pb.h"
 #include "tensorflow/core/data/service/worker.grpc.pb.h"
@@ -65,17 +66,17 @@ Status DataServiceMasterImpl::RegisterWorker(
 
   // Allocate tasks to the worker.
   for (auto& entry : jobs_) {
-    Job& job = entry.second;
-    if (job.finished()) {
+    std::shared_ptr<Job> job = entry.second;
+    if (job->finished()) {
       continue;
     }
-    int64 task_id = CreateTask(&job, request->worker_address());
+    int64 task_id = CreateTask(job.get(), request->worker_address());
 
     TaskDef* task_def = response->add_tasks();
     *task_def->mutable_dataset() =
-        datasets_by_id_[job.dataset_id()]->dataset_def();
-    task_def->set_dataset_id(job.dataset_id());
-    task_def->set_job_id(job.job_id());
+        datasets_by_id_[job->dataset_id()]->dataset_def();
+    task_def->set_dataset_id(job->dataset_id());
+    task_def->set_job_id(job->job_id());
     task_def->set_task_id(task_id);
   }
 
@@ -96,7 +97,7 @@ Status DataServiceMasterImpl::WorkerUpdate(const WorkerUpdateRequest* request,
     if (update.completed()) {
       int64 job_id = tasks_.at(task_id).job_id();
       DCHECK(jobs_.contains(job_id));
-      jobs_.at(job_id).task_finished(task_id);
+      jobs_.at(job_id)->task_finished(task_id);
       VLOG(3) << "Task " << task_id << " from job " << job_id << " completed";
     }
   }
@@ -135,49 +136,115 @@ int64 DataServiceMasterImpl::RegisterDataset(uint64 fingerprint,
   DCHECK(!datasets_by_id_.contains(dataset_id));
   datasets_by_id_[dataset_id] = new_dataset;
   DCHECK(!datasets_by_fingerprint_.contains(fingerprint));
-  datasets_by_fingerprint_[dataset_id] = new_dataset;
+  datasets_by_fingerprint_[fingerprint] = new_dataset;
   return dataset_id;
 }
 
 Status DataServiceMasterImpl::CreateJob(const CreateJobRequest* request,
                                         CreateJobResponse* response) {
-  VLOG(3) << "Received begin job request for dataset id "
+  VLOG(3) << "Received create job request for dataset id "
           << request->dataset_id();
-  switch (request->processing_mode()) {
-    case PARALLEL_EPOCHS:
+  ProcessingMode processing_mode = ProcessingMode(request->processing_mode());
+  mutex_lock l(mu_);
+  int64 job_id;
+  TF_RETURN_IF_ERROR(CreateJob(request->dataset_id(), processing_mode,
+                               absl::optional<std::string>(), &job_id));
+  response->set_job_id(job_id);
+
+  VLOG(3) << "Creating job " << job_id << " for dataset "
+          << request->dataset_id();
+  return Status::OK();
+}
+
+Status DataServiceMasterImpl::GetOrCreateJob(
+    const GetOrCreateJobRequest* request, GetOrCreateJobResponse* response) {
+  VLOG(3) << "Received get or create job request for dataset id "
+          << request->dataset_id() << " with name " << request->job_name()
+          << " and index " << request->job_name_index();
+  mutex_lock l(mu_);
+  NamedJobKey key(request->job_name(), request->job_name_index());
+  ProcessingMode requested_processing_mode =
+      ProcessingMode(request->processing_mode());
+  std::shared_ptr<Job>* job = gtl::FindOrNull(named_jobs_, key);
+  if (job != nullptr) {
+    TF_RETURN_IF_ERROR(ValidateMatchingJob(**job, requested_processing_mode,
+                                           request->dataset_id()));
+    int64 job_id = (*job)->job_id();
+    response->set_job_id(job_id);
+    VLOG(3) << "Found existing job for name=" << request->job_name()
+            << ", index=" << request->job_name_index()
+            << ". job_id: " << job_id;
+    return Status::OK();
+  }
+  int64 job_id;
+  TF_RETURN_IF_ERROR(CreateJob(request->dataset_id(), requested_processing_mode,
+                               request->job_name(), &job_id));
+  named_jobs_[key] = jobs_[job_id];
+  response->set_job_id(job_id);
+  VLOG(3) << "Created job " << job_id << " for dataset "
+          << request->dataset_id() << " and name " << request->job_name();
+  return Status::OK();
+}
+
+// Validates that the job matches the given processing_mode and dataset_id.
+Status DataServiceMasterImpl::ValidateMatchingJob(
+    const Job& job, ProcessingMode processing_mode, int64 dataset_id) {
+  DCHECK(job.name().has_value());
+  std::string job_name = job.name().value();
+  if (job.processing_mode() != processing_mode) {
+    std::string requested = ProcessingModeToString(processing_mode);
+    std::string actual = ProcessingModeToString(job.processing_mode());
+    return errors::FailedPrecondition(
+        "Found a job with name ", job_name, ", but the processing mode <",
+        actual, "> doesn't match the requested processing mode <", requested,
+        ">.");
+  }
+  if (job.dataset_id() != dataset_id) {
+    return errors::FailedPrecondition(
+        "Found a job with name ", job_name, ", but the dataset id <",
+        job.dataset_id(), "> doesn't match the requested dataset id <",
+        dataset_id, ">.");
+  }
+  return Status::OK();
+}
+
+Status DataServiceMasterImpl::CreateJob(int64 dataset_id,
+                                        ProcessingMode processing_mode,
+                                        absl::optional<std::string> job_name,
+                                        int64* out_job_id)
+    EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+  switch (processing_mode) {
+    case ProcessingMode::PARALLEL_EPOCHS:
       break;
-    case ONE_EPOCH:
+    case ProcessingMode::ONE_EPOCH:
       return errors::Unimplemented(
           "CreateJob only supports the PARALLEL_EPOCHS job mode. "
           "ONE_EPOCH is not currently supported.");
     default:
-      return errors::Unimplemented(
-          "ProcessingMode ", request->processing_mode(), " not recognized");
+      return errors::Unimplemented("ProcessingMode ",
+                                   ProcessingModeToString(processing_mode),
+                                   " not recognized");
   }
-  mutex_lock l(mu_);
-  if (!datasets_by_id_.contains(request->dataset_id())) {
-    return errors::NotFound("CreateJob failed. Dataset id: <",
-                            request->dataset_id(), "> not found.");
+  if (!datasets_by_id_.contains(dataset_id)) {
+    return errors::NotFound("Dataset id: <", dataset_id, "> not found.");
   }
 
   int64 job_id = next_job_id_++;
   DCHECK(!jobs_.contains(job_id));
-  auto result =
-      jobs_.emplace(std::piecewise_construct, std::forward_as_tuple(job_id),
-                    std::forward_as_tuple(job_id, request->dataset_id()));
-  DCHECK(result.second);
-  Job& job = result.first->second;
-  response->set_job_id(job_id);
+  auto job =
+      std::make_shared<Job>(job_id, dataset_id, processing_mode, job_name);
+  jobs_[job_id] = job;
 
   for (auto& worker : workers_) {
-    int64 task_id = CreateTask(&job, worker.address());
+    int64 task_id = CreateTask(job.get(), worker.address());
 
     // TODO(aaudibert): perform these calls asynchronously.
+    // TODO(aaudibert): clean up in case some calls succeed, but later calls
+    // fail
     TF_RETURN_IF_ERROR(AllocateTaskToWorker(tasks_.at(task_id), &worker));
   }
 
-  VLOG(3) << "Beginning job " << job_id << " for dataset "
-          << request->dataset_id();
+  *out_job_id = job_id;
   return Status::OK();
 }
 
@@ -233,8 +300,8 @@ Status DataServiceMasterImpl::GetTasks(const GetTasksRequest* request,
     return errors::NotFound("GetTasks failed. Job id <", request->job_id(),
                             "> not found.");
   }
-  Job& job = it->second;
-  for (const auto& task_id : job.task_ids()) {
+  std::shared_ptr<Job> job = it->second;
+  for (const auto& task_id : job->task_ids()) {
     auto task_iter = tasks_.find(task_id);
     DCHECK(task_iter != tasks_.end());
     Task& task = task_iter->second;
@@ -242,7 +309,7 @@ Status DataServiceMasterImpl::GetTasks(const GetTasksRequest* request,
     task_info->set_worker_address(task.worker_address());
     task_info->set_id(task.task_id());
   }
-  response->set_job_finished(job.finished());
+  response->set_job_finished(job->finished());
   VLOG(3) << "Found " << response->task_info_size() << " tasks for job id "
           << request->job_id();
   return Status::OK();
diff --git a/tensorflow/core/data/service/master_impl.h b/tensorflow/core/data/service/master_impl.h
index b7cfc496e69..e8b70e84d0f 100644
--- a/tensorflow/core/data/service/master_impl.h
+++ b/tensorflow/core/data/service/master_impl.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/data/service/common.pb.h"
+#include "tensorflow/core/data/service/data_service.h"
 #include "tensorflow/core/data/service/master.pb.h"
 #include "tensorflow/core/data/service/worker.grpc.pb.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -56,6 +57,8 @@ class DataServiceMasterImpl {
                               GetOrRegisterDatasetResponse* response);
   Status CreateJob(const CreateJobRequest* request,
                    CreateJobResponse* response);
+  Status GetOrCreateJob(const GetOrCreateJobRequest* request,
+                        GetOrCreateJobResponse* response);
   Status GetTasks(const GetTasksRequest* request, GetTasksResponse* response);
 
  private:
@@ -72,7 +75,7 @@ class DataServiceMasterImpl {
     }
 
     std::string DebugString() {
-      return absl::StrCat("id: ", worker_id_, "address: ", address_);
+      return absl::StrCat("id: ", worker_id_, " address: ", address_);
     }
 
    private:
@@ -100,11 +103,17 @@ class DataServiceMasterImpl {
 
   class Job {
    public:
-    Job(int64 job_id, int64 dataset_id)
-        : job_id_(job_id), dataset_id_(dataset_id) {}
+    Job(int64 job_id, int64 dataset_id, ProcessingMode processing_mode,
+        absl::optional<absl::string_view> job_name)
+        : job_id_(job_id),
+          dataset_id_(dataset_id),
+          processing_mode_(processing_mode),
+          job_name_(job_name) {}
 
     int64 job_id() const { return job_id_; }
     int64 dataset_id() const { return dataset_id_; }
+    ProcessingMode processing_mode() const { return processing_mode_; }
+    absl::optional<std::string> name() const { return job_name_; }
     const std::vector<int64>& task_ids() const { return task_ids_; }
     void add_task_id(int64 task_id) { task_ids_.push_back(task_id); }
     void task_finished(int64 task_id) {
@@ -118,11 +127,32 @@ class DataServiceMasterImpl {
    private:
     const int64 job_id_;
     const int64 dataset_id_;
+    const ProcessingMode processing_mode_;
+    const absl::optional<std::string> job_name_;
     std::vector<int64> task_ids_;
     std::vector<int64> finished_tasks_;
     bool finished_ = false;
   };
 
+  class NamedJobKey {
+   public:
+    NamedJobKey(absl::string_view name, int64 index)
+        : name_(name), index_(index) {}
+
+    friend bool operator==(const NamedJobKey& lhs, const NamedJobKey& rhs) {
+      return lhs.name_ == rhs.name_ && lhs.index_ == rhs.index_;
+    }
+
+    template <typename H>
+    friend H AbslHashValue(H h, const NamedJobKey& k) {
+      return H::combine(std::move(h), k.name_, k.index_);
+    }
+
+   private:
+    const std::string name_;
+    const int64 index_;
+  };
+
   class Task {
    public:
     Task(int64 task_id, int64 job_id, int64 dataset_id,
@@ -150,9 +180,15 @@ class DataServiceMasterImpl {
   Status EnsureWorkerStubInitialized(Worker* worker);
   // Instructs a worker to begin processing a task.
   Status AllocateTaskToWorker(const Task& task_id, Worker* worker);
+  // Creates a job and stores its job_id in `*job_id`.
+  Status CreateJob(int64 dataset_id, ProcessingMode processing_mode,
+                   absl::optional<std::string> job_name, int64* out_job_id);
   // Creates a new task for a job, returning the new task's id.
   int64 CreateTask(Job* job, const std::string& worker_address);
-
+  // Validates that an existing job matches the given processing_mode and
+  // dataset_id, returning an error status describing any difference.
+  Status ValidateMatchingJob(const Job& job, ProcessingMode processing_mode,
+                             int64 dataset_id);
   // Protocol to use for communicating with workers.
   const std::string protocol_;
 
@@ -172,9 +208,13 @@ class DataServiceMasterImpl {
   absl::flat_hash_map<uint64, std::shared_ptr<Dataset>> datasets_by_fingerprint_
       TF_GUARDED_BY(mu_);
   // Information about jobs, keyed by job ids.
-  absl::flat_hash_map<int64, Job> jobs_ TF_GUARDED_BY(mu_);
+  absl::flat_hash_map<int64, std::shared_ptr<Job>> jobs_ TF_GUARDED_BY(mu_);
   // Information about tasks, keyed by task ids.
   absl::flat_hash_map<int64, Task> tasks_ TF_GUARDED_BY(mu_);
+  // Named jobs, keyed by their names and indices. Not all jobs have names, so
+  // this is a subset of the jobs stored in `jobs_`.
+  absl::flat_hash_map<NamedJobKey, std::shared_ptr<Job>> named_jobs_
+      TF_GUARDED_BY(mu_);
 
   TF_DISALLOW_COPY_AND_ASSIGN(DataServiceMasterImpl);
 };
diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc
index b0d3275313a..8d00825227b 100644
--- a/tensorflow/core/data/service/worker_impl.cc
+++ b/tensorflow/core/data/service/worker_impl.cc
@@ -84,6 +84,7 @@ Status DataServiceWorkerImpl::ProcessTask(const ProcessTaskRequest* request,
 
 Status DataServiceWorkerImpl::ProcessTaskInternal(const TaskDef& task_def)
     EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+  VLOG(3) << "Received request to process task " << task_def.task_id();
   standalone::Dataset::Params params;
   std::unique_ptr<standalone::Dataset> dataset;
   TF_RETURN_IF_ERROR(standalone::Dataset::FromGraph(
@@ -100,6 +101,7 @@ Status DataServiceWorkerImpl::ProcessTaskInternal(const TaskDef& task_def)
   task.id = task_def.task_id();
   task.dataset = std::move(dataset);
   task.iterator = std::move(iterator);
+  VLOG(3) << "Began processing for task " << task_def.task_id();
   return Status::OK();
 }
 
@@ -117,11 +119,13 @@ Status DataServiceWorkerImpl::GetElement(const GetElementRequest* request,
     }
     std::unique_ptr<standalone::Iterator>& iter = it->second.iterator;
     if (iter == nullptr) {
+      VLOG(3) << "Task " << request->task_id() << " is already finished";
       response->set_end_of_sequence(true);
       return Status::OK();
     }
     TF_RETURN_IF_ERROR(iter->GetNext(&outputs, &end_of_sequence));
     if (end_of_sequence) {
+      VLOG(3) << "Reached end_of_sequence for task " << request->task_id();
       // Release iterator memory and leave a null entry as a tombstone.
       iter.reset();
       pending_completed_tasks_.push_back(request->task_id());
@@ -130,6 +134,7 @@ Status DataServiceWorkerImpl::GetElement(const GetElementRequest* request,
   }
 
   if (!end_of_sequence) {
+    VLOG(3) << "Producing an element for task " << request->task_id();
     TF_RETURN_IF_ERROR(service_util::Compress(
         outputs, response->mutable_compressed_element()));
   }
diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
index 95131150d3d..6dc03cbc527 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
+++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
@@ -524,6 +524,8 @@ Status EagerServiceImpl::Enqueue(const EnqueueRequest* request,
       s = context->Context()->Executor().AddOrExecute(std::move(node));
     } else if (item.has_send_tensor()) {
       s = SendTensor(item.send_tensor(), context->Context());
+    } else if (item.has_send_packed_handle()) {
+      s = SendPackedHandle(item.send_packed_handle(), context->Context());
     } else if (item.has_register_function()) {
       s = RegisterFunction(item.register_function(), context->Context());
     } else if (item.has_cleanup_function()) {
@@ -643,6 +645,52 @@ Status EagerServiceImpl::SendTensor(const SendTensorOp& send_tensor,
   return Status::OK();
 }
 
+Status EagerServiceImpl::SendPackedHandle(
+    const SendPackedHandleOp& send_packed_handle, EagerContext* eager_context) {
+  if (send_packed_handle.handles().empty()) {
+    return errors::InvalidArgument("Handles should not be empty.");
+  }
+
+  std::vector<tensorflow::TensorHandle*> handles;
+  handles.resize(send_packed_handle.handles_size());
+  for (int i = 0; i < send_packed_handle.handles_size(); ++i) {
+    const auto& item = send_packed_handle.handles(i);
+    if (item.has_local_handle()) {
+      Tensor tensor;
+      if (!ParseTensorProtoToTensor(item.local_handle().tensor(), &tensor)) {
+        return errors::InvalidArgument(
+            "Invalid TensorProto: ",
+            item.local_handle().tensor().DebugString());
+      }
+      Device* op_device = nullptr;
+      TF_RETURN_IF_ERROR(eager_context->FindDeviceFromName(
+          item.local_handle().device().c_str(), &op_device));
+      handles[i] = TensorHandle::CreateLocalHandle(
+          std::move(tensor), /*d=*/nullptr, op_device, eager_context);
+    } else {
+      TF_RETURN_IF_ERROR(
+          eager_context->RemoteMgr()->DeserializeRemoteTensorHandle(
+              item.remote_handle(), &handles[i]));
+    }
+  }
+
+  tensorflow::TensorHandle* packed_handle = nullptr;
+  std::vector<tensorflow::TensorHandle*> handles_to_pack = handles;
+  // Create a unshaped packed TensorHandle.
+  TF_RETURN_IF_ERROR(TensorHandle::CreatePackedHandle(
+      std::move(handles_to_pack), handles.at(0)->dtype, TensorShape(),
+      eager_context, &packed_handle));
+
+  for (auto* h : handles) {
+    // Unref handle since it has a ref in the packed handle now.
+    h->Unref();
+  }
+
+  eager_context->RemoteMgr()->AddOperationOutputs({packed_handle},
+                                                  send_packed_handle.op_id());
+  return Status::OK();
+}
+
 tensorflow::Status EagerServiceImpl::GetServerContext(
     uint64 context_id, ServerContext** server_context) {
   tf_shared_lock l(contexts_mu_);
diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h
index 06d4c36b61c..1e4d36ccf9f 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h
+++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h
@@ -212,6 +212,8 @@ class EagerServiceImpl {
                    QueueResponse* queue_response);
   Status SendTensor(const SendTensorOp& send_tensor,
                     EagerContext* eager_context);
+  Status SendPackedHandle(const SendPackedHandleOp& send_packed_handle,
+                          EagerContext* eager_context);
   Status RegisterFunction(const RegisterFunctionOp& register_function,
                           EagerContext* eager_context);
   Status CleanupFunction(const CleanupFunctionOp& cleanup_function);
diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc
index 76ca5c318fb..46a6181cfa9 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc
+++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc
@@ -500,11 +500,11 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest {
         : EagerKernelArgs(std::move(tensor_args)),
           serialize_remote_handle_(std::move(serialize_remote_handle)) {}
 
-    bool HasRemoteInputs() const override { return true; }
+    bool HasRemoteOrPackedInputs() const override { return true; }
 
-    Status GetRemoteArg(const int index,
+    Status GetRemoteArg(const FunctionArgIndex& index,
                         eager::RemoteTensorHandle* val) const override {
-      return serialize_remote_handle_(index, val);
+      return serialize_remote_handle_(index.index, val);
     }
 
    private:
@@ -562,7 +562,14 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest {
     eager_pflr_ = absl::make_unique<ProcessFunctionLibraryRuntime>(
         remote_device_mgr_.get(), Env::Default(), /*config=*/
         nullptr, TF_GRAPH_DEF_VERSION, &func_lib_def_, OptimizerOptions(),
-        /*thread_pool=*/nullptr, eager_cluster_flr_.get());
+        /*thread_pool=*/nullptr, eager_cluster_flr_.get(),
+        /*custom_kernel_creator=*/nullptr, /*session_metadata=*/nullptr,
+        Rendezvous::Factory{[this](const int64 step_id,
+                                   const DeviceMgr* device_mgr,
+                                   Rendezvous** r) {
+          *r = worker_env_.rendezvous_mgr->Find(step_id);
+          return Status::OK();
+        }});
   }
 
   void CheckOutputTensorAndClose(const Tensor& tensor) {
@@ -721,7 +728,9 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncTest) {
   core::RefCountPtr<KernelAndDeviceFunc> kernel = nullptr;
   const int64 op_id = 2;
   kernel.reset(new KernelAndDeviceFunc(
-      flr, eager_pflr_.get(), std::move(input_dev_ptrs), {}, /*runner=*/nullptr,
+      flr, eager_pflr_.get(), std::move(input_dev_ptrs),
+      /*composite_devices=*/{}, /*input_resource_dtypes_and_shapes=*/{},
+      /*runner=*/nullptr,
       /*collective_executor=*/nullptr, local_device, fdef_.signature().name(),
       [ctx](const int64 step_id) { return ctx->CreateRendezvous(step_id); },
       [=]() { return op_id; }));
@@ -766,7 +775,9 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncAsyncTest) {
   core::RefCountPtr<KernelAndDeviceFunc> kernel = nullptr;
   const int64 op_id = 2;
   kernel.reset(new KernelAndDeviceFunc(
-      flr, eager_pflr_.get(), std::move(input_dev_ptrs), {}, /*runner=*/nullptr,
+      flr, eager_pflr_.get(), std::move(input_dev_ptrs),
+      /*composite_devices=*/{}, /*input_resource_dtypes_and_shapes=*/{},
+      /*runner=*/nullptr,
       /*collective_executor=*/nullptr, local_device, fdef_.signature().name(),
       [ctx](const int64 step_id) { return ctx->CreateRendezvous(step_id); },
       [=]() { return op_id; }));
@@ -870,6 +881,109 @@ TEST_F(EagerServiceImplTest, SendTensorTest) {
                                                &close_context_response));
 }
 
+// Test serializes and sends a pack TensorHandle.
+TEST_F(EagerServiceImplTest, SendPackedHandleTest) {
+  TestEagerServiceImpl eager_service_impl(&worker_env_);
+
+  const string device0 = "/job:localhost/replica:0/task:0/device:CPU:0";
+  const string device1 = "/job:localhost/replica:0/task:1/device:CPU:0";
+  const string device2 = "/job:localhost/replica:0/task:2/device:CPU:0";
+
+  uint64 context_id = random::New64();
+  CreateContextRequest request;
+  auto* server_def = request.mutable_server_def();
+  server_def->set_job_name("localhost");
+  server_def->set_task_index(0);
+  request.add_cluster_device_attributes()->set_name(device0);
+  request.add_cluster_device_attributes()->set_name(device1);
+  request.add_cluster_device_attributes()->set_name(device2);
+  request.set_context_id(context_id);
+  CreateContextResponse response;
+
+  TF_ASSERT_OK(eager_service_impl.CreateContext(&request, &response));
+
+  EnqueueRequest remote_enqueue_request;
+  remote_enqueue_request.set_context_id(context_id);
+  EnqueueResponse remote_enqueue_response;
+
+  // Copy a tensor to device0
+  auto* send_tensor = remote_enqueue_request.add_queue()->mutable_send_tensor();
+  send_tensor->set_op_id(1);
+  SetTensorProto(send_tensor->add_tensors());
+
+  // Copy a packed handle to device0
+  auto* send_packed_handle =
+      remote_enqueue_request.add_queue()->mutable_send_packed_handle();
+  send_packed_handle->set_op_id(3);
+  RemoteTensorHandle* remote_handle =
+      send_packed_handle->add_handles()->mutable_remote_handle();
+  remote_handle->set_op_id(send_tensor->op_id());
+  remote_handle->set_output_num(0);
+  remote_handle->set_op_device(device0);
+  remote_handle->set_device(device0);
+
+  SendPackedHandleOp::LocalTensorHandle* lcoal_handle =
+      send_packed_handle->add_handles()->mutable_local_handle();
+  SetTensorProto(lcoal_handle->mutable_tensor());
+  lcoal_handle->set_device(device1);
+
+  remote_handle = send_packed_handle->add_handles()->mutable_remote_handle();
+  remote_handle->set_op_id(2);
+  remote_handle->set_output_num(5);
+  remote_handle->set_op_device(device2);
+  remote_handle->set_device(device2);
+
+  TF_ASSERT_OK(eager_service_impl.Enqueue(&remote_enqueue_request,
+                                          &remote_enqueue_response));
+
+  tensorflow::TensorHandle* packed_handle;
+  TF_ASSERT_OK(eager_service_impl.GetTensorHandle(
+      context_id, RemoteTensorHandleInternal(3, 0), &packed_handle));
+
+  EXPECT_EQ(packed_handle->Type(), TensorHandle::PACKED);
+  EXPECT_EQ(packed_handle->NumPackedHandles(), 3);
+
+  TensorHandle* handle0 = nullptr;
+  TF_ASSERT_OK(packed_handle->ExtractPackedHandle(0, &handle0));
+  EXPECT_EQ(handle0->Type(), TensorHandle::LOCAL);
+  EXPECT_EQ(handle0->op_device()->name(), device0);
+  const Tensor* t0 = nullptr;
+  TF_ASSERT_OK(handle0->Tensor(&t0));
+  auto actual = t0->flat<float>();
+  EXPECT_EQ(4, actual.size());
+  EXPECT_EQ(1.0, actual(0));
+  EXPECT_EQ(2.0, actual(1));
+  EXPECT_EQ(3.0, actual(2));
+  EXPECT_EQ(4.0, actual(3));
+
+  TensorHandle* handle1 = nullptr;
+  TF_ASSERT_OK(packed_handle->ExtractPackedHandle(1, &handle1));
+  EXPECT_EQ(handle1->Type(), TensorHandle::LOCAL);
+  EXPECT_EQ(handle1->op_device()->name(), device1);
+  const Tensor* t1 = nullptr;
+  TF_ASSERT_OK(handle0->Tensor(&t1));
+  EXPECT_EQ(t1, t0);
+
+  TensorHandle* handle2 = nullptr;
+  TF_ASSERT_OK(packed_handle->ExtractPackedHandle(2, &handle2));
+  EXPECT_EQ(handle2->Type(), TensorHandle::REMOTE);
+  EXPECT_EQ(handle2->op_device()->name(), device2);
+  int64 op_id;
+  int32 output_num;
+  TF_ASSERT_OK(handle2->RemoteAddress(absl::get<Device*>(handle2->device()),
+                                      /*wait_until_ready=*/true, &op_id,
+                                      &output_num));
+  EXPECT_EQ(op_id, 2);
+  EXPECT_EQ(output_num, 5);
+
+  CloseContextRequest close_context_request;
+  close_context_request.set_context_id(context_id);
+  close_context_request.set_context_view_id(0);
+  CloseContextResponse close_context_response;
+  TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request,
+                                               &close_context_response));
+}
+
 // Test requests sent to the eager service on master.
 TEST_F(EagerServiceImplTest, RequestsToMasterTest) {
   tensorflow::Rendezvous* rendezvous =
diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc
index b281bcef2b3..090417863f3 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc
+++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc
@@ -25,6 +25,8 @@ limitations under the License.
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/protobuf.h"
 
 namespace tensorflow {
 namespace eager {
@@ -145,7 +147,8 @@ void RemoteCopyNode::StartSend() {
     request.set_context_id(ctx_->GetContextId());
     auto* remote_op = request.add_queue()->mutable_operation();
     status = ctx_->RemoteMgr()->SerializeRemoteTensorHandle(
-        src_, remote_op->add_op_inputs()->mutable_remote_handle(),
+        src_, /*wait_until_ready=*/false,
+        remote_op->add_op_inputs()->mutable_remote_handle(),
         absl::get<Device*>(src_->device()),
         absl::get<Device*>(src_->DeviceOrHostCPU(*ctx_))->name());
     if (!status.ok()) {
@@ -290,6 +293,103 @@ void RemoteCopyNode::StartRecv(StatusCallback done) {
   }
 }
 
+Status SerializePackedHandle(const uint64 op_id, TensorHandle* packed_handle,
+                             const Device* target_device, EagerContext* ctx,
+                             SendPackedHandleOp* op) {
+  op->set_op_id(op_id);
+  for (int i = 0; i < packed_handle->NumPackedHandles(); ++i) {
+    TensorHandle* h = nullptr;
+    TF_RETURN_IF_ERROR(packed_handle->ExtractPackedHandle(i, &h));
+    if (h->Type() == TensorHandle::LOCAL) {
+      // AsProtoTensorContent doesn't work when the tensor is on the GPU, hence
+      // copy it to the CPU before copying it out.
+      Tensor tensor;
+      TF_RETURN_IF_ERROR(h->CopyToDevice(*ctx, ctx->HostCPU(), &tensor));
+      auto* local_handle = op->add_handles()->mutable_local_handle();
+      local_handle->set_device(h->op_device() ? h->op_device()->name()
+                                              : ctx->HostCPU()->name());
+      tensor.AsProtoTensorContent(local_handle->mutable_tensor());
+    } else if (h->Type() == TensorHandle::REMOTE) {
+      // Only serialize the resource dtype and shape of the first handle, since
+      // all handles are of the same resource dtype and shape.
+      Device* src_device = absl::get<Device*>(h->device());
+      const bool serialize_resource_dtype_and_shape =
+          (i == 0) && (h->dtype == DT_RESOURCE) &&
+          (ctx->OnSameTask(src_device, target_device));
+      TF_RETURN_IF_ERROR(ctx->RemoteMgr()->SerializeRemoteTensorHandle(
+          h, /*wait_until_ready=*/false,
+          op->add_handles()->mutable_remote_handle(), src_device,
+          absl::get<Device*>(h->DeviceOrHostCPU(*ctx))->name(),
+          serialize_resource_dtype_and_shape));
+    } else {
+      return errors::InvalidArgument("Nested packed handles are not supported");
+    }
+  }
+  return Status::OK();
+}
+
+void RemoteCopyNode::StartSendPackedHandle(StatusCallback done) {
+  Status s;
+  const uint64 context_view_id = ctx_->GetContextViewId();
+  if (!send_device_->IsLocal()) {
+    s = errors::InvalidArgument(
+        "Copy a packed handle from a remote device is not supported");
+    captured_state_->dst()->PoisonRemote(s, recv_device_, context_view_id);
+    done(s);
+    return;
+  }
+
+  EnqueueRequest request;
+  uint64 context_id = ctx_->GetContextId();
+  request.set_context_id(context_id);
+  s = SerializePackedHandle(recv_op_id_, src_, recv_device_, ctx_,
+                            request.add_queue()->mutable_send_packed_handle());
+  if (!s.ok()) {
+    captured_state_->dst()->PoisonRemote(s, recv_device_, context_view_id);
+    done(s);
+    return;
+  }
+
+  TensorShape shape;
+  s = src_->Shape(&shape);
+  if (!s.ok()) {
+    captured_state_->dst()->PoisonRemote(s, recv_device_, context_view_id);
+    done(s);
+    return;
+  }
+  captured_state_->SetSrcShape(shape);
+
+  core::RefCountPtr<eager::EagerClient> eager_client;
+  s = ctx_->GetClient(recv_device_, &eager_client);
+  if (!s.ok()) {
+    captured_state_->dst()->PoisonRemote(s, recv_device_, context_view_id);
+    done(s);
+    return;
+  }
+
+  EnqueueResponse* response = new EnqueueResponse;
+  Device* recv_device = recv_device_;
+  const std::shared_ptr<CapturedSharedState>& captured_state = captured_state_;
+  eager_client->StreamingEnqueueAsync(
+      &request, response,
+      [captured_state, response, recv_device, context_view_id,
+       done](const Status& s) {
+        if (s.ok()) {
+          Status status = captured_state->dst()->SetRemoteShape(
+              captured_state->GetSrcShape(), recv_device, context_view_id);
+          if (!status.ok()) {
+            LOG(ERROR) << "Ignoring an error encountered when setting remote "
+                          "shape of tensor received by SendPackedHadnle rpc: "
+                       << status.ToString();
+          }
+        } else {
+          captured_state->dst()->PoisonRemote(s, recv_device, context_view_id);
+        }
+        done(s);
+        delete response;
+      });
+}
+
 void RemoteCopyNode::StartRemoteSendTensor(StatusCallback done) {
   Status s;
   EnqueueRequest request;
@@ -351,7 +451,11 @@ Status RemoteCopyNode::Prepare() {
 
 void RemoteCopyNode::RunAsync(StatusCallback done) {
   started_ = true;
-  if (ctx_->UseSendTensorRPC() && send_device_->IsLocal() &&
+  if (src_->Type() == TensorHandle::PACKED) {
+    return StartSendPackedHandle(std::move(done));
+  }
+
+  if ((ctx_->UseSendTensorRPC()) && send_device_->IsLocal() &&
       !recv_device_->IsLocal()) {
     return StartRemoteSendTensor(std::move(done));
   }
diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.h b/tensorflow/core/distributed_runtime/eager/remote_copy_node.h
index a527cd47127..7816a24ed33 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.h
+++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.h
@@ -121,6 +121,9 @@ class RemoteCopyNode : public AsyncEagerNode {
   // SendTensor RPC *on the receiver*.
   void StartRemoteSendTensor(StatusCallback done);
 
+  // Send a local packed TensorHandle to a remote device.
+  void StartSendPackedHandle(StatusCallback done);
+
   // State that is captured by Send and/or Recv callbacks (depending on which
   // one(s) is remote) and outlives this node in the case of remote->remote
   // copy.
diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc
index c120a28032c..94a4f199337 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc
+++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc
@@ -74,6 +74,7 @@ Status RemoteMgr::GetMirroredResourceShape(
 }
 
 Status RemoteMgr::GetRemoteTensorHandle(const tensorflow::TensorHandle* handle,
+                                        const bool wait_until_ready,
                                         int64* op_id, int32* output_num) {
   // TODO(allenl): Consider supporting remote handles on custom devices.
   VariantDevice device = handle->device();
@@ -82,8 +83,8 @@ Status RemoteMgr::GetRemoteTensorHandle(const tensorflow::TensorHandle* handle,
         "Custom devices and remote execution are currently not supported "
         "together.");
   }
-  TF_RETURN_IF_ERROR(
-      handle->RemoteAddress(absl::get<Device*>(device), op_id, output_num));
+  TF_RETURN_IF_ERROR(handle->RemoteAddress(
+      absl::get<Device*>(device), wait_until_ready, op_id, output_num));
   tensorflow::TensorHandle* h;
   TF_RETURN_IF_ERROR(
       GetTensorHandleImpl(RemoteTensorHandleInternal(*op_id, *output_num), &h));
@@ -120,13 +121,15 @@ Status RemoteMgr::DeleteTensorHandle(
 }
 
 Status RemoteMgr::SerializeRemoteTensorHandle(
-    TensorHandle* in, RemoteTensorHandle* out, Device* device,
-    const string& device_name, const bool serialize_resource_dtype_and_shape) {
+    TensorHandle* in, const bool wait_until_ready, RemoteTensorHandle* out,
+    Device* device, const string& device_name,
+    const bool serialize_resource_dtype_and_shape) {
   int64 op_id;
   int32 output_num;
-  if (!in->RemoteAddress(device, &op_id, &output_num).ok()) {
+  if (!in->RemoteAddress(device, wait_until_ready, &op_id, &output_num).ok()) {
     tf_shared_lock l(remote_tensor_handle_mu_);
-    TF_RETURN_IF_ERROR(GetRemoteTensorHandle(in, &op_id, &output_num));
+    TF_RETURN_IF_ERROR(
+        GetRemoteTensorHandle(in, wait_until_ready, &op_id, &output_num));
   }
   out->Clear();
   out->set_op_id(op_id);
diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.h b/tensorflow/core/distributed_runtime/eager/remote_mgr.h
index 54c987d4daa..2446352c931 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_mgr.h
+++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.h
@@ -61,9 +61,11 @@ class RemoteMgr {
   }
 
   // Serialize a remote TensorHandle to a RemoteTensorHandle.
+  // If wait_until_ready is true, block until the remote handle is ready on a
+  // remote worker.
   Status SerializeRemoteTensorHandle(
-      TensorHandle* in, RemoteTensorHandle* out, Device* device,
-      const string& device_name,
+      TensorHandle* in, const bool wait_until_ready, RemoteTensorHandle* out,
+      Device* device, const string& device_name,
       const bool serialize_resource_dtype_and_shape = false);
 
   // Deserialize a RemoteTensorHandle to a TensorHandle(local/remote).
@@ -83,7 +85,8 @@ class RemoteMgr {
   // Returns the op_id and output_num if the given local TensorHandle exists in
   // remote_tensor_handle_map_.
   Status GetRemoteTensorHandle(const tensorflow::TensorHandle* handle,
-                               int64* op_id, int32* output_num)
+                               const bool wait_until_ready, int64* op_id,
+                               int32* output_num)
       TF_SHARED_LOCKS_REQUIRED(remote_tensor_handle_mu_);
 
   Status GetTensorHandleImpl(const RemoteTensorHandleInternal& remote_handle,
diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc b/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc
index 84eee59dba6..1e33a9d0f62 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc
+++ b/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc
@@ -68,7 +68,8 @@ class RemoteMgrTest : public ::testing::Test {
 
 TEST_F(RemoteMgrTest, SerializeLocalTensorHandleWithRemoteMirror) {
   RemoteMgr remote_mgr(false, ctx_);
-  Tensor t(DT_FLOAT, TensorShape({0}));
+  const TensorShape shape({0});
+  Tensor t(DT_FLOAT, shape);
 
   TensorHandle* handle = TensorHandle::CreateLocalHandle(
       std::move(t), local_device_, local_device_, ctx_);
@@ -76,9 +77,12 @@ TEST_F(RemoteMgrTest, SerializeLocalTensorHandleWithRemoteMirror) {
   const int output_num = 3;
   TF_ASSERT_OK(handle->AddUnshapedRemoteMirror(remote_device_, op_id,
                                                output_num, "", ctx_));
+  TF_ASSERT_OK(
+      handle->SetRemoteShape(shape, remote_device_, ctx_->GetContextViewId()));
   RemoteTensorHandle remote_handle;
   TF_ASSERT_OK(remote_mgr.SerializeRemoteTensorHandle(
-      handle, &remote_handle, remote_device_, remote_device_->name()));
+      handle, /*wait_until_ready=*/true, &remote_handle, remote_device_,
+      remote_device_->name()));
   EXPECT_EQ(op_id, remote_handle.op_id());
   EXPECT_EQ(output_num, remote_handle.output_num());
   EXPECT_EQ(remote_device_->name(), remote_handle.device());
@@ -90,12 +94,12 @@ TEST_F(RemoteMgrTest, SerializeRemoteTensorHandle) {
 
   const uint64 op_id = 3;
   const int output_num = 1;
-  TensorHandle* handle = TensorHandle::CreateUnshapedRemoteHandle(
-      op_id, output_num,
-      /*remote_task=*/"", DT_FLOAT, remote_device_, ctx_);
+  TensorHandle* handle = TensorHandle::CreateLazyRemoteHandle(
+      op_id, output_num, DT_FLOAT, remote_device_, ctx_);
   RemoteTensorHandle remote_handle;
   TF_ASSERT_OK(remote_mgr.SerializeRemoteTensorHandle(
-      handle, &remote_handle, remote_device_, remote_device_->name()));
+      handle, /*wait_until_ready=*/true, &remote_handle, remote_device_,
+      remote_device_->name()));
   EXPECT_EQ(op_id, remote_handle.op_id());
   EXPECT_EQ(output_num, remote_handle.output_num());
   EXPECT_EQ(remote_device_->name(), remote_handle.device());
@@ -119,9 +123,10 @@ TEST_F(RemoteMgrTest, InvalidateRemoteMirrorWithClusterUpdate) {
   ctx_->IncrementContextViewId();
   EXPECT_FALSE(
       handle->HasRemoteMirror(remote_device_, ctx_->GetContextViewId()));
-  // Setting remote shape should still be OK
-  TF_ASSERT_OK(handle->SetRemoteShape(TensorShape({0}), remote_device_,
-                                      ctx_->GetContextViewId()));
+  EXPECT_FALSE(handle
+                   ->SetRemoteShape(TensorShape({0}), remote_device_,
+                                    ctx_->GetContextViewId())
+                   .ok());
   handle->Unref();
 }
 
diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc
index 8e6b97fab73..6f4d5ada759 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc
+++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc
@@ -86,7 +86,7 @@ void DestroyRemoteTensorHandle(EagerContext* ctx, const string& remote_task,
 
 RemoteTensorHandleData::RemoteTensorHandleData(int64 op_id, int output_num,
                                                uint64 context_view_id)
-    : is_ready_(false),
+    : is_ready_(true),
       op_id_(op_id),
       output_num_(output_num),
       context_view_id_(context_view_id),
@@ -194,17 +194,26 @@ string RemoteTensorHandleData::DebugString() const {
                          " output_num: ", output_num_);
 }
 
-Status RemoteTensorHandleData::WaitReady(const char* caller) const {
-  if (ctx_ == nullptr) {
-    return errors::Internal("Cannot wait on lazy remote handle");
+Status RemoteTensorHandleData::OpIdAndOutputNum(const bool wait_util_ready,
+                                                int64* op_id,
+                                                int32* output_num) const {
+  if (wait_util_ready) {
+    TF_RETURN_IF_ERROR(WaitReady("OpIdAndOutputNumUntilReady"));
   }
+  *op_id = op_id_;
+  *output_num = output_num_;
+  return Status::OK();
+}
 
+Status RemoteTensorHandleData::WaitReady(const char* caller) const {
   tf_shared_lock l(mu_);
   if (!is_ready_) {
     profiler::TraceMe activity(
         [caller] { return absl::StrCat(caller, " WaitReady"); },
         profiler::TraceMeLevel::kInfo);
     DVLOG(3) << "WaitReady: " << caller << " " << this;
+    // TODO(b/155493048): add a timeout here if it could cause any hanging
+    // issue.
     mu_.Await(Condition(&is_ready_));
   }
   return is_poisoned_;
diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h
index 56f0e3f7569..5f096677225 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h
+++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h
@@ -25,7 +25,10 @@ namespace tensorflow {
 // the shape is known.
 class RemoteTensorHandleData {
  public:
-  // Constructor for lazy remote handles
+  // Constructor for lazy remote handles. A lazy remote handle is created on
+  // a remote worker with an op_id and an output_num sent by a client. The
+  // client won't serialize them until the corresponding remote tensor is ready.
+  // So the remote tensor should be ready when we create a lazy remote handle.
   RemoteTensorHandleData(int64 op_id, int output_num, uint64 context_view_id);
   // Constructor for unshaped remote handles
   RemoteTensorHandleData(int64 op_id, int output_num, const string& remote_task,
@@ -47,8 +50,11 @@ class RemoteTensorHandleData {
 
   string DebugString() const;
 
-  int64 op_id() const { return op_id_; }
-  int32 output_num() const { return output_num_; }
+  // Return the op id and output num. If wait_util_ready is true, block until
+  // the remote tensor is ready on a remote worker.
+  Status OpIdAndOutputNum(const bool wait_util_ready, int64* op_id,
+                          int32* output_num) const;
+
   uint64 context_view_id() const { return context_view_id_; }
 
  private:
diff --git a/tensorflow/core/distributed_runtime/remote_device.cc b/tensorflow/core/distributed_runtime/remote_device.cc
index cf8a2d90ea4..9b837bd5671 100644
--- a/tensorflow/core/distributed_runtime/remote_device.cc
+++ b/tensorflow/core/distributed_runtime/remote_device.cc
@@ -131,8 +131,8 @@ void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache,
           // on the job called "worker" (but still adds the CPUs of other jobs).
           if (getenv("TPU_NO_POPULATE_DEVICE_LIST_FROM_CLUSTER_SPEC") !=
               nullptr) {
-            if (worker_name_parsed.job != "worker" ||
-                device_name_parsed.type.find("TPU") != std::string::npos) {
+            if (worker_name_parsed.job == "worker" ||
+                device_name_parsed.type.find("TPU") == std::string::npos) {
               remote_devices.push_back(d);
             }
           } else {
diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc
index de4f36ea24d..752bfdf71a1 100644
--- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc
+++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc
@@ -106,8 +106,8 @@ class GrpcEagerClientThread : public core::RefCounted {
 class GrpcEagerClient : public EagerClient {
  public:
   GrpcEagerClient(const tensorflow::SharedGrpcChannelPtr& channel,
-                  GrpcEagerClientThread* thread)
-      : stub_(channel), thread_(thread) {
+                  GrpcEagerClientThread* thread, const string& target)
+      : stub_(channel), thread_(thread), target_(target) {
     // Hold a reference to make sure the corresponding EagerClientThread
     // outlives the client.
     thread_->Ref();
@@ -127,7 +127,8 @@ class GrpcEagerClient : public EagerClient {
     new RPCState<protobuf::Message>(                                      \
         &stub_, cq_, "/tensorflow.eager.EagerService/" #method, *request, \
         response, std::move(done_wrapped), /*call_opts=*/nullptr,         \
-        /*threadpool=*/nullptr, /*max_retries=*/0, /*fail_fast=*/true);   \
+        /*threadpool=*/nullptr, /*max_retries=*/0, /*fail_fast=*/true,    \
+        &target_);                                                        \
   }
 
   CLIENT_METHOD(CreateContext);
@@ -146,7 +147,8 @@ class GrpcEagerClient : public EagerClient {
     new RPCState<protobuf::Message>(
         &stub_, cq_, "/tensorflow.eager.EagerService/CloseContext", *request,
         response, std::move(done_wrapped), /*call_opts=*/nullptr,
-        /*threadpool=*/nullptr);
+        /*threadpool=*/nullptr, /*max_retries=*/0, /*fail_fast=*/true,
+        &target_);
 
     VLOG(1) << "Sending RPC to close remote eager context "
             << request->DebugString();
@@ -194,6 +196,7 @@ class GrpcEagerClient : public EagerClient {
  private:
   ::grpc::GenericStub stub_;
   const GrpcEagerClientThread* thread_;
+  const string target_;
 
   ::grpc::CompletionQueue* cq_;
 
@@ -236,7 +239,7 @@ class GrpcEagerClientCache : public EagerClientCache {
       int assigned_index = AssignClientToThread(target);
       GrpcEagerClientThread* thread = threads_[assigned_index].get();
       core::RefCountPtr<EagerClient> worker(
-          new GrpcEagerClient(shared, thread));
+          new GrpcEagerClient(shared, thread, target));
       it = clients_.emplace(target, std::move(worker)).first;
     }
 
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc b/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc
index 272d6bb1b20..bcb98baaeb9 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_rpc_factory.cc
@@ -210,7 +210,8 @@ void GrpcRPCFactory::StartCall(const Tensor& address_t, const Tensor& method_t,
       get_stub(index), &completion_queue_, *get_method_ptr(index),
       call->request(), call->response(),
       /*done=*/[call](const Status& s) { call->Done(s); }, call->call_opts(),
-      nullptr /*threadpool*/, fail_fast_, timeout_in_ms_, 0 /* max_retries */);
+      /*threadpool=*/nullptr, fail_fast_, timeout_in_ms_, /*max_retries=*/0,
+      /*target=*/nullptr);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
index 32083fc272f..25aa5f3480c 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
@@ -70,6 +70,18 @@ class NoReusePortOption : public ::grpc::ServerBuilderOption {
                          plugins) override {}
 };
 
+// Define an option subclass in order to enable SO_REUSEPORT for the
+// server socket.
+class ReusePortOption : public ::grpc::ServerBuilderOption {
+ public:
+  void UpdateArguments(::grpc::ChannelArguments* args) override {
+    args->SetInt(GRPC_ARG_ALLOW_REUSEPORT, 1);
+  }
+
+  void UpdatePlugins(std::vector<std::unique_ptr<::grpc::ServerBuilderPlugin>>*
+                         plugins) override {}
+};
+
 // static utility function
 RendezvousMgrInterface* NewRpcRendezvousMgr(const WorkerEnv* env) {
   return new RpcRendezvousMgr(env);
@@ -220,8 +232,18 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) {
                            GetServerCredentials(server_def_), &bound_port_);
   builder.SetMaxMessageSize(std::numeric_limits<int32>::max());
 
-  builder.SetOption(
-      std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption));
+  bool reuse_port = false;
+  const Status status =
+      ReadBoolFromEnvVar("TF_GRPC_REUSE_PORT", false, &reuse_port);
+  if (!status.ok()) {
+    LOG(ERROR) << status.error_message();
+  }
+  auto server_build_option =
+      reuse_port
+          ? std::unique_ptr<::grpc::ServerBuilderOption>(new ReusePortOption)
+          : std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption);
+  builder.SetOption(std::move(server_build_option));
+
   // Allow subclasses to specify more args to pass to the gRPC server.
   MaybeMutateBuilder(&builder);
   master_impl_ = CreateMaster(&master_env_);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_state.h b/tensorflow/core/distributed_runtime/rpc/grpc_state.h
index c72ba6035a4..041b6e51ffb 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_state.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_state.h
@@ -45,7 +45,7 @@ class RPCState : public GrpcClientCQTag {
            const ::grpc::string& method, const protobuf::Message& request,
            Response* response, StatusCallback done, CallOptions* call_opts,
            thread::ThreadPool* threadpool, int32 max_retries = 0,
-           bool fail_fast = true)
+           bool fail_fast = true, const string* target = nullptr)
       : RPCState(
             stub, cq, method, request, response, std::move(done), call_opts,
             threadpool,
@@ -63,7 +63,7 @@ class RPCState : public GrpcClientCQTag {
 #endif  // PLATFORM_GOOGLE
               return x;
             }(),
-            /*timeout_in_ms=*/0, max_retries) {
+            /*timeout_in_ms=*/0, max_retries, target) {
   }
 
   template <typename Request>
@@ -71,7 +71,7 @@ class RPCState : public GrpcClientCQTag {
            const ::grpc::string& method, const Request& request,
            Response* response, StatusCallback done, CallOptions* call_opts,
            thread::ThreadPool* threadpool, bool fail_fast, int64 timeout_in_ms,
-           int32 max_retries)
+           int32 max_retries, const string* target)
       : call_opts_(call_opts),
         threadpool_(threadpool),
         done_(std::move(done)),
@@ -80,7 +80,8 @@ class RPCState : public GrpcClientCQTag {
         cq_(cq),
         stub_(stub),
         method_(method),
-        fail_fast_(fail_fast) {
+        fail_fast_(fail_fast),
+        target_(target) {
     response_ = response;
     ::grpc::Status s = GrpcMaybeUnparseProto(request, &request_buf_);
     if (!s.ok()) {
@@ -152,10 +153,13 @@ class RPCState : public GrpcClientCQTag {
       StartCall();
     } else {
       // Attach additional GRPC error information if any to the final status
-      s = Status(s.code(),
-                 strings::StrCat(s.error_message(),
-                                 "\nAdditional GRPC error information:\n",
-                                 context_->debug_error_string()));
+      string error_msg = s.error_message();
+      strings::StrAppend(&error_msg, "\nAdditional GRPC error information");
+      if (target_) {
+        strings::StrAppend(&error_msg, " from remote target ", *target_);
+      }
+      strings::StrAppend(&error_msg, ":\n:", context_->debug_error_string());
+      s = Status(s.code(), error_msg);
       // Always treat gRPC cancellation as a derived error. This ensures that
       // other error types are preferred during status aggregation. (gRPC
       // cancellation messages do not contain the original status message).
@@ -196,6 +200,7 @@ class RPCState : public GrpcClientCQTag {
   ::grpc::GenericStub* stub_;
   ::grpc::string method_;
   bool fail_fast_;
+  const string* target_;
 };
 
 // Represents state associated with one streaming RPC call.
diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
index 234c5a403bc..0e923bd1236 100644
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@@ -634,7 +634,7 @@ cc_library(
     ],
     visibility = [
         "//tensorflow/core:__pkg__",
-        "//tensorflow/core/runtime_fallback:__pkg__",
+        "//tensorflow/core/runtime_fallback:__subpackages__",
     ],
     deps = [
         ":bounds_check",
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 1d9559d1878..3635cf7c4ba 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -292,36 +292,6 @@ class Runner {
   static Runner* get();
 };
 
-// A token for reading from a tf.data service job.
-class JobToken {
- public:
-  JobToken() : is_empty_(true) {}
-
-  explicit JobToken(int64 job_id) : job_id_(job_id), is_empty_(false) {}
-
-  bool is_empty() const { return is_empty_; }
-  int64 job_id() const { return job_id_; }
-  string TypeName() const { return "tensorflow::JobToken"; }
-  void Encode(VariantTensorData* data) const {
-    Tensor job_id = Tensor(DT_INT64, TensorShape({}));
-    job_id.scalar<int64>()() = job_id_;
-    *(data->add_tensors()) = job_id;
-
-    Tensor is_empty = Tensor(DT_BOOL, TensorShape({}));
-    is_empty.scalar<bool>()() = is_empty_;
-    *(data->add_tensors()) = is_empty;
-  }
-  bool Decode(const VariantTensorData& data) {
-    job_id_ = data.tensors(0).scalar<int64>()();
-    is_empty_ = data.tensors(1).scalar<bool>()();
-    return true;
-  }
-
- private:
-  int64 job_id_;
-  bool is_empty_;
-};
-
 // A cut-down version of `OpKernelContext` for running computations in
 // iterators. Note that we cannot simply use `OpKernelContext` here because we
 // might run computation in an iterator whose lifetime is not nested within the
@@ -342,7 +312,6 @@ class IteratorContext {
           env(ctx->env()),
           flr(ctx->flr()),
           function_handle_cache(ctx->function_handle_cache()),
-          job_token(ctx->job_token()),
           resource_mgr(ctx->resource_mgr()),
           model(ctx->model()),
           runner(*(ctx->runner())),
@@ -401,9 +370,6 @@ class IteratorContext {
     // A FunctionHandleCache that owns all the function handles. Not owned.
     FunctionHandleCache* function_handle_cache = nullptr;
 
-    // A token for reading data from a tf.data service job.
-    JobToken job_token;
-
     // A resource manager for storing dataset-related state, e.g. random
     // seeds or cached tensors. Not owned.
     ResourceMgr* resource_mgr = nullptr;
@@ -453,8 +419,6 @@ class IteratorContext {
     return params_.function_handle_cache;
   }
 
-  const JobToken& job_token() { return params_.job_token; }
-
   ResourceMgr* resource_mgr() { return params_.resource_mgr; }
 
   const std::shared_ptr<model::Model>& model() { return params_.model; }
diff --git a/tensorflow/core/framework/dataset_test.cc b/tensorflow/core/framework/dataset_test.cc
index 49a4763e8cb..9dbb3be7faf 100644
--- a/tensorflow/core/framework/dataset_test.cc
+++ b/tensorflow/core/framework/dataset_test.cc
@@ -91,27 +91,4 @@ INSTANTIATE_TEST_SUITE_P(
         {_tf_string_, tensor_strs,
          static_cast<int64>(sizeof(str) + str.size()) /*bytes*/}}));
 
-TEST(DatasetTest, JobServiceTokenIsEmpty) {
-  data::JobToken token;
-  EXPECT_TRUE(token.is_empty());
-}
-
-TEST(DatasetTest, JobTokenHoldsJobId) {
-  int64 job_id = 5;
-  data::JobToken token(job_id);
-  EXPECT_EQ(job_id, token.job_id());
-  EXPECT_FALSE(token.is_empty());
-}
-
-TEST(DatasetTest, JobTokenEncodeDecode) {
-  int64 job_id = 5;
-  data::JobToken token(job_id);
-  VariantTensorData data;
-  token.Encode(&data);
-  data::JobToken decoded;
-  decoded.Decode(data);
-  EXPECT_FALSE(token.is_empty());
-  EXPECT_EQ(job_id, token.job_id());
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index b64047e999f..314d57d8ba4 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/platform/platform.h"
 // clang-format on
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/types/optional.h"
 #include "absl/types/variant.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
@@ -275,6 +276,18 @@ class CallFrameInterface {
   virtual size_t num_retvals() const = 0;
 
   virtual Status GetArg(int index, const Tensor** val) = 0;
+
+  // Optimized implementation of `GetArg()` that allows the caller to take
+  // ownership of the tensor. This method may only be called once per
+  // value of `index` and `CallFrameInterface` instance.
+  //
+  // REQUIRES: `this->CanConsumeArg(index) == true`.
+  virtual void ConsumeArg(int index, Tensor* val) {
+    LOG(ERROR) << "This `CallFrameInterface` implementation does not support "
+                  "consuming arguments.";
+  }
+  virtual bool CanConsumeArg(int index) const { return false; }
+
   virtual Status SetRetval(int index, const Tensor& val) = 0;
 };
 
@@ -525,6 +538,20 @@ class Device;
 // Forward declare. Defined in common_runtime/device_mgr.h
 class DeviceMgr;
 
+// Index of an _Arg node.
+struct FunctionArgIndex {
+  explicit FunctionArgIndex(const int index) : index(index) {}
+  FunctionArgIndex(const int index, const int sub_index)
+      : index(index), sub_index(sub_index) {}
+
+  // The value of the attribute "Index" of the _Arg node.
+  int index;
+  // Set only when the _Arg node represents multiple arguments (e.g. an _Arg
+  // node is replicated to multiple devices/subgraphs). Use sub-index to
+  // distinguish arguments with the same index.
+  int sub_index = -1;
+};
+
 class FunctionLibraryRuntime {
  public:
   virtual ~FunctionLibraryRuntime() {}
@@ -576,6 +603,10 @@ class FunctionLibraryRuntime {
     // infer correct device.
     std::vector<string> output_devices;
 
+    // Maps from a CompositeDevice name to a list of underlying physical
+    // devices.
+    absl::flat_hash_map<string, const std::vector<string>*> composite_devices;
+
     // This interface is EXPERIMENTAL and subject to change.
     //
     // For multi-device functions, a mapping from _Arg node index to type and
diff --git a/tensorflow/core/framework/graph_to_functiondef.cc b/tensorflow/core/framework/graph_to_functiondef.cc
index 4e965e7b5bb..bbd70151849 100644
--- a/tensorflow/core/framework/graph_to_functiondef.cc
+++ b/tensorflow/core/framework/graph_to_functiondef.cc
@@ -276,17 +276,10 @@ Status FillFunctionBody(
         }
       }
       if (!node_attr_def) {
-#ifdef TENSORFLOW_LITE_PROTOS
-        return errors::Unimplemented(
-            "Placeholder value is not supported for attributes not in OpDef. "
-            "Attribute: ",
-            node_attr_name);
-#else
         return errors::Unimplemented(
             "Placeholder value is not supported for attributes not in OpDef. "
             "Attribute: ",
             node_attr_name, ", OpDef: ", node->op_def().DebugString());
-#endif
       }
       OpDef::AttrDef* attr_def = fdef->mutable_signature()->add_attr();
       attr_def->set_name(func_attr_name);
diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index 7aeec28e995..b4a54029a4f 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -32,96 +32,6 @@ constexpr char kInputTimeDerivativeKey[] = "last_input_time";
 // Wrapper for the square function to reduce verbosity.
 inline double Square(double x) { return x * x; }
 
-// Given the average time between output events (`output_time`), the average
-// time between input events (`input_time`) and the buffer size, the method
-// computes the expected time an input event will have to wait.
-//
-// The wait time is approximated as the product of the probability the buffer
-// will be empty and the time it takes to produce an element into the buffer.
-//
-// The formula used for computing the probability is derived by modeling the
-// problem as an M/M/1/K queue
-// (https://en.wikipedia.org/wiki/Birth%E2%80%93death_process#M/M/1/K_queue).
-//
-// Collects derivatives of `ComputeWaitTime` w.r.t `output_time`, `input_time'
-// and `buffer_size` if the corresponding pointers are not `nullptr`.
-double ComputeWaitTime(double output_time, double input_time,
-                       double buffer_size, double* output_time_derivative,
-                       double* input_time_derivative,
-                       double* buffer_size_derivative) {
-  // Case 0: either the producer or the consumer are infinitely fast. Wait time
-  // is the time to produce an output.
-  if (output_time == 0 || input_time == 0) {
-    if (output_time_derivative) {
-      *output_time_derivative = 1.0L;
-    }
-    if (input_time_derivative) {
-      *input_time_derivative = 0.0L;
-    }
-    if (buffer_size_derivative) {
-      *buffer_size_derivative = 0.0L;
-    }
-    return output_time;
-  }
-  // Case 1: the consumer is slower than the producer. Wait time is 0 since the
-  // buffer will be full in the long run.
-  if (input_time > output_time) {
-    if (output_time_derivative) {
-      *output_time_derivative = 0.0L;
-    }
-    if (input_time_derivative) {
-      *input_time_derivative = 0.0L;
-    }
-    if (buffer_size_derivative) {
-      *buffer_size_derivative = 0.0L;
-    }
-    return 0;
-  }
-  // Case 2: the consumer and the producer are equally fast. Expected wait time
-  // decreases linearly with the size of the buffer.
-  if (input_time == output_time) {
-    const double p_buffer_empty = 1.0L / (buffer_size + 1.0L);
-    if (output_time_derivative) {
-      *output_time_derivative = p_buffer_empty;
-    }
-    if (input_time_derivative) {
-      *input_time_derivative = 0.0L;
-    }
-    if (buffer_size_derivative) {
-      const double p_buffer_empty_der = -1.0L / Square(buffer_size + 1.0L);
-      *buffer_size_derivative = p_buffer_empty_der * output_time;
-    }
-    return p_buffer_empty * output_time;
-  }
-  // Case 3: the producer is slower than the consumer and neither is infinitely
-  // fast.
-  const double alpha = 1.0L / input_time;
-  const double beta = 1.0L / output_time;
-  const double ratio_pow = std::pow((beta / alpha), (buffer_size + 1.0L));
-  const double p_buffer_empty = (1.0L - beta / alpha) / (1.0L - ratio_pow);
-  if (output_time_derivative) {
-    *output_time_derivative =
-        (1.0L - ratio_pow -
-         (output_time - input_time) * (buffer_size + 1.0L) * ratio_pow /
-             output_time) /
-        Square(1.0L - ratio_pow);
-  }
-  if (input_time_derivative) {
-    *input_time_derivative =
-        (ratio_pow - 1.0L +
-         (buffer_size + 1.0L) * ratio_pow * (alpha / beta - 1.0L)) /
-        Square(1.0L - ratio_pow);
-  }
-  if (buffer_size_derivative) {
-    const double p_buffer_empty_der = (1.0L - beta / alpha) * ratio_pow *
-                                      std::log(beta / alpha) /
-                                      Square(1.0L - ratio_pow);
-    *buffer_size_derivative = p_buffer_empty_der * output_time;
-  }
-
-  return p_buffer_empty * output_time;
-}
-
 // The first input of InterleaveMany corresponds to the input dataset whose
 // elements are used to create the (derived) input datasets whose elements are
 // interleaved as output.
@@ -700,6 +610,145 @@ std::shared_ptr<Node> MakeUnknownNode(Node::Args args) {
   return std::make_shared<Unknown>(std::move(args));
 }
 
+double Node::ComputeWaitTime(const double& output_time,
+                             const double& input_time,
+                             const double& buffer_size,
+                             double* output_time_derivative,
+                             double* input_time_derivative,
+                             double* buffer_size_derivative) {
+  // If we set x=`input_time`, y=`output_time`, n=`buffer_size`,
+  // p=`p_buffer_empty`, T=`wait_time`, then we have:
+  // if y = 0, then p = 0;
+  // elif x = 0, then p = 1;
+  // elif x = y, then p = 1 / (n+1);
+  // else p = [1 - x/y] / [1 - power(x/y, n+1)].
+  //
+  // We also have T = p * y, and derivatives of T w.r.t. x, y, n are computed:
+  // dT/dx = dp/dx * y,
+  // dT/dy = p + dp/dy * y,
+  // dT/dn = dp/dn * y.
+  // Then the remaining work is to compute dp/dx, dp/dy, dp/dn by considering
+  // different cases and substitute the values into above formulas.
+
+  // Case 1: if producer is infinitely fast. The buffer will always be full.
+  // Wait time will always be 0.
+  if (output_time == 0) {
+    if (output_time_derivative) {
+      // Note a common error is `*output_time_derivative = 0` since p=0 on the
+      // line y=0 doesn't imply dp/dy = 0 there. Actually to compute dp/dy at
+      // (x,0), we need to consider lim_{dy->0+} [p(x,dy)-p(x,0)] / dy, where
+      // p(x,0)=0 and p(x,dy) = [1 - x/dy] / [1 - power(x/dy, n+1)].
+      if (buffer_size == 0 || input_time == 0) {
+        *output_time_derivative = 1.0L;
+      } else {
+        *output_time_derivative = 0.0L;
+      }
+    }
+    if (input_time_derivative) {
+      *input_time_derivative = 0.0L;
+    }
+    if (buffer_size_derivative) {
+      *buffer_size_derivative = 0.0L;
+    }
+    return 0.0L;
+  }
+
+  // Case 2: if consumer is infinitely fast. Wait time is always the time to
+  // produce an output.
+  if (input_time == 0) {
+    if (output_time_derivative) {
+      *output_time_derivative = 1.0L;
+    }
+    if (input_time_derivative) {
+      // Note a common error is `*input_time_derivative = 0` since p=1 on the
+      // line x=0 doesn't imply dp/dx = 0 there. Actually to compute dp/dx at
+      // (0,y), we need to consider lim_{dx->0+} [p(dx,y)-p(0,y)] / dx, where
+      // p(0,y)=1, p(dx,y) = [1 - dx/y] / [1 - power(dx/y, n+1)] if y!=0.
+      if (buffer_size == 0) {
+        *input_time_derivative = 0.0L;
+      } else {
+        *input_time_derivative = -1.0L;
+      }
+    }
+    if (buffer_size_derivative) {
+      *buffer_size_derivative = 0.0L;
+    }
+    return output_time;
+  }
+
+  // Case 3: the consumer and the producer are equally fast. Expected wait time
+  // decreases linearly with the size of the buffer.
+  if (input_time == output_time) {
+    const double p_buffer_empty = 1.0L / (buffer_size + 1.0L);
+    const double p_buffer_empty_der =
+        -buffer_size / (2.0L * buffer_size + 2.0L);
+    if (output_time_derivative) {
+      // Note a common error is `*output_time_derivative = p_buffer_empty` since
+      // p=1/(n+1) on the line x=y doesn't imply dp/dy = 0 there. Actually to
+      // compute dp/dy at (y,y), we need to consider
+      // lim_{dy->0} [p(y,y+dy)-p(y,y)] / dy, where p(y,y)=1/(n+1),
+      // p(y,y+dy) = [1 - y/(y+dy)] / [1 - power(y/(y+dy), n+1)].
+      *output_time_derivative = p_buffer_empty - p_buffer_empty_der;
+    }
+    if (input_time_derivative) {
+      // Note a common error is `*input_time_derivative = 0` since
+      // p=1/(n+1) on the line x=y doesn't imply dp/dx = 0 there. Actually to
+      // compute dp/dx at (x,x), we need to consider
+      // lim_{dx->0} [p(x+dx,x)-p(x,x)] / dx, where p(x,x)=1/(n+1),
+      // p(x+dx,x) = [1 - (x+dx)/x] / [1 - power((x+dx)/x, n+1)].
+      *input_time_derivative = p_buffer_empty_der;
+    }
+    if (buffer_size_derivative) {
+      *buffer_size_derivative = -output_time / Square(buffer_size + 1.0L);
+    }
+    return p_buffer_empty * output_time;
+  }
+
+  // Case 4: the consumer is slower than the producer and neither is infinitely
+  // fast. Case 4 and Case 5 actually follow same formula. Separate them for
+  // numerical computation reasons.
+  if (input_time > output_time) {
+    const double ratio = output_time / input_time;
+    const double ratio_pow = std::pow(ratio, buffer_size);
+    const double p_buffer_empty =
+        ratio_pow * (1.0L - ratio) / (1.0L - ratio * ratio_pow);
+    const double p_buffer_empty_der =
+        (buffer_size - (buffer_size + 1.0L) * ratio + ratio_pow * ratio) *
+        ratio_pow / ratio / Square(1.0L - ratio_pow * ratio);
+    if (output_time_derivative) {
+      *output_time_derivative = p_buffer_empty + p_buffer_empty_der * ratio;
+    }
+    if (input_time_derivative) {
+      *input_time_derivative = -p_buffer_empty_der * Square(ratio);
+    }
+    if (buffer_size_derivative) {
+      *buffer_size_derivative = p_buffer_empty / (1.0L - ratio_pow * ratio) *
+                                std::log(ratio) * output_time;
+    }
+    return p_buffer_empty * output_time;
+  }
+
+  // Case 5: the producer is slower than the consumer and neither is infinitely
+  // fast.
+  const double ratio = input_time / output_time;
+  const double ratio_pow = std::pow(ratio, buffer_size);
+  const double p_buffer_empty = (1.0L - ratio) / (1.0L - ratio_pow * ratio);
+  const double p_buffer_empty_der =
+      ((buffer_size + 1.0L - buffer_size * ratio) * ratio_pow - 1.0L) /
+      Square(1.0L - ratio_pow * ratio);
+  if (output_time_derivative) {
+    *output_time_derivative = p_buffer_empty - p_buffer_empty_der * ratio;
+  }
+  if (input_time_derivative) {
+    *input_time_derivative = p_buffer_empty_der;
+  }
+  if (buffer_size_derivative) {
+    *buffer_size_derivative = p_buffer_empty / (1.0L - ratio_pow * ratio) *
+                              ratio_pow * ratio * std::log(ratio) * output_time;
+  }
+  return p_buffer_empty * output_time;
+}
+
 void Node::CollectTunableParameters(
     absl::flat_hash_map<string, std::shared_ptr<Parameter>>* parameters) const {
   CollectTunableParametersHelper(parameters);
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index 1c3b64f4a0d..a4af549fad2 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -142,7 +142,31 @@ class Node {
         metrics_(name_),
         output_(args.output.get()) {}
 
-  virtual ~Node() { FlushMetrics(); }
+  virtual ~Node() {
+    // Clear the sub-nodes instead of relying on implicit shared pointer
+    // destructor to avoid potential stack overflow when the tree is deep.
+    std::deque<std::shared_ptr<Node>> queue;
+    {
+      mutex_lock l(mu_);
+      while (inputs_.size() > 0) {
+        queue.push_back(inputs_.front());
+        inputs_.pop_front();
+      }
+    }
+    while (!queue.empty()) {
+      auto node = queue.back();
+      queue.pop_back();
+      {
+        mutex_lock l(node->mu_);
+        while (node->inputs_.size() > 0) {
+          queue.push_back(node->inputs_.front());
+          node->inputs_.pop_front();
+        }
+      }
+    }
+
+    FlushMetrics();
+  }
 
   // Adds an input.
   void add_input(std::shared_ptr<Node> node) TF_LOCKS_EXCLUDED(mu_) {
@@ -261,6 +285,26 @@ class Node {
     autotune_.store(autotune);
   }
 
+  // Given the average time between output events (`output_time`), the average
+  // time between input events (`input_time`) and the buffer size, the method
+  // computes the expected time an input event will have to wait.
+  //
+  // The wait time is approximated as the product of the probability the buffer
+  // will be empty and the time it takes to produce an element into the buffer.
+  //
+  // The formula used for computing the probability is derived by modeling the
+  // problem as an M/M/1/K queue
+  // (https://en.wikipedia.org/wiki/Birth%E2%80%93death_process#M/M/1/K_queue).
+  //
+  // Collects derivatives of `ComputeWaitTime` w.r.t `output_time`, `input_time'
+  // and `buffer_size` if the corresponding pointers are not `nullptr`.
+  static double ComputeWaitTime(const double& output_time,
+                                const double& input_time,
+                                const double& buffer_size,
+                                double* output_time_derivative,
+                                double* input_time_derivative,
+                                double* buffer_size_derivative);
+
   // Collects tunable parameters in the subtree rooted in this node.
   void CollectTunableParameters(
       absl::flat_hash_map<string, std::shared_ptr<Parameter>>* parameters) const
diff --git a/tensorflow/core/framework/model_test.cc b/tensorflow/core/framework/model_test.cc
index 92c309bd476..898594b7c81 100644
--- a/tensorflow/core/framework/model_test.cc
+++ b/tensorflow/core/framework/model_test.cc
@@ -757,6 +757,78 @@ TEST(SnapshotTest, Model) {
     }
   }
 }
+
+class ComputeWaitTimeTest
+    : public ::testing::TestWithParam<std::tuple<double, double, double>> {};
+
+TEST_P(ComputeWaitTimeTest, Model) {
+  const double output_time = std::get<0>(GetParam());
+  const double input_time = std::get<1>(GetParam());
+  const double buffer_size = std::get<2>(GetParam());
+
+  double output_time_derivative = 0.0L;
+  double input_time_derivative = 0.0L;
+  double buffer_size_derivative = 0.0L;
+
+  double wait_time = model::Node::ComputeWaitTime(
+      output_time, input_time, buffer_size, &output_time_derivative,
+      &input_time_derivative, &buffer_size_derivative);
+
+  double new_wait_time =
+      model::Node::ComputeWaitTime(output_time + kParameterStep, input_time,
+                                   buffer_size, nullptr, nullptr, nullptr);
+  EXPECT_NEAR(output_time_derivative,
+              (new_wait_time - wait_time) / kParameterStep,
+              kComparisonPrecision);
+
+  if (output_time >= kParameterStep) {
+    new_wait_time =
+        model::Node::ComputeWaitTime(output_time - kParameterStep, input_time,
+                                     buffer_size, nullptr, nullptr, nullptr);
+    EXPECT_NEAR(output_time_derivative,
+                (wait_time - new_wait_time) / kParameterStep,
+                kComparisonPrecision);
+  }
+
+  new_wait_time =
+      model::Node::ComputeWaitTime(output_time, input_time + kParameterStep,
+                                   buffer_size, nullptr, nullptr, nullptr);
+  EXPECT_NEAR(input_time_derivative,
+              (new_wait_time - wait_time) / kParameterStep,
+              kComparisonPrecision);
+
+  if (input_time >= kParameterStep) {
+    new_wait_time =
+        model::Node::ComputeWaitTime(output_time, input_time - kParameterStep,
+                                     buffer_size, nullptr, nullptr, nullptr);
+    EXPECT_NEAR(input_time_derivative,
+                (wait_time - new_wait_time) / kParameterStep,
+                kComparisonPrecision);
+  }
+
+  new_wait_time = model::Node::ComputeWaitTime(output_time, input_time,
+                                               buffer_size + kParameterStep,
+                                               nullptr, nullptr, nullptr);
+  EXPECT_NEAR(buffer_size_derivative,
+              (new_wait_time - wait_time) / kParameterStep,
+              kComparisonPrecision);
+
+  if (buffer_size >= kParameterStep) {
+    new_wait_time = model::Node::ComputeWaitTime(output_time, input_time,
+                                                 buffer_size - kParameterStep,
+                                                 nullptr, nullptr, nullptr);
+    EXPECT_NEAR(buffer_size_derivative,
+                (wait_time - new_wait_time) / kParameterStep,
+                kComparisonPrecision);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Test, ComputeWaitTimeTest,
+    ::testing::Combine(::testing::Values(0, 20, 40, 80, 100),
+                       ::testing::Values(0, 20, 40, 80, 100),
+                       ::testing::Values(0, 1, 2, 4, 10, 20, 40)));
+
 }  // namespace
 }  // namespace model
 }  // namespace data
diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h
index 278534e1ab0..86bc70448d2 100644
--- a/tensorflow/core/framework/op.h
+++ b/tensorflow/core/framework/op.h
@@ -249,6 +249,12 @@ class OpDefBuilderWrapper<true> {
     builder_.SetIsStateful();
     return *this;
   }
+  OpDefBuilderWrapper<true>& SetDoNotOptimize() {
+    // We don't have a separate flag to disable optimizations such as constant
+    // folding and CSE so we reuse the stateful flag.
+    builder_.SetIsStateful();
+    return *this;
+  }
   OpDefBuilderWrapper<true>& SetAllowsUninitializedInput() {
     builder_.SetAllowsUninitializedInput();
     return *this;
@@ -282,6 +288,7 @@ class OpDefBuilderWrapper<false> {
   OpDefBuilderWrapper<false>& SetIsCommutative() { return *this; }
   OpDefBuilderWrapper<false>& SetIsAggregate() { return *this; }
   OpDefBuilderWrapper<false>& SetIsStateful() { return *this; }
+  OpDefBuilderWrapper<false>& SetDoNotOptimize() { return *this; }
   OpDefBuilderWrapper<false>& SetAllowsUninitializedInput() { return *this; }
   OpDefBuilderWrapper<false>& Deprecated(int, StringPiece) { return *this; }
   OpDefBuilderWrapper<false>& Doc(StringPiece text) { return *this; }
diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index 0ebc4bf2483..115c24e1968 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc
@@ -783,11 +783,13 @@ void RemoveDescriptionsFromOpList(OpList* op_list) {
 }
 
 bool AttrDefEqual(const OpDef::AttrDef& a1, const OpDef::AttrDef& a2) {
-#ifndef TENSORFLOW_LITE_PROTOS
-  DCHECK_EQ(7, a1.GetDescriptor()->field_count())
-      << "Please modify these equality and hash functions to reflect the "
-         "changes to the AttrDef protobuf";
-#endif  // TENSORFLOW_LITE_PROTOS
+  if (std::is_base_of<protobuf::Message, OpDef::AttrDef>()) {
+    DCHECK_EQ(7, reinterpret_cast<const protobuf::Message*>(&a1)
+                     ->GetDescriptor()
+                     ->field_count())
+        << "Please modify these equality and hash functions to reflect the "
+           "changes to the AttrDef protobuf";
+  }
 
   if (a1.name() != a2.name()) return false;
   if (a1.type() != a2.type()) return false;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 6e4e9962d51..2e7747380b4 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -1101,6 +1101,15 @@ void OpKernelContext::set_record_memory_consumption(bool v) {
   }
 }
 
+const string& OpKernelContext::executor_type() const {
+  if (params_->executor_type) {
+    return *params_->executor_type;
+  } else {
+    static const string& kEmptyString = *new string("");
+    return kEmptyString;
+  }
+}
+
 // OpKernel registration ------------------------------------------------------
 
 struct KernelRegistration {
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index f31effb607c..55de3d9fa03 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -689,6 +689,7 @@ class OpKernelContext {
     StepStatsCollectorInterface* stats_collector = nullptr;
     GraphCollector* graph_collector = nullptr;
     bool run_all_kernels_inline = false;
+    const string* executor_type = nullptr;
 
     // TensorSliceReaderCache support.
     checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache = nullptr;
@@ -830,6 +831,10 @@ class OpKernelContext {
     return params_->run_all_kernels_inline;
   }
 
+  // Returns the registered name for the executor type that is executing the
+  // current kernel. If empty, the default executor is used.
+  const string& executor_type() const;
+
   // Input to output forwarding.
 
   // Set the output Ref Tensor at output_index to be an alias of the
diff --git a/tensorflow/core/framework/shape_inference_testutil.h b/tensorflow/core/framework/shape_inference_testutil.h
index 40a6d53d223..361f7ed13c1 100644
--- a/tensorflow/core/framework/shape_inference_testutil.h
+++ b/tensorflow/core/framework/shape_inference_testutil.h
@@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_SHAPE_INFERENCE_TESTUTIL_H_
 
 #include <vector>
+
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -90,7 +91,7 @@ class ShapeInferenceTestutil {
         ::tensorflow::shape_inference::ShapeInferenceTestutil::InferShapes( \
             op, i, "e")                                                     \
             .error_message();                                               \
-    const std::string& substring = error_substring;                         \
+    const std::string substring = error_substring;                          \
     EXPECT_NE("", error_message);                                           \
     EXPECT_TRUE(absl::StrContains(error_message, substring))                \
         << "Expected to see '" << substring << "' in '" << error_message    \
diff --git a/tensorflow/core/graph/BUILD b/tensorflow/core/graph/BUILD
index ebaec566695..4834cdc53ed 100644
--- a/tensorflow/core/graph/BUILD
+++ b/tensorflow/core/graph/BUILD
@@ -45,7 +45,6 @@ filegroup(
         "algorithm.h",
         "default_device.h",
         "graph.h",
-        "graph_constructor.h",
         "graph_def_builder.h",
         "graph_node_util.h",
         "node_builder.h",
@@ -95,7 +94,6 @@ filegroup(
         "default_device.h",
         "edgeset.h",
         "graph.h",
-        "graph_constructor.h",  # NOTE(mrry): Don't include the .cc since it depends on common_runtime.
         "graph_def_builder.h",
         "graph_node_util.h",
         "graph_partition.h",
@@ -147,13 +145,6 @@ filegroup(
     ],
 )
 
-filegroup(
-    name = "higher_level_tests_needing_kernels",
-    srcs = [
-        "graph_constructor_test.cc",
-    ],
-)
-
 filegroup(
     name = "mobile_srcs_only_runtime",
     srcs = [
@@ -173,7 +164,6 @@ filegroup(
         "edgeset.h",
         "graph.cc",
         "graph.h",
-        "graph_constructor.h",
         "graph_def_builder.cc",
         "graph_def_builder.h",
         "graph_node_util.cc",
diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index 26e8f61ab5a..cffe7df8186 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/grappler/grappler_item_builder.h"
 
+#include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -480,28 +481,28 @@ std::unique_ptr<GrapplerItem> GrapplerItemFromMetaGraphDef(
           meta_graph.collection_def().at("saved_model_assets");
       const auto& any_assets = collection.any_list().value();
       if (!any_assets.empty()) {
-#ifndef TENSORFLOW_LITE_PROTOS
-        for (const auto& any_asset : any_assets) {
-          AssetFileDef asset_file_def;
-          if (!ParseAny(any_asset, &asset_file_def, "tensorflow.AssetFileDef")
-                   .ok()) {
-            LOG(ERROR) << "Failed to parse AssetFile.";
-            continue;
+        if (std::is_base_of<protobuf::Message, AssetFileDef>()) {
+          for (const auto& any_asset : any_assets) {
+            AssetFileDef asset_file_def;
+            if (!ParseAny(any_asset, &asset_file_def, "tensorflow.AssetFileDef")
+                     .ok()) {
+              LOG(ERROR) << "Failed to parse AssetFile.";
+              continue;
+            }
+            string asset_filepath = io::JoinPath(cfg.assets_directory_override,
+                                                 asset_file_def.filename());
+            if (!FilesExist({asset_filepath}, nullptr)) {
+              LOG(ERROR) << "Can't access one or more of the asset files "
+                         << asset_filepath << ", skipping this input";
+              return nullptr;
+            }
+            asset_node_to_value[NodeName(asset_file_def.tensor_info().name())] =
+                asset_filepath;
           }
-          string asset_filepath = io::JoinPath(cfg.assets_directory_override,
-                                               asset_file_def.filename());
-          if (!FilesExist({asset_filepath}, nullptr)) {
-            LOG(ERROR) << "Can't access one or more of the asset files "
-                       << asset_filepath << ", skipping this input";
-            return nullptr;
-          }
-          asset_node_to_value[NodeName(asset_file_def.tensor_info().name())] =
-              asset_filepath;
+        } else {
+          LOG(ERROR) << "Can't parse AssetFileDef when using lite protos.";
+          return nullptr;
         }
-#else
-        LOG(ERROR) << "Can't parse AssetFileDef on mobile.";
-        return nullptr;
-#endif  // TENSORFLOW_LITE_PROTOS
       }
     }
   } else if (meta_graph.collection_def().count("asset_filepaths") > 0) {
diff --git a/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h b/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h
index 74e5080a30f..bf776bcd2bc 100644
--- a/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h
+++ b/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h
@@ -30,7 +30,7 @@ class TrivialTestGraphInputYielder : public InputYielder {
  public:
   TrivialTestGraphInputYielder(int num_stages, int width, int tensor_size,
                                bool insert_queue,
-                               const std::vector<string>& device_names);
+                               const std::vector<std::string>& device_names);
   bool NextItem(GrapplerItem* item) override;
 
  private:
@@ -38,7 +38,7 @@ class TrivialTestGraphInputYielder : public InputYielder {
   const int width_;
   const int tensor_size_;
   const bool insert_queue_;
-  std::vector<string> device_names_;
+  std::vector<std::string> device_names_;
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index 0b8846faf05..b880055b47d 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -531,7 +531,10 @@ cc_library(
 tf_cuda_cc_test(
     name = "memory_optimizer_test",
     srcs = ["memory_optimizer_test.cc"],
-    tags = ["no_cuda_on_cpu_tap"],  # Do not re-enable again without actually testing.
+    tags = [
+        "no_cuda_on_cpu_tap",  # Do not re-enable again without actually testing.
+        "no_windows",  # b/56402646
+    ],
     deps = [
         ":gpu_swapping_kernels",
         ":gpu_swapping_ops",
diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc
index 996d96c2c1f..ed3af955c13 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc
@@ -828,7 +828,7 @@ const bool IsExemptFromSideEffectsExecutionValidation(const string& op) {
        // Op types that should not run in program order, e.g. because they need
        // to run asynchronously to avoid deadlock.
        "CollectiveGather", "CollectiveReduce", "CollectiveBcastSend",
-       "CollectiveBcastRecv", "NcclAllReduce",
+       "CollectiveBcastRecv", "NcclAllReduce", "Send", "Recv",
 
        // Legacy random ops.
        // See details in tensorflow/python/framework/auto_control_deps.py.
@@ -849,7 +849,8 @@ const bool IsExemptFromSideEffectsExecutionValidation(const string& op) {
        // TPUEmbedding EnqueueOps are stateful but this is only between ops with
        // the same device_ordinal on the same host.
        "EnqueueTPUEmbeddingSparseBatch", "EnqueueTPUEmbeddingIntegerBatch",
-       "EnqueueTPUEmbeddingSparseTensorBatch"});
+       "EnqueueTPUEmbeddingSparseTensorBatch",
+       "EnqueueTPUEmbeddingRaggedTensorBatch"});
   return exemption->contains(op);
 }
 
@@ -1121,7 +1122,15 @@ void AddStrictInputSemantics(Node* caller, Graph* g) {
 
   VLOG(3) << "Add control edges from all data inputs to enforce strict "
              "semantics with regard to function inputs";
+
+  // Do not add control edges from placeholders, because it will prevent
+  // pruning, and they can't produce any side effects anyway.
+  const auto is_placeholder = [](const Node* node) -> bool {
+    return node->type_string() == "Placeholder";
+  };
+
   for (const Node* node : data_inputs) {
+    if (is_placeholder(node)) continue;
     g->AddControlEdge(g->FindNodeId(node->id()), caller,
                       /*allow_duplicates=*/true);
   }
diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc
index e9691a13b30..a5a5f7ae64a 100644
--- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc
+++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc
@@ -1672,19 +1672,14 @@ string GetDeviceName(const VirtualPlacer* virtual_placer, const NodeDef& node) {
 }
 
 bool IsDefaultLayoutSensitiveOp(const NodeDef& node) {
-  std::set<string> default_layout_sensitive_ops = {"AvgPool",
-                                                   "BiasAdd",
-                                                   "Conv2D",
-                                                   "DepthwiseConv2dNative",
-                                                   "DepthToSpace",
-                                                   "FusedBatchNorm",
-                                                   "FusedBatchNormV2",
-                                                   "FusedBatchNormV3",
-                                                   "FusedConv2DBiasActivation",
-                                                   "MaxPool",
-                                                   "SpaceToDepth"};
-  return default_layout_sensitive_ops.find(node.op()) !=
-         default_layout_sensitive_ops.end();
+  static absl::flat_hash_set<string>* default_layout_sensitive_ops =
+      new absl::flat_hash_set<std::string>(
+          {"AvgPool", "BiasAdd", "Conv2D", "DepthwiseConv2dNative",
+           "DepthToSpace", "FusedBatchNorm", "FusedBatchNormV2",
+           "FusedBatchNormV3", "FusedConv2DBiasActivation", "MaxPool",
+           "SpaceToDepth"});
+  return default_layout_sensitive_ops->find(node.op()) !=
+         default_layout_sensitive_ops->end();
 }
 
 bool IsLayoutSensitiveOp(const NodeDef& node) {
@@ -1699,37 +1694,72 @@ bool IsLayoutSensitiveOp(const NodeDef& node) {
 }
 
 bool IsDefaultLayoutAgnosticOp(const NodeDef& node) {
-  std::set<string> agnostic_nodes = {"Abs",          "Acos",
-                                     "Acosh",        "Angle",
-                                     "Asin",         "Asinh",
-                                     "Atan",         "Atanh",
-                                     "Bitcast",      "Cast",
-                                     "Ceil",         "CheckNumerics",
-                                     "ComplexAbs",   "Conj",
-                                     "Cos",          "Cosh",
-                                     "Digamma",      "Elu",
-                                     "Enter",        "Erf",
-                                     "Erfc",         "Exit",
-                                     "Exp",          "Expm1",
-                                     "Floor",        "GuaranteeConst",
-                                     "Identity",     "Imag",
-                                     "Inv",          "IsFinite",
-                                     "IsInf",        "IsNan",
-                                     "Lgamma",       "Log",
-                                     "LogicalNot",   "Log1p",
-                                     "Neg",          "NextIteration",
-                                     "OnesLike",     "PreventGradient",
-                                     "Real",         "Reciprocal",
-                                     "Relu",         "Relu6",
-                                     "Rint",         "Selu",
-                                     "Sigmoid",      "Sign",
-                                     "Sin",          "Sinh",
-                                     "Snapshot",     "Softplus",
-                                     "Round",        "Rsqrt",
-                                     "Sqrt",         "Square",
-                                     "StopGradient", "Tan",
-                                     "Tanh",         "ZerosLike"};
-  return agnostic_nodes.find(node.op()) != agnostic_nodes.end();
+  static absl::flat_hash_set<string>* agnostic_nodes =
+      new absl::flat_hash_set<std::string>({"Abs",
+                                            "Acos",
+                                            "Acosh",
+                                            "Angle",
+                                            "Asin",
+                                            "Asinh",
+                                            "Atan",
+                                            "Atanh",
+                                            "Bitcast",
+                                            "Cast",
+                                            "Ceil",
+                                            "CheckNumerics",
+                                            "ComplexAbs",
+                                            "Conj",
+                                            "Cos",
+                                            "Cosh",
+                                            "Digamma",
+                                            "Elu",
+                                            "Enter",
+                                            "Erf",
+                                            "Erfc",
+                                            "Exit",
+                                            "Exp",
+                                            "Expm1",
+                                            "FakeQuantWithMinMaxVars",
+                                            "FakeQuantWithMinMaxArgs",
+                                            "Floor",
+                                            "GuaranteeConst",
+                                            "Identity",
+                                            "Imag",
+                                            "Inv",
+                                            "IsFinite",
+                                            "IsInf",
+                                            "IsNan",
+                                            "Lgamma",
+                                            "Log",
+                                            "LogicalNot",
+                                            "Log1p",
+                                            "Neg",
+                                            "NextIteration",
+                                            "OnesLike",
+                                            "PreventGradient",
+                                            "QuantizeAndDequantizeV2",
+                                            "QuantizeAndDequantizeV3",
+                                            "Real",
+                                            "Reciprocal",
+                                            "Relu",
+                                            "Relu6",
+                                            "Rint",
+                                            "Selu",
+                                            "Sigmoid",
+                                            "Sign",
+                                            "Sin",
+                                            "Sinh",
+                                            "Snapshot",
+                                            "Softplus",
+                                            "Round",
+                                            "Rsqrt",
+                                            "Sqrt",
+                                            "Square",
+                                            "StopGradient",
+                                            "Tan",
+                                            "Tanh",
+                                            "ZerosLike"});
+  return agnostic_nodes->find(node.op()) != agnostic_nodes->end();
 }
 
 bool IsLayoutAgnosticOp(const NodeDef& node) {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
index 2d6e201f084..c92693adef4 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer_test.cc
@@ -84,8 +84,9 @@ class LayoutOptimizerTest : public GrapplerTest {
         ops::Const(s->WithOpName("Filter"), Input::Initializer(filter_data));
 
     ops::Conv2D::Attrs attrs;
+    const int kExplicitPaddings[] = {0, 0, 1, 2, 3, 4, 0, 0};
     if (padding == "EXPLICIT") {
-      attrs = attrs.ExplicitPaddings({0, 0, 1, 2, 3, 4, 0, 0});
+      attrs = attrs.ExplicitPaddings(kExplicitPaddings);
     }
 
     Output conv = ops::Conv2D(s->WithOpName("Conv2D").WithDevice(device), input,
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index f47265f6334..cd0d44e8e12 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -114,12 +114,12 @@ FunctionDefLibrary GetFunctionDefLibraryStub(
 }
 
 uint64 DeadlineMicroSeconds(const RewriterConfig& cfg) {
-  const uint64 kFiveMinutesInUsec = 5 * 60 * 1000 * 1000;
+  const uint64 kTwentyMinutesInUsec = 20 * 60 * 1000 * 1000;
   if (cfg.meta_optimizer_timeout_ms() < 0) {
     return 0;
   } else {
     return cfg.meta_optimizer_timeout_ms() == 0
-               ? Env::Default()->NowMicros() + kFiveMinutesInUsec
+               ? Env::Default()->NowMicros() + kTwentyMinutesInUsec
                : Env::Default()->NowMicros() +
                      cfg.meta_optimizer_timeout_ms() * 1000;
   }
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index a2d850d4c1c..788924e8b37 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2194,6 +2194,9 @@ tf_cuda_cc_test(
     name = "quantize_and_dequantize_op_test",
     size = "small",
     srcs = ["quantize_and_dequantize_op_test.cc"],
+    tags = [
+        "no_windows",  # test uses rand_r which does not exist on Windows
+    ],
     deps = [
         ":ops_testutil",
         ":ops_util",
@@ -3789,7 +3792,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "svd_op",
     prefix = "svd_op",
-    deps = LINALG_DEPS,
+    deps = LINALG_DEPS + if_cuda([
+        ":eye_functor",
+    ]),
 )
 
 tf_kernel_library(
@@ -4016,7 +4021,7 @@ cc_library(
     ],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
             "//third_party/eigen3",
@@ -4041,7 +4046,7 @@ cc_library(
         ":eigen_spatial_convolutions-inl",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
@@ -4057,7 +4062,7 @@ cc_library(
     deps = select({
         "//tensorflow:android": [
             ":conv_3d_mobile",
-            "//tensorflow/core:android_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
+            "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
         ],
         "//conditions:default": [
             ":conv_3d",
@@ -4868,6 +4873,7 @@ tf_kernel_library(
     name = "bincount_op",
     prefix = "bincount_op",
     deps = [
+        ":fill_functor",
         ":gpu_prim_hdrs",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
@@ -5668,6 +5674,32 @@ tf_kernel_library(
     deps = STATE_DEPS,
 )
 
+tf_kernel_library(
+    name = "count_ops",
+    prefix = "count_ops",
+    deps = STATE_DEPS + [
+        "@com_google_absl//absl/container:flat_hash_map",
+        "//tensorflow/core/framework:op_requires",
+    ],
+)
+
+tf_cc_test(
+    name = "count_ops_test",
+    size = "small",
+    srcs = ["count_ops_test.cc"],
+    deps = [
+        ":count_ops",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_kernel_library(
     name = "scatter_nd_op",
     srcs = [
@@ -7066,7 +7098,7 @@ cc_library(
 
 build_test(
     name = "android_tensorflow_kernels_build_test",
-    targets = [":android_tensorflow_kernels"],
+    targets = [":portable_tensorflow_kernels"],
 )
 
 cc_library(
@@ -7079,7 +7111,7 @@ cc_library(
         "//tensorflow/core:android_gif_internal",
         "//tensorflow/core:android_jpeg_internal",
         "//tensorflow/core:android_png_internal",
-        "//tensorflow/core:android_tensorflow_lib_lite",
+        "//tensorflow/core:portable_tensorflow_lib_lite",
     ],
     alwayslink = 1,
 )
@@ -7096,7 +7128,7 @@ cc_library(
     linkopts = ["-ldl"],
     visibility = ["//visibility:public"],
     deps = [
-        "//tensorflow/core:android_tensorflow_lib_lite",
+        "//tensorflow/core:portable_tensorflow_lib_lite",
     ],
     alwayslink = 1,
 )
@@ -7238,8 +7270,8 @@ tf_cc_binary(
     ] + select({
         "//tensorflow:android": [
             ":android_tensorflow_kernels",
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
         "//conditions:default": [
             ":quantized_ops",
@@ -7299,8 +7331,8 @@ cc_binary(
     ] + select({
         "//tensorflow:android": [
             ":android_tensorflow_kernels",
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
         "//conditions:default": [
             ":ops_util",
@@ -7384,8 +7416,8 @@ cc_binary(
     ] + select({
         "//tensorflow:android": [
             ":android_tensorflow_kernels",
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
         "//conditions:default": [
             ":ops_testutil",
@@ -7571,8 +7603,8 @@ cc_binary(
     ] + select({
         "//tensorflow:android": [
             ":android_tensorflow_kernels",
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
         "//conditions:default": [
             ":ops_util",
@@ -7797,8 +7829,8 @@ cc_binary(
     ] + select({
         "//tensorflow:android": [
             ":android_tensorflow_kernels",
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:framework",
diff --git a/tensorflow/core/kernels/batch_matmul_op_impl.h b/tensorflow/core/kernels/batch_matmul_op_impl.h
index d5b6e7d5e17..89c438b62cc 100644
--- a/tensorflow/core/kernels/batch_matmul_op_impl.h
+++ b/tensorflow/core/kernels/batch_matmul_op_impl.h
@@ -78,8 +78,9 @@ struct ParallelMatMulKernel {
   }
 
   static void Run(const OpKernelContext* context, const Tensor& in_x,
-                  const Tensor in_y, bool adj_x, bool adj_y,
-                  const MatMulBCast& bcast, Tensor* out, int start, int limit) {
+                  const Tensor in_y, bool adj_x, bool adj_y, bool trans_x,
+                  bool trans_y, const MatMulBCast& bcast, Tensor* out,
+                  int start, int limit) {
     static_assert(IsComplex, "Complex type expected.");
     auto Tx = in_x.tensor<Scalar, 3>();
     auto Ty = in_y.tensor<Scalar, 3>();
@@ -90,7 +91,7 @@ struct ParallelMatMulKernel {
     // to halve the number of cases. The final conjugation of the result is
     // done at the end of LaunchBatchMatMul<CPUDevice, Scalar>::Launch().
     Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> contract_pairs;
-    contract_pairs[0] = ContractionDims(adj_x, adj_y);
+    contract_pairs[0] = ContractionDims(adj_x || trans_x, adj_y || trans_y);
     const Eigen::ThreadPoolDevice d = context->eigen_cpu_device();
 
     const bool should_bcast = bcast.IsBroadcastingRequired();
@@ -121,13 +122,14 @@ struct ParallelMatMulKernel<Scalar, false> {
   static void Conjugate(const OpKernelContext* context, Tensor* out) {}
 
   static void Run(const OpKernelContext* context, const Tensor& in_x,
-                  const Tensor& in_y, bool adj_x, bool adj_y,
-                  const MatMulBCast& bcast, Tensor* out, int start, int limit) {
+                  const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
+                  bool trans_y, const MatMulBCast& bcast, Tensor* out,
+                  int start, int limit) {
     auto Tx = in_x.tensor<Scalar, 3>();
     auto Ty = in_y.tensor<Scalar, 3>();
     auto Tz = out->tensor<Scalar, 3>();
     Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> contract_pairs;
-    contract_pairs[0] = ContractionDims(adj_x, adj_y);
+    contract_pairs[0] = ContractionDims(adj_x || trans_x, adj_y || trans_y);
     const Eigen::ThreadPoolDevice d = context->eigen_cpu_device();
 
     const bool should_bcast = bcast.IsBroadcastingRequired();
@@ -169,8 +171,8 @@ struct SequentialMatMulKernel {
   }
 
   static void Run(const Tensor& in_x, const Tensor& in_y, bool adj_x,
-                  bool adj_y, const MatMulBCast& bcast, Tensor* out, int start,
-                  int limit) {
+                  bool adj_y, bool trans_x, bool trans_y,
+                  const MatMulBCast& bcast, Tensor* out, int start, int limit) {
     const bool should_bcast = bcast.IsBroadcastingRequired();
     const auto& x_batch_indices = bcast.x_batch_indices();
     const auto& y_batch_indices = bcast.y_batch_indices();
@@ -180,17 +182,31 @@ struct SequentialMatMulKernel {
       auto x = ConstTensorSliceToEigenMatrix(in_x, x_batch_index);
       auto y = ConstTensorSliceToEigenMatrix(in_y, y_batch_index);
       auto z = TensorSliceToEigenMatrix(out, i);
-      if (!adj_x) {
-        if (!adj_y) {
+      // Assume at most one of adj_x or trans_x is true. Similarly, for adj_y
+      // and trans_y.
+      if (!adj_x && !trans_x) {
+        if (!adj_y && !trans_y) {
           z.noalias() = x * y;
-        } else {
+        } else if (adj_y) {
           z.noalias() = x * y.adjoint();
+        } else {  // trans_y == true
+          z.noalias() = x * y.transpose();
         }
-      } else {
-        if (!adj_y) {
+      } else if (adj_x) {
+        if (!adj_y && !trans_y) {
           z.noalias() = x.adjoint() * y;
-        } else {
+        } else if (adj_y) {
           z.noalias() = x.adjoint() * y.adjoint();
+        } else {  // trans_y == true
+          z.noalias() = x.adjoint() * y.transpose();
+        }
+      } else {  // trans_x == true
+        if (!adj_y && !trans_y) {
+          z.noalias() = x.transpose() * y;
+        } else if (adj_y) {
+          z.noalias() = x.transpose() * y.adjoint();
+        } else {  // trans_y == true
+          z.noalias() = x.transpose() * y.transpose();
         }
       }
     }
@@ -205,8 +221,8 @@ struct LaunchBatchMatMul;
 template <typename Scalar>
 struct LaunchBatchMatMul<CPUDevice, Scalar> {
   static void Launch(OpKernelContext* context, const Tensor& in_x,
-                     const Tensor& in_y, bool adj_x, bool adj_y,
-                     const MatMulBCast& bcast, Tensor* out) {
+                     const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
+                     bool trans_y, const MatMulBCast& bcast, Tensor* out) {
     typedef ParallelMatMulKernel<Scalar, Eigen::NumTraits<Scalar>::IsComplex>
         ParallelMatMulKernel;
     bool conjugate_result = false;
@@ -226,17 +242,19 @@ struct LaunchBatchMatMul<CPUDevice, Scalar> {
       // Parallelize over inner dims.
       // For large matrix products it is counter-productive to parallelize
       // over the batch dimension.
-      ParallelMatMulKernel::Run(context, in_x, in_y, adj_x, adj_y, bcast, out,
-                                0, batch_size);
+      ParallelMatMulKernel::Run(context, in_x, in_y, adj_x, adj_y, trans_x,
+                                trans_y, bcast, out, 0, batch_size);
       conjugate_result = adj_x;
     } else {
       // Parallelize over outer dims. For small matrices and large batches, it
       // is counter-productive to parallelize the inner matrix multiplies.
       Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
             cost_per_unit,
-            [&in_x, &in_y, adj_x, adj_y, &bcast, out](int start, int limit) {
+            [&in_x, &in_y, adj_x, adj_y, trans_x, trans_y, &bcast, out](
+                int start, int limit) {
               SequentialMatMulKernel<Scalar>::Run(in_x, in_y, adj_x, adj_y,
-                                                  bcast, out, start, limit);
+                                                  trans_x, trans_y, bcast, out,
+                                                  start, limit);
             });
     }
     if (conjugate_result) {
@@ -297,19 +315,17 @@ class BlasScratchAllocator : public se::ScratchAllocator {
 template <typename Scalar>
 struct LaunchBatchMatMul<GPUDevice, Scalar> {
   static void Launch(OpKernelContext* context, const Tensor& in_x,
-                     const Tensor& in_y, bool adj_x, bool adj_y,
-                     const MatMulBCast& bcast, Tensor* out) {
-    constexpr se::blas::Transpose kTranspose =
-        is_complex<Scalar>::value ? se::blas::Transpose::kConjugateTranspose
-                                  : se::blas::Transpose::kTranspose;
+                     const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
+                     bool trans_y, const MatMulBCast& bcast, Tensor* out) {
     se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose,
-                                   kTranspose};
-    const uint64 m = in_x.dim_size(adj_x ? 2 : 1);
-    const uint64 k = in_x.dim_size(adj_x ? 1 : 2);
-    const uint64 n = in_y.dim_size(adj_y ? 1 : 2);
+                                   se::blas::Transpose::kTranspose,
+                                   se::blas::Transpose::kConjugateTranspose};
+    const uint64 m = in_x.dim_size(adj_x || trans_x ? 2 : 1);
+    const uint64 k = in_x.dim_size(adj_x || trans_x ? 1 : 2);
+    const uint64 n = in_y.dim_size(adj_y || trans_y ? 1 : 2);
     const int64 batch_size = bcast.output_batch_size();
-    auto blas_transpose_a = trans[adj_x];
-    auto blas_transpose_b = trans[adj_y];
+    auto blas_transpose_a = trans[adj_x ? 2 : (trans_x ? 1 : 0)];
+    auto blas_transpose_b = trans[adj_y ? 2 : (trans_y ? 1 : 0)];
 
     auto* stream = context->op_device_context()->stream();
     OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
@@ -399,9 +415,10 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
                                 : se::blas::Transpose::kTranspose;
         bool blas_launch_status =
             stream
-                ->ThenBlasGemv(gemv_trans_a, adj_x ? m : k, adj_x ? k : m,
+                ->ThenBlasGemv(gemv_trans_a, adj_x || trans_x ? m : k,
+                               adj_x || trans_x ? k : m,
                                static_cast<Coefficient>(1.0), *(a_ptrs[0]),
-                               adj_x ? m : k, *(b_ptrs[0]), 1,
+                               adj_x || trans_x ? m : k, *(b_ptrs[0]), 1,
                                static_cast<Coefficient>(0.0), c_ptrs[0], 1)
                 .ok();
         if (!blas_launch_status) {
@@ -415,7 +432,8 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
             stream
                 ->ThenBlasGemm(blas_transpose_b, blas_transpose_a, n, m, k,
                                static_cast<Coefficient>(1.0), *(b_ptrs[0]),
-                               adj_y ? k : n, *(a_ptrs[0]), adj_x ? m : k,
+                               adj_y || trans_y ? k : n, *(a_ptrs[0]),
+                               adj_x || trans_x ? m : k,
                                static_cast<Coefficient>(0.0), c_ptrs[0], n)
                 .ok();
         if (!blas_launch_status) {
@@ -430,8 +448,9 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
           stream
               ->ThenBlasGemmStridedBatched(
                   blas_transpose_b, blas_transpose_a, n, m, k,
-                  static_cast<Coefficient>(1.0), *b_ptrs[0], adj_y ? k : n,
-                  b_stride, *a_ptrs[0], adj_x ? m : k, a_stride,
+                  static_cast<Coefficient>(1.0), *b_ptrs[0],
+                  adj_y || trans_y ? k : n, b_stride, *a_ptrs[0],
+                  adj_x || trans_x ? m : k, a_stride,
                   static_cast<Coefficient>(0.0), c_ptrs[0], n, c_stride,
                   batch_size)
               .ok();
@@ -448,9 +467,10 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
           stream
               ->ThenBlasGemmBatchedWithScratch(
                   blas_transpose_b, blas_transpose_a, n, m, k,
-                  static_cast<Coefficient>(1.0), b_ptrs, adj_y ? k : n, a_ptrs,
-                  adj_x ? m : k, static_cast<Coefficient>(0.0), c_ptrs, n,
-                  batch_size, &scratch_allocator)
+                  static_cast<Coefficient>(1.0), b_ptrs,
+                  adj_y || trans_y ? k : n, a_ptrs, adj_x || trans_x ? m : k,
+                  static_cast<Coefficient>(0.0), c_ptrs, n, batch_size,
+                  &scratch_allocator)
               .ok();
       if (!blas_launch_status) {
         context->SetStatus(errors::Internal(
@@ -466,21 +486,18 @@ struct LaunchBatchMatMul<GPUDevice, Scalar> {
 template <>
 struct LaunchBatchMatMul<GPUDevice, Eigen::half> {
   static void Launch(OpKernelContext* context, const Tensor& in_x,
-                     const Tensor& in_y, bool adj_x, bool adj_y,
-                     const MatMulBCast& bcast, Tensor* out) {
+                     const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
+                     bool trans_y, const MatMulBCast& bcast, Tensor* out) {
     typedef Eigen::half Scalar;
-    constexpr perftools::gputools::blas::Transpose kTranspose =
-        is_complex<Scalar>::value
-            ? perftools::gputools::blas::Transpose::kConjugateTranspose
-            : perftools::gputools::blas::Transpose::kTranspose;
-    perftools::gputools::blas::Transpose trans[] = {
-        perftools::gputools::blas::Transpose::kNoTranspose, kTranspose};
-    const uint64 m = in_x.dim_size(adj_x ? 2 : 1);
-    const uint64 k = in_x.dim_size(adj_x ? 1 : 2);
-    const uint64 n = in_y.dim_size(adj_y ? 1 : 2);
+    se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose,
+                                   se::blas::Transpose::kTranspose,
+                                   se::blas::Transpose::kConjugateTranspose};
+    const uint64 m = in_x.dim_size(adj_x || trans_x ? 2 : 1);
+    const uint64 k = in_x.dim_size(adj_x || trans_x ? 1 : 2);
+    const uint64 n = in_y.dim_size(adj_y || trans_y ? 1 : 2);
     const uint64 batch_size = bcast.output_batch_size();
-    auto blas_transpose_a = trans[adj_x];
-    auto blas_transpose_b = trans[adj_y];
+    auto blas_transpose_a = trans[adj_x ? 2 : (trans_x ? 1 : 0)];
+    auto blas_transpose_b = trans[adj_y ? 2 : (trans_y ? 1 : 0)];
 
     auto* stream = context->op_device_context()->stream();
     OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
@@ -563,7 +580,8 @@ struct LaunchBatchMatMul<GPUDevice, Eigen::half> {
           stream
               ->ThenBlasGemm(blas_transpose_b, blas_transpose_a, n, m, k,
                              static_cast<Coefficient>(1.0), *(b_ptrs[0]),
-                             adj_y ? k : n, *(a_ptrs[0]), adj_x ? m : k,
+                             adj_y || trans_y ? k : n, *(a_ptrs[0]),
+                             adj_x || trans_x ? m : k,
                              static_cast<Coefficient>(0.0), c_ptrs[0], n)
               .ok();
       if (!blas_launch_status) {
@@ -577,8 +595,9 @@ struct LaunchBatchMatMul<GPUDevice, Eigen::half> {
           stream
               ->ThenBlasGemmStridedBatched(
                   blas_transpose_b, blas_transpose_a, n, m, k,
-                  static_cast<Coefficient>(1.0), *b_ptrs[0], adj_y ? k : n,
-                  b_stride, *a_ptrs[0], adj_x ? m : k, a_stride,
+                  static_cast<Coefficient>(1.0), *b_ptrs[0],
+                  adj_y || trans_y ? k : n, b_stride, *a_ptrs[0],
+                  adj_x || trans_x ? m : k, a_stride,
                   static_cast<Coefficient>(0.0), c_ptrs[0], n, c_stride,
                   batch_size)
               .ok();
@@ -595,9 +614,10 @@ struct LaunchBatchMatMul<GPUDevice, Eigen::half> {
           stream
               ->ThenBlasGemmBatchedWithScratch(
                   blas_transpose_b, blas_transpose_a, n, m, k,
-                  static_cast<Coefficient>(1.0), b_ptrs, adj_y ? k : n, a_ptrs,
-                  adj_x ? m : k, static_cast<Coefficient>(0.0), c_ptrs, n,
-                  batch_size, &scratch_allocator)
+                  static_cast<Coefficient>(1.0), b_ptrs,
+                  adj_y || trans_y ? k : n, a_ptrs, adj_x || trans_x ? m : k,
+                  static_cast<Coefficient>(0.0), c_ptrs, n, batch_size,
+                  &scratch_allocator)
               .ok();
       if (!blas_launch_status) {
         context->SetStatus(errors::Internal(
@@ -616,13 +636,14 @@ struct LaunchBatchMatMul<GPUDevice, Eigen::half> {
 template <typename Scalar>
 struct ParallelMatMulKernelSYCL {
   static void Run(const OpKernelContext* context, const Tensor& in_x,
-                  const Tensor& in_y, bool adj_x, bool adj_y,
-                  const MatMulBCast& bcast, Tensor* out, int start, int limit) {
+                  const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
+                  bool trans_y, const MatMulBCast& bcast, Tensor* out,
+                  int start, int limit) {
     auto Tx = in_x.tensor<Scalar, 3>();
     auto Ty = in_y.tensor<Scalar, 3>();
     auto Tz = out->tensor<Scalar, 3>();
     Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> contract_pairs;
-    contract_pairs[0] = ContractionDims(adj_x, adj_y);
+    contract_pairs[0] = ContractionDims(adj_x || trans_x, adj_y || trans_y);
     auto d = context->eigen_sycl_device();
 
     const bool should_bcast = bcast.IsBroadcastingRequired();
@@ -643,12 +664,13 @@ struct ParallelMatMulKernelSYCL {
 template <typename Scalar>
 struct LaunchBatchMatMul<SYCLDevice, Scalar> {
   static void Launch(OpKernelContext* context, const Tensor& in_x,
-                     const Tensor& in_y, bool adj_x, bool adj_y,
-                     const MatMulBCast& bcast, Tensor* out) {
+                     const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
+                     bool trans_y, const MatMulBCast& bcast, Tensor* out) {
     // Number of matrix multiplies i.e. size of the batch.
     const int64 batch_size = bcast.output_batch_size();
     ParallelMatMulKernelSYCL<Scalar>::Run(context, in_x, in_y, adj_x, adj_y,
-                                          bcast, out, 0, batch_size);
+                                          trans_x, trans_y, bcast, out, 0,
+                                          batch_size);
   }
 };
 #endif  // TENSORFLOW_USE_SYCL
@@ -720,7 +742,8 @@ class BaseBatchMatMulOp : public OpKernel {
                 errors::Internal("Failed to reshape output from ",
                                  out->shape().DebugString()));
     LaunchBatchMatMul<Device, Scalar>::Launch(
-        ctx, in0_reshaped, in1_reshaped, adj_x_, adj_y_, bcast, &out_reshaped);
+        ctx, in0_reshaped, in1_reshaped, adj_x_, adj_y_, /*trans_x=*/false,
+        /*trans_y=*/false, bcast, &out_reshaped);
   }
 
  protected:
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index d2b531bae3d..a84b25f2541 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -15,12 +15,14 @@ limitations under the License.
 
 // See docs in ../ops/math_ops.cc.
 
+#include "tensorflow/core/platform/errors.h"
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bincount_op.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -33,19 +35,18 @@ typedef Eigen::GpuDevice GPUDevice;
 
 namespace functor {
 
-template <typename T>
-struct BincountFunctor<CPUDevice, T> {
+template <typename Tidx, typename T>
+struct BincountFunctor<CPUDevice, Tidx, T, true> {
   static Status Compute(OpKernelContext* context,
-                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<Tidx, 1>::ConstTensor& arr,
                         const typename TTypes<T, 1>::ConstTensor& weights,
-                        typename TTypes<T, 1>::Tensor& output) {
-    int size = output.size();
-
+                        typename TTypes<T, 1>::Tensor& output,
+                        const Tidx num_bins) {
     Tensor all_nonneg_t;
     TF_RETURN_IF_ERROR(context->allocate_temp(
         DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes()));
     all_nonneg_t.scalar<bool>().device(context->eigen_cpu_device()) =
-        (arr >= 0).all();
+        (arr >= Tidx(0)).all();
     if (!all_nonneg_t.scalar<bool>()()) {
       return errors::InvalidArgument("Input arr must be non-negative!");
     }
@@ -56,17 +57,62 @@ struct BincountFunctor<CPUDevice, T> {
         context->device()->tensorflow_cpu_worker_threads()->workers;
     const int64 num_threads = thread_pool->NumThreads() + 1;
     Tensor partial_bins_t;
-    TF_RETURN_IF_ERROR(context->allocate_temp(DataTypeToEnum<T>::value,
-                                              TensorShape({num_threads, size}),
-                                              &partial_bins_t));
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DT_BOOL, TensorShape({num_threads, num_bins}), &partial_bins_t));
+    auto partial_bins = partial_bins_t.matrix<bool>();
+    partial_bins.setZero();
+    thread_pool->ParallelForWithWorkerId(
+        arr.size(), 8 /* cost */,
+        [&](int64 start_ind, int64 limit_ind, int64 worker_id) {
+          for (int64 i = start_ind; i < limit_ind; i++) {
+            Tidx value = arr(i);
+            if (value < num_bins) {
+              partial_bins(worker_id, value) = true;
+            }
+          }
+        });
+
+    // Sum the partial bins along the 0th axis.
+    Eigen::array<int, 1> reduce_dim({0});
+    output.device(context->eigen_cpu_device()) =
+        partial_bins.any(reduce_dim).cast<T>();
+    return Status::OK();
+  }
+};
+
+template <typename Tidx, typename T>
+struct BincountFunctor<CPUDevice, Tidx, T, false> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<Tidx, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output,
+                        const Tidx num_bins) {
+    Tensor all_nonneg_t;
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DT_BOOL, TensorShape({}), &all_nonneg_t, AllocatorAttributes()));
+    all_nonneg_t.scalar<bool>().device(context->eigen_cpu_device()) =
+        (arr >= Tidx(0)).all();
+    if (!all_nonneg_t.scalar<bool>()()) {
+      return errors::InvalidArgument("Input arr must be non-negative!");
+    }
+
+    // Allocate partial output bin sums for each worker thread. Worker ids in
+    // ParallelForWithWorkerId range from 0 to NumThreads() inclusive.
+    ThreadPool* thread_pool =
+        context->device()->tensorflow_cpu_worker_threads()->workers;
+    const int64 num_threads = thread_pool->NumThreads() + 1;
+    Tensor partial_bins_t;
+    TF_RETURN_IF_ERROR(context->allocate_temp(
+        DataTypeToEnum<T>::value, TensorShape({num_threads, num_bins}),
+        &partial_bins_t));
     auto partial_bins = partial_bins_t.matrix<T>();
     partial_bins.setZero();
     thread_pool->ParallelForWithWorkerId(
         arr.size(), 8 /* cost */,
         [&](int64 start_ind, int64 limit_ind, int64 worker_id) {
           for (int64 i = start_ind; i < limit_ind; i++) {
-            int32 value = arr(i);
-            if (value < size) {
+            Tidx value = arr(i);
+            if (value < num_bins) {
               if (weights.size()) {
                 partial_bins(worker_id, value) += weights(i);
               } else {
@@ -78,8 +124,43 @@ struct BincountFunctor<CPUDevice, T> {
         });
 
     // Sum the partial bins along the 0th axis.
-    Eigen::array<int, 1> reduce_dims({0});
-    output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dims);
+    Eigen::array<int, 1> reduce_dim({0});
+    output.device(context->eigen_cpu_device()) = partial_bins.sum(reduce_dim);
+    return Status::OK();
+  }
+};
+
+template <typename Tidx, typename T, bool binary_output>
+struct BincountReduceFunctor<CPUDevice, Tidx, T, binary_output> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<Tidx, 2>::ConstTensor& in,
+                        const typename TTypes<T, 2>::ConstTensor& weights,
+                        typename TTypes<T, 2>::Tensor& out,
+                        const Tidx num_bins) {
+    const int num_rows = out.dimension(0);
+    const int num_cols = in.dimension(1);
+    ThreadPool* thread_pool =
+        context->device()->tensorflow_cpu_worker_threads()->workers;
+    thread_pool->ParallelForWithWorkerId(
+        num_rows, 8 /* cost */,
+        [&](int64 start_row, int64 end_row, int64 worker_id) {
+          for (int64 i = start_row; i < end_row; ++i) {
+            for (int64 j = 0; j < num_cols; ++j) {
+              Tidx value = in(i, j);
+              if (value < num_bins) {
+                if (binary_output) {
+                  out(i, value) = T(1);
+                } else {
+                  if (weights.size()) {
+                    out(i, value) += weights(i, j);
+                  } else {
+                    out(i, value) += T(1);
+                  }
+                }
+              }
+            }
+          }
+        });
     return Status::OK();
   }
 };
@@ -107,8 +188,9 @@ class BincountOp : public OpKernel {
     OP_REQUIRES_OK(ctx,
                    ctx->allocate_output(0, TensorShape({size}), &output_t));
     auto output = output_t->flat<T>();
-    OP_REQUIRES_OK(ctx, functor::BincountFunctor<Device, T>::Compute(
-                            ctx, arr, weights, output));
+    OP_REQUIRES_OK(ctx,
+                   functor::BincountFunctor<Device, int32, T, false>::Compute(
+                       ctx, arr, weights, output, size));
   }
 };
 
@@ -135,4 +217,244 @@ TF_CALL_float(REGISTER_KERNELS);
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
+template <typename Device, typename Tidx, typename T>
+class DenseBincountOp : public OpKernel {
+ public:
+  explicit DenseBincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("binary_output", &binary_output_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& data = ctx->input(0);
+    const Tensor& size_t = ctx->input(1);
+    const Tensor& weights = ctx->input(2);
+
+    Tidx size = size_t.scalar<Tidx>()();
+    OP_REQUIRES(
+        ctx, size >= 0,
+        errors::InvalidArgument("size (", size, ") must be non-negative"));
+
+    Tensor* out_t;
+    functor::SetZeroFunctor<Device, T> fill;
+    if (data.dims() == 1) {
+      OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({size}), &out_t));
+      auto out = out_t->flat<T>();
+      fill(ctx->eigen_device<Device>(), out);
+      if (binary_output_) {
+        OP_REQUIRES_OK(
+            ctx, functor::BincountFunctor<Device, Tidx, T, true>::Compute(
+                     ctx, data.flat<Tidx>(), weights.flat<T>(), out, size));
+      } else {
+        OP_REQUIRES_OK(
+            ctx, functor::BincountFunctor<Device, Tidx, T, false>::Compute(
+                     ctx, data.flat<Tidx>(), weights.flat<T>(), out, size));
+      }
+    } else if (data.dims() == 2) {
+      const int64 num_rows = data.dim_size(0);
+      auto weight_matrix =
+          (weights.NumElements() == 0)
+              ? weights.shaped<T, 2>(gtl::InlinedVector<int64, 2>(2, 0))
+              : weights.matrix<T>();
+      OP_REQUIRES_OK(
+          ctx, ctx->allocate_output(0, TensorShape({num_rows, size}), &out_t));
+      auto out = out_t->matrix<T>();
+      fill(ctx->eigen_device<Device>(), out_t->flat<T>());
+      if (binary_output_) {
+        OP_REQUIRES_OK(
+            ctx, functor::BincountReduceFunctor<Device, Tidx, T, true>::Compute(
+                     ctx, data.matrix<Tidx>(), weight_matrix, out, size));
+      } else {
+        OP_REQUIRES_OK(
+            ctx,
+            functor::BincountReduceFunctor<Device, Tidx, T, false>::Compute(
+                ctx, data.matrix<Tidx>(), weight_matrix, out, size));
+      }
+    }
+  }
+
+ private:
+  bool binary_output_;
+};
+
+#define REGISTER_KERNELS(Tidx, T)                            \
+  REGISTER_KERNEL_BUILDER(Name("DenseBincount")              \
+                              .Device(DEVICE_CPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<Tidx>("Tidx"), \
+                          DenseBincountOp<CPUDevice, Tidx, T>);
+#define REGISTER_CPU_KERNELS(T) \
+  REGISTER_KERNELS(int32, T);   \
+  REGISTER_KERNELS(int64, T);
+
+TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
+#undef REGISTER_CPU_KERNELS
+#undef REGISTER_KERNELS
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+#define REGISTER_KERNELS(Tidx, T)                            \
+  REGISTER_KERNEL_BUILDER(Name("DenseBincount")              \
+                              .Device(DEVICE_GPU)            \
+                              .HostMemory("size")            \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<Tidx>("Tidx"), \
+                          DenseBincountOp<GPUDevice, Tidx, T>);
+#define REGISTER_GPU_KERNELS(T) \
+  REGISTER_KERNELS(int32, T);   \
+  REGISTER_KERNELS(int64, T);
+
+TF_CALL_int32(REGISTER_GPU_KERNELS);
+TF_CALL_float(REGISTER_GPU_KERNELS);
+#undef REGISTER_GPU_KERNELS
+#undef REGISTER_KERNELS
+
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+template <typename Device, typename Tidx, typename T>
+class SparseBincountOp : public OpKernel {
+ public:
+  explicit SparseBincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("binary_output", &binary_output_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& indices = ctx->input(0);
+    const auto values = ctx->input(1).flat<Tidx>();
+    const Tensor& dense_shape = ctx->input(2);
+    const Tensor& size_t = ctx->input(3);
+    const auto weights = ctx->input(4).flat<T>();
+    const int64 weights_size = weights.size();
+
+    Tidx size = size_t.scalar<Tidx>()();
+    OP_REQUIRES(
+        ctx, size >= 0,
+        errors::InvalidArgument("size (", size, ") must be non-negative"));
+
+    bool is_1d = dense_shape.NumElements() == 1;
+
+    Tensor* out_t;
+    functor::SetZeroFunctor<Device, T> fill;
+    if (is_1d) {
+      OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({size}), &out_t));
+      auto out = out_t->flat<T>();
+      fill(ctx->eigen_device<Device>(), out);
+      if (binary_output_) {
+        OP_REQUIRES_OK(ctx,
+                       functor::BincountFunctor<Device, Tidx, T, true>::Compute(
+                           ctx, values, weights, out, size));
+      } else {
+        OP_REQUIRES_OK(
+            ctx, functor::BincountFunctor<Device, Tidx, T, false>::Compute(
+                     ctx, values, weights, out, size));
+      }
+    } else {
+      const auto shape = dense_shape.flat<int64>();
+      const int64 num_rows = shape(0);
+      OP_REQUIRES_OK(
+          ctx, ctx->allocate_output(0, TensorShape({num_rows, size}), &out_t));
+      const auto out = out_t->matrix<T>();
+      fill(ctx->eigen_device<Device>(), out_t->flat<T>());
+      const auto indices_mat = indices.matrix<int64>();
+      for (int64 i = 0; i < indices_mat.dimension(0); ++i) {
+        const int64 batch = indices_mat(i, 0);
+        const Tidx bin = values(i);
+        if (bin < size) {
+          if (binary_output_) {
+            out(batch, bin) = T(1);
+          } else {
+            if (weights_size) {
+              out(batch, bin) += weights(i);
+            } else {
+              out(batch, bin) += T(1);
+            }
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  bool binary_output_;
+};
+
+#define REGISTER_KERNELS(Tidx, T)                            \
+  REGISTER_KERNEL_BUILDER(Name("SparseBincount")             \
+                              .Device(DEVICE_CPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<Tidx>("Tidx"), \
+                          SparseBincountOp<CPUDevice, Tidx, T>);
+#define REGISTER_CPU_KERNELS(T) \
+  REGISTER_KERNELS(int32, T);   \
+  REGISTER_KERNELS(int64, T);
+
+TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
+#undef REGISTER_CPU_KERNELS
+#undef REGISTER_KERNELS
+
+template <typename Device, typename Tidx, typename T>
+class RaggedBincountOp : public OpKernel {
+ public:
+  explicit RaggedBincountOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("binary_output", &binary_output_));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const auto splits = ctx->input(0).flat<int64>();
+    const auto values = ctx->input(1).flat<Tidx>();
+    const Tensor& size_t = ctx->input(2);
+    const auto weights = ctx->input(3).flat<T>();
+    const int64 weights_size = weights.size();
+
+    Tidx size = size_t.scalar<Tidx>()();
+    OP_REQUIRES(
+        ctx, size >= 0,
+        errors::InvalidArgument("size (", size, ") must be non-negative"));
+
+    int num_rows = splits.size() - 1;
+    int num_values = values.size();
+    int batch_idx = 0;
+
+    Tensor* out_t;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_output(0, TensorShape({num_rows, size}), &out_t));
+    functor::SetZeroFunctor<Device, T> fill;
+    fill(ctx->eigen_device<Device>(), out_t->flat<T>());
+    const auto out = out_t->matrix<T>();
+
+    for (int idx = 0; idx < num_values; ++idx) {
+      while (idx >= splits(batch_idx)) {
+        batch_idx++;
+      }
+      Tidx bin = values(idx);
+      OP_REQUIRES(ctx, bin >= 0,
+                  errors::InvalidArgument("Input must be non-negative"));
+      if (bin < size) {
+        if (binary_output_) {
+          out(batch_idx - 1, bin) = T(1);
+        } else {
+          T value = (weights_size > 0) ? weights(idx) : T(1);
+          out(batch_idx - 1, bin) += value;
+        }
+      }
+    }
+  }
+
+ private:
+  bool binary_output_;
+};
+
+#define REGISTER_KERNELS(Tidx, T)                            \
+  REGISTER_KERNEL_BUILDER(Name("RaggedBincount")             \
+                              .Device(DEVICE_CPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .TypeConstraint<Tidx>("Tidx"), \
+                          RaggedBincountOp<CPUDevice, Tidx, T>);
+#define REGISTER_CPU_KERNELS(T) \
+  REGISTER_KERNELS(int32, T);   \
+  REGISTER_KERNELS(int64, T);
+
+TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
+#undef REGISTER_CPU_KERNELS
+#undef REGISTER_KERNELS
+
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
index 54cfb79de78..054552a0075 100644
--- a/tensorflow/core/kernels/bincount_op.h
+++ b/tensorflow/core/kernels/bincount_op.h
@@ -26,12 +26,22 @@ namespace tensorflow {
 
 namespace functor {
 
-template <typename Device, typename T>
+template <typename Device, typename Tidx, typename T, bool binary_count>
 struct BincountFunctor {
   static Status Compute(OpKernelContext* context,
-                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<Tidx, 1>::ConstTensor& arr,
                         const typename TTypes<T, 1>::ConstTensor& weights,
-                        typename TTypes<T, 1>::Tensor& output);
+                        typename TTypes<T, 1>::Tensor& output,
+                        const Tidx num_bins);
+};
+
+template <typename Device, typename Tidx, typename T, bool binary_count>
+struct BincountReduceFunctor {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<Tidx, 2>::ConstTensor& in,
+                        const typename TTypes<T, 2>::ConstTensor& weights,
+                        typename TTypes<T, 2>::Tensor& out,
+                        const Tidx num_bins);
 };
 
 }  // end namespace functor
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
index 56e209819d9..b137413d5e3 100644
--- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -33,12 +33,13 @@ typedef Eigen::GpuDevice GPUDevice;
 
 namespace functor {
 
-template <typename T>
-struct BincountFunctor<GPUDevice, T> {
+template <typename Tidx, typename T>
+struct BincountFunctor<GPUDevice, Tidx, T, false> {
   static Status Compute(OpKernelContext* context,
-                        const typename TTypes<int32, 1>::ConstTensor& arr,
+                        const typename TTypes<Tidx, 1>::ConstTensor& arr,
                         const typename TTypes<T, 1>::ConstTensor& weights,
-                        typename TTypes<T, 1>::Tensor& output) {
+                        typename TTypes<T, 1>::Tensor& output,
+                        const Tidx num_bins) {
     if (weights.size() != 0) {
       return errors::InvalidArgument(
           "Weights should not be passed as it should be "
@@ -49,11 +50,11 @@ struct BincountFunctor<GPUDevice, T> {
     }
     // In case weight.size() == 0, use CUB
     size_t temp_storage_bytes = 0;
-    const int32* d_samples = arr.data();
+    const Tidx* d_samples = arr.data();
     T* d_histogram = output.data();
     int num_levels = output.size() + 1;
-    int32 lower_level = 0;
-    int32 upper_level = output.size();
+    Tidx lower_level = Tidx(0);
+    Tidx upper_level = num_bins;
     int num_samples = arr.size();
     const gpuStream_t& stream = GetGpuStream(context);
 
@@ -100,10 +101,142 @@ struct BincountFunctor<GPUDevice, T> {
   }
 };
 
+template <typename Tidx, typename T>
+__global__ void BincountReduceKernel(const Tidx* in, T* out, const int nthreads,
+                                     const Tidx num_bins) {
+  GPU_1D_KERNEL_LOOP(index, nthreads) {
+    Tidx bin = ldg(in + index);
+    if (bin < num_bins) {
+      out[bin] = T(1);
+    }
+  }
+}
+
+template <typename Tidx, typename T>
+struct BincountFunctor<GPUDevice, Tidx, T, true> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<Tidx, 1>::ConstTensor& arr,
+                        const typename TTypes<T, 1>::ConstTensor& weights,
+                        typename TTypes<T, 1>::Tensor& output,
+                        const Tidx num_bins) {
+    const int nthreads = arr.dimension(0);
+
+    auto d = context->eigen_gpu_device();
+    GpuLaunchConfig config = GetGpuLaunchConfig(nthreads, d);
+    return GpuLaunchKernel(BincountReduceKernel<Tidx, T>, config.block_count,
+                           config.thread_per_block, 0, d.stream(), arr.data(),
+                           output.data(), nthreads, num_bins);
+    return Status::OK();
+  }
+};
+
+template <typename Tidx, typename T, bool binary_count>
+__global__ void BincountColReduceKernel(const Tidx* in, const T* weights,
+                                        const int weights_size, T* out,
+                                        const int num_rows, const int num_cols,
+                                        const Tidx num_bins) {
+  const int nthreads = num_rows * num_cols;
+  GPU_1D_KERNEL_LOOP(index, nthreads) {
+    Tidx bin = ldg(in + index);
+    if (bin < num_bins) {
+      int row = index / num_cols;
+      int offset = row * num_bins + bin;
+      if (binary_count) {
+        out[offset] = T(1);
+      } else {
+        T value = (weights_size == 0) ? T(1) : ldg(weights + index);
+        GpuAtomicAdd(out + offset, value);
+      }
+    }
+  }
+}
+
+template <typename Tidx, typename T, bool binary_count>
+__global__ void BincountColReduceSharedKernel(const Tidx* in, const T* weights,
+                                              const int weights_size, T* out,
+                                              const int num_rows,
+                                              const int num_cols,
+                                              const Tidx num_bins) {
+  const int out_size = num_rows * num_bins;
+  GPU_DYNAMIC_SHARED_MEM_DECL(sizeof(T), unsigned char, shared_col_mem);
+  T* shared_col_bins = reinterpret_cast<T*>(shared_col_mem);
+  for (unsigned int binIdx = threadIdx.x; binIdx < out_size;
+       binIdx += blockDim.x) {
+    shared_col_bins[binIdx] = T(0);
+  }
+  __syncthreads();
+  const int nthreads = num_rows * num_cols;
+  GPU_1D_KERNEL_LOOP(index, nthreads) {
+    Tidx bin = ldg(in + index);
+    if (bin < num_bins) {
+      int row = index / num_cols;
+      int offset = row * num_bins + bin;
+      if (binary_count) {
+        shared_col_bins[offset] = T(1);
+      } else {
+        T value = (weights_size == 0) ? T(1) : ldg(weights + index);
+        GpuAtomicAdd(shared_col_bins + offset, value);
+      }
+    }
+  }
+  __syncthreads();
+  for (unsigned int binIdx = threadIdx.x; binIdx < out_size;
+       binIdx += blockDim.x) {
+    if (binary_count) {
+      // out[binIdx] = out[binIdx] & shared_col_bins[binIdx];
+      if (shared_col_bins[binIdx]) {
+        out[binIdx] = shared_col_bins[binIdx];
+      }
+    } else {
+      GpuAtomicAdd(out + binIdx, shared_col_bins[binIdx]);
+    }
+  }
+}
+
+template <typename Tidx, typename T, bool binary_count>
+struct BincountReduceFunctor<GPUDevice, Tidx, T, binary_count> {
+  static Status Compute(OpKernelContext* context,
+                        const typename TTypes<Tidx, 2>::ConstTensor& in,
+                        const typename TTypes<T, 2>::ConstTensor& weights,
+                        typename TTypes<T, 2>::Tensor& out,
+                        const Tidx num_bins) {
+    const int num_rows = in.dimension(0);
+    const int num_cols = in.dimension(1);
+
+    auto d = context->eigen_gpu_device();
+    GpuLaunchConfig config = GetGpuLaunchConfig(num_rows * num_cols, d);
+
+    // Use half of maximum shared memory, approximately 6 * 1024 inputs.
+    int smem_max = d.sharedMemPerBlock() / 2;
+    int smem_usage = out.size() * sizeof(T);
+    if (smem_usage < smem_max) {
+      return GpuLaunchKernel(
+          BincountColReduceSharedKernel<Tidx, T, binary_count>,
+          config.block_count, config.thread_per_block, smem_usage, d.stream(),
+          in.data(), weights.data(), weights.size(), out.data(), num_rows,
+          num_cols, num_bins);
+    } else {
+      return GpuLaunchKernel(
+          BincountColReduceKernel<Tidx, T, binary_count>, config.block_count,
+          config.thread_per_block, 0, d.stream(), in.data(), weights.data(),
+          weights.size(), out.data(), num_rows, num_cols, num_bins);
+    }
+
+    return Status::OK();
+  }
+};
+
 }  // end namespace functor
 
-#define REGISTER_GPU_SPEC(type) \
-  template struct functor::BincountFunctor<GPUDevice, type>;
+#define REGISTER_GPU_SPEC(T)                                                  \
+  template struct functor::BincountFunctor<GPUDevice, int32, T, true>;        \
+  template struct functor::BincountFunctor<GPUDevice, int64, T, true>;        \
+  template struct functor::BincountFunctor<GPUDevice, int32, T, false>;       \
+  template struct functor::BincountFunctor<GPUDevice, int64, T, false>;       \
+  template struct functor::BincountReduceFunctor<GPUDevice, int32, T, true>;  \
+  template struct functor::BincountReduceFunctor<GPUDevice, int64, T, true>;  \
+  template struct functor::BincountReduceFunctor<GPUDevice, int32, T, false>; \
+  template struct functor::BincountReduceFunctor<GPUDevice, int64, T, false>;
 
 TF_CALL_int32(REGISTER_GPU_SPEC);
 TF_CALL_float(REGISTER_GPU_SPEC);
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index f787d879ed6..4bcbc076446 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -48,12 +48,15 @@ namespace tensorflow {
 namespace {
 
 NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
-#ifndef TENSORFLOW_LITE_PROTOS
-  DCHECK_EQ(NodeDef::descriptor()->field_count(), 6)
-      << "The NodeDef format has changed, and the attr-stripping code may need "
-      << "to be updated.";
-#endif
   const NodeDef& original = ctx->def();
+  if (std::is_base_of<protobuf::Message, NodeDef>()) {
+    DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
+                  ->GetDescriptor()
+                  ->field_count(),
+              6)
+        << "The NodeDef format has changed, and the attr-stripping code may "
+           "need to be updated.";
+  }
   NodeDef ret;
   ret.set_name(original.name());
   ret.set_op(original.op());
diff --git a/tensorflow/core/kernels/count_ops.cc b/tensorflow/core/kernels/count_ops.cc
new file mode 100644
index 00000000000..7c85b050039
--- /dev/null
+++ b/tensorflow/core/kernels/count_ops.cc
@@ -0,0 +1,310 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "absl/container/flat_hash_map.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+template <class T>
+using BatchedMap = std::vector<absl::flat_hash_map<int64, T>>;
+
+namespace {
+// TODO(momernick): Extend this function to work with outputs of rank > 2.
+template <class T>
+Status OutputSparse(const BatchedMap<T>& per_batch_counts, int num_values,
+                    bool is_1d, OpKernelContext* context) {
+  int total_values = 0;
+  int num_batches = per_batch_counts.size();
+  for (const auto& per_batch_count : per_batch_counts) {
+    total_values += per_batch_count.size();
+  }
+
+  Tensor* indices;
+  int inner_dim = is_1d ? 1 : 2;
+  TF_RETURN_IF_ERROR(context->allocate_output(
+      0, TensorShape({total_values, inner_dim}), &indices));
+
+  Tensor* values;
+  TF_RETURN_IF_ERROR(
+      context->allocate_output(1, TensorShape({total_values}), &values));
+
+  auto output_indices = indices->matrix<int64>();
+  auto output_values = values->flat<T>();
+  int64 value_loc = 0;
+  for (int b = 0; b < num_batches; ++b) {
+    const auto& per_batch_count = per_batch_counts[b];
+    std::vector<std::pair<int, T>> pairs(per_batch_count.begin(),
+                                         per_batch_count.end());
+    std::sort(pairs.begin(), pairs.end());
+    for (const auto& x : pairs) {
+      if (is_1d) {
+        output_indices(value_loc, 0) = x.first;
+      } else {
+        output_indices(value_loc, 0) = b;
+        output_indices(value_loc, 1) = x.first;
+      }
+      output_values(value_loc) = x.second;
+      ++value_loc;
+    }
+  }
+  Tensor* dense_shape;
+  if (is_1d) {
+    TF_RETURN_IF_ERROR(
+        context->allocate_output(2, TensorShape({1}), &dense_shape));
+    dense_shape->flat<int64>().data()[0] = num_values;
+  } else {
+    TF_RETURN_IF_ERROR(
+        context->allocate_output(2, TensorShape({2}), &dense_shape));
+    dense_shape->flat<int64>().data()[0] = num_batches;
+    dense_shape->flat<int64>().data()[1] = num_values;
+  }
+
+  return Status::OK();
+}
+
+int GetOutputSize(int max_seen, int max_length, int min_length) {
+  return max_length > 0 ? max_length : std::max((max_seen + 1), min_length);
+}
+
+}  // namespace
+
+template <class T, class W>
+class DenseCount : public OpKernel {
+ public:
+  explicit DenseCount(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("minlength", &minlength_));
+    OP_REQUIRES_OK(context, context->GetAttr("maxlength", &maxlength_));
+    OP_REQUIRES_OK(context, context->GetAttr("binary_output", &binary_output_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& data = context->input(0);
+    const Tensor& weights = context->input(1);
+    bool use_weights = weights.NumElements() > 0;
+
+    OP_REQUIRES(context,
+                TensorShapeUtils::IsVector(data.shape()) ||
+                    TensorShapeUtils::IsMatrix(data.shape()),
+                errors::InvalidArgument(
+                    "Input must be a 1 or 2-dimensional tensor. Got: ",
+                    data.shape().DebugString()));
+
+    if (use_weights) {
+      OP_REQUIRES(
+          context, weights.shape() == data.shape(),
+          errors::InvalidArgument(
+              "Weights and data must have the same shape. Weight shape: ",
+              weights.shape().DebugString(),
+              "; data shape: ", data.shape().DebugString()));
+    }
+
+    bool is_1d = TensorShapeUtils::IsVector(data.shape());
+    int negative_valued_axis = -1;
+    int num_batch_dimensions = (data.shape().dims() + negative_valued_axis);
+
+    int num_batch_elements = 1;
+    for (int i = 0; i < num_batch_dimensions; ++i) {
+      num_batch_elements *= data.shape().dim_size(i);
+    }
+    int num_value_elements = data.shape().num_elements() / num_batch_elements;
+    auto per_batch_counts = BatchedMap<W>(num_batch_elements);
+
+    T max_value = 0;
+
+    const auto data_values = data.flat<T>();
+    const auto weight_values = weights.flat<W>();
+    int i = 0;
+    for (int b = 0; b < num_batch_elements; ++b) {
+      for (int v = 0; v < num_value_elements; ++v) {
+        const auto& value = data_values(i);
+        if (value >= 0 && (maxlength_ <= 0 || value < maxlength_)) {
+          if (binary_output_) {
+            per_batch_counts[b][value] = 1;
+          } else if (use_weights) {
+            per_batch_counts[b][value] += weight_values(i);
+          } else {
+            per_batch_counts[b][value]++;
+          }
+          if (value > max_value) {
+            max_value = value;
+          }
+        }
+        ++i;
+      }
+    }
+
+    int num_output_values = GetOutputSize(max_value, maxlength_, minlength_);
+    OP_REQUIRES_OK(context, OutputSparse<W>(per_batch_counts, num_output_values,
+                                            is_1d, context));
+  }
+
+ private:
+  int maxlength_;
+  int minlength_;
+  bool binary_output_;
+};
+
+template <class T, class W>
+class SparseCount : public OpKernel {
+ public:
+  explicit SparseCount(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("minlength", &minlength_));
+    OP_REQUIRES_OK(context, context->GetAttr("maxlength", &maxlength_));
+    OP_REQUIRES_OK(context, context->GetAttr("binary_output", &binary_output_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& indices = context->input(0);
+    const Tensor& values = context->input(1);
+    const Tensor& shape = context->input(2);
+    const Tensor& weights = context->input(3);
+    bool use_weights = weights.NumElements() > 0;
+
+    bool is_1d = shape.NumElements() == 1;
+    int num_batches = is_1d ? 1 : shape.flat<int64>()(0);
+    int num_values = values.NumElements();
+
+    const auto indices_values = indices.matrix<int64>();
+    const auto values_values = values.flat<T>();
+    const auto weight_values = weights.flat<W>();
+
+    auto per_batch_counts = BatchedMap<W>(num_batches);
+
+    T max_value = 0;
+
+    for (int idx = 0; idx < num_values; ++idx) {
+      int batch = is_1d ? 0 : indices_values(idx, 0);
+      const auto& value = values_values(idx);
+      if (value >= 0 && (maxlength_ <= 0 || value < maxlength_)) {
+        if (binary_output_) {
+          per_batch_counts[batch][value] = 1;
+        } else if (use_weights) {
+          per_batch_counts[batch][value] += weight_values(idx);
+        } else {
+          per_batch_counts[batch][value]++;
+        }
+        if (value > max_value) {
+          max_value = value;
+        }
+      }
+    }
+
+    int num_output_values = GetOutputSize(max_value, maxlength_, minlength_);
+    OP_REQUIRES_OK(context, OutputSparse<W>(per_batch_counts, num_output_values,
+                                            is_1d, context));
+  }
+
+ private:
+  int maxlength_;
+  int minlength_;
+  bool binary_output_;
+  bool validate_;
+};
+
+template <class T, class W>
+class RaggedCount : public OpKernel {
+ public:
+  explicit RaggedCount(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("minlength", &minlength_));
+    OP_REQUIRES_OK(context, context->GetAttr("maxlength", &maxlength_));
+    OP_REQUIRES_OK(context, context->GetAttr("binary_output", &binary_output_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& splits = context->input(0);
+    const Tensor& values = context->input(1);
+    const Tensor& weights = context->input(2);
+    bool use_weights = weights.NumElements() > 0;
+    bool is_1d = false;
+
+    const auto splits_values = splits.flat<int64>();
+    const auto values_values = values.flat<T>();
+    const auto weight_values = weights.flat<W>();
+    int num_batches = splits.NumElements() - 1;
+    int num_values = values.NumElements();
+
+    auto per_batch_counts = BatchedMap<W>(num_batches);
+    T max_value = 0;
+    int batch_idx = 0;
+
+    for (int idx = 0; idx < num_values; ++idx) {
+      while (idx >= splits_values(batch_idx)) {
+        batch_idx++;
+      }
+      const auto& value = values_values(idx);
+      if (value >= 0 && (maxlength_ <= 0 || value < maxlength_)) {
+        if (binary_output_) {
+          per_batch_counts[batch_idx - 1][value] = 1;
+        } else if (use_weights) {
+          per_batch_counts[batch_idx - 1][value] += weight_values(idx);
+        } else {
+          per_batch_counts[batch_idx - 1][value]++;
+        }
+        if (value > max_value) {
+          max_value = value;
+        }
+      }
+    }
+
+    int num_output_values = GetOutputSize(max_value, maxlength_, minlength_);
+    OP_REQUIRES_OK(context, OutputSparse<W>(per_batch_counts, num_output_values,
+                                            is_1d, context));
+  }
+
+ private:
+  int maxlength_;
+  int minlength_;
+  bool binary_output_;
+  bool validate_;
+};
+
+#define REGISTER_W(W_TYPE) \
+  REGISTER(int32, W_TYPE)  \
+  REGISTER(int64, W_TYPE)
+
+#define REGISTER(I_TYPE, W_TYPE)                                     \
+                                                                     \
+  REGISTER_KERNEL_BUILDER(Name("DenseCountSparseOutput")             \
+                              .TypeConstraint<I_TYPE>("T")           \
+                              .TypeConstraint<W_TYPE>("output_type") \
+                              .Device(DEVICE_CPU),                   \
+                          DenseCount<I_TYPE, W_TYPE>)                \
+                                                                     \
+  REGISTER_KERNEL_BUILDER(Name("SparseCountSparseOutput")            \
+                              .TypeConstraint<I_TYPE>("T")           \
+                              .TypeConstraint<W_TYPE>("output_type") \
+                              .Device(DEVICE_CPU),                   \
+                          SparseCount<I_TYPE, W_TYPE>)               \
+                                                                     \
+  REGISTER_KERNEL_BUILDER(Name("RaggedCountSparseOutput")            \
+                              .TypeConstraint<I_TYPE>("T")           \
+                              .TypeConstraint<W_TYPE>("output_type") \
+                              .Device(DEVICE_CPU),                   \
+                          RaggedCount<I_TYPE, W_TYPE>)
+
+TF_CALL_INTEGRAL_TYPES(REGISTER_W);
+TF_CALL_float(REGISTER_W);
+TF_CALL_double(REGISTER_W);
+
+#undef REGISTER_W
+#undef REGISTER
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/count_ops_test.cc b/tensorflow/core/kernels/count_ops_test.cc
new file mode 100644
index 00000000000..5c504d8e8f9
--- /dev/null
+++ b/tensorflow/core/kernels/count_ops_test.cc
@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/shape_inference_testutil.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+TEST_F(OpsTestBase, DenseCountSparseOutputShapeFn) {
+  ShapeInferenceTestOp op("DenseCountSparseOutput");
+  INFER_OK(op, "[?];?", "[?,1];[?];[1]");
+  INFER_OK(op, "[?,?];?", "[?,2];[?];[2]");
+}
+
+TEST_F(OpsTestBase, SparseCountSparseOutputShapeFn) {
+  ShapeInferenceTestOp op("SparseCountSparseOutput");
+  INFER_OK(op, "[?,1];?;?;?", "[?,d0_1];[?];[d0_1]");
+  INFER_OK(op, "[?,2];?;?;?", "[?,d0_1];[?];[d0_1]");
+}
+
+TEST_F(OpsTestBase, RaggedCountSparseOutputShapeFn) {
+  ShapeInferenceTestOp op("RaggedCountSparseOutput");
+  INFER_OK(op, "?;[?];?", "[?,2];[?];[2]");
+}
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cubin_headers/BUILD b/tensorflow/core/kernels/cubin_headers/BUILD
new file mode 100644
index 00000000000..bb7995dd221
--- /dev/null
+++ b/tensorflow/core/kernels/cubin_headers/BUILD
@@ -0,0 +1,47 @@
+# Generates headers containing cubin for CUDA kernels.
+load("//tensorflow/core/kernels/cubin_headers:build_defs.bzl", "gen_kernel_image_hdr")
+
+bias_add_kernel = """
+func @bias_add(%arg0: tensor<?x?xf99>,
+         %arg1: tensor<?xf99>) -> tensor<?x?xf99> {
+  %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
+    : (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
+  return %0 : tensor<?x?xf99>
+}
+"""
+
+[
+    gen_kernel_image_hdr(
+        name = "bias_add_{type}_kernel".format(type = type),
+        op = bias_add_kernel.replace("f99", type).replace("DT_TYPE", dtype),
+        same_shape = "0,2",
+        tile_size = "16x16",
+    )
+    for (type, dtype) in [
+        ("f16", "DT_HALF"),
+        ("f32", "DT_FLOAT"),
+        ("f64", "DT_DOUBLE"),
+    ]
+]
+
+relu_kernel = """
+func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
+  %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
+    : (tensor<?xf99>) -> tensor<?xf99>
+  return %0 : tensor<?xf99>
+}
+"""
+
+[
+    gen_kernel_image_hdr(
+        name = "relu_{type}_kernel".format(type = type),
+        op = relu_kernel.replace("f99", type).replace("DT_TYPE", dtype),
+        same_shape = "0,1",
+        tile_size = "256",
+    )
+    for (type, dtype) in [
+        ("f16", "DT_HALF"),
+        ("f32", "DT_FLOAT"),
+        ("f64", "DT_DOUBLE"),
+    ]
+]
diff --git a/tensorflow/core/kernels/cubin_headers/build_defs.bzl b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
new file mode 100644
index 00000000000..14f47601f06
--- /dev/null
+++ b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
@@ -0,0 +1,100 @@
+"""Generates cubin headers for TF dialect ops."""
+
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures")
+
+def _lookup_file(filegroup, path):
+    """Extracts file at (relative) path in filegroup."""
+    for file in filegroup.files.to_list():
+        if file.path.endswith(path):
+            return file
+    return None
+
+def _gen_kernel_image_hdr_impl(ctx):
+    if not ctx.attr.gpu_archs:
+        fail("No GPU architecture specified, use --config=cuda or similar")
+
+    name = ctx.attr.name
+    tile_sizes = ctx.attr.tile_size.replace("x", ",")
+    same_shape = []
+    if ctx.attr.same_shape:
+        same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
+
+    cubins = []
+    images = []
+    for arch in ctx.attr.gpu_archs:
+        filename = "%s.%s.cubin" % (name, arch)
+        cubin = ctx.actions.declare_file(filename)
+        ctx.actions.run(
+            outputs = [cubin],
+            executable = ctx.executable._tool,
+            arguments = same_shape + [
+                "--tile_sizes=%s" % tile_sizes,
+                "--arch=%s" % arch.split("_")[1],
+                "--output=%s" % cubin.path,
+                ctx.attr.op,
+            ],
+            mnemonic = "compile",
+        )
+        cubins.append(cubin)
+        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
+
+    # Generate fatbin file from all cubins.
+    fatbin = ctx.actions.declare_file("%s.fatbin" % name)
+    ctx.actions.run(
+        outputs = [fatbin],
+        inputs = cubins,
+        executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
+        arguments = [
+            "--64",
+            "--cmdline=--compile-only",
+            "--link",
+            "--compress-all",
+            "--create=%s" % fatbin.path,
+        ] + images,
+        mnemonic = "fatbinary",
+    )
+
+    bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
+    ctx.actions.run_shell(
+        outputs = [ctx.outputs.out],
+        inputs = [fatbin],
+        tools = [bin2c],
+        command = "%s --static --const --type=int --name=%s %s 1> %s" %
+                  (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
+        mnemonic = "bin2c",
+    )
+
+_gen_kernel_image_hdr = rule(
+    implementation = _gen_kernel_image_hdr_impl,
+    output_to_genfiles = True,
+    attrs = {
+        "op": attr.string(mandatory = True),
+        "tile_size": attr.string(mandatory = True),
+        "same_shape": attr.string(),
+        "out": attr.output(mandatory = True),
+        "symbol": attr.string(mandatory = True),
+        "gpu_archs": attr.string_list(mandatory = True),
+        "_cuda_root": attr.label(
+            default = Label("@local_config_cuda//cuda:cuda_root"),
+        ),
+        "_tool": attr.label(
+            executable = True,
+            default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
+            cfg = "host",
+        ),
+    },
+)
+
+def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
+    """Generates a C header with fatbin data from a Tensorflow op."""
+    if cuda_gpu_architectures():
+        _gen_kernel_image_hdr(
+            name = name,
+            op = op,
+            tile_size = tile_size,
+            same_shape = same_shape,
+            out = "%s.h" % name,
+            symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
+            gpu_archs = cuda_gpu_architectures(),
+            tags = tags,
+        )
diff --git a/tensorflow/core/kernels/cuda_sparse.cc b/tensorflow/core/kernels/cuda_sparse.cc
index 9d4ddc13d0d..141aae61571 100644
--- a/tensorflow/core/kernels/cuda_sparse.cc
+++ b/tensorflow/core/kernels/cuda_sparse.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include <vector>
 
 #include "third_party/gpus/cuda/include/cusparse.h"
+#include "third_party/gpus/cuda/include/library_types.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
@@ -179,6 +180,10 @@ Status GpuSparse::Initialize() {
   return Status::OK();
 }
 
+#define TF_CALL_CUSPARSE_DTYPES(m)           \
+  m(float, CUDA_R_32F) m(double, CUDA_R_64F) \
+      m(std::complex<float>, CUDA_C_32F) m(std::complex<double>, CUDA_C_64F)
+
 // Macro that specializes a sparse method for all 4 standard
 // numeric types.
 // TODO: reuse with cuda_solvers
@@ -359,23 +364,30 @@ Status GpuSparse::Csr2coo(const int* csrRowPtr, int nnz, int m,
   return Status::OK();
 }
 
-Status GpuSparse::CsrgeamNnz(int m, int n, const cusparseMatDescr_t descrA,
-                             int nnzA, const int* csrSortedRowPtrA,
-                             const int* csrSortedColIndA,
-                             const cusparseMatDescr_t descrB, int nnzB,
-                             const int* csrSortedRowPtrB,
-                             const int* csrSortedColIndB,
-                             const cusparseMatDescr_t descrC,
-                             int* csrSortedRowPtrC, int* nnzTotalDevHostPtr) {
+Status GpuSparse::CsrgeamNnz(
+    int m, int n, const cusparseMatDescr_t descrA, int nnzA,
+    const int* csrSortedRowPtrA, const int* csrSortedColIndA,
+    const cusparseMatDescr_t descrB, int nnzB, const int* csrSortedRowPtrB,
+    const int* csrSortedColIndB, const cusparseMatDescr_t descrC,
+    int* csrSortedRowPtrC, int* nnzTotalDevHostPtr, void* workspace) {
   DCHECK(initialized_);
   DCHECK(nnzTotalDevHostPtr != nullptr);
+#if CUDA_VERSION >= 10000
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseXcsrgeam2Nnz(
+      *gpusparse_handle_, m, n, descrA, nnzA, csrSortedRowPtrA,
+      csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, csrSortedColIndB,
+      descrC, csrSortedRowPtrC, nnzTotalDevHostPtr, workspace));
+#else
   TF_RETURN_IF_GPUSPARSE_ERROR(cusparseXcsrgeamNnz(
       *gpusparse_handle_, m, n, descrA, nnzA, csrSortedRowPtrA,
       csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, csrSortedColIndB,
       descrC, csrSortedRowPtrC, nnzTotalDevHostPtr));
+#endif
   return Status::OK();
 }
 
+#if CUDA_VERSION < 10020
+
 template <typename Scalar, typename SparseFnT>
 static inline Status CsrmmImpl(
     SparseFnT op, OpKernelContext* context, cusparseHandle_t cusparse_handle,
@@ -416,6 +428,45 @@ static inline Status CsrmmImpl(
 
 TF_CALL_LAPACK_TYPES(CSRMM_INSTANCE);
 
+#else
+
+#define SPMM_BUFFERSIZE_INSTANCE(Scalar, dtype)                              \
+  template <>                                                                \
+  Status GpuSparse::SpMMBufferSize<Scalar>(                                  \
+      cusparseOperation_t transA, cusparseOperation_t transB,                \
+      const Scalar* alpha, const cusparseSpMatDescr_t matA,                  \
+      const gpusparseDnMatDescr_t matB, const Scalar* beta,                  \
+      gpusparseDnMatDescr_t matC, cusparseSpMMAlg_t alg, size_t* bufferSize) \
+      const {                                                                \
+    DCHECK(initialized_);                                                    \
+    TF_RETURN_IF_GPUSPARSE_ERROR(cusparseSpMM_bufferSize(                    \
+        *gpusparse_handle_, transA, transB, alpha, matA, matB, beta, matC,   \
+        dtype, alg, bufferSize));                                            \
+    return Status::OK();                                                     \
+  }
+
+TF_CALL_CUSPARSE_DTYPES(SPMM_BUFFERSIZE_INSTANCE);
+
+#define SPMM_INSTANCE(Scalar, dtype)                                           \
+  template <>                                                                  \
+  Status GpuSparse::SpMM<Scalar>(                                              \
+      cusparseOperation_t transA, cusparseOperation_t transB,                  \
+      const Scalar* alpha, const cusparseSpMatDescr_t matA,                    \
+      const gpusparseDnMatDescr_t matB, const Scalar* beta,                    \
+      gpusparseDnMatDescr_t matC, cusparseSpMMAlg_t alg, int8* buffer) const { \
+    DCHECK(initialized_);                                                      \
+    TF_RETURN_IF_GPUSPARSE_ERROR(cusparseSpMM(*gpusparse_handle_, transA,      \
+                                              transB, alpha, matA, matB, beta, \
+                                              matC, dtype, alg, buffer));      \
+    return Status::OK();                                                       \
+  }
+
+TF_CALL_CUSPARSE_DTYPES(SPMM_INSTANCE);
+
+#endif
+
+#if CUDA_VERSION < 10020
+
 template <typename Scalar, typename SparseFnT>
 static inline Status CsrmvImpl(
     SparseFnT op, OpKernelContext* context, cusparseHandle_t cusparse_handle,
@@ -455,6 +506,115 @@ static inline Status CsrmvImpl(
 
 TF_CALL_LAPACK_TYPES(CSRMV_INSTANCE);
 
+#else
+
+template <typename Scalar>
+static inline Status CsrmvExImpl(cudaDataType_t dtype, OpKernelContext* context,
+                                 cusparseHandle_t cusparse_handle,
+                                 cusparseOperation_t transA, int m, int n,
+                                 int nnz, const Scalar* alpha_host,
+                                 const Scalar* csrSortedValA,
+                                 const int* csrSortedRowPtrA,
+                                 const int* csrSortedColIndA, const Scalar* x,
+                                 const Scalar* beta_host, Scalar* y) {
+  cusparseMatDescr_t descrA;
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateMatDescr(&descrA));
+  TF_RETURN_IF_GPUSPARSE_ERROR(
+      cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
+  TF_RETURN_IF_GPUSPARSE_ERROR(
+      cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
+  // CUSPARSE_ALG_MERGE_PATH algo only supports non-transpose matrix.
+  DCHECK(transA == CUSPARSE_OPERATION_NON_TRANSPOSE);
+
+  size_t bufferSize;
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCsrmvEx_bufferSize(
+      cusparse_handle, CUSPARSE_ALG_MERGE_PATH, transA, m, n, nnz, alpha_host,
+      dtype, descrA, csrSortedValA, dtype, csrSortedRowPtrA, csrSortedColIndA,
+      x, dtype, beta_host, dtype, y, dtype, dtype, &bufferSize));
+
+  Tensor buffer;
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      DT_INT8, TensorShape({static_cast<int64>(bufferSize)}), &buffer));
+  auto pBuffer = buffer.flat<int8>();
+  DCHECK(pBuffer.data() != nullptr);
+
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCsrmvEx(
+      cusparse_handle, CUSPARSE_ALG_MERGE_PATH, transA, m, n, nnz, alpha_host,
+      dtype, descrA, csrSortedValA, dtype, csrSortedRowPtrA, csrSortedColIndA,
+      x, dtype, beta_host, dtype, y, dtype, dtype, pBuffer.data()));
+
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyMatDescr(descrA));
+  return Status::OK();
+}
+
+template <typename Scalar>
+static inline Status SpMVImpl(cudaDataType_t dtype, OpKernelContext* context,
+                              cusparseHandle_t cusparse_handle,
+                              cusparseOperation_t transA, int m, int n, int nnz,
+                              const Scalar* alpha_host,
+                              const Scalar* csrSortedValA,
+                              const int* csrSortedRowPtrA,
+                              const int* csrSortedColIndA, const Scalar* x,
+                              const Scalar* beta_host, Scalar* y) {
+  cusparseSpMatDescr_t matA;
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateCsr(
+      &matA, m, n, nnz, const_cast<int*>(csrSortedRowPtrA),
+      const_cast<int*>(csrSortedColIndA), const_cast<Scalar*>(csrSortedValA),
+      CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, dtype));
+
+  cusparseDnVecDescr_t vecX, vecY;
+  int sizeX = (transA == CUSPARSE_OPERATION_NON_TRANSPOSE) ? n : m;
+  int sizeY = (transA == CUSPARSE_OPERATION_NON_TRANSPOSE) ? m : n;
+  TF_RETURN_IF_GPUSPARSE_ERROR(
+      cusparseCreateDnVec(&vecX, sizeX, const_cast<Scalar*>(x), dtype));
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateDnVec(&vecY, sizeY, y, dtype));
+
+  size_t bufferSize;
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseSpMV_bufferSize(
+      cusparse_handle, transA, alpha_host, matA, vecX, beta_host, vecY, dtype,
+      CUSPARSE_CSRMV_ALG1, &bufferSize));
+
+  Tensor buffer;
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      DT_INT8, TensorShape({static_cast<int64>(bufferSize)}), &buffer));
+  auto pBuffer = buffer.flat<int8>();
+  DCHECK(pBuffer.data() != nullptr);
+
+  TF_RETURN_IF_GPUSPARSE_ERROR(
+      cusparseSpMV(cusparse_handle, transA, alpha_host, matA, vecX, beta_host,
+                   vecY, dtype, CUSPARSE_CSRMV_ALG1, pBuffer.data()));
+
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyDnVec(vecY));
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyDnVec(vecX));
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroySpMat(matA));
+  return Status::OK();
+}
+
+#define CSRMV_INSTANCE(Scalar, cudaDataType)                                   \
+  template <>                                                                  \
+  Status GpuSparse::Csrmv<Scalar>(                                             \
+      cusparseOperation_t transA, int m, int n, int nnz,                       \
+      const Scalar* alpha_host, const Scalar* csrSortedValA,                   \
+      const int* csrSortedRowPtrA, const int* csrSortedColIndA,                \
+      const Scalar* x, const Scalar* beta_host, Scalar* y) const {             \
+    DCHECK(initialized_);                                                      \
+    if (transA == CUSPARSE_OPERATION_NON_TRANSPOSE) {                          \
+      return CsrmvExImpl(cudaDataType, context_, *gpusparse_handle_, transA,   \
+                         m, n, nnz, alpha_host, csrSortedValA,                 \
+                         csrSortedRowPtrA, csrSortedColIndA, x, beta_host, y); \
+    } else {                                                                   \
+      return SpMVImpl(cudaDataType, context_, *gpusparse_handle_, transA, m,   \
+                      n, nnz, alpha_host, csrSortedValA, csrSortedRowPtrA,     \
+                      csrSortedColIndA, x, beta_host, y);                      \
+    }                                                                          \
+  }
+
+TF_CALL_CUSPARSE_DTYPES(CSRMV_INSTANCE);
+
+#endif  // CUDA_VERSION < 10020
+
+#if CUDA_VERSION < 10000
+
 template <typename Scalar, typename SparseFnT>
 static inline Status CsrgeamImpl(
     SparseFnT op, OpKernelContext* context, cusparseHandle_t cusparse_handle,
@@ -483,7 +643,7 @@ static inline Status CsrgeamImpl(
       const cusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB, \
       const int* csrSortedRowPtrB, const int* csrSortedColIndB,               \
       const cusparseMatDescr_t descrC, Scalar* csrSortedValC,                 \
-      int* csrSortedRowPtrC, int* csrSortedColIndC) {                         \
+      int* csrSortedRowPtrC, int* csrSortedColIndC, void* workspace) {        \
     DCHECK(initialized_);                                                     \
     return CsrgeamImpl(SPARSE_FN(csrgeam, sparse_prefix), context_,           \
                        *gpusparse_handle_, m, n, alpha, descrA, nnzA,         \
@@ -493,8 +653,113 @@ static inline Status CsrgeamImpl(
                        csrSortedRowPtrC, csrSortedColIndC);                   \
   }
 
+#else
+
+template <typename Scalar, typename SparseFnT>
+static inline Status Csrgeam2Impl(
+    SparseFnT op, OpKernelContext* context, cusparseHandle_t cusparse_handle,
+    int m, int n, const Scalar* alpha, const cusparseMatDescr_t descrA,
+    int nnzA, const Scalar* csrSortedValA, const int* csrSortedRowPtrA,
+    const int* csrSortedColIndA, const Scalar* beta,
+    const cusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB,
+    const int* csrSortedRowPtrB, const int* csrSortedColIndB,
+    const cusparseMatDescr_t descrC, Scalar* csrSortedValC,
+    int* csrSortedRowPtrC, int* csrSortedColIndC, void* workspace) {
+  TF_RETURN_IF_GPUSPARSE_ERROR(op(
+      cusparse_handle, m, n, AsCudaComplex(alpha), descrA, nnzA,
+      AsCudaComplex(csrSortedValA), csrSortedRowPtrA, csrSortedColIndA,
+      AsCudaComplex(beta), descrB, nnzB, AsCudaComplex(csrSortedValB),
+      csrSortedRowPtrB, csrSortedColIndB, descrC, AsCudaComplex(csrSortedValC),
+      csrSortedRowPtrC, csrSortedColIndC, workspace));
+  return Status::OK();
+}
+
+#define CSRGEAM_INSTANCE(Scalar, sparse_prefix)                               \
+  template <>                                                                 \
+  Status GpuSparse::Csrgeam<Scalar>(                                          \
+      int m, int n, const Scalar* alpha, const cusparseMatDescr_t descrA,     \
+      int nnzA, const Scalar* csrSortedValA, const int* csrSortedRowPtrA,     \
+      const int* csrSortedColIndA, const Scalar* beta,                        \
+      const cusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB, \
+      const int* csrSortedRowPtrB, const int* csrSortedColIndB,               \
+      const cusparseMatDescr_t descrC, Scalar* csrSortedValC,                 \
+      int* csrSortedRowPtrC, int* csrSortedColIndC, void* workspace) {        \
+    DCHECK(initialized_);                                                     \
+    return Csrgeam2Impl(SPARSE_FN(csrgeam2, sparse_prefix), context_,         \
+                        *gpusparse_handle_, m, n, alpha, descrA, nnzA,        \
+                        csrSortedValA, csrSortedRowPtrA, csrSortedColIndA,    \
+                        beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB,  \
+                        csrSortedColIndB, descrC, csrSortedValC,              \
+                        csrSortedRowPtrC, csrSortedColIndC, workspace);       \
+  }
+
+#endif
+
 TF_CALL_LAPACK_TYPES(CSRGEAM_INSTANCE);
 
+#if CUDA_VERSION < 10000
+
+#define CSRGEAM_BUFFERSIZE_INSTANCE(Scalar, sparse_prefix)                    \
+  template <>                                                                 \
+  Status GpuSparse::CsrgeamBufferSizeExt<Scalar>(                             \
+      int m, int n, const Scalar* alpha, const cusparseMatDescr_t descrA,     \
+      int nnzA, const Scalar* csrSortedValA, const int* csrSortedRowPtrA,     \
+      const int* csrSortedColIndA, const Scalar* beta,                        \
+      const cusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB, \
+      const int* csrSortedRowPtrB, const int* csrSortedColIndB,               \
+      const cusparseMatDescr_t descrC, Scalar* csrSortedValC,                 \
+      int* csrSortedRowPtrC, int* csrSortedColIndC, size_t* bufferSize) {     \
+    DCHECK(initialized_);                                                     \
+    *bufferSize = 0;                                                          \
+    return Status::OK();                                                      \
+  }
+
+#else
+
+template <typename Scalar, typename SparseFnT>
+static inline Status CsrgeamBufferSizeExtImpl(
+    SparseFnT op, OpKernelContext* context, cusparseHandle_t sparse_handle,
+    int m, int n, const Scalar* alpha, const cusparseMatDescr_t descrA,
+    int nnzA, const Scalar* csrSortedValA, const int* csrSortedRowPtrA,
+    const int* csrSortedColIndA, const Scalar* beta,
+    const cusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB,
+    const int* csrSortedRowPtrB, const int* csrSortedColIndB,
+    const cusparseMatDescr_t descrC, Scalar* csrSortedValC,
+    int* csrSortedRowPtrC, int* csrSortedColIndC, size_t* bufferSize) {
+  TF_RETURN_IF_GPUSPARSE_ERROR(op(
+      sparse_handle, m, n, AsCudaComplex(alpha), descrA, nnzA,
+      AsCudaComplex(csrSortedValA), csrSortedRowPtrA, csrSortedColIndA,
+      AsCudaComplex(beta), descrB, nnzB, AsCudaComplex(csrSortedValB),
+      csrSortedRowPtrB, csrSortedColIndB, descrC, AsCudaComplex(csrSortedValC),
+      csrSortedRowPtrC, csrSortedColIndC, bufferSize));
+  return Status::OK();
+}
+
+#define CSRGEAM_BUFFERSIZE_INSTANCE(Scalar, sparse_prefix)                     \
+  template <>                                                                  \
+  Status GpuSparse::CsrgeamBufferSizeExt<Scalar>(                              \
+      int m, int n, const Scalar* alpha, const cusparseMatDescr_t descrA,      \
+      int nnzA, const Scalar* csrSortedValA, const int* csrSortedRowPtrA,      \
+      const int* csrSortedColIndA, const Scalar* beta,                         \
+      const cusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB,  \
+      const int* csrSortedRowPtrB, const int* csrSortedColIndB,                \
+      const cusparseMatDescr_t descrC, Scalar* csrSortedValC,                  \
+      int* csrSortedRowPtrC, int* csrSortedColIndC, size_t* bufferSize) {      \
+    DCHECK(initialized_);                                                      \
+    return CsrgeamBufferSizeExtImpl(                                           \
+        SPARSE_FN(csrgeam2_bufferSizeExt, sparse_prefix), context_,            \
+        *gpusparse_handle_, m, n, alpha, descrA, nnzA, csrSortedValA,          \
+        csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, \
+        csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedValC,             \
+        csrSortedRowPtrC, csrSortedColIndC, bufferSize);                       \
+  }
+
+#endif
+
+TF_CALL_LAPACK_TYPES(CSRGEAM_BUFFERSIZE_INSTANCE);
+
+#if CUDA_VERSION < 10000
+
 Status GpuSparse::CsrgemmNnz(
     cusparseOperation_t transA, cusparseOperation_t transB, int m, int k, int n,
     const cusparseMatDescr_t descrA, int nnzA, const int* csrSortedRowPtrA,
@@ -551,6 +816,101 @@ static inline Status CsrgemmImpl(
 
 TF_CALL_LAPACK_TYPES(CSRGEMM_INSTANCE);
 
+#else
+
+template <typename T>
+static const T* one_ptr() {
+  static const T one = static_cast<T>(1);
+  return &one;
+}
+
+template <typename T>
+static const T* null_ptr() {
+  return nullptr;
+}
+
+#define CSRGEMM_BUFFERSIZE_INSTANCE(Scalar, sparse_prefix)                     \
+  template <>                                                                  \
+  Status GpuSparse::CsrgemmBufferSize<Scalar>(                                 \
+      int m, int n, int k, const cusparseMatDescr_t descrA, int nnzA,          \
+      const int* csrSortedRowPtrA, const int* csrSortedColIndA,                \
+      const cusparseMatDescr_t descrB, int nnzB, const int* csrSortedRowPtrB,  \
+      const int* csrSortedColIndB, csrgemm2Info_t info,                        \
+      size_t* workspaceBytes) {                                                \
+    DCHECK(initialized_);                                                      \
+    TF_RETURN_IF_GPUSPARSE_ERROR(SPARSE_FN(csrgemm2_bufferSizeExt,             \
+                                           sparse_prefix)(                     \
+        *gpusparse_handle_, m, n, k, AsCudaComplex(one_ptr<Scalar>()), descrA, \
+        nnzA, csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB,                \
+        csrSortedRowPtrB, csrSortedColIndB, AsCudaComplex(null_ptr<Scalar>()), \
+        descrA, 0, null_ptr<int>(), null_ptr<int>(), info, workspaceBytes));   \
+    return Status::OK();                                                       \
+  }
+
+TF_CALL_LAPACK_TYPES(CSRGEMM_BUFFERSIZE_INSTANCE);
+
+Status GpuSparse::CsrgemmNnz(
+    int m, int n, int k, const cusparseMatDescr_t descrA, int nnzA,
+    const int* csrSortedRowPtrA, const int* csrSortedColIndA,
+    const cusparseMatDescr_t descrB, int nnzB, const int* csrSortedRowPtrB,
+    const int* csrSortedColIndB, const cusparseMatDescr_t descrC,
+    int* csrSortedRowPtrC, int* nnzTotalDevHostPtr, csrgemm2Info_t info,
+    void* workspace) {
+  DCHECK(initialized_);
+  DCHECK(nnzTotalDevHostPtr != nullptr);
+
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseXcsrgemm2Nnz(
+      *gpusparse_handle_, m, n, k, descrA, nnzA, csrSortedRowPtrA,
+      csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, csrSortedColIndB,
+      descrA, 0, null_ptr<int>(), null_ptr<int>(), descrC, csrSortedRowPtrC,
+      nnzTotalDevHostPtr, info, workspace));
+  return Status::OK();
+}
+
+template <typename Scalar, typename SparseFnT>
+static inline Status CsrgemmImpl(
+    SparseFnT op, OpKernelContext* context, cusparseHandle_t cusparse_handle,
+    int m, int n, int k, const cusparseMatDescr_t descrA, int nnzA,
+    const Scalar* csrSortedValA, const int* csrSortedRowPtrA,
+    const int* csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB,
+    const Scalar* csrSortedValB, const int* csrSortedRowPtrB,
+    const int* csrSortedColIndB, const cusparseMatDescr_t descrC,
+    Scalar* csrSortedValC, int* csrSortedRowPtrC, int* csrSortedColIndC,
+    const csrgemm2Info_t info, void* workspace) {
+  TF_RETURN_IF_GPUSPARSE_ERROR(
+      op(cusparse_handle, m, n, k, AsCudaComplex(one_ptr<Scalar>()), descrA,
+         nnzA, AsCudaComplex(csrSortedValA), csrSortedRowPtrA, csrSortedColIndA,
+         descrB, nnzB, AsCudaComplex(csrSortedValB), csrSortedRowPtrB,
+         csrSortedColIndB, AsCudaComplex(null_ptr<Scalar>()), descrA, 0,
+         AsCudaComplex(null_ptr<Scalar>()), null_ptr<int>(), null_ptr<int>(),
+         descrC, AsCudaComplex(csrSortedValC), csrSortedRowPtrC,
+         csrSortedColIndC, info, workspace));
+  return Status::OK();
+}
+
+#define CSRGEMM_INSTANCE(Scalar, sparse_prefix)                               \
+  template <>                                                                 \
+  Status GpuSparse::Csrgemm<Scalar>(                                          \
+      int m, int n, int k, const cusparseMatDescr_t descrA, int nnzA,         \
+      const Scalar* csrSortedValA, const int* csrSortedRowPtrA,               \
+      const int* csrSortedColIndA, const cusparseMatDescr_t descrB, int nnzB, \
+      const Scalar* csrSortedValB, const int* csrSortedRowPtrB,               \
+      const int* csrSortedColIndB, const cusparseMatDescr_t descrC,           \
+      Scalar* csrSortedValC, int* csrSortedRowPtrC, int* csrSortedColIndC,    \
+      const csrgemm2Info_t info, void* workspace) {                           \
+    DCHECK(initialized_);                                                     \
+    return CsrgemmImpl(SPARSE_FN(csrgemm2, sparse_prefix), context_,          \
+                       *gpusparse_handle_, m, n, k, descrA, nnzA,             \
+                       csrSortedValA, csrSortedRowPtrA, csrSortedColIndA,     \
+                       descrB, nnzB, csrSortedValB, csrSortedRowPtrB,         \
+                       csrSortedColIndB, descrC, csrSortedValC,               \
+                       csrSortedRowPtrC, csrSortedColIndC, info, workspace);  \
+  }
+
+TF_CALL_LAPACK_TYPES(CSRGEMM_INSTANCE);
+
+#endif  // CUDA_VERSION < 10000
+
 template <typename Scalar, typename BufferSizeFnT, typename SparseFnT>
 static inline Status Csru2csrImpl(SparseFnT op, BufferSizeFnT buffer_size_op,
                                   OpKernelContext* context,
@@ -596,6 +956,8 @@ static inline Status Csru2csrImpl(SparseFnT op, BufferSizeFnT buffer_size_op,
 
 TF_CALL_LAPACK_TYPES(CSRU2CSR_INSTANCE);
 
+#if CUDA_VERSION < 10010
+
 template <typename Scalar, typename SparseFnT>
 static inline Status Csr2cscImpl(SparseFnT op, OpKernelContext* context,
                                  cusparseHandle_t cusparse_handle, int m, int n,
@@ -624,6 +986,53 @@ static inline Status Csr2cscImpl(SparseFnT op, OpKernelContext* context,
 
 TF_CALL_LAPACK_TYPES(CSR2CSC_INSTANCE);
 
+#else
+
+template <typename Scalar>
+static inline Status Csr2cscImpl(cudaDataType_t dtype, OpKernelContext* context,
+                                 cusparseHandle_t cusparse_handle, int m, int n,
+                                 int nnz, const Scalar* csrVal,
+                                 const int* csrRowPtr, const int* csrColInd,
+                                 Scalar* cscVal, int* cscRowInd, int* cscColPtr,
+                                 const cusparseAction_t copyValues) {
+  size_t bufferSize;
+  TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCsr2cscEx2_bufferSize(
+      cusparse_handle, m, n, nnz, AsCudaComplex(csrVal), csrRowPtr, csrColInd,
+      AsCudaComplex(cscVal), cscColPtr, cscRowInd, dtype, copyValues,
+      CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG2, &bufferSize));
+
+  Tensor buffer;
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      DataTypeToEnum<Scalar>::value,
+      TensorShape({static_cast<int64>(bufferSize)}), &buffer));
+
+  DCHECK(buffer.flat<Scalar>().data() != nullptr);
+
+  TF_RETURN_IF_GPUSPARSE_ERROR(
+      cusparseCsr2cscEx2(cusparse_handle, m, n, nnz, AsCudaComplex(csrVal),
+                         csrRowPtr, csrColInd, AsCudaComplex(cscVal), cscColPtr,
+                         cscRowInd, dtype, copyValues, CUSPARSE_INDEX_BASE_ZERO,
+                         CUSPARSE_CSR2CSC_ALG2, buffer.flat<Scalar>().data()));
+
+  return Status::OK();
+}
+
+#define CSR2CSC_INSTANCE(Scalar, cudaDataType)                                \
+  template <>                                                                 \
+  Status GpuSparse::Csr2csc<Scalar>(                                          \
+      int m, int n, int nnz, const Scalar* csrVal, const int* csrRowPtr,      \
+      const int* csrColInd, Scalar* cscVal, int* cscRowInd, int* cscColPtr,   \
+      const cusparseAction_t copyValues) {                                    \
+    DCHECK(initialized_);                                                     \
+    return Csr2cscImpl(cudaDataType, context_, *gpusparse_handle_, m, n, nnz, \
+                       csrVal, csrRowPtr, csrColInd, cscVal, cscRowInd,       \
+                       cscColPtr, copyValues);                                \
+  }
+
+TF_CALL_CUSPARSE_DTYPES(CSR2CSC_INSTANCE);
+
+#endif  // CUDA_VERSION < 10010
+
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/kernels/cuda_sparse.h
index 5dd62037ff0..2d41cc72421 100644
--- a/tensorflow/core/kernels/cuda_sparse.h
+++ b/tensorflow/core/kernels/cuda_sparse.h
@@ -26,6 +26,7 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 
+#include "third_party/gpus/cuda/include/cuda.h"
 #include "third_party/gpus/cuda/include/cusparse.h"
 
 using gpusparseStatus_t = cusparseStatus_t;
@@ -34,6 +35,14 @@ using gpusparseMatDescr_t = cusparseMatDescr_t;
 using gpusparseAction_t = cusparseAction_t;
 using gpusparseHandle_t = cusparseHandle_t;
 using gpuStream_t = cudaStream_t;
+#if CUDA_VERSION >= 10020
+using gpusparseDnMatDescr_t = cusparseDnMatDescr_t;
+using gpusparseSpMatDescr_t = cusparseSpMatDescr_t;
+using gpusparseSpMMAlg_t = cusparseSpMMAlg_t;
+#endif
+
+#define GPUSPARSE(postfix) CUSPARSE_##postfix
+#define gpusparse(postfix) cusparse##postfix
 
 #elif TENSORFLOW_USE_ROCM
 
@@ -46,6 +55,9 @@ using gpusparseAction_t = hipsparseAction_t;
 using gpusparseHandle_t = hipsparseHandle_t;
 using gpuStream_t = hipStream_t;
 
+#define GPUSPARSE(postfix) HIPSPARSE_##postfix
+#define gpusparse(postfix) hipsparse##postfix
+
 #endif
 
 #include "tensorflow/core/framework/op_kernel.h"
@@ -247,6 +259,7 @@ class GpuSparse {
   // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-coo2csr.
   Status Coo2csr(const int* cooRowInd, int nnz, int m, int* csrRowPtr) const;
 
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10020)) || TENSORFLOW_USE_ROCM
   // Sparse-dense matrix multiplication C = alpha * op(A) * op(B)  + beta * C,
   // where A is a sparse matrix in CSR format, B and C are dense tall
   // matrices.  This routine allows transposition of matrix B, which
@@ -266,18 +279,64 @@ class GpuSparse {
                const int* csrSortedRowPtrA, const int* csrSortedColIndA,
                const Scalar* B, int ldb, const Scalar* beta_host, Scalar* C,
                int ldc) const;
+#else
+  // Workspace size query for sparse-dense matrix multiplication. Helper
+  // function for SpMM which computes y = alpha * op(A) * op(B) + beta * C,
+  // where A is a sparse matrix in CSR format, B and C are dense matricies in
+  // column-major format. Returns needed workspace size in bytes.
+  template <typename Scalar>
+  Status SpMMBufferSize(gpusparseOperation_t transA,
+                        gpusparseOperation_t transB, const Scalar* alpha,
+                        const gpusparseSpMatDescr_t matA,
+                        const gpusparseDnMatDescr_t matB, const Scalar* beta,
+                        gpusparseDnMatDescr_t matC, gpusparseSpMMAlg_t alg,
+                        size_t* bufferSize) const;
+
+  // Sparse-dense matrix multiplication y = alpha * op(A) * op(B) + beta * C,
+  // where A is a sparse matrix in CSR format, B and C are dense matricies in
+  // column-major format. Buffer is assumed to be at least as large as the
+  // workspace size returned by SpMMBufferSize().
+  //
+  // **NOTE** This is an in-place operation for data in C.
+  template <typename Scalar>
+  Status SpMM(gpusparseOperation_t transA, gpusparseOperation_t transB,
+              const Scalar* alpha, const gpusparseSpMatDescr_t matA,
+              const gpusparseDnMatDescr_t matB, const Scalar* beta,
+              gpusparseDnMatDescr_t matC, gpusparseSpMMAlg_t alg,
+              int8* buffer) const;
+#endif
 
   // Sparse-dense vector multiplication y = alpha * op(A) * x  + beta * y,
   // where A is a sparse matrix in CSR format, x and y are dense vectors. See:
   // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrmv_mergepath
   //
   // **NOTE** This is an in-place operation for data in y.
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10020)) || TENSORFLOW_USE_ROCM
   template <typename Scalar>
   Status Csrmv(gpusparseOperation_t transA, int m, int n, int nnz,
                const Scalar* alpha_host, const gpusparseMatDescr_t descrA,
                const Scalar* csrSortedValA, const int* csrSortedRowPtrA,
                const int* csrSortedColIndA, const Scalar* x,
                const Scalar* beta_host, Scalar* y) const;
+#else
+  template <typename Scalar>
+  Status Csrmv(gpusparseOperation_t transA, int m, int n, int nnz,
+               const Scalar* alpha_host, const Scalar* csrSortedValA,
+               const int* csrSortedRowPtrA, const int* csrSortedColIndA,
+               const Scalar* x, const Scalar* beta_host, Scalar* y) const;
+#endif  // CUDA_VERSION < 10020
+
+  // Computes workspace size for sparse - sparse matrix addition of matrices
+  // stored in CSR format.
+  template <typename Scalar>
+  Status CsrgeamBufferSizeExt(
+      int m, int n, const Scalar* alpha, const gpusparseMatDescr_t descrA,
+      int nnzA, const Scalar* csrSortedValA, const int* csrSortedRowPtrA,
+      const int* csrSortedColIndA, const Scalar* beta,
+      const gpusparseMatDescr_t descrB, int nnzB, const Scalar* csrSortedValB,
+      const int* csrSortedRowPtrB, const int* csrSortedColIndB,
+      const gpusparseMatDescr_t descrC, Scalar* csrSortedValC,
+      int* csrSortedRowPtrC, int* csrSortedColIndC, size_t* bufferSize);
 
   // Computes sparse-sparse matrix addition of matrices
   // stored in CSR format.  This is part one: calculate nnz of the
@@ -289,7 +348,7 @@ class GpuSparse {
                     const gpusparseMatDescr_t descrB, int nnzB,
                     const int* csrSortedRowPtrB, const int* csrSortedColIndB,
                     const gpusparseMatDescr_t descrC, int* csrSortedRowPtrC,
-                    int* nnzTotalDevHostPtr);
+                    int* nnzTotalDevHostPtr, void* workspace);
 
   // Computes sparse - sparse matrix addition of matrices
   // stored in CSR format.  This is part two: perform sparse-sparse
@@ -305,13 +364,26 @@ class GpuSparse {
                  const Scalar* csrSortedValB, const int* csrSortedRowPtrB,
                  const int* csrSortedColIndB, const gpusparseMatDescr_t descrC,
                  Scalar* csrSortedValC, int* csrSortedRowPtrC,
-                 int* csrSortedColIndC);
+                 int* csrSortedColIndC, void* workspace);
+
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10000)
+  // Computes sparse-sparse matrix multiplication of matrices
+  // stored in CSR format.  This is part zero: calculate required workspace
+  // size.
+  template <typename Scalar>
+  Status CsrgemmBufferSize(
+      int m, int n, int k, const gpusparseMatDescr_t descrA, int nnzA,
+      const int* csrSortedRowPtrA, const int* csrSortedColIndA,
+      const gpusparseMatDescr_t descrB, int nnzB, const int* csrSortedRowPtrB,
+      const int* csrSortedColIndB, csrgemm2Info_t info, size_t* workspaceBytes);
+#endif
 
   // Computes sparse-sparse matrix multiplication of matrices
   // stored in CSR format.  This is part one: calculate nnz of the
   // output.  csrSortedRowPtrC must be preallocated on device with
   // m + 1 entries.  See:
   // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrgemm.
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM
   Status CsrgemmNnz(gpusparseOperation_t transA, gpusparseOperation_t transB,
                     int m, int k, int n, const gpusparseMatDescr_t descrA,
                     int nnzA, const int* csrSortedRowPtrA,
@@ -320,12 +392,23 @@ class GpuSparse {
                     const int* csrSortedRowPtrB, const int* csrSortedColIndB,
                     const gpusparseMatDescr_t descrC, int* csrSortedRowPtrC,
                     int* nnzTotalDevHostPtr);
+#else
+  Status CsrgemmNnz(int m, int n, int k, const gpusparseMatDescr_t descrA,
+                    int nnzA, const int* csrSortedRowPtrA,
+                    const int* csrSortedColIndA,
+                    const gpusparseMatDescr_t descrB, int nnzB,
+                    const int* csrSortedRowPtrB, const int* csrSortedColIndB,
+                    const gpusparseMatDescr_t descrC, int* csrSortedRowPtrC,
+                    int* nnzTotalDevHostPtr, csrgemm2Info_t info,
+                    void* workspace);
+#endif
 
   // Computes sparse - sparse matrix matmul of matrices
   // stored in CSR format.  This is part two: perform sparse-sparse
   // addition.  csrValC and csrColIndC must be allocated on the device
   // with nnzTotalDevHostPtr entries (as calculated by CsrgemmNnz).  See:
   // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csrgemm.
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM
   template <typename Scalar>
   Status Csrgemm(gpusparseOperation_t transA, gpusparseOperation_t transB,
                  int m, int k, int n, const gpusparseMatDescr_t descrA,
@@ -336,6 +419,18 @@ class GpuSparse {
                  const int* csrSortedColIndB, const gpusparseMatDescr_t descrC,
                  Scalar* csrSortedValC, int* csrSortedRowPtrC,
                  int* csrSortedColIndC);
+#else
+  template <typename Scalar>
+  Status Csrgemm(int m, int n, int k, const gpusparseMatDescr_t descrA,
+                 int nnzA, const Scalar* csrSortedValA,
+                 const int* csrSortedRowPtrA, const int* csrSortedColIndA,
+                 const gpusparseMatDescr_t descrB, int nnzB,
+                 const Scalar* csrSortedValB, const int* csrSortedRowPtrB,
+                 const int* csrSortedColIndB, const gpusparseMatDescr_t descrC,
+                 Scalar* csrSortedValC, int* csrSortedRowPtrC,
+                 int* csrSortedColIndC, const csrgemm2Info_t info,
+                 void* workspace);
+#endif
 
   // In-place reordering of unsorted CSR to sorted CSR.
   // http://docs.nvidia.com/cuda/cusparse/index.html#cusparse-lt-t-gt-csru2csr
diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc
index 646ff841d88..4e2aa6bbc58 100644
--- a/tensorflow/core/kernels/cwise_op_mul_1.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_1.cc
@@ -19,8 +19,8 @@ namespace tensorflow {
 
 REGISTER6(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8,
           int32, bfloat16);
-REGISTER5(BinaryOp, CPU, "MulNoNan", functor::mul_no_nan, Eigen::half, float,
-          double, complex64, complex128);
+REGISTER6(BinaryOp, CPU, "MulNoNan", functor::mul_no_nan, Eigen::half, float,
+          double, complex64, complex128, bfloat16);
 
 #if defined(__ANDROID_TYPES_SLIM__)
 // We only register the first type when we have multi-argument calls in the
diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD
index 65881da377f..d088abc00e6 100644
--- a/tensorflow/core/kernels/data/BUILD
+++ b/tensorflow/core/kernels/data/BUILD
@@ -167,9 +167,9 @@ cc_library(
     srcs = ["single_threaded_executor.cc"],
     hdrs = ["single_threaded_executor.h"],
     deps = [
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:lib",
+        "//tensorflow/core/common_runtime:core_cpu_internal",
+        "//tensorflow/core/common_runtime:entry",
         "//tensorflow/core/common_runtime:local_executor_params",
     ],
     alwayslink = 1,
diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc
index b4244c95395..28738e3e2fe 100644
--- a/tensorflow/core/kernels/data/captured_function.cc
+++ b/tensorflow/core/kernels/data/captured_function.cc
@@ -323,6 +323,17 @@ class OwnedArgsCallFrame : public CallFrameBase {
     }
   }
 
+  // Since we own the argument tensors in `args_`, we can implement
+  // `ConsumeArg()` for those arguments.
+  void ConsumeArg(int index, Tensor* val) override {
+    DCHECK_GE(index, 0);
+    DCHECK_LT(index, args_.size());
+    *val = std::move(args_[index]);
+  }
+  bool CanConsumeArg(int index) const override {
+    return index >= 0 && index < args_.size();
+  }
+
  private:
   std::vector<Tensor> args_;
   const std::vector<Tensor>* const captured_inputs_;  // Not owned.
diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h
index d8ae7190a7f..70ca70176e8 100644
--- a/tensorflow/core/kernels/data/dataset_utils.h
+++ b/tensorflow/core/kernels/data/dataset_utils.h
@@ -63,7 +63,10 @@ class AnonymousResourceOp : public OpKernel {
 
     if (create_deleter_) {
       Tensor* deleter_t;
-      OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &deleter_t));
+      AllocatorAttributes attr;
+      attr.set_on_host(true);
+      OP_REQUIRES_OK(
+          ctx, ctx->allocate_output(1, TensorShape({}), &deleter_t, attr));
       deleter_t->scalar<Variant>()() =
           ResourceDeleter(handle, ctx->resource_manager());
     }
diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD
index d61c574cb35..4ddfd99951c 100644
--- a/tensorflow/core/kernels/data/experimental/BUILD
+++ b/tensorflow/core/kernels/data/experimental/BUILD
@@ -138,6 +138,7 @@ tf_kernel_library(
         "//tensorflow/core/kernels/data:dataset_utils",
         "//tensorflow/core/kernels/data:name_utils",
         "//tensorflow/core/kernels/data:serialization_utils",
+        "//tensorflow/core/profiler/lib:traceme",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc
index 815468d98a3..56077a671fb 100644
--- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc
@@ -37,19 +37,26 @@ limitations under the License.
 #include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/snappy.h"
+#include "tensorflow/core/profiler/lib/traceme.h"
 #include "tensorflow/core/protobuf/error_codes.pb.h"
 
 namespace tensorflow {
 namespace data {
 
 /* static */ constexpr const char* const DataServiceDatasetOp::kDatasetType;
+/* static */ constexpr const char* const DataServiceDatasetOp::kDatasetId;
+/* static */ constexpr const char* const DataServiceDatasetOp::kProcessingMode;
 /* static */ constexpr const char* const DataServiceDatasetOp::kAddress;
 /* static */ constexpr const char* const DataServiceDatasetOp::kProtocol;
+/* static */ constexpr const char* const DataServiceDatasetOp::kJobName;
 /* static */ constexpr const char* const
     DataServiceDatasetOp::kMaxOutstandingRequests;
+/* static */ constexpr const char* const
+    DataServiceDatasetOp::kIterationCounter;
 /* static */ constexpr const char* const DataServiceDatasetOp::kOutputTypes;
 /* static */ constexpr const char* const DataServiceDatasetOp::kOutputShapes;
 
+namespace {
 // Once we've spent `kRetryTimeoutMicros` in `GetNextInternal`, we will wait for
 // the current attempt to complete and perform no more retries.
 const int64 kRetryTimeoutMicros = 1000LL * 1000 * 60 * 60;  // 60 minutes.
@@ -57,6 +64,8 @@ const int64 kRetryTimeoutMicros = 1000LL * 1000 * 60 * 60;  // 60 minutes.
 // Default interval between task list refreshes.
 const int64 kDefaultTaskRefreshIntervalMs = 1000;  // 1 second.
 
+}  // namespace
+
 // Dataset for reading data from the tf.data service non-deterministically.
 //
 // This dataset interleaves dataset elements produced by multiple tf.data
@@ -64,22 +73,47 @@ const int64 kDefaultTaskRefreshIntervalMs = 1000;  // 1 second.
 // to read from (in case workers are added or removed).
 class DataServiceDatasetOp::Dataset : public DatasetBase {
  public:
-  Dataset(OpKernelContext* ctx, const std::string& address,
-          const std::string& protocol, int64 max_outstanding_requests,
-          int64 task_refresh_interval_ms, const DataTypeVector& output_types,
+  Dataset(OpKernelContext* ctx, int64 dataset_id,
+          ProcessingMode processing_mode, const std::string& address,
+          const std::string& protocol, const std::string& job_name,
+          int64 max_outstanding_requests, int64 task_refresh_interval_ms,
+          IterationCounter* iteration_counter, bool owns_resource,
+          ResourceHandle iteration_counter_handle,
+          const DataTypeVector& output_types,
           const std::vector<PartialTensorShape>& output_shapes)
       : DatasetBase(DatasetContext(ctx)),
+        dataset_id_(dataset_id),
+        processing_mode_(processing_mode),
         address_(address),
         protocol_(protocol),
+        job_name_(job_name),
         max_outstanding_requests_(max_outstanding_requests),
         task_refresh_interval_ms_(task_refresh_interval_ms),
+        iteration_counter_(iteration_counter),
+        owns_resource_(owns_resource),
+        iteration_counter_handle_(iteration_counter_handle),
+        resource_mgr_(ctx->resource_manager()),
         output_types_(output_types),
         output_shapes_(output_shapes) {}
 
+  ~Dataset() override {
+    iteration_counter_->Unref();
+    if (owns_resource_) {
+      Status s = resource_mgr_->Delete<IterationCounter>(
+          iteration_counter_handle_.container(),
+          iteration_counter_handle_.name());
+      if (!s.ok()) {
+        LOG(WARNING) << "Failed to delete iteration counter resource: " << s;
+      }
+    }
+  }
+
   std::unique_ptr<IteratorBase> MakeIteratorInternal(
       const string& prefix) const override {
-    return absl::make_unique<Iterator>(Iterator::Params{
-        this, name_utils::IteratorPrefix(kDatasetType, prefix)});
+    return absl::make_unique<Iterator>(
+        Iterator::Params{this,
+                         name_utils::IteratorPrefix(kDatasetType, prefix)},
+        iteration_counter_->GetAndIncrement());
   }
 
   const DataTypeVector& output_dtypes() const override { return output_types_; }
@@ -102,22 +136,39 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
   Status AsGraphDefInternal(SerializationContext* ctx,
                             DatasetGraphDefBuilder* b,
                             Node** output) const override {
+    Node* dataset_id;
+    TF_RETURN_IF_ERROR(b->AddScalar(dataset_id_, &dataset_id));
+
+    Node* processing_mode;
+    tstring processing_mode_str = ProcessingModeToString(processing_mode_);
+    TF_RETURN_IF_ERROR(b->AddScalar(processing_mode_str, &processing_mode));
+
     Node* address;
     TF_RETURN_IF_ERROR(b->AddScalar(address_, &address));
 
     Node* protocol;
     TF_RETURN_IF_ERROR(b->AddScalar(protocol_, &protocol));
 
+    Node* job_name;
+    TF_RETURN_IF_ERROR(b->AddScalar(job_name_, &job_name));
+
     Node* max_outstanding_requests;
     TF_RETURN_IF_ERROR(
         b->AddScalar(max_outstanding_requests_, &max_outstanding_requests));
 
+    Node* iteration_counter_handle = nullptr;
+    Tensor handle(DT_RESOURCE, TensorShape({}));
+    handle.scalar<ResourceHandle>()() = iteration_counter_handle_;
+    TF_RETURN_IF_ERROR(b->AddTensor(handle, &iteration_counter_handle));
+
     AttrValue task_refresh_interval_hint_ms;
     b->BuildAttrValue(task_refresh_interval_ms_,
                       &task_refresh_interval_hint_ms);
 
     TF_RETURN_IF_ERROR(
-        b->AddDataset(this, {address, protocol, max_outstanding_requests},
+        b->AddDataset(this,
+                      {dataset_id, processing_mode, address, protocol, job_name,
+                       max_outstanding_requests, iteration_counter_handle},
                       {std::make_pair(kTaskRefreshIntervalHintMs,
                                       task_refresh_interval_hint_ms)},
                       output));
@@ -127,13 +178,20 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
  private:
   class Iterator : public DatasetIterator<Dataset> {
    public:
-    explicit Iterator(const Params& params)
-        : DatasetIterator<Dataset>(params) {}
+    explicit Iterator(const Params& params, int64 iterator_index)
+        : DatasetIterator<Dataset>(params),
+          iterator_index_(iterator_index),
+          max_outstanding_requests_(params.dataset->max_outstanding_requests_) {
+    }
 
     ~Iterator() override {
       mutex_lock l(mu_);
+      VLOG(1) << "Destroying data service dataset iterator for job id "
+              << job_id_;
       cancelled_ = true;
-      cv_.notify_all();
+      worker_thread_cv_.notify_all();
+      manager_thread_cv_.notify_all();
+      get_next_cv_.notify_all();
       // Thread destructors will block until the threads finish, no need to wait
       // here.
     }
@@ -141,14 +199,16 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
     Status Initialize(IteratorContext* ctx) override {
       VLOG(3) << "Connecting to " << dataset()->address_
               << " in data service dataset op";
-      if (ctx->job_token().is_empty()) {
-        return errors::FailedPrecondition(
-            "Expected a job token, but none found. To iterate over a dataset "
-            "containing a `distribute` transformation, call `create_job`, "
-            "which will return a job token that you should then use to iterate "
-            "over the dataset via `create_iterator(dataset, job_token).`");
+      DataServiceMasterClient master(dataset()->address_, dataset()->protocol_);
+      if (dataset()->job_name_.empty()) {
+        TF_RETURN_IF_ERROR(master.CreateJob(
+            dataset()->dataset_id_, dataset()->processing_mode_, &job_id_));
+      } else {
+        TF_RETURN_IF_ERROR(master.GetOrCreateJob(
+            dataset()->dataset_id_, dataset()->processing_mode_,
+            dataset()->job_name_, iterator_index_, &job_id_));
       }
-      job_id_ = ctx->job_token().job_id();
+      VLOG(1) << "Created data service job with id " << job_id_;
       return Status::OK();
     }
 
@@ -164,20 +224,25 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
             });
       }
 
-      while (results_.empty() && !job_finished_ && !cancelled_) {
-        cv_.wait(l);
+      while (results_.empty() && !job_finished_ && !cancelled_ &&
+             status_.ok()) {
+        get_next_cv_.wait(l);
       }
       if (cancelled_) {
         return errors::Cancelled("Data service iterator was cancelled");
       }
+      if (!status_.ok()) {
+        return status_;
+      }
       if (results_.empty()) {
         *end_of_sequence = true;
         return Status::OK();
       }
       DCHECK(!results_.empty());
+      *end_of_sequence = false;
       out_tensors->swap(results_.front());
       results_.pop();
-      cv_.notify_all();
+      worker_thread_cv_.notify_one();
 
       return Status::OK();
     }
@@ -200,25 +265,29 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
     }
 
    private:
-    typedef struct TaskThread {
-      int64 task_id;
-      // Cached address of the worker for task `task_id`.
-      std::string address;
-      std::unique_ptr<DataServiceWorkerClient> worker;
-      std::unique_ptr<Thread> thread;
-      bool end_of_sequence = false;
-      // Indicates that the thread has finished running.
-      bool finished = false;
-    } TaskThread;
+    struct Task {
+      Task(int64 task_id, const std::string& address,
+           std::unique_ptr<DataServiceWorkerClient> worker)
+          : task_id(task_id), address(address), worker(std::move(worker)) {}
+
+      const int64 task_id;
+      // Address of the tf.data service worker for task `task_id`.
+      const std::string address;
+      // Client for fetching task elements from the tf.data service worker.
+      const std::unique_ptr<DataServiceWorkerClient> worker;
+      // Indicates whether a worker thread is currently processing the task.
+      bool in_use TF_GUARDED_BY(&Iterator::mu_) = false;
+      // Indicates whether the worker has returned end_of_sequence for the task.
+      bool end_of_sequence TF_GUARDED_BY(&Iterator::mu_) = false;
+    };
 
     // Periodically refresh the task list.
     // Maintain one thread fetching elements for each task.
     // TODO(aaudibert): Instead of polling, have master send updates when
     // the list of tasks changes.
     void TaskThreadManager(std::unique_ptr<IteratorContext> ctx) {
-      VLOG(3) << "Starting task handler manager";
+      VLOG(3) << "Starting task thread manager";
       DataServiceMasterClient master(dataset()->address_, dataset()->protocol_);
-
       uint64 next_check = Env::Default()->NowMicros();
       while (true) {
         {
@@ -226,23 +295,25 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
           // All units are microseconds.
           while (!cancelled_ && Env::Default()->NowMicros() < next_check) {
             int64 remaining_time = next_check - Env::Default()->NowMicros();
-            VLOG(3) << "Task manager waiting for " << remaining_time << "us";
-            cv_.wait_for(l, std::chrono::microseconds(remaining_time));
+            VLOG(3) << "Task thread manager waiting for " << remaining_time
+                    << "us";
+            manager_thread_cv_.wait_for(
+                l, std::chrono::microseconds(remaining_time));
           }
           if (cancelled_) {
             VLOG(3) << "Task thread manager finished";
             return;
           }
         }
-        UpdateTaskThreads(&master, ctx.get());
+        UpdateTasks(&master);
+        UpdateWorkerThreads(ctx.get());
         next_check = Env::Default()->NowMicros() +
                      dataset()->task_refresh_interval_ms_ * 1000;
       }
     }
 
-    void UpdateTaskThreads(DataServiceMasterClient* master,
-                           IteratorContext* ctx) LOCKS_EXCLUDED(mu_) {
-      VLOG(3) << "Updating task handler threads";
+    void UpdateTasks(DataServiceMasterClient* master) LOCKS_EXCLUDED(mu_) {
+      VLOG(3) << "Updating tasks";
       std::vector<TaskInfo> tasks;
       bool job_finished;
       Status s = master->GetTasks(job_id_, &tasks, &job_finished);
@@ -251,111 +322,149 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
                      << s;
         return;
       }
-      absl::flat_hash_set<int64> task_ids;
+      absl::flat_hash_map<int64, TaskInfo> task_id_to_task;
+      for (auto& task : tasks) {
+        task_id_to_task[task.id()] = task;
+      }
       mutex_lock l(mu_);
       job_finished_ = job_finished;
-      for (auto& task : tasks) {
-        task_ids.insert(task.id());
-        if (task_threads_.contains(task.id())) {
-          continue;
-        }
-        task_threads_[task.id()] = absl::make_unique<TaskThread>();
-        TaskThread* task_handler = task_threads_[task.id()].get();
-        task_handler->task_id = task.id();
-        task_handler->address = task.worker_address();
-        num_unfinished_tasks_++;
-        outstanding_requests_++;
-        auto done = [this, task_handler]() {
-          mutex_lock l(mu_);
-          num_unfinished_tasks_--;
-          outstanding_requests_--;
-          cv_.notify_all();
-          task_handler->finished = true;
-          VLOG(3) << "Task thread " << task_handler->task_id << " finished";
-        };
-        task_handler->thread =
-            ctx->StartThread("tf-data-service-task_handler",
-                             [this, task_handler, done = std::move(done)]() {
-                               RunTaskThread(task_handler, std::move(done));
-                             });
+      if (job_finished) {
+        get_next_cv_.notify_all();
+        return;
       }
-      // Mark deleted tasks and clean up finished task threads.
-      for (auto it = task_threads_.begin(); it != task_threads_.end();) {
-        TaskThread* task_thread = it->second.get();
-        if (task_thread->finished) {
-          task_threads_.erase(it++);
+      for (int i = 0; i < tasks_.size(); ++i) {
+        std::shared_ptr<Task> task = tasks_[i];
+        if (task_id_to_task.contains(task->task_id)) {
+          // Remove already-known tasks from `task_id_to_task`, so that at the
+          // end of the loop, only new tasks remain.
+          task_id_to_task.erase(task->task_id);
+        } else {
+          // Task has been removed.
+          if (task->end_of_sequence) {
+            finished_tasks_--;
+          }
+          tasks_[i] = tasks_[tasks_.size() - 1];
+          tasks_.pop_back();
+        }
+      }
+      for (auto& new_task_entry : task_id_to_task) {
+        TaskInfo& task_info = new_task_entry.second;
+        std::unique_ptr<DataServiceWorkerClient> worker;
+        Status s = CreateDataServiceWorkerClient(task_info.worker_address(),
+                                                 dataset()->protocol_, &worker);
+        if (!s.ok()) {
+          status_ = s;
+          get_next_cv_.notify_all();
           continue;
         }
-        if (!task_ids.contains(task_thread->task_id)) {
-          task_thread->end_of_sequence = true;
-        }
-        ++it;
+        tasks_.push_back(std::make_shared<Task>(
+            task_info.id(), task_info.worker_address(), std::move(worker)));
       }
       if (dataset()->max_outstanding_requests_ == model::kAutotune) {
         // Adjust max_outstanding_requests to account for newly added tasks.
-        max_outstanding_requests_ = task_threads_.size();
+        max_outstanding_requests_ = tasks_.size();
       }
     }
 
-    void RunTaskThread(TaskThread* task_handler, std::function<void()> done) {
+    void UpdateWorkerThreads(IteratorContext* ctx) LOCKS_EXCLUDED(mu_) {
+      mutex_lock l(mu_);
+      while (num_running_worker_threads_ < max_outstanding_requests_) {
+        num_running_worker_threads_++;
+        outstanding_requests_++;
+        auto done = [this]() {
+          mutex_lock l(mu_);
+          num_running_worker_threads_--;
+          outstanding_requests_--;
+          VLOG(3) << "Exiting worker thread";
+        };
+        worker_threads_.push_back(ctx->StartThread(
+            "tf-data-service-task_thread", [this, done = std::move(done)]() {
+              RunWorkerThread(std::move(done));
+            }));
+      }
+    }
+
+    void RunWorkerThread(std::function<void()> done) {
       auto cleanup = gtl::MakeCleanup([done = std::move(done)]() { done(); });
-      VLOG(3) << "Starting task handler thread for task "
-              << task_handler->task_id << " with worker address "
-              << task_handler->address;
+      VLOG(3) << "Starting worker thread";
+      std::shared_ptr<Task> task_to_process;
       while (true) {
-        if (!task_handler->worker) {
-          Status s = CreateDataServiceWorkerClient(task_handler->address,
-                                                   dataset()->protocol_,
-                                                   &task_handler->worker);
-          if (!s.ok()) {
-            LOG(WARNING) << "Failed to create a worker client for "
-                         << task_handler->address << ": " << s;
-          }
-        }
         {
           mutex_lock l(mu_);
-          if (task_handler->end_of_sequence) {
-            return;
+          if (task_to_process) {
+            task_to_process->in_use = false;
+            task_to_process = nullptr;
+            worker_thread_cv_.notify_one();
           }
           outstanding_requests_--;
-          while (!cancelled_ && results_.size() + outstanding_requests_ >=
-                                    max_outstanding_requests_) {
-            VLOG(3) << "Task thread for task " << task_handler->task_id
-                    << " waiting. results_.size()=" << results_.size()
-                    << " outstanding_requests_=" << outstanding_requests_;
-            cv_.wait(l);
+          while (!cancelled_ && !(SpaceInBuffer() && TaskAvailable())) {
+            if (VLOG_IS_ON(3)) {
+              VLOG(3) << "Sleeping with results_.size=" << results_.size()
+                      << ", outstanding_requests_=" << outstanding_requests_
+                      << ", max_oustanding_requests="
+                      << max_outstanding_requests_
+                      << " finished_tasks=" << finished_tasks_
+                      << " tasks_.size()=" << tasks_.size();
+            }
+            worker_thread_cv_.wait(l);
           }
-          outstanding_requests_++;
           if (cancelled_) {
             return;
           }
+          outstanding_requests_++;
+          // Search for a task to update.
+          int num_tasks = tasks_.size();
+          for (int i = 0; i < num_tasks; ++i) {
+            int index = (next_task_index_ + i) % num_tasks;
+            std::shared_ptr<Task>& task = tasks_[index];
+            if (!task->in_use && !task->end_of_sequence) {
+              task->in_use = true;
+              task_to_process = task;
+              next_task_index_ = (index + 1) % num_tasks;
+              break;
+            }
+          }
+          DCHECK(task_to_process != nullptr);
+          VLOG(3) << "Processing task " << task_to_process->task_id;
         }
-        // TODO(aaudibert): add backoff and max retries.
         int64 deadline_micros =
             Env::Default()->NowMicros() + kRetryTimeoutMicros;
-        Status s = FetchElement(task_handler, deadline_micros);
+        Status s = GetElement(task_to_process.get(), deadline_micros);
         if (!s.ok()) {
-          LOG(WARNING) << "Failed to fetch element from worker at "
-                       << task_handler->address << ": " << s;
+          mutex_lock l(mu_);
+          status_ = s;
+          get_next_cv_.notify_all();
+          return;
         }
       }
     }
 
-    // Fetches an element from a task and adds the element to `results_`.
+    // Gets an element from a task and adds the element to `results_`.
     //
     // If the task reaches end_of_sequence or is cancelled (e.g. due to a
-    // worker dying), FetchElement returns Status::OK() without adding to
+    // worker dying), GetElement returns Status::OK() without adding to
     // `results_`.
-    Status FetchElement(TaskThread* task_handler, int64 deadline_micros) {
-      VLOG(3) << "Fetching an element for task id " << task_handler->task_id;
+    Status GetElement(Task* task, int64 deadline_micros)
+        TF_LOCKS_EXCLUDED(mu_) {
+      VLOG(3) << "Getting an element for task id " << task->task_id;
+      tensorflow::profiler::TraceMe activity(
+          "GetElement", tensorflow::profiler::TraceMeLevel::kInfo);
       CompressedElement compressed;
       bool end_of_sequence;
       for (int num_retries = 0;; ++num_retries) {
-        Status s = task_handler->worker->GetElement(
-            task_handler->task_id, &compressed, &end_of_sequence);
+        Status s = task->worker->GetElement(task->task_id, &compressed,
+                                            &end_of_sequence);
         if (s.ok()) {
           break;
         }
+        if (errors::IsNotFound(s)) {
+          // This indicates that the worker was restarted. The restarted worker
+          // will get a new task, and the old task is lost.
+          mutex_lock l(mu_);
+          finished_tasks_++;
+          task->end_of_sequence = true;
+          return Status::OK();
+        }
         // Retry all errors that could indicate preemption.
         if (!errors::IsUnavailable(s) && !errors::IsCancelled(s) &&
             !errors::IsAborted(s)) {
@@ -365,7 +474,7 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
           mutex_lock l(mu_);
           // If `UpdateTaskThreads` finds that the task has been cancelled, it
           // will set end_of_sequence to `true`.
-          if (task_handler->end_of_sequence || cancelled_) {
+          if (task->end_of_sequence || cancelled_) {
             return Status::OK();
           }
         }
@@ -391,19 +500,31 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
       }
       mutex_lock l(mu_);
       if (end_of_sequence) {
-        task_handler->end_of_sequence = true;
+        task->end_of_sequence = true;
+        finished_tasks_++;
         return Status::OK();
       }
       results_.push(std::move(element));
-      cv_.notify_all();
-      VLOG(3) << "Fetched an element for task id " << task_handler->task_id;
+      get_next_cv_.notify_all();
+      VLOG(3) << "Got an element for task id " << task->task_id;
       return Status::OK();
     }
 
+    bool SpaceInBuffer() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      return results_.size() + outstanding_requests_ <
+             max_outstanding_requests_;
+    }
+
+    bool TaskAvailable() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      return finished_tasks_ + outstanding_requests_ < tasks_.size();
+    }
+
+    const int64 iterator_index_;
+
     mutex mu_;
-    // TODO(aaudibert): split this into a couple cvs for different conditions
-    // so that we can use notify_one and avoid unnecessary wakeups.
-    condition_variable cv_ TF_GUARDED_BY(mu_);
+    condition_variable get_next_cv_ TF_GUARDED_BY(mu_);
+    condition_variable worker_thread_cv_ TF_GUARDED_BY(mu_);
+    condition_variable manager_thread_cv_ TF_GUARDED_BY(mu_);
     bool cancelled_ TF_GUARDED_BY(mu_) = false;
 
     int64 outstanding_requests_ TF_GUARDED_BY(mu_) = 0;
@@ -411,26 +532,47 @@ class DataServiceDatasetOp::Dataset : public DatasetBase {
     // at the same time. This count includes both in-progress requests for
     // elements as well as completed requests which haven't yet been produced.
     int64 max_outstanding_requests_ TF_GUARDED_BY(mu_);
+
+    // The number of threads in `worker_threads_` which are still running.
+    int64 num_running_worker_threads_ TF_GUARDED_BY(mu_) = 0;
+
+    // The index of the next task in `tasks_` to read from.
+    int64 next_task_index_ TF_GUARDED_BY(mu_) = 0;
+
+    // The number tasks in the `tasks_` list that have reached end_of_sequence.
+    int64 finished_tasks_ TF_GUARDED_BY(mu_) = 0;
+
+    // List of tasks to read from.
+    std::vector<std::shared_ptr<Task>> tasks_ TF_GUARDED_BY(mu_);
+
+    // A status to be returned from the next call to `GetNext`. This is set by
+    // asynchronous threads when they encounter errors.
+    Status status_ TF_GUARDED_BY(mu_) = Status::OK();
     std::queue<std::vector<Tensor>> results_ TF_GUARDED_BY(mu_);
 
     // Set once in Initialize().
     int64 job_id_;
-    int64 num_unfinished_tasks_ TF_GUARDED_BY(mu_) = 0;
 
     bool job_finished_ = false;
-    // Must come second to last so that task threads are joined before
+    // Must be ordered second to last so that worker threads are joined before
     // destroying other fields.
-    absl::flat_hash_map<int64, std::unique_ptr<TaskThread>> task_threads_
-        TF_GUARDED_BY(mu_);
+    std::vector<std::unique_ptr<Thread>> worker_threads_ TF_GUARDED_BY(mu_);
     // Must be ordered last so that the thread is joined before destroying other
     // fields.
     std::unique_ptr<Thread> task_thread_manager_ GUARDED_BY(mu_);
   };
 
+  const int64 dataset_id_;
+  const ProcessingMode processing_mode_;
   const tstring address_;
   const tstring protocol_;
+  const tstring job_name_;
   const int64 max_outstanding_requests_;
   const int64 task_refresh_interval_ms_;
+  IterationCounter* const iteration_counter_;  // Owned
+  const bool owns_resource_;
+  const ResourceHandle iteration_counter_handle_;
+  ResourceMgr* const resource_mgr_;  // Not owned
   const DataTypeVector output_types_;
   const std::vector<PartialTensorShape> output_shapes_;
 };
@@ -448,6 +590,16 @@ DataServiceDatasetOp::DataServiceDatasetOp(OpKernelConstruction* ctx)
 
 void DataServiceDatasetOp::MakeDataset(OpKernelContext* ctx,
                                        DatasetBase** output) {
+  int64 dataset_id;
+  OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kDatasetId, &dataset_id));
+
+  tstring processing_mode_str;
+  OP_REQUIRES_OK(
+      ctx, ParseScalarArgument(ctx, kProcessingMode, &processing_mode_str));
+  ProcessingMode processing_mode;
+  OP_REQUIRES_OK(ctx,
+                 ParseProcessingMode(processing_mode_str, &processing_mode));
+
   tstring address;
   OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kAddress, &address));
   OP_REQUIRES(ctx, !address.empty(),
@@ -458,9 +610,41 @@ void DataServiceDatasetOp::MakeDataset(OpKernelContext* ctx,
   OP_REQUIRES(ctx, !protocol.empty(),
               errors::InvalidArgument(kProtocol, " must be non-empty."));
 
+  tstring job_name;
+  OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kJobName, &job_name));
+
   int64 max_outstanding_requests;
   OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kMaxOutstandingRequests,
                                           &max_outstanding_requests));
+
+  ResourceHandle iteration_counter_handle;
+  OP_REQUIRES_OK(
+      ctx, HandleFromInput(ctx, kIterationCounter, &iteration_counter_handle));
+  IterationCounter* iteration_counter = nullptr;
+  Status s = ctx->resource_manager()->Lookup<IterationCounter>(
+      iteration_counter_handle.container(), iteration_counter_handle.name(),
+      &iteration_counter);
+  bool owns_resource = false;
+  if (errors::IsNotFound(s)) {
+    owns_resource = true;
+    static std::atomic<int64> resource_id_counter(0);
+    const std::string& container = ctx->resource_manager()->default_container();
+    std::string name =
+        strings::StrCat(ctx->op_kernel().name(), "/", kIterationCounter, "_",
+                        resource_id_counter.fetch_add(1));
+    OP_REQUIRES_OK(ctx,
+                   ctx->resource_manager()->LookupOrCreate<IterationCounter>(
+                       container, name, &iteration_counter,
+                       [](IterationCounter** counter) {
+                         *counter = new IterationCounter();
+                         return Status::OK();
+                       }));
+    iteration_counter_handle =
+        MakeResourceHandle<IterationCounter>(ctx, container, name);
+  } else {
+    OP_REQUIRES_OK(ctx, s);
+  }
+
   OP_REQUIRES(
       ctx,
       max_outstanding_requests == model::kAutotune ||
@@ -468,13 +652,17 @@ void DataServiceDatasetOp::MakeDataset(OpKernelContext* ctx,
       errors::InvalidArgument(kMaxOutstandingRequests, " must be positive or ",
                               model::kAutotune));
 
-  *output = new Dataset(ctx, address, protocol, max_outstanding_requests,
-                        task_refresh_interval_hint_ms_, output_types_,
-                        output_shapes_);
+  *output =
+      new Dataset(ctx, dataset_id, processing_mode, address, protocol, job_name,
+                  max_outstanding_requests, task_refresh_interval_hint_ms_,
+                  iteration_counter, owns_resource, iteration_counter_handle,
+                  output_types_, output_shapes_);
 }
 
 REGISTER_KERNEL_BUILDER(Name("DataServiceDataset").Device(DEVICE_CPU),
                         DataServiceDatasetOp);
+REGISTER_KERNEL_BUILDER(Name("DummyIterationCounter").Device(DEVICE_CPU),
+                        DummyResourceOp<IterationCounter>);
 
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.h b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.h
index d51cb8c861c..b2c7f368c8e 100644
--- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.h
+++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.h
@@ -15,21 +15,48 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_DATA_SERVICE_DATASET_OP_H_
 #define TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_DATA_SERVICE_DATASET_OP_H_
 
+#include "absl/strings/str_cat.h"
 #include "tensorflow/core/framework/dataset.h"
+#include "tensorflow/core/framework/resource_mgr.h"
 
 namespace tensorflow {
 namespace data {
 
+// A resource which counts how many iterators have been created. This is used
+// by the DataServiceDataset to coordinate jobs across multiple iterations.
+class IterationCounter : public ResourceBase {
+ public:
+  IterationCounter() : counter_(0) {}
+
+  std::string DebugString() const override {
+    mutex_lock l(mu_);
+    return absl::StrCat(counter_);
+  }
+
+  int64 GetAndIncrement() {
+    mutex_lock l(mu_);
+    return ++counter_;
+  }
+
+ private:
+  mutable mutex mu_;
+  int64 counter_ TF_GUARDED_BY(mu_) = 0;
+};
+
 // Creates a dataset for reading from the tf.data service.
 class DataServiceDatasetOp : public DatasetOpKernel {
  public:
   static constexpr const char* const kDatasetType = "DataService";
+  static constexpr const char* const kDatasetId = "dataset_id";
+  static constexpr const char* const kProcessingMode = "processing_mode";
   static constexpr const char* const kAddress = "address";
   static constexpr const char* const kProtocol = "protocol";
+  static constexpr const char* const kJobName = "job_name";
   static constexpr const char* const kMaxOutstandingRequests =
       "max_outstanding_requests";
   static constexpr const char* const kTaskRefreshIntervalHintMs =
       "task_refresh_interval_hint_ms";
+  static constexpr const char* const kIterationCounter = "iteration_counter";
   static constexpr const char* const kOutputTypes = "output_types";
   static constexpr const char* const kOutputShapes = "output_shapes";
 
diff --git a/tensorflow/core/kernels/data/experimental/data_service_ops.cc b/tensorflow/core/kernels/data/experimental/data_service_ops.cc
index fa3a1a51c1e..c6a54baad64 100644
--- a/tensorflow/core/kernels/data/experimental/data_service_ops.cc
+++ b/tensorflow/core/kernels/data/experimental/data_service_ops.cc
@@ -22,18 +22,6 @@ limitations under the License.
 
 namespace tensorflow {
 namespace data {
-namespace {
-Status ParseProcessingMode(const tstring& s, ProcessingMode* mode) {
-  if (s == "parallel_epochs") {
-    *mode = ProcessingMode::PARALLEL_EPOCHS;
-  } else if (s == "one_epoch") {
-    *mode = ProcessingMode::ONE_EPOCH;
-  } else {
-    return errors::InvalidArgument("Unrecognized processing mode: ", s);
-  }
-  return Status::OK();
-}
-}  // namespace
 
 RegisterDatasetOp::RegisterDatasetOp(OpKernelConstruction* ctx)
     : OpKernel(ctx) {
@@ -75,62 +63,8 @@ void RegisterDatasetOp::Compute(OpKernelContext* ctx) {
   output_dataset_id() = dataset_id;
 }
 
-CreateJobOp::CreateJobOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
-
-void CreateJobOp::Compute(OpKernelContext* ctx) {
-  int64 dataset_id;
-  OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kDatasetId, &dataset_id));
-
-  tstring address;
-  OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kAddress, &address));
-  OP_REQUIRES(ctx, !address.empty(),
-              errors::InvalidArgument(kAddress, " must be non-empty."));
-
-  tstring protocol;
-  OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kProtocol, &protocol));
-  OP_REQUIRES(ctx, !protocol.empty(),
-              errors::InvalidArgument(kProtocol, " must be non-empty."));
-
-  tstring processing_mode_str;
-  OP_REQUIRES_OK(
-      ctx, ParseScalarArgument(ctx, kProcessingMode, &processing_mode_str));
-  ProcessingMode processing_mode;
-  OP_REQUIRES_OK(ctx,
-                 ParseProcessingMode(processing_mode_str, &processing_mode));
-
-  DataServiceMasterClient client(address, protocol);
-  int64 job_id;
-  OP_REQUIRES_OK(ctx, client.CreateJob(dataset_id, processing_mode, &job_id));
-
-  JobToken token(job_id);
-  Tensor* output;
-  OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &output));
-  auto output_token = output->tensor<Variant, 0>();
-  output_token() = token;
-}
-
-Status MakeDataServiceIteratorOp::DoCompute(OpKernelContext* ctx) {
-  DatasetBase* dataset;
-  TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(ctx->input(0), &dataset));
-
-  const Tensor* token_tensor;
-  TF_RETURN_IF_ERROR(ctx->input(kJobToken, &token_tensor));
-  JobToken token = *token_tensor->scalar<Variant>()().get<JobToken>();
-
-  IteratorResource* iterator_resource;
-  TF_RETURN_IF_ERROR(
-      LookupResource(ctx, HandleFromInput(ctx, 2), &iterator_resource));
-
-  core::ScopedUnref unref_iterator(iterator_resource);
-
-  return iterator_resource->SetIteratorFromDataset(ctx, dataset, token);
-}
-
 REGISTER_KERNEL_BUILDER(Name("RegisterDataset").Device(DEVICE_CPU),
                         RegisterDatasetOp);
-REGISTER_KERNEL_BUILDER(Name("CreateJob").Device(DEVICE_CPU), CreateJobOp);
-REGISTER_KERNEL_BUILDER(Name("MakeDataServiceIterator").Device(DEVICE_CPU),
-                        MakeDataServiceIteratorOp);
 
 }  // namespace data
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/data/experimental/data_service_ops.h b/tensorflow/core/kernels/data/experimental/data_service_ops.h
index ebbcb39d0a3..b7d66938ae6 100644
--- a/tensorflow/core/kernels/data/experimental/data_service_ops.h
+++ b/tensorflow/core/kernels/data/experimental/data_service_ops.h
@@ -44,41 +44,6 @@ class RegisterDatasetOp : public OpKernel {
   SerializationContext::ExternalStatePolicy external_state_policy_;
 };
 
-// Creates a token for reading from the tf.data service.
-//
-// The dataset_id input identifies which dataset to create a token for.
-// The address and protocol inputs are used to connect to the tf.data service
-// master.
-// The processing_mode defines how the tf.data service should produce data for
-// the token.
-class CreateJobOp : public OpKernel {
- public:
-  static constexpr const char* const kDatasetId = "dataset_id";
-  static constexpr const char* const kAddress = "address";
-  static constexpr const char* const kProtocol = "protocol";
-  static constexpr const char* const kProcessingMode = "processing_mode";
-
-  explicit CreateJobOp(OpKernelConstruction* ctx);
-
-  void Compute(OpKernelContext* ctx) override;
-};
-
-// Creates a new iterator for iterating over a tf.data service dataset.
-//
-// The epoch_id input identifies which epoch to read from. Multiple iterators
-// may read from the same epoch, causing the elements of the epoch to be split
-// across all iterators.
-class MakeDataServiceIteratorOp : public MakeIteratorOp {
- public:
-  static constexpr const char* const kJobToken = "job_token";
-
-  explicit MakeDataServiceIteratorOp(OpKernelConstruction* ctx)
-      : MakeIteratorOp(ctx) {}
-
- protected:
-  Status DoCompute(OpKernelContext* ctx) override;
-};
-
 }  // namespace data
 }  // namespace tensorflow
 #endif  // TENSORFLOW_CORE_KERNELS_DATA_EXPERIMENTAL_DATA_SERVICE_OPS_H_
diff --git a/tensorflow/core/kernels/data/experimental/snapshot_util.cc b/tensorflow/core/kernels/data/experimental/snapshot_util.cc
index 3ad1345d776..6c4d6424146 100644
--- a/tensorflow/core/kernels/data/experimental/snapshot_util.cc
+++ b/tensorflow/core/kernels/data/experimental/snapshot_util.cc
@@ -503,12 +503,10 @@ Status Reader::ReadTensors(std::vector<Tensor>* read_tensors) {
       size_t tensor_proto_size = tensor_proto_strs[complex_index].second;
       TensorProto tp;
 #if defined(PLATFORM_GOOGLE)
-      auto tensor_proto_ptr = tensor_proto_str.release();
-      absl::Cord c;
-      c.AppendExternalMemory(
-          absl::string_view(tensor_proto_ptr, tensor_proto_size),
-          tensor_proto_ptr,
-          [](void* arg) { delete[] static_cast<char*>(arg); });
+      absl::string_view tensor_proto_view(tensor_proto_str.get(),
+                                          tensor_proto_size);
+      absl::Cord c = absl::MakeCordFromExternal(
+          tensor_proto_view, [s = std::move(tensor_proto_str)] {});
       if (!tp.ParseFromCord(c)) {
         return errors::Internal("Could not parse TensorProto");
       }
@@ -615,11 +613,9 @@ Status Reader::ReadRecord(absl::Cord* record) {
   } else {
     auto tmp_str = absl::make_unique<tstring>();
     TF_RETURN_IF_ERROR(input_stream_->ReadNBytes(length, tmp_str.get()));
-    tstring* tmp_str_raw = tmp_str.release();
-    record->AppendExternalMemory(*tmp_str_raw, tmp_str_raw,
-                                 [](absl::string_view unused_data, void* arg) {
-                                   delete static_cast<tstring*>(arg);
-                                 });
+    absl::string_view tmp_str_view(*tmp_str);
+    record->Append(
+        absl::MakeCordFromExternal(tmp_str_view, [s = std::move(tmp_str)] {}));
     return Status::OK();
   }
 }
diff --git a/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc
index 111afa218df..e813de70931 100644
--- a/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/unbatch_dataset_op.cc
@@ -38,8 +38,12 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel {
     explicit Dataset(OpKernelContext* ctx, DatasetBase* input)
         : DatasetBase(DatasetContext(ctx)), input_(input) {
       input_->Ref();
+      batch_size_ = -1;
       for (const PartialTensorShape& shape : input->output_shapes()) {
         if (!shape.unknown_rank()) {
+          if (batch_size_ < 0 && shape.dim_size(0) >= 0) {
+            batch_size_ = shape.dim_size(0);
+          }
           gtl::InlinedVector<int64, 4> partial_dim_sizes;
           for (int i = 1; i < shape.dims(); ++i) {
             partial_dim_sizes.push_back(shape.dim_size(i));
@@ -69,6 +73,17 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel {
 
     string DebugString() const override { return "UnbatchDatasetOp::Dataset"; }
 
+    int64 Cardinality() const override {
+      int64 n = input_->Cardinality();
+      if (n == kInfiniteCardinality || n == kUnknownCardinality) {
+        return n;
+      }
+      if (batch_size_ > 0) {
+        return n * batch_size_;
+      }
+      return kUnknownCardinality;
+    }
+
     Status CheckExternalState() const override {
       return input_->CheckExternalState();
     }
@@ -222,6 +237,8 @@ class UnbatchDatasetOp : public UnaryDatasetOpKernel {
 
     const DatasetBase* const input_;
     std::vector<PartialTensorShape> shapes_;
+    // batch_size_ may or may not be known, with -1 as unknown
+    int64 batch_size_;
   };
 };
 
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index 21fa5bf6ac2..9fb3c5fb46e 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -166,8 +166,7 @@ Status IteratorResource::Restore(OpKernelContext* ctx,
 }
 
 Status IteratorResource::SetIteratorFromDataset(OpKernelContext* ctx,
-                                                DatasetBase* dataset,
-                                                JobToken job_token) {
+                                                DatasetBase* dataset) {
   std::shared_ptr<State> new_state;
   {
     tf_shared_lock l(mu_);
@@ -180,7 +179,6 @@ Status IteratorResource::SetIteratorFromDataset(OpKernelContext* ctx,
   IteratorContext::Params params(ctx);
   params.flr = new_state->flr;
   params.function_handle_cache = new_state->function_handle_cache.get();
-  params.job_token = job_token;
   params.resource_mgr = &new_state->resource_mgr;
   params.thread_factory = unbounded_thread_pool_.get_thread_factory();
   params.thread_pool = &unbounded_thread_pool_;
@@ -532,19 +530,17 @@ Status MakeIteratorOp::DoCompute(OpKernelContext* ctx) {
   TF_RETURN_IF_ERROR(
       LookupResource(ctx, HandleFromInput(ctx, 1), &iterator_resource));
   core::ScopedUnref unref_iterator(iterator_resource);
-  JobToken empty_token;
-  return iterator_resource->SetIteratorFromDataset(ctx, dataset,
-                                                   /*job_token=*/empty_token);
+  return iterator_resource->SetIteratorFromDataset(ctx, dataset);
 }
 
-void DeleteIteratorOp::Compute(OpKernelContext* ctx) {
+Status DeleteIteratorOp::DoCompute(OpKernelContext* ctx) {
   tensorflow::ResourceTagger tag(kTFDataResourceTag,
                                  ctx->op_kernel().type_string());
-  ResourceHandle handle = ctx->input(0).flat<ResourceHandle>()(0);
+  const ResourceHandle& handle = ctx->input(0).flat<ResourceHandle>()(0);
   // The iterator resource is guaranteed to exist because the variant tensor
   // wrapping the deleter is provided as an unused input to this op, which
   // guarantees that it has not run yet.
-  OP_REQUIRES_OK(ctx, ctx->resource_manager()->Delete(handle));
+  return ctx->resource_manager()->Delete(handle);
 }
 
 namespace {
@@ -855,9 +851,7 @@ class OneShotIteratorOp : public AsyncOpKernel {
     // factory function.
     DatasetBase* dataset;
     TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(return_values[0], &dataset));
-    JobToken empty_token;
-    TF_RETURN_IF_ERROR((*iterator)->SetIteratorFromDataset(
-        ctx, dataset, /*job_token=*/empty_token));
+    TF_RETURN_IF_ERROR((*iterator)->SetIteratorFromDataset(ctx, dataset));
     (*iterator)->Ref();
     return Status::OK();
   }
@@ -931,6 +925,13 @@ Status IteratorGetNextOp::DoCompute(OpKernelContext* ctx) {
 }
 
 Status IteratorGetNextAsOptionalOp::DoCompute(OpKernelContext* ctx) {
+  profiler::TraceMe traceme(
+      [&] {
+        return strings::StrCat(
+            "IteratorGetNextAsOptionalOp::DoCompute#id=", ctx->step_id(),
+            ",iter_num=", ctx->frame_iter().iter_id, "#");
+      },
+      profiler::kInfo);
   tensorflow::ResourceTagger tag(kTFDataResourceTag,
                                  ctx->op_kernel().type_string());
   IteratorResource* iterator;
@@ -1101,9 +1102,8 @@ REGISTER_KERNEL_BUILDER(
     MakeIteratorOp);
 REGISTER_KERNEL_BUILDER(Name("DeleteIterator").Device(DEVICE_CPU).Priority(2),
                         DeleteIteratorOp);
-REGISTER_KERNEL_BUILDER(
-    Name("DeleteIterator").Device(DEVICE_GPU).HostMemory("deleter").Priority(1),
-    DeleteIteratorOp);
+REGISTER_KERNEL_BUILDER(Name("DeleteIterator").Device(DEVICE_GPU).Priority(1),
+                        DeleteIteratorOp);
 REGISTER_KERNEL_BUILDER(
     Name("AnonymousIterator").Device(DEVICE_CPU).Priority(2),
     AnonymousIteratorHandleOp);
@@ -1115,7 +1115,6 @@ REGISTER_KERNEL_BUILDER(
     AnonymousIteratorHandleOp);
 REGISTER_KERNEL_BUILDER(Name("AnonymousIteratorV2")
                             .Device(DEVICE_GPU)
-                            .HostMemory("deleter")
                             .Priority(1),
                         AnonymousIteratorHandleOp);
 REGISTER_KERNEL_BUILDER(Name("DatasetToSingleElement").Device(DEVICE_CPU),
diff --git a/tensorflow/core/kernels/data/iterator_ops.h b/tensorflow/core/kernels/data/iterator_ops.h
index df99a38b516..86db80ed75c 100644
--- a/tensorflow/core/kernels/data/iterator_ops.h
+++ b/tensorflow/core/kernels/data/iterator_ops.h
@@ -69,14 +69,9 @@ class IteratorResource : public ResourceBase {
   // Creates an iterator for `dataset`, and associates the iterator with this
   // iterator resource.
   //
-  // The `job_token` will be passed through the IteratorContext when
-  // creating the iterator. This token is used to read from a tf.data service
-  // job.
-  //
   // `SetIteratorFromDataset` should be called before calling `GetNext`, `Save`,
   // or `Restore`.
-  Status SetIteratorFromDataset(OpKernelContext* ctx, DatasetBase* dataset,
-                                JobToken job_token);
+  Status SetIteratorFromDataset(OpKernelContext* ctx, DatasetBase* dataset);
 
   string DebugString() const override { return "Iterator resource"; }
 
@@ -229,11 +224,13 @@ class IteratorGetNextOp : public HybridAsyncOpKernel {
   Status DoCompute(OpKernelContext* ctx) override;
 };
 
-class DeleteIteratorOp : public OpKernel {
+class DeleteIteratorOp : public HybridAsyncOpKernel {
  public:
-  explicit DeleteIteratorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+  explicit DeleteIteratorOp(OpKernelConstruction* ctx)
+      : HybridAsyncOpKernel(ctx, "tf_data_delete_iterator") {}
 
-  void Compute(OpKernelContext* ctx) override;
+ protected:
+  Status DoCompute(OpKernelContext* ctx) override;
 };
 
 class IteratorGetNextAsOptionalOp : public HybridAsyncOpKernel {
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 5dae096d5b5..7b8f697d2d3 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -621,6 +621,11 @@ class ParallelMapIterator : public DatasetBaseIterator {
       return false;
     }
     if (!deterministic_) {
+      // Iterate through in-flight results and returns the first one that is
+      // found to be available and not end-of-input. If the first result (in
+      // order) is end-of-input, we know that all earlier iterations have
+      // already been completed, so it is safe to return that result for the
+      // caller to process end of iteration.
       for (auto it = invocation_results_.begin();
            it != invocation_results_.end(); ++it) {
         if ((*it)->notification.HasBeenNotified() &&
diff --git a/tensorflow/core/kernels/data/single_threaded_executor.cc b/tensorflow/core/kernels/data/single_threaded_executor.cc
index 45413e8d312..3a16f1018dd 100644
--- a/tensorflow/core/kernels/data/single_threaded_executor.cc
+++ b/tensorflow/core/kernels/data/single_threaded_executor.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/data/single_threaded_executor.h"
 
+#include "tensorflow/core/common_runtime/entry.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/executor_factory.h"
 #include "tensorflow/core/graph/algorithm.h"
@@ -28,6 +29,9 @@ namespace {
 typedef gtl::InlinedVector<TensorValue, 4> TensorValueVec;
 typedef gtl::InlinedVector<AllocatorAttributes, 4> AllocatorAttributeVec;
 
+static const string& kSingleThreadedExecutor =
+    *new string("SINGLE_THREADED_EXECUTOR");
+
 class SingleThreadedExecutorImpl : public Executor {
  public:
   explicit SingleThreadedExecutorImpl(const LocalExecutorParams& params)
@@ -37,6 +41,9 @@ class SingleThreadedExecutorImpl : public Executor {
     for (const KernelState& kernel_state : kernels_) {
       params_.delete_kernel(kernel_state.kernel);
     }
+    for (const ConstTensorKernelState& kernel_state : const_tensor_kernels_) {
+      params_.delete_kernel(kernel_state.kernel);
+    }
   }
 
   Status Initialize(const Graph& graph) {
@@ -53,6 +60,7 @@ class SingleThreadedExecutorImpl : public Executor {
 
     kernels_.reserve(ordered_nodes.size());
     std::vector<Node*> nodes_with_kernels;
+    std::vector<Node*> nodes_with_const_tensor_kernels;
     nodes_with_kernels.reserve(ordered_nodes.size());
 
     std::map<size_t, Node*> arg_index_to_node_map;
@@ -102,24 +110,38 @@ class SingleThreadedExecutorImpl : public Executor {
         continue;
       }
 
-      const size_t kernel_index = kernels_.size();
-      kernels_.push_back({});
-      nodes_with_kernels.push_back(n);
-      KernelState& kernel_state = kernels_[kernel_index];
-      node_to_index_map[n] = kernel_index;
+      OpKernel* kernel;
+      TF_RETURN_IF_ERROR(params_.create_kernel(n->properties(), &kernel));
 
-      TF_RETURN_IF_ERROR(
-          params_.create_kernel(n->properties(), &kernel_state.kernel));
-      kernel_state.num_inputs = n->num_inputs();
-      kernel_state.num_outputs = n->num_outputs();
-
-      if (kernel_index == 0) {
-        kernel_state.input_start_index = 0;
+      const Tensor* const_tensor;
+      if (n->num_outputs() == 1 && (const_tensor = kernel->const_tensor())) {
+        // Nodes that produce a single constant tensor are handled specially:
+        // we evaluate the tensor once, and propagate it to its consumers as
+        // a `const Tensor*`, to avoid refcount manipulation.
+        const size_t kernel_index = const_tensor_kernels_.size();
+        const_tensor_kernels_.push_back({});
+        nodes_with_const_tensor_kernels.push_back(n);
+        ConstTensorKernelState& kernel_state =
+            const_tensor_kernels_[kernel_index];
+        kernel_state.kernel = kernel;
+        kernel_state.const_tensor = *const_tensor;
       } else {
-        const KernelState& previous_kernel_state = kernels_[kernel_index - 1];
-        kernel_state.input_start_index =
-            previous_kernel_state.input_start_index +
-            previous_kernel_state.num_inputs;
+        const size_t kernel_index = kernels_.size();
+        kernels_.push_back({});
+        nodes_with_kernels.push_back(n);
+        KernelState& kernel_state = kernels_[kernel_index];
+        kernel_state.kernel = kernel;
+        kernel_state.num_inputs = n->num_inputs();
+        kernel_state.num_outputs = n->num_outputs();
+        node_to_index_map[n] = kernel_index;
+        if (kernel_index == 0) {
+          kernel_state.input_start_index = 0;
+        } else {
+          const KernelState& previous_kernel_state = kernels_[kernel_index - 1];
+          kernel_state.input_start_index =
+              previous_kernel_state.input_start_index +
+              previous_kernel_state.num_inputs;
+        }
       }
     }
 
@@ -146,6 +168,28 @@ class SingleThreadedExecutorImpl : public Executor {
       }
     }
 
+    // Build the mapping from each const tensor kernel to the input slot for the
+    // corresponding destination node.
+    for (size_t i = 0; i < const_tensor_kernels_.size(); ++i) {
+      Node* n = nodes_with_const_tensor_kernels[i];
+      ConstTensorKernelState& kernel_state = const_tensor_kernels_[i];
+      for (const Edge* e : n->out_edges()) {
+        if (e->src_output() == Graph::kControlSlot) {
+          continue;
+        } else if (e->src_output() != 0) {
+          return errors::Internal("Invalid output index ", e->src_output(),
+                                  " from node ", n->DebugString());
+        }
+        kernel_state.output_locations.push_back(
+            kernels_[node_to_index_map[e->dst()]].input_start_index +
+            e->dst_input());
+      }
+
+      bool on_host =
+          kernel_state.kernel->output_memory_types()[0] == HOST_MEMORY;
+      kernel_state.output_alloc_attr.set_on_host(on_host);
+    }
+
     // Build the mapping from each node output to the input slot for the
     // corresponding destination node.
     for (size_t i = 0; i < kernels_.size(); ++i) {
@@ -230,7 +274,7 @@ class SingleThreadedExecutorImpl : public Executor {
     // * In an error case (see below), we use the connectivity information in
     //   `KernelState::output_locations` to determine which locations have been
     //   initialized, and manually destroy them.
-    std::vector<ManualConstructor<Tensor>> inputs(total_num_inputs_);
+    std::vector<Entry> inputs(total_num_inputs_);
 
     // TODO(mrry): Can we avoid copying into these vectors? Consider modifying
     // OpKernelContext to take the TensorValueVec as a pointer into `inputs`.
@@ -259,6 +303,7 @@ class SingleThreadedExecutorImpl : public Executor {
     params.runner = &runner_copy;
     params.run_all_kernels_inline = args.run_all_kernels_inline;
     params.stats_collector = args.stats_collector;
+    params.executor_type = &kSingleThreadedExecutor;
 
     // NOTE(mrry): We are assuming that the graph is loopless and condless.
     params.frame_iter = FrameAndIter(0, 0);
@@ -284,14 +329,52 @@ class SingleThreadedExecutorImpl : public Executor {
     for (size_t i = 0; i < arg_output_locations_.size(); ++i) {
       const size_t num_destinations = arg_output_locations_[i].size();
       if (num_destinations > 0) {
-        const Tensor* arg;
-        TF_CHECK_OK(args.call_frame->GetArg(i, &arg));
-        for (size_t j = 0; j < num_destinations; ++j) {
-          inputs[arg_output_locations_[i][j]].Init(*arg);
+        if (args.call_frame->CanConsumeArg(i)) {
+          // The first destination input can consume the argument.
+          Entry& first_input = inputs[arg_output_locations_[i][0]];
+          first_input.state = Entry::State::HAS_VALUE;
+          first_input.val.Init();
+          args.call_frame->ConsumeArg(i, first_input.val.get());
+          // All subsequent destination inputs get a shallow copy of the first
+          // destination input.
+          //
+          // NOTE: If we had metadata about which kernels might attempt to
+          // forward their input, we could arrange the kernel order so that
+          // one of those kernels was executed last.
+          for (size_t j = 1; j < num_destinations; ++j) {
+            Entry& input = inputs[arg_output_locations_[i][j]];
+            input.state = Entry::State::HAS_VALUE;
+            input.val.Init(*first_input.val);
+          }
+        } else {
+          const Tensor* arg;
+          TF_CHECK_OK(args.call_frame->GetArg(i, &arg));
+          for (size_t j = 0; j < num_destinations; ++j) {
+            Entry& input = inputs[arg_output_locations_[i][j]];
+            // NOTE: We must make at least one shallow copy of the argument
+            // tensor that remains live until all consuming kernels have
+            // executed, to keep the reference count > 1, and inhibit buffer
+            // forwarding. For simplicity, we shallow copy into the input entry
+            // for each consuming kernel.
+            input.state = Entry::State::HAS_VALUE;
+            input.val.Init(*arg);
+          }
         }
       }
     }
 
+    // Kernels that return a constant value (e.g. ConstOp) are relatively
+    // expensive due to the Tensor allocations that they perform. Therefore we
+    // specialize their implementation and forward their constant value directly
+    // to the inputs of kernels that consume them.
+    for (const ConstTensorKernelState& kernel_state : const_tensor_kernels_) {
+      for (size_t i = 0; i < kernel_state.output_locations.size(); ++i) {
+        Entry& input = inputs[kernel_state.output_locations[i]];
+        input.state = Entry::State::HAS_CONST_TENSOR;
+        input.const_tensor = &kernel_state.const_tensor;
+      }
+    }
+
     // Execute the kernels one-at-a-time in topological order.
     for (size_t i = 0; i < kernels_.size(); ++i) {
       const KernelState& kernel_state = kernels_[i];
@@ -306,8 +389,21 @@ class SingleThreadedExecutorImpl : public Executor {
       input_alloc_attrs.clear();
       input_alloc_attrs.resize(num_inputs);
       for (size_t j = 0; j < num_inputs; ++j) {
-        auto t = inputs[input_start_index + j].get();
-        node_inputs[j].tensor = t;
+        Entry& input = inputs[input_start_index + j];
+        switch (input.state) {
+          case Entry::State::HAS_CONST_TENSOR:
+            // NOTE(mrry): This `const_cast` is necessary because `TensorValue`
+            // stores a non-const `Tensor*`, and relies on the `OpKernelContext`
+            // accessors making dynamic checks that prevent using an immutable
+            // tensor as a mutable tensor.
+            node_inputs[j].tensor = const_cast<Tensor*>(input.const_tensor);
+            break;
+          case Entry::State::HAS_VALUE:
+            node_inputs[j].tensor = input.val.get();
+            break;
+          default:
+            DCHECK(false) << "Input did not have a valid value.";
+        }
         input_alloc_attrs[j] = input_alloc_attrs_[input_start_index + j];
       }
       params.op_kernel = kernel_state.kernel;
@@ -316,41 +412,11 @@ class SingleThreadedExecutorImpl : public Executor {
 
       // Actually execute the kernel.
       device->Compute(kernel_state.kernel, &ctx);
-
-      if (!ctx.status().ok()) {
-        // On failure, we must manually free all intermediate tensors. We have
-        // already freed all the inputs for kernels up to (but not including)
-        // the `i`th kernel. We scan through the previously executed kernels and
-        // destroy any tensors that were destined to be the input for a kernel
-        // that has not yet executed.
-        for (size_t j = 0; j < arg_output_locations_.size(); ++j) {
-          for (size_t output_location : arg_output_locations_[j]) {
-            if (output_location >= input_start_index) {
-              // Only destroy an output location if it is an input to an
-              // operation that has not yet executed.
-              inputs[output_location].Destroy();
-            }
-          }
-        }
-        for (size_t j = 0; j < i; ++j) {
-          const KernelState& executed_kernel_state = kernels_[j];
-          for (size_t k = 0; k < executed_kernel_state.num_outputs; ++k) {
-            for (size_t output_location :
-                 executed_kernel_state.output_locations[k]) {
-              if (output_location >= input_start_index) {
-                // Only destroy an output location if it is an input to an
-                // operation that has not yet executed.
-                inputs[output_location].Destroy();
-              }
-            }
-          }
-        }
-        return ctx.status();
-      }
+      TF_RETURN_IF_ERROR(ctx.status());
 
       // Free the inputs to the current kernel.
       for (size_t j = 0; j < num_inputs; ++j) {
-        inputs[input_start_index + j].Destroy();
+        inputs[input_start_index + j].ClearVal();
       }
 
       // Forward the outputs of the kernel to the inputs of subsequent kernels.
@@ -363,11 +429,15 @@ class SingleThreadedExecutorImpl : public Executor {
           for (size_t k = 0; k < num_destinations - 1; ++k) {
             // TODO(mrry): Validate that the types match the expected values or
             // ensure that the necessary validation has already happened.
-            inputs[kernel_state.output_locations[j][k]].Init(*val.tensor);
+            Entry& input = inputs[kernel_state.output_locations[j][k]];
+            input.state = Entry::State::HAS_VALUE;
+            input.val.Init(*val.tensor);
           }
           // Move `arg` to the last consumer to avoid the cost of copying it.
-          inputs[kernel_state.output_locations[j][num_destinations - 1]].Init(
-              std::move(*val.tensor));
+          Entry& input =
+              inputs[kernel_state.output_locations[j][num_destinations - 1]];
+          input.state = Entry::State::HAS_VALUE;
+          input.val.Init(std::move(*val.tensor));
         }
         delete val.tensor;
       }
@@ -406,7 +476,7 @@ class SingleThreadedExecutorImpl : public Executor {
 
     // For the `j`th output of `kernel`, `output_locations[j]` contains the
     // locations in the flat `inputs` vector to which that output must be
-    // copied. See comment at the beginning of `RunAsync()` for details.
+    // copied. See comment at the beginning of `Run()` for details.
     std::vector<std::vector<size_t>>
         output_locations;  // Length = `num_outputs`.
 
@@ -421,6 +491,33 @@ class SingleThreadedExecutorImpl : public Executor {
   std::vector<std::vector<size_t>>
       arg_output_locations_;  // Length = `num_args`.
 
+  // Represents cached graph structure state for each kernel that produces
+  // a single constant-valued tensor.
+  struct ConstTensorKernelState {
+    // The kernel object. Not owned.
+    //
+    // This pointer is managed by `params_.create_kernel()` and
+    // `params_.delete_kernel()`.
+    OpKernel* kernel;
+
+    // The cached value of `kernel->const_tensor()`.
+    //
+    // NOTE: We keep a `Tensor` rather than a `const Tensor*` here in order to
+    // keep the reference count on the underlying buffer above 1. Otherwise, a
+    // kernel could interpret the input as a forwardable tensor, and mutate the
+    // underlying constant tensor.
+    Tensor const_tensor;
+
+    // For the single output of `kernel`, `output_locations` contains the
+    // locations in the flat `inputs` vector to which that output must be
+    // copied. See comment at the beginning of `Run()` for details.
+    std::vector<size_t> output_locations;  // Length = `num_outputs`.
+
+    // Memory space information for the single output of `kernel`.
+    AllocatorAttributes output_alloc_attr;
+  };
+  std::vector<ConstTensorKernelState> const_tensor_kernels_;
+
   // Memory space information for each input. This information is stored in the
   // same order as the flat `inputs` vector. See comment at the beginning of
   // `RunAsync()` for details.
@@ -431,7 +528,7 @@ class SingleThreadedExecutorImpl : public Executor {
 class SingleThreadedExecutorRegistrar {
  public:
   SingleThreadedExecutorRegistrar() {
-    ExecutorFactory::Register("SINGLE_THREADED_EXECUTOR", new Factory());
+    ExecutorFactory::Register(kSingleThreadedExecutor, new Factory());
   }
 
  private:
diff --git a/tensorflow/core/kernels/data/single_threaded_executor_test.cc b/tensorflow/core/kernels/data/single_threaded_executor_test.cc
index e1f8a399c6f..16ad78e5f9b 100644
--- a/tensorflow/core/kernels/data/single_threaded_executor_test.cc
+++ b/tensorflow/core/kernels/data/single_threaded_executor_test.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/rendezvous.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/testlib.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/random/simple_philox.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -154,6 +155,14 @@ TEST_F(ExecutorTest, SimpleAdd) {
   std::vector<Tensor> retvals;
   TF_ASSERT_OK(call_frame.ConsumeRetvals(&retvals, false));
   EXPECT_EQ(3.0, V(retvals[0]));  // out = 1.0 + 2.0 = 3.0
+
+  // Verify that the argument values are unchanged.
+  const Tensor* arg_0;
+  TF_ASSERT_OK(call_frame.GetArg(0, &arg_0));
+  EXPECT_EQ(1.0, V(*arg_0));
+  const Tensor* arg_1;
+  TF_ASSERT_OK(call_frame.GetArg(1, &arg_1));
+  EXPECT_EQ(2.0, V(*arg_1));
 }
 
 TEST_F(ExecutorTest, SelfAdd) {
@@ -246,6 +255,24 @@ TEST_F(ExecutorTest, OpError) {
   EXPECT_TRUE(errors::IsInvalidArgument(Run(&call_frame)));
 }
 
+TEST_F(ExecutorTest, ControlDependenciesFromSpecialNodes) {
+  auto g = absl::make_unique<Graph>(OpRegistry::Global());
+  auto in0 = test::graph::Arg(g.get(), 0, DT_FLOAT);
+  auto one = test::graph::Constant(g.get(), V(2.0));
+  auto add = test::graph::Add(g.get(), in0, one);
+  auto ret = test::graph::Retval(g.get(), 0, add);
+  g->AddControlEdge(in0, add);
+  g->AddControlEdge(one, ret);
+  FixupSourceAndSinkEdges(g.get());
+  Create(std::move(g));
+  FunctionCallFrame call_frame({DT_FLOAT}, {DT_FLOAT});
+  TF_ASSERT_OK(call_frame.SetArgs({V(1.0)}));
+  TF_ASSERT_OK(Run(&call_frame));
+  std::vector<Tensor> retvals;
+  TF_ASSERT_OK(call_frame.ConsumeRetvals(&retvals, false));
+  EXPECT_EQ(3.0, V(retvals[0]));  // out = 1.0 + 2.0 = 3.0
+}
+
 static void BM_executor(int iters, int width, int depth) {
 #ifdef PLATFORM_GOOGLE
   BenchmarkUseRealTime();
@@ -299,6 +326,36 @@ BENCHMARK(BM_executor)->ArgPair(8192, 32);
 // Tall fat graph
 BENCHMARK(BM_executor)->ArgPair(1024, 1024);
 
+static void BM_const_identity(int iters, int width, int outputs_per_const) {
+#ifdef PLATFORM_GOOGLE
+  BenchmarkUseRealTime();
+#endif  // PLATFORM_GOOGLE
+  Graph* g = new Graph(OpRegistry::Global());
+  for (int i = 0; i < width; ++i) {
+    Tensor i_t(i);
+    Node* const_node = test::graph::Constant(g, i_t);
+    for (int j = 0; j < outputs_per_const; ++j) {
+      test::graph::Identity(g, const_node);
+    }
+  }
+  FixupSourceAndSinkEdges(g);
+#ifdef PLATFORM_GOOGLE
+  SetBenchmarkLabel(
+      strings::StrCat("Nodes = ", (1 + outputs_per_const) * width));
+  SetBenchmarkItemsProcessed((1 + outputs_per_const) * width *
+                             static_cast<int64>(iters));
+#endif  // PLATFORM_GOOGLE
+  test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
+                  "SINGLE_THREADED_EXECUTOR")
+      .Run(iters);
+}
+
+// Graph with actual op execution.
+BENCHMARK(BM_const_identity)->ArgPair(1, 1);
+BENCHMARK(BM_const_identity)->ArgPair(1, 100);
+BENCHMARK(BM_const_identity)->ArgPair(100, 1);
+BENCHMARK(BM_const_identity)->ArgPair(100, 100);
+
 // TODO(mrry): This benchmark currently crashes with a use-after free, because
 // test::Benchmark::RunWithArgs() assumes that the executor will take ownership
 // of the given graph, *and* keep its nodes (`x`, `y` and `z`) alive for the
diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h
index 00356778026..42364e416ea 100644
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@@ -435,9 +435,9 @@ class DebugIdentityV2Op : public OpKernel {
     for (const string& dump_root : dump_roots_) {
       tfdbg::DebugEventsWriter* debug_events_writer =
           tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root);
-      debug_events_writer->WriteGraphExecutionTrace(
-          tfdbg_context_id_, device_name_, op_name_, output_slot_,
-          tensor_debug_mode_, tensor);
+      OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace(
+                                  tfdbg_context_id_, device_name_, op_name_,
+                                  output_slot_, tensor_debug_mode_, tensor));
     }
     context->set_output(0, tensor);
   }
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 0f5a7019b1f..3b38daf0067 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -61,7 +61,9 @@ class DequantizeOp : public OpKernel {
                                 " is '" +
                                 DataTypeString(ctx->output_type(0)) + "'"));
 
+    need_cast_ = true;
     if (ctx->output_type(0) == DT_FLOAT) {
+      need_cast_ = false;
       OP_REQUIRES(ctx,
                   (mode_string == "MIN_COMBINED" ||
                    mode_string == "MIN_FIRST" || mode_string == "SCALED"),
@@ -98,8 +100,9 @@ class DequantizeOp : public OpKernel {
     }
 
     Tensor* output = nullptr;
-    Tensor float_output = tensorflow::Tensor(DT_FLOAT, input.shape());
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
+    Tensor float_output =
+        need_cast_ ? tensorflow::Tensor(DT_FLOAT, input.shape()) : *output;
     if (num_slices == 1) {
       const float min_range = input_min_tensor.flat<float>()(0);
       const float max_range = input_max_tensor.flat<float>()(0);
@@ -128,10 +131,12 @@ class DequantizeOp : public OpKernel {
                         max_ranges(i), output_tensor.template chip<1>(i));
       }
     }
-    S* out_ptr = output->flat<S>().data();
-    float* in_ptr = float_output.flat<float>().data();
-    for (int64 i = 0; i < float_output.NumElements(); ++i) {
-      out_ptr[i] = static_cast<S>(in_ptr[i]);
+    if (need_cast_) {
+      S* out_ptr = output->flat<S>().data();
+      float* in_ptr = float_output.flat<float>().data();
+      for (int64 i = 0; i < float_output.NumElements(); ++i) {
+        out_ptr[i] = static_cast<S>(in_ptr[i]);
+      }
     }
   }
 
@@ -219,6 +224,7 @@ class DequantizeOp : public OpKernel {
   int mode_;
   int axis_;
   bool narrow_range_;
+  bool need_cast_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("Dequantize")
diff --git a/tensorflow/core/kernels/diag_op.cc b/tensorflow/core/kernels/diag_op.cc
index 06e4b8dbaee..811d48af091 100644
--- a/tensorflow/core/kernels/diag_op.cc
+++ b/tensorflow/core/kernels/diag_op.cc
@@ -178,6 +178,7 @@ TF_CALL_int32(REGISTER_DIAGOP);
 TF_CALL_int64(REGISTER_DIAGOP);
 TF_CALL_complex64(REGISTER_DIAGOP);
 TF_CALL_complex128(REGISTER_DIAGOP);
+TF_CALL_half(REGISTER_DIAGOP);
 #undef REGISTER_DIAGOP
 
 #define REGISTER_DIAGPARTOP(T)                                    \
@@ -191,6 +192,7 @@ TF_CALL_int32(REGISTER_DIAGPARTOP);
 TF_CALL_int64(REGISTER_DIAGPARTOP);
 TF_CALL_complex64(REGISTER_DIAGPARTOP);
 TF_CALL_complex128(REGISTER_DIAGPARTOP);
+TF_CALL_half(REGISTER_DIAGPARTOP);
 #undef REGISTER_DIAGPARTOP
 
 // Register the GPU kernels.
@@ -217,6 +219,7 @@ TF_CALL_int32(REGISTER_DIAGOP_GPU);
 TF_CALL_int64(REGISTER_DIAGOP_GPU);
 TF_CALL_complex64(REGISTER_DIAGOP_GPU);
 TF_CALL_complex128(REGISTER_DIAGOP_GPU);
+TF_CALL_half(REGISTER_DIAGOP_GPU);
 #undef REGISTER_DIAGOP_GPU
 
 // Forward declarations of the functor specializations for GPU.
@@ -227,6 +230,7 @@ extern template struct DiagPartFunctor<GPUDevice, int32>;
 extern template struct DiagPartFunctor<GPUDevice, int64>;
 extern template struct DiagPartFunctor<GPUDevice, complex64>;
 extern template struct DiagPartFunctor<GPUDevice, complex128>;
+extern template struct DiagPartFunctor<GPUDevice, Eigen::half>;
 }  // namespace functor
 
 #define REGISTER_DIAGPARTOP_GPU(T)                                \
@@ -240,6 +244,7 @@ TF_CALL_int32(REGISTER_DIAGPARTOP_GPU);
 TF_CALL_int64(REGISTER_DIAGPARTOP_GPU);
 TF_CALL_complex64(REGISTER_DIAGPARTOP_GPU);
 TF_CALL_complex128(REGISTER_DIAGPARTOP_GPU);
+TF_CALL_half(REGISTER_DIAGPARTOP_GPU);
 #undef REGISTER_DIAGPARTOP_GPU
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/diag_op_gpu.cu.cc b/tensorflow/core/kernels/diag_op_gpu.cu.cc
index ae541fb365a..c6859d748d3 100644
--- a/tensorflow/core/kernels/diag_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/diag_op_gpu.cu.cc
@@ -76,6 +76,7 @@ template struct DiagFunctor<GPUDevice, int32>;
 template struct DiagFunctor<GPUDevice, int64>;
 template struct DiagFunctor<GPUDevice, complex64>;
 template struct DiagFunctor<GPUDevice, complex128>;
+template struct DiagFunctor<GPUDevice, Eigen::half>;
 
 template <typename T>
 __global__ void DiagPartGpuKernel(const int num_threads, const int64 size,
@@ -113,6 +114,7 @@ template struct DiagPartFunctor<GPUDevice, int32>;
 template struct DiagPartFunctor<GPUDevice, int64>;
 template struct DiagPartFunctor<GPUDevice, complex64>;
 template struct DiagPartFunctor<GPUDevice, complex128>;
+template struct DiagPartFunctor<GPUDevice, Eigen::half>;
 
 }  // end namespace functor
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/einsum_op_impl.h b/tensorflow/core/kernels/einsum_op_impl.h
index 679a3de9b82..620a144e886 100644
--- a/tensorflow/core/kernels/einsum_op_impl.h
+++ b/tensorflow/core/kernels/einsum_op_impl.h
@@ -537,29 +537,13 @@ struct EinsumHelper {
     return CopyFrom(input, output_shape, output);
   }
 
-  // Conjugates the input.
-  template <typename Device, typename T>
-  static Status Conjugate(OpKernelContext* ctx, Tensor* input) {
-    std::vector<int> permutation(input->dims());
-    std::iota(permutation.begin(), permutation.end(), 0);
-    Tensor output;
-    TF_RETURN_IF_ERROR(
-        ctx->allocate_temp(DataTypeToEnum<T>::value, input->shape(), &output));
-    const Device& d = ctx->eigen_device<Device>();
-    TF_RETURN_IF_ERROR(DoConjugateTranspose(d, *input, permutation, &output));
-    std::swap(*input, output);
-    return Status::OK();
-  }
-
   // Contracts the inputs along the last axis. (or the second last if the
   // corresponding value of swap_free_and_contract is true). The batch
   // dimensions are broadcast to the output shape.
-  // TODO(anudhyan): Factor this function into a BatchMatMul functor and support
-  // transpose_x and transpose_y attributes (in addition to adj_x and adj_y).
-  // Also, the BatchMatMul might devolve into a component-wise multiplication
-  // when the matrix shape is [1,1]; in this case BatchMatMul functor would be
-  // very inefficient. The functor should detect if this is the case and perform
-  // componentwise multiplication functor instead.
+  // TODO(anudhyan): BatchMatMul might devolve into a component-wise
+  // multiplication when the matrix shape is [1,1]; in this case BatchMatMul
+  // functor would be very inefficient. The functor should detect if this is the
+  // case and perform componentwise multiplication functor instead.
   template <typename Device, typename T>
   static Status ContractOperands(OpKernelContext* ctx,
                                  absl::Span<const Tensor> inputs,
@@ -584,12 +568,8 @@ struct EinsumHelper {
           inputs[i].dims() - (swap_free_and_contract[i] ? 1 : 2);
       output_shape.AddDim(inputs[i].dim_size(free_axis));
     }
-    bool adj_x = swap_free_and_contract[0];
-    bool adj_y = !swap_free_and_contract[1];
-    if (is_complex<T>::value) {
-      if (adj_x) TF_RETURN_IF_ERROR(Conjugate<Device, T>(ctx, &lhs));
-      if (adj_y) TF_RETURN_IF_ERROR(Conjugate<Device, T>(ctx, &rhs));
-    }
+    bool trans_x = swap_free_and_contract[0];
+    bool trans_y = !swap_free_and_contract[1];
     TF_RETURN_IF_ERROR(
         ctx->allocate_temp(DataTypeToEnum<T>::value, output_shape, output));
     if (lhs.NumElements() == 0 || rhs.NumElements() == 0) {
@@ -600,8 +580,9 @@ struct EinsumHelper {
     Tensor output_reshaped;
     TF_RETURN_IF_ERROR(
         ReshapeToRank3(*output, bcast.output_batch_size(), &output_reshaped));
-    LaunchBatchMatMul<Device, T>::Launch(ctx, lhs, rhs, adj_x, adj_y, bcast,
-                                         &output_reshaped);
+    LaunchBatchMatMul<Device, T>::Launch(ctx, lhs, rhs, /*adj_x=*/false,
+                                         /*adj_y=*/false, trans_x, trans_y,
+                                         bcast, &output_reshaped);
     return Status::OK();
   }
 };
diff --git a/tensorflow/core/kernels/example_parsing_ops.cc b/tensorflow/core/kernels/example_parsing_ops.cc
index 3412d00136e..0a940e52eb7 100644
--- a/tensorflow/core/kernels/example_parsing_ops.cc
+++ b/tensorflow/core/kernels/example_parsing_ops.cc
@@ -569,57 +569,115 @@ class ParseSequenceExampleOp : public OpKernel {
       const Tensor* dense_keys, const Tensor* sparse_keys,
       const Tensor* ragged_keys,
       const OpInputList& context_dense_defaults) const {
+    // Convert the tensors/attrs to ArraySlices once, instead of re-evaluating
+    // them in each loop iteration.
+    gtl::ArraySlice<tstring> dense_keys_slice =
+        dense_keys
+            ? gtl::ArraySlice<tstring>(dense_keys->flat<tstring>().data(),
+                                       attrs_.num_context_dense)
+            : attrs_.context_dense_keys;
+    gtl::ArraySlice<tstring> sparse_keys_slice =
+        sparse_keys
+            ? gtl::ArraySlice<tstring>(sparse_keys->flat<tstring>().data(),
+                                       attrs_.num_context_sparse)
+            : attrs_.context_sparse_keys;
+    gtl::ArraySlice<tstring> ragged_keys_slice =
+        ragged_keys
+            ? gtl::ArraySlice<tstring>(ragged_keys->flat<tstring>().data(),
+                                       attrs_.num_context_ragged)
+            : gtl::ArraySlice<tstring>(nullptr, 0);
+
     example::FastParseExampleConfig config;
+    config.dense.reserve(attrs_.num_context_dense);
     for (int d = 0; d < attrs_.num_context_dense; ++d) {
-      const tstring& key = dense_keys ? dense_keys->flat<tstring>()(d)
-                                      : attrs_.context_dense_keys[d];
-      config.dense.push_back({key, attrs_.context_dense_types[d],
-                              attrs_.context_dense_shapes[d],
-                              context_dense_defaults[d],
-                              false /* attrs_.context_variable_length[d] */,
-                              0 /*attrs_.context_elements_per_stride[d] */});
+      const tstring& key = dense_keys_slice[d];
+      config.dense.emplace_back(key, attrs_.context_dense_types[d],
+                                attrs_.context_dense_shapes[d],
+                                context_dense_defaults[d],
+                                false /* attrs_.context_variable_length[d] */,
+                                0 /*attrs_.context_elements_per_stride[d] */);
     }
+    config.sparse.reserve(attrs_.num_context_sparse);
     for (int d = 0; d < attrs_.num_context_sparse; ++d) {
-      const tstring& key = sparse_keys ? sparse_keys->flat<tstring>()(d)
-                                       : attrs_.context_sparse_keys[d];
-      config.sparse.push_back({key, attrs_.context_sparse_types[d]});
+      const tstring& key = sparse_keys_slice[d];
+      config.sparse.emplace_back(key, attrs_.context_sparse_types[d]);
     }
+    config.ragged.reserve(attrs_.num_context_ragged);
     for (int d = 0; d < attrs_.num_context_ragged; ++d) {
-      config.ragged.push_back({ragged_keys->flat<tstring>()(d),
-                               attrs_.context_ragged_value_types[d],
-                               attrs_.context_ragged_split_types[d]});
+      config.ragged.emplace_back(ragged_keys_slice[d],
+                                 attrs_.context_ragged_value_types[d],
+                                 attrs_.context_ragged_split_types[d]);
     }
     return config;
   }
 
+  static Tensor ConstructDefaultScalar(DataType dtype) {
+    switch (dtype) {
+      case DT_INT64:
+        return Tensor(static_cast<int64>(0));
+      case DT_FLOAT:
+        return Tensor(static_cast<float>(0.0));
+      case DT_STRING:
+        return Tensor("");
+      default:
+        return Tensor(DT_INVALID);
+    }
+  }
+
   example::FastParseExampleConfig MakeFeatureListConfig(
       const Tensor* dense_keys, const Tensor* sparse_keys,
       const Tensor* ragged_keys,
       const Tensor* feature_list_dense_missing_assumed_empty) const {
+    // Convert the tensors/attrs to ArraySlices once, instead of re-evaluating
+    // them in each loop iteration.
+    gtl::ArraySlice<tstring> dense_keys_slice =
+        dense_keys
+            ? gtl::ArraySlice<tstring>(dense_keys->flat<tstring>().data(),
+                                       attrs_.num_feature_list_dense)
+            : attrs_.feature_list_dense_keys;
+    gtl::ArraySlice<tstring> sparse_keys_slice =
+        sparse_keys
+            ? gtl::ArraySlice<tstring>(sparse_keys->flat<tstring>().data(),
+                                       attrs_.num_feature_list_sparse)
+            : attrs_.feature_list_sparse_keys;
+    gtl::ArraySlice<tstring> ragged_keys_slice =
+        ragged_keys
+            ? gtl::ArraySlice<tstring>(ragged_keys->flat<tstring>().data(),
+                                       attrs_.num_feature_list_ragged)
+            : gtl::ArraySlice<tstring>(nullptr, 0);
+    // Use an empty slice to indicate that the map in attrs_ should be used
+    // instead.
+    gtl::ArraySlice<bool> feature_list_dense_missing_assumed_empty_slice =
+        feature_list_dense_missing_assumed_empty
+            ? gtl::ArraySlice<bool>(
+                  feature_list_dense_missing_assumed_empty->flat<bool>().data(),
+                  attrs_.num_feature_list_dense)
+            : gtl::ArraySlice<bool>(nullptr, 0);
+
     example::FastParseExampleConfig config;
+    config.dense.reserve(attrs_.num_feature_list_dense);
     for (int d = 0; d < attrs_.num_feature_list_dense; ++d) {
-      const tstring& key = dense_keys ? dense_keys->flat<tstring>()(d)
-                                      : attrs_.feature_list_dense_keys[d];
+      const tstring& key = dense_keys_slice[d];
       bool missing_assumed_empty =
-          feature_list_dense_missing_assumed_empty
-              ? feature_list_dense_missing_assumed_empty->flat<bool>()(d)
+          !feature_list_dense_missing_assumed_empty_slice.empty()
+              ? feature_list_dense_missing_assumed_empty_slice[d]
               : attrs_.feature_list_dense_missing_assumed_empty.count(key) > 0;
       DataType dtype = attrs_.feature_list_dense_types[d];
-      Tensor default_value = Tensor(dtype, TensorShape({}));
-      config.dense.push_back(
-          {key, dtype, attrs_.feature_list_dense_shapes[d], default_value,
-           missing_assumed_empty,
-           0 /*attrs_.feature_list_elements_per_stride[d] */});
+      config.dense.emplace_back(
+          key, dtype, attrs_.feature_list_dense_shapes[d],
+          ConstructDefaultScalar(dtype), missing_assumed_empty,
+          0 /*attrs_.feature_list_elements_per_stride[d] */);
     }
+    config.sparse.reserve(attrs_.num_feature_list_sparse);
     for (int d = 0; d < attrs_.num_feature_list_sparse; ++d) {
-      const tstring& key = sparse_keys ? sparse_keys->flat<tstring>()(d)
-                                       : attrs_.feature_list_sparse_keys[d];
-      config.sparse.push_back({key, attrs_.feature_list_sparse_types[d]});
+      const tstring& key = sparse_keys_slice[d];
+      config.sparse.emplace_back(key, attrs_.feature_list_sparse_types[d]);
     }
+    config.ragged.reserve(attrs_.num_feature_list_ragged);
     for (int d = 0; d < attrs_.num_feature_list_ragged; ++d) {
-      config.ragged.push_back({ragged_keys->flat<tstring>()(d),
-                               attrs_.feature_list_ragged_value_types[d],
-                               attrs_.feature_list_ragged_split_types[d]});
+      config.ragged.emplace_back(ragged_keys_slice[d],
+                                 attrs_.feature_list_ragged_value_types[d],
+                                 attrs_.feature_list_ragged_split_types[d]);
     }
     return config;
   }
@@ -892,9 +950,6 @@ class ParseSingleSequenceExampleOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_dense_values",
                                          &feature_list_dense_values));
 
-#ifdef TENSORFLOW_LITE_PROTOS
-    SequenceExample ex;
-#else
     // Allocate the SequenceExample on an arena. Provides better memory locality
     // and greatly speeds up destruction.
     protobuf::ArenaOptions options;
@@ -907,7 +962,7 @@ class ParseSingleSequenceExampleOp : public OpKernel {
     options.max_block_size = std::max(options.max_block_size, block_size);
     protobuf::Arena arena(options);
     auto& ex = *protobuf::Arena::CreateMessage<SequenceExample>(&arena);
-#endif
+
     OP_REQUIRES(
         ctx, ParseProtoUnlimited(&ex, serialized_t()),
         errors::InvalidArgument("Could not parse example input, value: '",
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index 79d9f29b33a..5c4d68545a1 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -45,12 +45,27 @@ void ArgOp::Compute(OpKernelContext* ctx) {
   auto frame = ctx->call_frame();
   OP_REQUIRES(ctx, frame != nullptr, errors::Internal("no call frame"));
   const Tensor* val;
-  OP_REQUIRES_OK(ctx, frame->GetArg(index_, &val));
-  OP_REQUIRES(ctx, val->dtype() == dtype_,
-              errors::InvalidArgument("Type mismatch: actual ",
-                                      DataTypeString(val->dtype()),
-                                      " vs. expect ", DataTypeString(dtype_)));
-  ctx->set_output(0, *val);
+
+  auto validate_type = [this](const Tensor& val) {
+    if (val.dtype() == dtype_) {
+      return Status::OK();
+    } else {
+      return errors::InvalidArgument("Type mismatch: actual ",
+                                     DataTypeString(val.dtype()),
+                                     " vs. expect ", DataTypeString(dtype_));
+    }
+  };
+
+  if (frame->CanConsumeArg(index_)) {
+    Tensor val;
+    frame->ConsumeArg(index_, &val);
+    OP_REQUIRES_OK(ctx, validate_type(val));
+    ctx->set_output(0, std::move(val));
+  } else {
+    OP_REQUIRES_OK(ctx, frame->GetArg(index_, &val));
+    OP_REQUIRES_OK(ctx, validate_type(*val));
+    ctx->set_output(0, *val);
+  }
 }
 
 RetvalOp::RetvalOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
diff --git a/tensorflow/core/kernels/functional_ops.cc b/tensorflow/core/kernels/functional_ops.cc
index f6e6d8b1330..7f4d1144cb2 100644
--- a/tensorflow/core/kernels/functional_ops.cc
+++ b/tensorflow/core/kernels/functional_ops.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "tensorflow/core/framework/types.h"
 #define EIGEN_USE_THREADS
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
@@ -26,6 +27,8 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/casts.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
 
 namespace tensorflow {
@@ -42,6 +45,14 @@ Status Instantiate(FunctionLibraryRuntime* lib, const NameAttrList& func,
   return lib->Instantiate(func.name(), AttrSlice(&func.attr()), handle);
 }
 
+Status Instantiate(OpKernelContext* ctx, const NameAttrList& func,
+                   FunctionLibraryRuntime::Handle* handle) {
+  FunctionLibraryRuntime::InstantiateOptions opts;
+  opts.executor_type = ctx->executor_type();
+  return ctx->function_library()->Instantiate(
+      func.name(), AttrSlice(&func.attr()), opts, handle);
+}
+
 // If "t" is a scalar of a supported type, returns t != 0 in "*v".
 Status ToBool(gtl::ArraySlice<Tensor> t, bool* v) {
   if (t.size() != 1) {
@@ -224,8 +235,8 @@ class IfOp : public AsyncOpKernel {
         *then_handle = iter->second.first;
         *else_handle = iter->second.second;
       } else {
-        TF_RETURN_IF_ERROR(Instantiate(lib, then_func_, then_handle));
-        TF_RETURN_IF_ERROR(Instantiate(lib, else_func_, else_handle));
+        TF_RETURN_IF_ERROR(Instantiate(ctx, then_func_, then_handle));
+        TF_RETURN_IF_ERROR(Instantiate(ctx, else_func_, else_handle));
         handles_[lib] = {*then_handle, *else_handle};
       }
     }
@@ -357,14 +368,30 @@ class WhileOp : public AsyncOpKernel {
   ~WhileOp() override {}
 
   void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
-    auto lib = ctx->function_library();
-    OP_REQUIRES_ASYNC(ctx, lib != nullptr,
-                      errors::Internal("No function library"), done);
-    FHandle cond_handle;
-    FHandle body_handle;
-    OP_REQUIRES_OK_ASYNC(ctx, GetHandles(ctx, &cond_handle, &body_handle),
-                         done);
-    (new State(this, ctx, cond_handle, body_handle, done))->Start();
+    if (ctx->run_all_kernels_inline()) {
+      // Use the non-callback-based implementation when kernels (and function
+      // callbacks) execute inline to avoid stack overflow.
+      OP_REQUIRES_OK_ASYNC(ctx, DoComputeSync(ctx), done);
+    } else {
+      FHandle cond_handle;
+      FHandle body_handle;
+      OP_REQUIRES_OK_ASYNC(ctx, GetHandles(ctx, &cond_handle, &body_handle),
+                           done);
+      (new State(this, ctx, cond_handle, body_handle, done))->Start();
+    }
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    // Use the non-callback-based implementation when the synchronous Compute()
+    // method is invoked, because the caller is explicitly donating a thread.
+    Status s = DoComputeSync(ctx);
+    // NOTE: Unfortunately, we cannot use OP_REQUIRES_OK here, because this is
+    // still an AsyncOpKernel, and there is a run-time check to avoid calling
+    // OP_REQUIRES_OK in AsyncOpKernel::ComputeAsync() (which would deadlock in
+    // the event of an error).
+    if (TF_PREDICT_FALSE(!s.ok())) {
+      ctx->SetStatus(s);
+    }
   }
 
  private:
@@ -375,6 +402,119 @@ class WhileOp : public AsyncOpKernel {
   std::unordered_map<FunctionLibraryRuntime*, std::pair<FHandle, FHandle>>
       handles_ GUARDED_BY(mu_);
 
+  static string EvalCondTraceString(
+      OpKernelContext* ctx, const FunctionLibraryRuntime::Options& opts) {
+    return absl::StrCat("WhileOp-EvalCond #parent_step_id=", ctx->step_id(),
+                        ",function_step_id=", opts.step_id, "#");
+  }
+
+  static string StartBodyTraceString(
+      OpKernelContext* ctx, const FunctionLibraryRuntime::Options& opts) {
+    return absl::StrCat("WhileOp-StartBody #parent_step_id=", ctx->step_id(),
+                        ",function_step_id=", opts.step_id, "#");
+  }
+
+  static Status CondResultToBool(OpKernelContext* ctx,
+                                 const FunctionLibraryRuntime::Options& opts,
+                                 const Tensor& cond_t, bool* out_result) {
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    const DeviceBase::GpuDeviceInfo* gpu_device_info =
+        ctx->device()->tensorflow_gpu_device_info();
+    const bool is_hostmem_dtype =
+        cond_t.dtype() == DT_INT32 || cond_t.dtype() == DT_INT64;
+    if (!is_hostmem_dtype && gpu_device_info &&
+        (opts.rets_alloc_attrs.empty() ||
+         !opts.rets_alloc_attrs[0].on_host())) {
+      // Copy the ret value to host if it's allocated on device.
+      Device* device = down_cast<Device*>(ctx->device());
+      DeviceContext* device_ctx = ctx->op_device_context();
+      Tensor host_cond_t = Tensor(cond_t.dtype(), cond_t.shape());
+      TF_RETURN_IF_ERROR(device_ctx->CopyDeviceTensorToCPUSync(
+          &cond_t, /*tensor_name=*/"", device, &host_cond_t));
+      return ToBool({host_cond_t}, out_result);
+    }
+#endif
+    return ToBool({cond_t}, out_result);
+  }
+
+  // The initial loop variable args are the inputs to the kernel.
+  //
+  // We attempt to forward the input so that it can be consumed inside the
+  // body function (and participate in buffer forwarding, etc.).
+  static void GetArgsFromContext(OpKernelContext* ctx,
+                                 std::vector<Tensor>* out_args,
+                                 DataTypeVector* out_var_types) {
+    const int num_loop_vars = ctx->num_inputs();
+    out_args->reserve(num_loop_vars);
+    out_var_types->resize(num_loop_vars);
+    for (int i = 0; i < num_loop_vars; ++i) {
+      const Tensor& input = ctx->input(i);
+      (*out_var_types)[i] = input.dtype();
+      std::unique_ptr<Tensor> maybe_forwarded_input = ctx->forward_input(
+          i, /* output_index= */ OpKernelContext::Params::kNoReservation,
+          input.dtype(), input.shape(), ctx->input_memory_type(i),
+          ctx->input_alloc_attr(i));
+      if (maybe_forwarded_input) {
+        out_args->push_back(std::move(*maybe_forwarded_input));
+      } else {
+        out_args->push_back(input);
+      }
+    }
+  }
+
+  class BodyFuncCallFrame : public CallFrameInterface {
+   public:
+    BodyFuncCallFrame(std::vector<Tensor>* args, std::vector<Tensor>* retvals,
+                      DataTypeSlice ret_types)
+        : args_(args), retvals_(retvals), ret_types_(ret_types) {}
+
+    size_t num_args() const override { return args_->size(); }
+    size_t num_retvals() const override { return retvals_->size(); }
+
+    Status GetArg(int index, const Tensor** val) override {
+      if (index < args_->size()) {
+        *val = &(*args_)[index];
+        return Status::OK();
+      } else {
+        return errors::InvalidArgument("Argument ", index, " is out of range.");
+      }
+    }
+
+    void ConsumeArg(int index, Tensor* val) override {
+      DCHECK_GE(index, 0);
+      DCHECK_LT(index, args_->size());
+      *val = std::move((*args_)[index]);
+    }
+    bool CanConsumeArg(int index) const override {
+      return index >= 0 && index < args_->size();
+    }
+
+    Status SetRetval(int index, const Tensor& val) override {
+      if (TF_PREDICT_FALSE(index < 0)) {
+        return errors::InvalidArgument(
+            "Expected non-negative return value index, but got: ", index, ".");
+      } else if (TF_PREDICT_FALSE(index >= retvals_->size())) {
+        return errors::InvalidArgument("While loop body returned ", index + 1,
+                                       " arguments. Expected: ", num_retvals(),
+                                       ".");
+      } else if (TF_PREDICT_FALSE(val.dtype() != ret_types_[index])) {
+        return errors::InvalidArgument("Expected type ",
+                                       DataTypeString(ret_types_[index]),
+                                       " for return value ", index, " but got ",
+                                       DataTypeString(val.dtype()), ".");
+      }
+      (*retvals_)[index] = val;
+      return Status::OK();
+    }
+
+   private:
+    std::vector<Tensor>* const args_;     // Not owned.
+    std::vector<Tensor>* const retvals_;  // Not owned.
+    DataTypeSlice ret_types_;
+
+    TF_DISALLOW_COPY_AND_ASSIGN(BodyFuncCallFrame);
+  };
+
   class State {
    public:
     State(WhileOp* kernel, OpKernelContext* ctx, FHandle cond_handle,
@@ -386,9 +526,9 @@ class WhileOp : public AsyncOpKernel {
           done_(std::move(done)),
           lib_(CHECK_NOTNULL(ctx_->function_library())) {
       SetRunOptions(ctx_, &opts_, false /* always_collect_stats */);
-      for (int i = 0; i < ctx_->num_inputs(); ++i) {
-        args_.push_back(ctx_->input(i));
-      }
+      GetArgsFromContext(ctx, &args_, &loop_var_types_);
+      body_frame_ =
+          absl::make_unique<BodyFuncCallFrame>(&args_, &rets_, loop_var_types_);
     }
 
     ~State() {}
@@ -405,14 +545,12 @@ class WhileOp : public AsyncOpKernel {
     FunctionLibraryRuntime::Options opts_;
     TensorVec args_;
     TensorVec rets_;
+    DataTypeVector loop_var_types_;
+    std::unique_ptr<BodyFuncCallFrame> body_frame_;
 
     void EvalCond() {
       profiler::TraceMe trace_me(
-          [&] {
-            return absl::StrCat(
-                "WhileOp-EvalCond #parent_step_id=", ctx_->step_id(),
-                ",function_step_id=", opts_.step_id, "#");
-          },
+          [&] { return EvalCondTraceString(ctx_, opts_); },
           /*level=*/2);
       lib_->Run(
           // Evaluate the condition.
@@ -434,50 +572,27 @@ class WhileOp : public AsyncOpKernel {
             rets_.size(), " tensors.");
         return Finish(s);
       }
-      Tensor cond_t;
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-      const DeviceBase::GpuDeviceInfo* gpu_device_info =
-          ctx_->device()->tensorflow_gpu_device_info();
-      const bool is_hostmem_dtype =
-          rets_[0].dtype() == DT_INT32 || rets_[0].dtype() == DT_INT64;
-      if (!is_hostmem_dtype && gpu_device_info &&
-          (opts_.rets_alloc_attrs.empty() ||
-           !opts_.rets_alloc_attrs[0].on_host())) {
-        // Copy the ret value to host if it's allocated on device.
-        Device* device = down_cast<Device*>(ctx_->device());
-        DeviceContext* device_ctx = ctx_->op_device_context();
-        cond_t = Tensor(rets_[0].dtype(), rets_[0].shape());
-        s = device_ctx->CopyDeviceTensorToCPUSync(&rets_[0], /*tensor_name=*/"",
-                                                  device, &cond_t);
-        if (!s.ok()) {
-          return Finish(s);
-        }
-      } else {
-        cond_t = rets_[0];
-      }
-#else
-      cond_t = rets_[0];
-#endif
-      bool cond;
-      s = ToBool({cond_t}, &cond);
 
       if (!s.ok()) {
         return Finish(s);
       }
+      bool cond;
+      s = CondResultToBool(ctx_, opts_, rets_[0], &cond);
+      if (!s.ok()) {
+        return Finish(s);
+      }
+
       if (!cond) {
         return Finish(Status::OK());
       }
       rets_.clear();
+      rets_.resize(args_.size());
       profiler::TraceMe trace_me(
-          [&] {
-            return absl::StrCat(
-                "WhileOp-StartBody #parent_step_id=", ctx_->step_id(),
-                ",function_step_id=", opts_.step_id, "#");
-          },
+          [&] { return StartBodyTraceString(ctx_, opts_); },
           /*level=*/2);
       lib_->Run(
           // Evaluate the body.
-          opts_, body_handle_, args_, &rets_,
+          opts_, body_handle_, body_frame_.get(),
           // Done callback
           [this](const Status& s) {
             if (!s.ok()) {
@@ -505,6 +620,65 @@ class WhileOp : public AsyncOpKernel {
     }
   };
 
+  Status DoComputeSync(OpKernelContext* ctx) {
+    FHandle cond_handle;
+    FHandle body_handle;
+    TF_RETURN_IF_ERROR(GetHandles(ctx, &cond_handle, &body_handle));
+    auto lib = ctx->function_library();
+    FunctionLibraryRuntime::Options opts;
+    SetRunOptions(ctx, &opts, false /* always_collect_stats */);
+
+    // Pre-allocate argument and return value vectors for the cond and body
+    // functions.
+    std::vector<Tensor> args;
+    const int num_loop_vars = ctx->num_inputs();
+    DataTypeVector loop_var_types(num_loop_vars);
+    GetArgsFromContext(ctx, &args, &loop_var_types);
+    std::vector<Tensor> cond_rets;
+    cond_rets.reserve(1);
+    std::vector<Tensor> body_rets;
+    body_rets.reserve(num_loop_vars);
+
+    // Implement the logic of the while loop as a single C++ do-while loop that
+    // executes the cond and body functions synchronously.
+    do {
+      // Evaluate the cond function on the current loop variables.
+      {
+        profiler::TraceMe trace_me(
+            [&] { return EvalCondTraceString(ctx, opts); },
+            /*level=*/2);
+        TF_RETURN_IF_ERROR(lib->RunSync(opts, cond_handle, args, &cond_rets));
+      }
+      if (cond_rets.size() != 1) {
+        return errors::InvalidArgument(
+            "Expected a single scalar return value from WhileOp cond, got ",
+            cond_rets.size(), " tensors.");
+      }
+
+      // If the cond function evaluates to false, we are done: output the
+      // current loop variables.
+      bool cond_result;
+      TF_RETURN_IF_ERROR(
+          CondResultToBool(ctx, opts, cond_rets[0], &cond_result));
+      if (!cond_result) {
+        return SetOutputs(this, ctx, args);
+      }
+
+      // Evaluate the body function on the current loop variables, to get an
+      // updated vector of loop variables.
+      {
+        profiler::TraceMe trace_me(
+            [&] { return StartBodyTraceString(ctx, opts); },
+            /*level=*/2);
+        body_rets.resize(num_loop_vars);
+        BodyFuncCallFrame call_frame(&args, &body_rets, loop_var_types);
+        TF_RETURN_IF_ERROR(lib->RunSync(opts, body_handle, &call_frame));
+      }
+      std::swap(body_rets, args);
+      body_rets.clear();
+    } while (true);
+  }
+
   Status GetHandles(OpKernelContext* ctx, FHandle* cond_handle,
                     FHandle* body_handle) {
     // TODO(b/37549631): Because this op has `SetIsStateful()` in its
@@ -533,8 +707,8 @@ class WhileOp : public AsyncOpKernel {
         *cond_handle = iter->second.first;
         *body_handle = iter->second.second;
       } else {
-        TF_RETURN_IF_ERROR(Instantiate(lib, cond_func_, cond_handle));
-        TF_RETURN_IF_ERROR(Instantiate(lib, body_func_, body_handle));
+        TF_RETURN_IF_ERROR(Instantiate(ctx, cond_func_, cond_handle));
+        TF_RETURN_IF_ERROR(Instantiate(ctx, body_func_, body_handle));
         handles_[lib] = {*cond_handle, *body_handle};
       }
     }
diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
index 855506e9d8a..37fc1b3ae08 100644
--- a/tensorflow/core/kernels/list_kernels.h
+++ b/tensorflow/core/kernels/list_kernels.h
@@ -435,8 +435,10 @@ class TensorListConcat : public OpKernel {
     for (int i = 0; i < tensor_list->tensors().size(); i++) {
       const Tensor& element_tensor = tensor_list->tensors()[i];
       if (element_tensor.dtype() != DT_INVALID) {
-        inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
-            element_tensor.shaped<T, 2>({1, element_tensor.NumElements()})));
+        if (element_tensor.NumElements() > 0) {
+          inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
+              element_tensor.shaped<T, 2>({1, element_tensor.NumElements()})));
+        }
       } else {
         AllocatorAttributes attr;
         if (element_dtype_ == DT_VARIANT) {
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index a85f3f449fd..3045fd050d5 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -245,6 +245,7 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle,
   run_opts.source_device =
       lib->device() == nullptr ? "" : lib->device()->name();
   run_opts.allow_dead_tensors = true;
+  run_opts.rendezvous = ctx->rendezvous();
 
   std::vector<Tensor>* rets = new std::vector<Tensor>;
   const string& func_name = func_->name();
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index bd6aaa07d9d..3bd76eca4a2 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -451,7 +451,9 @@ TF_CALL_bool(REGISTER_SCATTER_ND_TENSOR_UPDATE_CPU);
   REGISTER_SCATTER_ND_UPDATE_GPU(type);   \
   REGISTER_SCATTER_ND_GPU(type);
 
+// TODO(b/155931747): Use HostMemory for int32
 TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU);
+TF_CALL_int64(REGISTER_SCATTER_ND_ALL_GPU);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ND_ALL_GPU);
 TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU);
 TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU);
@@ -491,7 +493,10 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL);
   REGISTER_SCATTER_ND_TENSOR_UPDATE_GPU(type); \
   REGISTER_SCATTER_ND_TENSOR_SUB_GPU(type);
 
+TF_CALL_int64(REGISTER_SCATTER_ND_TENSOR_GPU);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_TENSOR_GPU);
+TF_CALL_complex64(REGISTER_SCATTER_ND_TENSOR_GPU);
+TF_CALL_complex128(REGISTER_SCATTER_ND_TENSOR_GPU);
 
 #undef REGISTER_SCATTER_ND_ADD
 #undef REGISTER_SCATTER_ND_ADD_SUB
diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
index c7b14af5bc0..49bb22c94bb 100644
--- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc
@@ -173,6 +173,7 @@ struct ScatterNdFunctor<GPUDevice, T, Index, op, IXDIM> {
   DECLARE_GPU_SPECS_INDEX(T, int64)
 
 TF_CALL_int32(DECLARE_GPU_SPECS);
+TF_CALL_int64(DECLARE_GPU_SPECS);
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
 TF_CALL_complex64(DECLARE_GPU_SPECS);
 TF_CALL_complex128(DECLARE_GPU_SPECS);
diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc
index 93830515040..3d94fe1b6a5 100644
--- a/tensorflow/core/kernels/sendrecv_ops.cc
+++ b/tensorflow/core/kernels/sendrecv_ops.cc
@@ -92,14 +92,16 @@ void SendOp::Compute(OpKernelContext* ctx) {
   FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_);
   if (frame_iter == FrameAndIter(0, 0)) {
     // Use the cached rendezvous key.
-    VLOG(2) << "Send " << parsed_key_.buf_;
+    VLOG(2) << "Send " << parsed_key_.buf_ << " using "
+            << reinterpret_cast<uintptr_t>(ctx->rendezvous());
     ctx->SetStatus(ctx->rendezvous()->Send(parsed_key_, args, ctx->input(0),
                                            ctx->is_input_dead()));
     return;
   } else {
     Rendezvous::ParsedKey in_loop_parsed;
     GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_);
-    VLOG(2) << "Send " << in_loop_parsed.buf_;
+    VLOG(2) << "Send " << in_loop_parsed.buf_ << " using "
+            << reinterpret_cast<uintptr_t>(ctx->rendezvous());
     OP_REQUIRES_OK(ctx,
                    Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed));
 
@@ -200,13 +202,15 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
 
   FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_);
   if (frame_iter == FrameAndIter(0, 0)) {
-    VLOG(2) << "Recv " << parsed_key_.buf_;
+    VLOG(2) << "Recv " << parsed_key_.buf_ << " using "
+            << reinterpret_cast<uintptr_t>(ctx->rendezvous());
     ctx->rendezvous()->RecvAsync(parsed_key_, args,
                                  make_recv_callback(ctx, std::move(done)));
   } else {
     Rendezvous::ParsedKey in_loop_parsed;
     GetRendezvousKey(key_prefix_, frame_iter, &in_loop_parsed.buf_);
-    VLOG(2) << "Recv " << in_loop_parsed.buf_;
+    VLOG(2) << "Recv " << in_loop_parsed.buf_ << " using "
+            << reinterpret_cast<uintptr_t>(ctx->rendezvous());
     OP_REQUIRES_OK_ASYNC(
         ctx, Rendezvous::ParseKey(in_loop_parsed.buf_, &in_loop_parsed), done);
     ctx->rendezvous()->RecvAsync(in_loop_parsed, args,
diff --git a/tensorflow/core/kernels/softplus_op.h b/tensorflow/core/kernels/softplus_op.h
index 3b35af07039..0e4de9cdeb1 100644
--- a/tensorflow/core/kernels/softplus_op.h
+++ b/tensorflow/core/kernels/softplus_op.h
@@ -53,7 +53,7 @@ struct Softplus {
     activations.device(d) = too_large.select(
         features,                       // softplus(x) ~= x for x large
         too_small.select(features_exp,  // softplus(x) ~= exp(x) for x small
-                         (features_exp + features.constant(T(1))).log()));
+                         features_exp.log1p()));
   }
 };
 
diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc
index 81bc7dfdb7d..b6265a1412c 100644
--- a/tensorflow/core/kernels/sparse/add_op.cc
+++ b/tensorflow/core/kernels/sparse/add_op.cc
@@ -107,6 +107,26 @@ class CSRSparseMatrixAddFunctor {
     const Device& d = ctx_->eigen_device<Device>();
     set_zero(d, c_row_ptr_t.flat<int32>());
 
+    size_t maxWorkspaceSize = 0;
+    for (int i = 0; i < batch_size; ++i) {
+      ConstCSRComponent<T> a_comp{a.row_pointers_vec(i), a.col_indices_vec(i),
+                                  a.values_vec<T>(i), a_dense_shape};
+      ConstCSRComponent<T> b_comp{b.row_pointers_vec(i), b.col_indices_vec(i),
+                                  b.values_vec<T>(i), b_dense_shape};
+
+      size_t thisWorkspaceSize;
+      TF_RETURN_IF_ERROR(
+          csr_geam.GetWorkspaceSize(a_comp, b_comp, &thisWorkspaceSize));
+      if (thisWorkspaceSize > maxWorkspaceSize) {
+        maxWorkspaceSize = thisWorkspaceSize;
+      }
+    }
+
+    Tensor temp;
+    TF_RETURN_IF_ERROR(ctx_->allocate_temp(
+        DT_INT8, TensorShape({static_cast<int64>(maxWorkspaceSize)}), &temp));
+    void* workspace = temp.flat<int8>().data();
+
     for (int i = 0; i < batch_size; ++i) {
       // Calculate output sizes for all minibatch entries.
       // Store in c_batch_ptr and update c_row_ptrs.
@@ -121,8 +141,8 @@ class CSRSparseMatrixAddFunctor {
       TTypes<int32>::UnalignedVec c_row_ptr_i(&c_row_ptr(i * (rows + 1)),
                                               rows + 1);
       int c_nnz_i;
-      TF_RETURN_IF_ERROR(
-          csr_geam.GetOutputStructure(a_comp, b_comp, c_row_ptr_i, &c_nnz_i));
+      TF_RETURN_IF_ERROR(csr_geam.GetOutputStructure(
+          a_comp, b_comp, c_row_ptr_i, &c_nnz_i, workspace));
       c_batch_ptr(i + 1) = c_batch_ptr(i) + c_nnz_i;
     }
 
@@ -151,7 +171,7 @@ class CSRSparseMatrixAddFunctor {
       CSRComponent<T> c_comp{c->row_pointers_vec(i), c->col_indices_vec(i),
                              c->values_vec<T>(i), c_dense_shape_t.vec<int64>()};
 
-      TF_RETURN_IF_ERROR(csr_geam.Compute(a_comp, b_comp, &c_comp));
+      TF_RETURN_IF_ERROR(csr_geam.Compute(a_comp, b_comp, &c_comp, workspace));
     }
 
     return Status::OK();
@@ -269,10 +289,36 @@ struct CSRSparseMatrixAdd<GPUDevice, T>
     return Status::OK();
   }
 
+  Status GetWorkspaceSize(const ConstCSRComponent<T>& a,
+                          const ConstCSRComponent<T>& b, size_t* bufferSize) {
+    DCHECK(initialized_);
+
+    const int m = a.row_ptr.size() - 1;
+    DCHECK_EQ(m, b.row_ptr.size() - 1);
+    const int row_dim = a.dense_shape_host.size() == 2 ? 0 : 1;
+    DCHECK_EQ(m, a.dense_shape_host(row_dim));
+    DCHECK_EQ(m, b.dense_shape_host(row_dim));
+    const int nnzA = a.col_ind.size();
+    const int nnzB = b.col_ind.size();
+
+    const int n = a.dense_shape_host(row_dim + 1);
+    DCHECK_EQ(n, b.dense_shape_host(row_dim + 1));
+    T* null_T = nullptr;
+    int* null_int = nullptr;
+
+    TF_RETURN_IF_ERROR(cuda_sparse_.CsrgeamBufferSizeExt(
+        m, n, &alpha_, descrA_.descr(), nnzA, a.values.data(), a.row_ptr.data(),
+        a.col_ind.data(), &beta_, descrB_.descr(), nnzB, b.values.data(),
+        b.row_ptr.data(), b.col_ind.data(), descrC_.descr(), null_T, null_int,
+        null_int, bufferSize));
+
+    return Status::OK();
+  }
+
   Status GetOutputStructure(const ConstCSRComponent<T>& a,
                             const ConstCSRComponent<T>& b,
                             TTypes<int32>::UnalignedVec c_row_ptr,
-                            int* output_nnz) {
+                            int* output_nnz, void* workspace) {
     DCHECK(initialized_);
 
     const int m = a.row_ptr.size() - 1;
@@ -290,7 +336,7 @@ struct CSRSparseMatrixAdd<GPUDevice, T>
     TF_RETURN_IF_ERROR(cuda_sparse_.CsrgeamNnz(
         m, n, descrA_.descr(), nnzA, a.row_ptr.data(), a.col_ind.data(),
         descrB_.descr(), nnzB, b.row_ptr.data(), b.col_ind.data(),
-        descrC_.descr(), c_row_ptr.data(), output_nnz));
+        descrC_.descr(), c_row_ptr.data(), output_nnz, workspace));
 
     if (*output_nnz < 0) {
       return errors::Internal(
@@ -300,7 +346,7 @@ struct CSRSparseMatrixAdd<GPUDevice, T>
   }
 
   Status Compute(const ConstCSRComponent<T>& a, const ConstCSRComponent<T>& b,
-                 CSRComponent<T>* c) {
+                 CSRComponent<T>* c, void* workspace) {
     DCHECK(initialized_);
 
     const int m = a.row_ptr.size() - 1;
@@ -319,7 +365,7 @@ struct CSRSparseMatrixAdd<GPUDevice, T>
         m, n, &alpha_, descrA_.descr(), nnzA, a.values.data(), a.row_ptr.data(),
         a.col_ind.data(), &beta_, descrB_.descr(), nnzB, b.values.data(),
         b.row_ptr.data(), b.col_ind.data(), descrC_.descr(), c->values.data(),
-        c->row_ptr.data(), c->col_ind.data()));
+        c->row_ptr.data(), c->col_ind.data(), workspace));
 
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/sparse/kernels.h b/tensorflow/core/kernels/sparse/kernels.h
index f795829af05..0c4ef9e26dc 100644
--- a/tensorflow/core/kernels/sparse/kernels.h
+++ b/tensorflow/core/kernels/sparse/kernels.h
@@ -167,13 +167,18 @@ struct CSRStructureModifyingFunctor {
 
   virtual Status Initialize() = 0;
 
+  virtual Status GetWorkspaceSize(const ConstCSRComponent<T>& a,
+                                  const ConstCSRComponent<T>& b,
+                                  size_t* bufferSize) = 0;
+
   virtual Status GetOutputStructure(const ConstCSRComponent<T>& a,
                                     const ConstCSRComponent<T>& b,
                                     TTypes<int32>::UnalignedVec c_row_ptr,
-                                    int* output_nnz) = 0;
+                                    int* output_nnz, void* workspace) = 0;
 
   virtual Status Compute(const ConstCSRComponent<T>& a,
-                         const ConstCSRComponent<T>& b, CSRComponent<T>* c) = 0;
+                         const ConstCSRComponent<T>& b, CSRComponent<T>* c,
+                         void* workspace) = 0;
 };
 
 // Calculates C = alpha * A + beta * B, where A and B are in CSR
diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc
index 1a9186b7e4b..50fa0ec88ea 100644
--- a/tensorflow/core/kernels/sparse/mat_mul_op.cc
+++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc
@@ -721,6 +721,54 @@ REGISTER_GPU(complex128)
 
 namespace functor {
 
+namespace {
+
+// GPUDataType<T>::type translates from a C++ type (e.g. float) to a
+// GPUDataType_t (e.g. CUDA_R_32F).
+template <typename T>
+struct GPUDataType;
+
+// GPUDataType templates are currently not instantiated in the ROCm flow
+// So leaving out the #elif TENSORFLOW_USE_ROCM blocks for now
+// hipblas library is not (yet) being pulled in via rocm_configure.bzl
+// so cannot reference tyeps from hipblas headers here
+template <>
+struct GPUDataType<Eigen::half> {
+#if GOOGLE_CUDA
+  static constexpr cudaDataType_t type = CUDA_R_16F;
+#endif
+};
+
+template <>
+struct GPUDataType<float> {
+#if GOOGLE_CUDA
+  static constexpr cudaDataType_t type = CUDA_R_32F;
+#endif
+};
+
+template <>
+struct GPUDataType<std::complex<float>> {
+#if GOOGLE_CUDA
+  static constexpr cudaDataType_t type = CUDA_C_32F;
+#endif
+};
+
+template <>
+struct GPUDataType<double> {
+#if GOOGLE_CUDA
+  static constexpr cudaDataType_t type = CUDA_R_64F;
+#endif
+};
+
+template <>
+struct GPUDataType<std::complex<double>> {
+#if GOOGLE_CUDA
+  static constexpr cudaDataType_t type = CUDA_C_64F;
+#endif
+};
+
+}  // namespace
+
 template <typename T>
 class CSRSparseMatrixMatMul<GPUDevice, T> {
  public:
@@ -733,10 +781,10 @@ class CSRSparseMatrixMatMul<GPUDevice, T> {
     GpuSparse cuda_sparse(ctx);
     TF_RETURN_IF_ERROR(cuda_sparse.Initialize());
     {
-      // Use Csrmm to calculate:
+      // Use Csrmm/SpMM to calculate:
       //   C = alpha * op(A) * op(B) + beta * C
       // where alpha = 1.0, beta = 0.0, A is sparse and B and C are dense.
-      // Note that Csrmm assumes B and C are in column-major form; so we
+      // Note that Csrmm/Spmm assumes B and C are in column-major form; so we
       // use transB == true, and manually transpose the output in place
       // using blas<t>geam.
       // TODO(ebrevdo,rmlarsen): Add support for transposition and adjoint.
@@ -746,37 +794,6 @@ class CSRSparseMatrixMatMul<GPUDevice, T> {
       const T alpha = 1;
       const T beta = 0;
 
-      // transA must be non-transpose if transB is transpose (cusparse
-      // limitation).
-#if GOOGLE_CUDA
-      const gpusparseOperation_t transA = CUSPARSE_OPERATION_NON_TRANSPOSE;
-#elif TENSORFLOW_USE_ROCM
-      const gpusparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
-#endif
-
-      // transB: b is row-major, and cusparse requires col-major b (or
-      // equivalently transB == transpose).  this version is actually more
-      // efficient.
-#if GOOGLE_CUDA
-      const gpusparseOperation_t transB = CUSPARSE_OPERATION_TRANSPOSE;
-
-      gpusparseMatDescr_t descrA;
-      TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateMatDescr(&descrA));
-      TF_RETURN_IF_GPUSPARSE_ERROR(
-          cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
-      TF_RETURN_IF_GPUSPARSE_ERROR(
-          cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
-#elif TENSORFLOW_USE_ROCM
-      const gpusparseOperation_t transB = HIPSPARSE_OPERATION_TRANSPOSE;
-
-      gpusparseMatDescr_t descrA;
-      TF_RETURN_IF_GPUSPARSE_ERROR(hipsparseCreateMatDescr(&descrA));
-      TF_RETURN_IF_GPUSPARSE_ERROR(
-          hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL));
-      TF_RETURN_IF_GPUSPARSE_ERROR(
-          hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ZERO));
-#endif
-
       // A is (m, k), Bt is (ldb, k) and Ct is (ldc, n)
       const int k = b.dimension(0);
       DCHECK_EQ(k, a.dense_shape_host(1));
@@ -801,10 +818,87 @@ class CSRSparseMatrixMatMul<GPUDevice, T> {
       // op(A) = A and at least max(1, k) otherwise.
       const int ldc = m;
 
+      // transA must be non-transpose if transB is transpose (cusparse
+      // limitation).
+#if GOOGLE_CUDA
+      const gpusparseOperation_t transA = CUSPARSE_OPERATION_NON_TRANSPOSE;
+#elif TENSORFLOW_USE_ROCM
+      const gpusparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
+#endif
+
+      // transB: b is row-major, and cusparse requires col-major b (or
+      // equivalently transB == transpose).  this version is actually more
+      // efficient.
+#if GOOGLE_CUDA && CUDA_VERSION >= 10020
+
+      const gpusparseOperation_t transB = CUSPARSE_OPERATION_TRANSPOSE;
+      gpusparseSpMatDescr_t matA;
+      gpusparseDnMatDescr_t matB, matC;
+
+      // NOTE: the following APIs are not available in ROCM
+      TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateCsr(
+          &matA, m, k, nnz, const_cast<int*>(a.row_ptr.data()),
+          const_cast<int*>(a.col_ind.data()), const_cast<T*>(a.values.data()),
+          CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
+          GPUDataType<T>::type));
+
+      TF_RETURN_IF_GPUSPARSE_ERROR(
+          cusparseCreateDnMat(&matB, n, k, ldb, const_cast<T*>(b.data()),
+                              GPUDataType<T>::type, CUSPARSE_ORDER_COL));
+
+      TF_RETURN_IF_GPUSPARSE_ERROR(
+          cusparseCreateDnMat(&matC, m, n, ldc, c.data(), GPUDataType<T>::type,
+                              CUSPARSE_ORDER_COL));
+
+      size_t bufferSize = 0;
+      TF_RETURN_IF_ERROR(cuda_sparse.SpMMBufferSize(
+          transA, transB, &alpha, matA, matB, &beta, matC,
+          CUSPARSE_MM_ALG_DEFAULT, &bufferSize));
+
+      Tensor buffer;
+      TF_RETURN_IF_ERROR(ctx->allocate_temp(
+          DT_INT8, TensorShape({static_cast<int64>(bufferSize)}), &buffer));
+      DCHECK(buffer.flat<int8>().data() != nullptr);
+
+      TF_RETURN_IF_ERROR(cuda_sparse.SpMM(transA, transB, &alpha, matA, matB,
+                                          &beta, matC, CUSPARSE_MM_ALG_DEFAULT,
+                                          buffer.flat<int8>().data()));
+
+      TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyDnMat(matB));
+      TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyDnMat(matC));
+      TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroySpMat(matA));
+
+#else
+
+#if GOOGLE_CUDA
+
+      const gpusparseOperation_t transB = CUSPARSE_OPERATION_TRANSPOSE;
+
+      gpusparseMatDescr_t descrA;
+      TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateMatDescr(&descrA));
+      TF_RETURN_IF_GPUSPARSE_ERROR(
+          cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
+      TF_RETURN_IF_GPUSPARSE_ERROR(
+          cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
+
+#elif TENSORFLOW_USE_ROCM
+
+      const gpusparseOperation_t transB = HIPSPARSE_OPERATION_TRANSPOSE;
+
+      gpusparseMatDescr_t descrA;
+      TF_RETURN_IF_GPUSPARSE_ERROR(hipsparseCreateMatDescr(&descrA));
+      TF_RETURN_IF_GPUSPARSE_ERROR(
+          hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL));
+      TF_RETURN_IF_GPUSPARSE_ERROR(
+          hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ZERO));
+#endif  // GOOGLE_CUDA
+
       TF_RETURN_IF_ERROR(
           cuda_sparse.Csrmm(transA, transB, m, n, k, nnz, &alpha, descrA,
                             a.values.data(), a.row_ptr.data(), a.col_ind.data(),
                             b.data(), ldb, &beta, c.data(), ldc));
+
+#endif  // GOOGLE_CUDA && CUDA_VERSION >= 10020
     }
 
     return Status::OK();
@@ -837,14 +931,15 @@ class CSRSparseMatrixMatVec<GPUDevice, T> {
       const T alpha = 1;
       const T beta = 0;
 
+#if GOOGLE_CUDA && CUDA_VERSION < 10020
       gpusparseMatDescr_t descrA;
-#if GOOGLE_CUDA
       TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateMatDescr(&descrA));
       TF_RETURN_IF_GPUSPARSE_ERROR(
           cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
       TF_RETURN_IF_GPUSPARSE_ERROR(
           cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
 #elif TENSORFLOW_USE_ROCM
+      gpusparseMatDescr_t descrA;
       TF_RETURN_IF_GPUSPARSE_ERROR(hipsparseCreateMatDescr(&descrA));
       TF_RETURN_IF_GPUSPARSE_ERROR(
           hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL));
@@ -856,9 +951,15 @@ class CSRSparseMatrixMatVec<GPUDevice, T> {
       const int n = a.dense_shape_host(1);
       const int nnz = a.values.size();
       DCHECK_EQ(nnz, a.col_ind.size());
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10020)
+      TF_RETURN_IF_ERROR(cuda_sparse.Csrmv(transA_, m, n, nnz, &alpha,
+                                           a.values.data(), a.row_ptr.data(),
+                                           a.col_ind.data(), x, &beta, y));
+#else
       TF_RETURN_IF_ERROR(cuda_sparse.Csrmv(transA_, m, n, nnz, &alpha, descrA,
                                            a.values.data(), a.row_ptr.data(),
                                            a.col_ind.data(), x, &beta, y));
+#endif
     }
 
     return Status::OK();
diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
index e06dbcb0242..fb652e13d15 100644
--- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc
@@ -417,6 +417,36 @@ class CSRSparseMatMulGPUOp : public OpKernel {
     }
     auto b_input_dense_shape = b_input_matrix->dense_shape().vec<int64>();
 
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10000)
+    size_t maxWorkspaceSize = 0;
+    for (int i = 0; i < batch_size; ++i) {
+      // Calculate maximum workspace size over batch.
+      ConstCSRComponent<T> a_comp{a_input_matrix->row_pointers_vec(i),
+                                  a_input_matrix->col_indices_vec(i),
+                                  a_input_matrix->values_vec<T>(i),
+                                  a_input_dense_shape};
+      ConstCSRComponent<T> b_comp{b_input_matrix->row_pointers_vec(i),
+                                  b_input_matrix->col_indices_vec(i),
+                                  b_input_matrix->values_vec<T>(i),
+                                  b_input_dense_shape};
+      size_t thisWorkspaceSize;
+      OP_REQUIRES_OK(
+          ctx, csr_gemm.GetWorkspaceSize(a_comp, b_comp, &thisWorkspaceSize));
+      if (thisWorkspaceSize > maxWorkspaceSize) {
+        maxWorkspaceSize = thisWorkspaceSize;
+      }
+    }
+
+    Tensor temp;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_temp(
+                 DT_INT8, TensorShape({static_cast<int64>(maxWorkspaceSize)}),
+                 &temp));
+    void* workspace = temp.flat<int8>().data();
+#else
+    void* workspace = nullptr;
+#endif
+
     for (int i = 0; i < batch_size; ++i) {
       // Calculate output sizes for all minibatch entries.
       // Store in c_batch_ptr and update c_row_ptrs.
@@ -433,8 +463,9 @@ class CSRSparseMatMulGPUOp : public OpKernel {
                                               rows + 1);
 
       int c_nnz_i;
-      OP_REQUIRES_OK(ctx, csr_gemm.GetOutputStructure(a_comp, b_comp,
-                                                      c_row_ptr_i, &c_nnz_i));
+      OP_REQUIRES_OK(ctx,
+                     csr_gemm.GetOutputStructure(a_comp, b_comp, c_row_ptr_i,
+                                                 &c_nnz_i, workspace));
       c_batch_ptr(i + 1) = c_batch_ptr(i) + c_nnz_i;
     }
 
@@ -464,7 +495,7 @@ class CSRSparseMatMulGPUOp : public OpKernel {
                                   b_input_dense_shape};
       CSRComponent<T> c_comp{c.row_pointers_vec(i), c.col_indices_vec(i),
                              c.values_vec<T>(i), c_dense_shape};
-      OP_REQUIRES_OK(ctx, csr_gemm.Compute(a_comp, b_comp, &c_comp));
+      OP_REQUIRES_OK(ctx, csr_gemm.Compute(a_comp, b_comp, &c_comp, workspace));
     }
 
     Tensor c_t(cpu_allocator(), DT_VARIANT, TensorShape({}));
@@ -527,24 +558,29 @@ struct CSRSparseSparseMatrixMatMul<GPUDevice, T>
         initialized_(false),
         transpose_a_(transpose_a),
         adjoint_a_(adjoint_a),
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM
         transpose_b_(transpose_b) {
+#else
+        transpose_b_(transpose_b),
+        info_(nullptr) {
+#endif  // CUDA_VERSION < 10000
     // TODO(ebrevdo): Figure out why transposed implementations crash cuSparse.
-#if GOOGLE_CUDA
-    transA_ = transpose_a ? (adjoint_a ? CUSPARSE_OPERATION_TRANSPOSE
-                                       : CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE)
-                          : CUSPARSE_OPERATION_NON_TRANSPOSE;
-    transB_ = transpose_b ? CUSPARSE_OPERATION_TRANSPOSE
-                          : CUSPARSE_OPERATION_NON_TRANSPOSE;
-#elif TENSORFLOW_USE_ROCM
     transA_ = transpose_a
-                  ? (adjoint_a ? HIPSPARSE_OPERATION_TRANSPOSE
-                               : HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE)
-                  : HIPSPARSE_OPERATION_NON_TRANSPOSE;
-    transB_ = transpose_b ? HIPSPARSE_OPERATION_TRANSPOSE
-                          : HIPSPARSE_OPERATION_NON_TRANSPOSE;
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+                  ? (adjoint_a ? GPUSPARSE(OPERATION_TRANSPOSE)
+                               : GPUSPARSE(OPERATION_CONJUGATE_TRANSPOSE))
+                  : GPUSPARSE(OPERATION_NON_TRANSPOSE);
+    transB_ = transpose_b ? GPUSPARSE(OPERATION_TRANSPOSE)
+                          : GPUSPARSE(OPERATION_NON_TRANSPOSE);
   }
 
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10000)
+  ~CSRSparseSparseMatrixMatMul() {
+    if (initialized_) {
+      cusparseDestroyCsrgemm2Info(info_);
+    }
+  }
+#endif
+
   Status Initialize() {
     if (adjoint_a_ && transpose_a_) {
       return errors::InvalidArgument(
@@ -555,14 +591,46 @@ struct CSRSparseSparseMatrixMatMul<GPUDevice, T>
     TF_RETURN_IF_ERROR(descrA_.Initialize());
     TF_RETURN_IF_ERROR(descrB_.Initialize());
     TF_RETURN_IF_ERROR(descrC_.Initialize());
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10000)
+    TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateCsrgemm2Info(&info_));
+#endif
     initialized_ = true;
     return Status::OK();
   }
 
+  Status GetWorkspaceSize(const ConstCSRComponent<T>& a,
+                          const ConstCSRComponent<T>& b, size_t* bufferSize) {
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10000)
+    DCHECK(initialized_);
+    const int m =
+        a.dense_shape_host(a.dense_shape_host.size() - (transpose_a_ ? 1 : 2));
+    if (!transpose_a_) {
+      DCHECK_EQ(m, a.row_ptr.size() - 1);
+    }
+    const int k =
+        a.dense_shape_host(a.dense_shape_host.size() - (transpose_a_ ? 2 : 1));
+    if (!transpose_b_) {
+      DCHECK_EQ(k, b.row_ptr.size() - 1);
+    }
+    const int nnzA = a.col_ind.size();
+    const int nnzB = b.col_ind.size();
+
+    const int n =
+        b.dense_shape_host(b.dense_shape_host.size() - (transpose_b_ ? 2 : 1));
+
+    TF_RETURN_IF_ERROR(cuda_sparse_.CsrgemmBufferSize<T>(
+        m, n, k, descrA_.descr(), nnzA, a.row_ptr.data(), a.col_ind.data(),
+        descrB_.descr(), nnzB, b.row_ptr.data(), b.col_ind.data(), info_,
+        bufferSize));
+#endif
+
+    return Status::OK();
+  }
+
   Status GetOutputStructure(const ConstCSRComponent<T>& a,
                             const ConstCSRComponent<T>& b,
                             TTypes<int32>::UnalignedVec c_row_ptr,
-                            int* output_nnz) {
+                            int* output_nnz, void* workspace) {
     DCHECK(initialized_);
 
     const int m =
@@ -584,10 +652,17 @@ struct CSRSparseSparseMatrixMatMul<GPUDevice, T>
 
     *output_nnz = -1;
 
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM
     TF_RETURN_IF_ERROR(cuda_sparse_.CsrgemmNnz(
         transA_, transB_, m, n, k, descrA_.descr(), nnzA, a.row_ptr.data(),
         a.col_ind.data(), descrB_.descr(), nnzB, b.row_ptr.data(),
         b.col_ind.data(), descrC_.descr(), c_row_ptr.data(), output_nnz));
+#else
+    TF_RETURN_IF_ERROR(cuda_sparse_.CsrgemmNnz(
+        m, n, k, descrA_.descr(), nnzA, a.row_ptr.data(), a.col_ind.data(),
+        descrB_.descr(), nnzB, b.row_ptr.data(), b.col_ind.data(),
+        descrC_.descr(), c_row_ptr.data(), output_nnz, info_, workspace));
+#endif
 
     if (*output_nnz < 0) {
       return errors::Internal(
@@ -598,7 +673,7 @@ struct CSRSparseSparseMatrixMatMul<GPUDevice, T>
   }
 
   Status Compute(const ConstCSRComponent<T>& a, const ConstCSRComponent<T>& b,
-                 CSRComponent<T>* c) {
+                 CSRComponent<T>* c, void* workspace) {
     DCHECK(initialized_);
 
     const int m =
@@ -620,11 +695,19 @@ struct CSRSparseSparseMatrixMatMul<GPUDevice, T>
         b.dense_shape_host(b.dense_shape_host.size() - (transpose_b_ ? 2 : 1));
     DCHECK_EQ(n, c->dense_shape_host(c->dense_shape_host.size() - 1));
 
+#if (GOOGLE_CUDA && (CUDA_VERSION < 10000)) || TENSORFLOW_USE_ROCM
     TF_RETURN_IF_ERROR(cuda_sparse_.Csrgemm(
         transA_, transB_, m, k, n, descrA_.descr(), nnzA, a.values.data(),
         a.row_ptr.data(), a.col_ind.data(), descrB_.descr(), nnzB,
         b.values.data(), b.row_ptr.data(), b.col_ind.data(), descrC_.descr(),
         c->values.data(), c->row_ptr.data(), c->col_ind.data()));
+#else
+    TF_RETURN_IF_ERROR(cuda_sparse_.Csrgemm(
+        m, n, k, descrA_.descr(), nnzA, a.values.data(), a.row_ptr.data(),
+        a.col_ind.data(), descrB_.descr(), nnzB, b.values.data(),
+        b.row_ptr.data(), b.col_ind.data(), descrC_.descr(), c->values.data(),
+        c->row_ptr.data(), c->col_ind.data(), info_, workspace));
+#endif
 
     // TODO(ebrevdo): Add a flag to CSRSparseMatrix whether matrix
     // columns are sorted?  Above operation leads to unsorted columns.
@@ -651,6 +734,9 @@ struct CSRSparseSparseMatrixMatMul<GPUDevice, T>
   GpuSparseMatrixDescriptor descrC_;
   gpusparseOperation_t transA_;
   gpusparseOperation_t transB_;
+#if GOOGLE_CUDA && (CUDA_VERSION >= 10000)
+  csrgemm2Info_t info_;
+#endif
 };
 
 }  // namespace functor
diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc
index f9ddb1d8d97..3158eb5016d 100644
--- a/tensorflow/core/kernels/sparse/transpose_op.cc
+++ b/tensorflow/core/kernels/sparse/transpose_op.cc
@@ -262,11 +262,7 @@ struct CSRSparseMatrixTransposeComponent<GPUDevice, T> {
     TF_RETURN_IF_ERROR(ValidateTransposeInputs(x, *y));
     GpuSparse cuda_sparse(ctx);
     TF_RETURN_IF_ERROR(cuda_sparse.Initialize());
-#if GOOGLE_CUDA
-    const gpusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC;
-#elif TENSORFLOW_USE_ROCM
-    const gpusparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC;
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+    const gpusparseAction_t copyValues = GPUSPARSE(ACTION_NUMERIC);
     const int rank = x.dense_shape_host.size();
     const int m = x.row_ptr.size() - 1;
     const int n = x.dense_shape_host(rank - 1);
diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/svd_op_gpu.cu.cc
index 2821abf8a6c..482fd057e4e 100644
--- a/tensorflow/core/kernels/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/svd_op_gpu.cu.cc
@@ -37,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/cuda_solvers.h"
+#include "tensorflow/core/kernels/eye_functor.h"
 #include "tensorflow/core/kernels/linalg_ops_common.h"
 #include "tensorflow/core/kernels/transpose_functor.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -390,8 +391,22 @@ class SvdOpGpu : public AsyncOpKernel {
                          done);
 
     if (n == 0 || m == 0) {
-      // If X is an empty matrix (0 rows, 0 col), X * X' == X.
-      // Therefore, we return X.
+      if (n == m || !compute_uv_ || !full_matrices_) {
+        // S, U, and V are all empty. Nothing to do.
+        done();
+        return;
+      }
+      auto device = context->eigen_device<GPUDevice>();
+      functor::EyeFunctor<GPUDevice, Scalar> eye;
+      if (m > 0) {
+        // Return a full canonical basis for the column space.
+        auto outputU_reshaped = outputU->flat_inner_dims<Scalar, 3>();
+        eye(device, outputU_reshaped);
+      } else if (n > 0) {
+        // Return a full canonical basis for the row space.
+        auto outputV_reshaped = outputV->flat_inner_dims<Scalar, 3>();
+        eye(device, outputV_reshaped);
+      }
       done();
       return;
     }
diff --git a/tensorflow/core/kernels/svd_op_impl.h b/tensorflow/core/kernels/svd_op_impl.h
index 2a67700c126..675826a057c 100644
--- a/tensorflow/core/kernels/svd_op_impl.h
+++ b/tensorflow/core/kernels/svd_op_impl.h
@@ -83,16 +83,29 @@ class SvdOp : public LinearAlgebraOp<Scalar> {
 
   void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
                      MatrixMaps* outputs) final {
+    int64 n = inputs[0].cols();
+    int64 m = inputs[0].rows();
+    const bool empty = (m == 0 || n == 0);
     int options = 0;  // Don't compute singular vectors;
     if (compute_uv_) {
       options = full_matrices_ ? Eigen::ComputeFullU | Eigen::ComputeFullV
                                : Eigen::ComputeThinU | Eigen::ComputeThinV;
     }
-    Eigen::BDCSVD<Matrix> svd(inputs[0], options);
-    outputs->at(0) = svd.singularValues().template cast<Scalar>();
-    if (compute_uv_) {
-      outputs->at(1) = svd.matrixU();
-      outputs->at(2) = svd.matrixV();
+    if (!empty) {
+      Eigen::BDCSVD<Matrix> svd(inputs[0], options);
+      outputs->at(0) = svd.singularValues().template cast<Scalar>();
+      if (compute_uv_) {
+        outputs->at(1) = svd.matrixU();
+        outputs->at(2) = svd.matrixV();
+      }
+    } else if (compute_uv_ && full_matrices_) {
+      // For an empty matrix where only one dimension is zero, we still set
+      // U or V to the unit matrix for the dimension that is non-zero.
+      if (m > 0) {
+        outputs->at(1) = Matrix::Identity(m, m);
+      } else {
+        outputs->at(2) = Matrix::Identity(n, n);
+      }
     }
   }
 
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 42b262b70eb..d3aadf04144 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -231,6 +231,8 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
                           IsVariableInitializedOp);
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+TF_CALL_complex64(REGISTER_GPU_KERNELS);
+TF_CALL_complex128(REGISTER_GPU_KERNELS);
 TF_CALL_int64(REGISTER_GPU_KERNELS);
 TF_CALL_uint32(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
diff --git a/tensorflow/core/lib/core/BUILD b/tensorflow/core/lib/core/BUILD
index 80ad4943f16..491e4c5e7aa 100644
--- a/tensorflow/core/lib/core/BUILD
+++ b/tensorflow/core/lib/core/BUILD
@@ -138,10 +138,13 @@ tf_proto_library(
     cc_api_version = 2,
     make_default_target_header_only = True,
     protodeps = [
-        "//tensorflow/core:error_codes_proto_impl",
+        "//tensorflow/core/protobuf:error_codes_proto_impl",
     ],
-    visibility = ["//tensorflow/core:__subpackages__"],
-    exports = ["//tensorflow/core:error_codes_proto_impl"],
+    visibility = [
+        "//tensorflow/core:__subpackages__",
+        "//tensorflow/core/protobuf:__subpackages__",
+    ],
+    exports = ["//tensorflow/core/protobuf:error_codes_proto_impl"],
 )
 
 # Export source files needed for mobile builds, which do not use granular targets.
diff --git a/tensorflow/core/lib/hash/crc32c.cc b/tensorflow/core/lib/hash/crc32c.cc
index 9a3eba704ac..244077b6037 100644
--- a/tensorflow/core/lib/hash/crc32c.cc
+++ b/tensorflow/core/lib/hash/crc32c.cc
@@ -265,9 +265,7 @@ uint32 Extend(uint32 crc, const char *buf, size_t size) {
 
 #if defined(PLATFORM_GOOGLE)
 uint32 Extend(uint32 crc, const absl::Cord &cord) {
-  absl::CordReader reader(cord);
-  absl::string_view fragment;
-  while (reader.ReadFragment(&fragment)) {
+  for (absl::string_view fragment : cord.Chunks()) {
     crc = Extend(crc, fragment.data(), fragment.size());
   }
   return crc;
diff --git a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
index b0c8b9a28a1..563503a1319 100644
--- a/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_outputbuffer.cc
@@ -42,9 +42,7 @@ Status SnappyOutputBuffer::Append(StringPiece data) { return Write(data); }
 
 #if defined(PLATFORM_GOOGLE)
 Status SnappyOutputBuffer::Append(const absl::Cord& cord) {
-  absl::CordReader reader(cord);
-  absl::string_view fragment;
-  while (reader.ReadFragment(&fragment)) {
+  for (absl::string_view fragment : cord.Chunks()) {
     TF_RETURN_IF_ERROR(Append(fragment));
   }
   return Status::OK();
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.cc b/tensorflow/core/lib/io/zlib_outputbuffer.cc
index 3b3b4745508..5840ca60242 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.cc
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.cc
@@ -192,9 +192,7 @@ Status ZlibOutputBuffer::Append(StringPiece data) {
 
 #if defined(PLATFORM_GOOGLE)
 Status ZlibOutputBuffer::Append(const absl::Cord& cord) {
-  absl::CordReader reader(cord);
-  absl::string_view fragment;
-  while (reader.ReadFragment(&fragment)) {
+  for (absl::string_view fragment : cord.Chunks()) {
     TF_RETURN_IF_ERROR(Append(fragment));
   }
   return Status::OK();
diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc
index d318059e8f6..592ab2bd2a5 100644
--- a/tensorflow/core/lib/wav/wav_io.cc
+++ b/tensorflow/core/lib/wav/wav_io.cc
@@ -235,7 +235,8 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string,
       ReadValue<uint32>(wav_string, &format_chunk_size, &offset));
   if ((format_chunk_size != 16) && (format_chunk_size != 18)) {
     return errors::InvalidArgument(
-        "Bad file size for WAV: Expected 16 or 18, but got", format_chunk_size);
+        "Bad format chunk size for WAV: Expected 16 or 18, but got",
+        format_chunk_size);
   }
   uint16 audio_format;
   TF_RETURN_IF_ERROR(ReadValue<uint16>(wav_string, &audio_format, &offset));
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 545b5b69153..b894d3d73da 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -402,7 +402,7 @@ REGISTER_OP("Empty")
     .Output("output: dtype")
     .Attr("dtype: type")
     .Attr("init: bool = false")
-    .SetIsStateful()
+    .SetDoNotOptimize()
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle out;
       TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out));
@@ -744,7 +744,7 @@ REGISTER_OP("GuaranteeConst")
       return UnchangedShape(c);
     })
     // We don't want this to be optimized away.
-    .SetIsStateful();
+    .SetDoNotOptimize();
 
 // --------------------------------------------------------------------------
 REGISTER_OP("ZerosLike")
diff --git a/tensorflow/core/ops/compat/ops_history_v1/CreateJob.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/CreateJob.pbtxt
deleted file mode 100644
index 56c68a8c6df..00000000000
--- a/tensorflow/core/ops/compat/ops_history_v1/CreateJob.pbtxt
+++ /dev/null
@@ -1,23 +0,0 @@
-op {
-  name: "CreateJob"
-  input_arg {
-    name: "dataset_id"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "address"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "protocol"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "processing_mode"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "job_token"
-    type: DT_VARIANT
-  }
-}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/DataServiceDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/DataServiceDataset.pbtxt
deleted file mode 100644
index 5fc666e77a9..00000000000
--- a/tensorflow/core/ops/compat/ops_history_v1/DataServiceDataset.pbtxt
+++ /dev/null
@@ -1,32 +0,0 @@
-op {
-  name: "DataServiceDataset"
-  input_arg {
-    name: "address"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "protocol"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "max_outstanding_requests"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/MakeDataServiceIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/MakeDataServiceIterator.pbtxt
deleted file mode 100644
index e2061ad3a57..00000000000
--- a/tensorflow/core/ops/compat/ops_history_v1/MakeDataServiceIterator.pbtxt
+++ /dev/null
@@ -1,16 +0,0 @@
-op {
-  name: "MakeDataServiceIterator"
-  input_arg {
-    name: "dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "job_token"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
-  }
-  is_stateful: true
-}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/CreateJob.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CreateJob.pbtxt
deleted file mode 100644
index 56c68a8c6df..00000000000
--- a/tensorflow/core/ops/compat/ops_history_v2/CreateJob.pbtxt
+++ /dev/null
@@ -1,23 +0,0 @@
-op {
-  name: "CreateJob"
-  input_arg {
-    name: "dataset_id"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "address"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "protocol"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "processing_mode"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "job_token"
-    type: DT_VARIANT
-  }
-}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt
index 89db2e0d697..be7deca49cb 100644
--- a/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v2/DataServiceDataset.pbtxt
@@ -1,5 +1,13 @@
 op {
   name: "DataServiceDataset"
+  input_arg {
+    name: "dataset_id"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "processing_mode"
+    type: DT_STRING
+  }
   input_arg {
     name: "address"
     type: DT_STRING
@@ -9,41 +17,17 @@ op {
     type: DT_STRING
   }
   input_arg {
-    name: "max_outstanding_requests"
-    type: DT_INT64
-  }
-  output_arg {
-    name: "handle"
-    type: DT_VARIANT
-  }
-  attr {
-    name: "output_types"
-    type: "list(type)"
-    has_minimum: true
-    minimum: 1
-  }
-  attr {
-    name: "output_shapes"
-    type: "list(shape)"
-    has_minimum: true
-    minimum: 1
-  }
-  is_stateful: true
-}
-op {
-  name: "DataServiceDataset"
-  input_arg {
-    name: "address"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "protocol"
+    name: "job_name"
     type: DT_STRING
   }
   input_arg {
     name: "max_outstanding_requests"
     type: DT_INT64
   }
+  input_arg {
+    name: "iteration_counter"
+    type: DT_RESOURCE
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt
new file mode 100644
index 00000000000..e26e1639e82
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/DenseBincount.pbtxt
@@ -0,0 +1,48 @@
+op {
+  name: "DenseBincount"
+  input_arg {
+    name: "input"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "weights"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt
new file mode 100644
index 00000000000..c5b845fd0fb
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/DenseCountSparseOutput.pbtxt
@@ -0,0 +1,65 @@
+op {
+  name: "DenseCountSparseOutput"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "output_type"
+  }
+  output_arg {
+    name: "output_dense_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "minlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "maxlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_FLOAT
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt
new file mode 100644
index 00000000000..b1df20cae73
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/DummyIterationCounter.pbtxt
@@ -0,0 +1,8 @@
+op {
+  name: "DummyIterationCounter"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/MakeDataServiceIterator.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MakeDataServiceIterator.pbtxt
deleted file mode 100644
index e2061ad3a57..00000000000
--- a/tensorflow/core/ops/compat/ops_history_v2/MakeDataServiceIterator.pbtxt
+++ /dev/null
@@ -1,16 +0,0 @@
-op {
-  name: "MakeDataServiceIterator"
-  input_arg {
-    name: "dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "job_token"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
-  }
-  is_stateful: true
-}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt
index 06145e59f54..ca5c92fb15b 100644
--- a/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v2/MulNoNan.pbtxt
@@ -52,3 +52,32 @@ op {
     }
   }
 }
+op {
+  name: "MulNoNan"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt
new file mode 100644
index 00000000000..9d94149cc09
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedBincount.pbtxt
@@ -0,0 +1,52 @@
+op {
+  name: "RaggedBincount"
+  input_arg {
+    name: "splits"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "weights"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt
new file mode 100644
index 00000000000..7f492418b48
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/RaggedCountSparseOutput.pbtxt
@@ -0,0 +1,69 @@
+op {
+  name: "RaggedCountSparseOutput"
+  input_arg {
+    name: "splits"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "output_type"
+  }
+  output_arg {
+    name: "output_dense_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "minlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "maxlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_FLOAT
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt
new file mode 100644
index 00000000000..333b71a5e1c
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/SparseBincount.pbtxt
@@ -0,0 +1,56 @@
+op {
+  name: "SparseBincount"
+  input_arg {
+    name: "indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "dense_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "weights"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt
new file mode 100644
index 00000000000..b701e5fc0db
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v2/SparseCountSparseOutput.pbtxt
@@ -0,0 +1,73 @@
+op {
+  name: "SparseCountSparseOutput"
+  input_arg {
+    name: "indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dense_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "output_type"
+  }
+  output_arg {
+    name: "output_dense_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "minlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "maxlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_FLOAT
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/count_ops.cc b/tensorflow/core/ops/count_ops.cc
new file mode 100644
index 00000000000..8de0a2ef954
--- /dev/null
+++ b/tensorflow/core/ops/count_ops.cc
@@ -0,0 +1,106 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+Status DenseCountSparseOutputShapeFn(InferenceContext *c) {
+  auto values = c->input(0);
+  auto weights = c->input(1);
+  ShapeHandle output;
+  auto num_weights = c->NumElements(weights);
+  if (c->ValueKnown(num_weights) && c->Value(num_weights) == 0) {
+    output = values;
+  } else {
+    TF_RETURN_IF_ERROR(c->Merge(weights, values, &output));
+  }
+  auto rank = c->Rank(output);
+  auto nvals = c->UnknownDim();
+  c->set_output(0, c->Matrix(nvals, rank));  // out.indices
+  c->set_output(1, c->Vector(nvals));        // out.values
+  c->set_output(2, c->Vector(rank));         // out.dense_shape
+  return Status::OK();
+}
+
+Status SparseCountSparseOutputShapeFn(InferenceContext *c) {
+  auto rank = c->Dim(c->input(0), 1);
+  auto nvals = c->UnknownDim();
+  c->set_output(0, c->Matrix(nvals, rank));  // out.indices
+  c->set_output(1, c->Vector(nvals));        // out.values
+  c->set_output(2, c->Vector(rank));         // out.dense_shape
+  return Status::OK();
+}
+
+Status RaggedCountSparseOutputShapeFn(InferenceContext *c) {
+  int32 rank = c->Rank(c->input(1));
+  if (rank != c->kUnknownRank) {
+    ++rank;  // Add the ragged dimension
+  }
+  auto nvals = c->UnknownDim();
+  c->set_output(0, c->Matrix(nvals, rank));  // out.indices
+  c->set_output(1, c->Vector(nvals));        // out.values
+  c->set_output(2, c->Vector(rank));         // out.dense_shape
+  return Status::OK();
+}
+
+REGISTER_OP("DenseCountSparseOutput")
+    .Input("values: T")
+    .Input("weights: output_type")
+    .Attr("T: {int32, int64}")
+    .Attr("minlength: int >= -1 = -1")
+    .Attr("maxlength: int >= -1 = -1")
+    .Attr("binary_output: bool")
+    .Attr("output_type: {int32, int64, float, double}")
+    .SetShapeFn(DenseCountSparseOutputShapeFn)
+    .Output("output_indices: int64")
+    .Output("output_values: output_type")
+    .Output("output_dense_shape: int64");
+
+REGISTER_OP("SparseCountSparseOutput")
+    .Input("indices: int64")
+    .Input("values: T")
+    .Input("dense_shape: int64")
+    .Input("weights: output_type")
+    .Attr("T: {int32, int64}")
+    .Attr("minlength: int >= -1 = -1")
+    .Attr("maxlength: int >= -1 = -1")
+    .Attr("binary_output: bool")
+    .Attr("output_type: {int32, int64, float, double}")
+    .SetShapeFn(SparseCountSparseOutputShapeFn)
+    .Output("output_indices: int64")
+    .Output("output_values: output_type")
+    .Output("output_dense_shape: int64");
+
+REGISTER_OP("RaggedCountSparseOutput")
+    .Input("splits: int64")
+    .Input("values: T")
+    .Input("weights: output_type")
+    .Attr("T: {int32, int64}")
+    .Attr("minlength: int >= -1 = -1")
+    .Attr("maxlength: int >= -1 = -1")
+    .Attr("binary_output: bool")
+    .Attr("output_type: {int32, int64, float, double}")
+    .SetShapeFn(RaggedCountSparseOutputShapeFn)
+    .Output("output_indices: int64")
+    .Output("output_values: output_type")
+    .Output("output_dense_shape: int64");
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index ab2cf35fa08..0122cbed087 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -37,8 +37,8 @@ REGISTER_OP("TensorDataset")
     .Output("handle: variant")
     .Attr("Toutput_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that
                                                 // `components` have shapes
                                                 // compatible with
@@ -49,8 +49,8 @@ REGISTER_OP("TensorSliceDataset")
     .Output("handle: variant")
     .Attr("Toutput_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn(shape_inference::ScalarShape);  // TODO(mrry): Validate that the
                                                 // dim-0 slices of `components`
                                                 // have shapes compatible with
@@ -62,8 +62,8 @@ REGISTER_OP("SparseTensorSliceDataset")
     .Input("dense_shape: int64")
     .Output("handle: variant")
     .Attr("Tvalues: type")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("GeneratorDataset")
@@ -79,8 +79,8 @@ REGISTER_OP("GeneratorDataset")
     .Attr("Tfinalize_func_args: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ZipDataset")
@@ -392,8 +392,8 @@ REGISTER_OP("RangeDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // start, stop, and step should be scalars.
@@ -595,8 +595,8 @@ REGISTER_OP("TextLineDataset")
     .Input("compression_type: string")
     .Input("buffer_size: int64")
     .Output("handle: variant")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `filenames` must be a scalar or a vector.
@@ -615,8 +615,8 @@ REGISTER_OP("FixedLengthRecordDataset")
     .Input("footer_bytes: int64")
     .Input("buffer_size: int64")
     .Output("handle: variant")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `filenames` must be a scalar or a vector.
@@ -638,8 +638,8 @@ REGISTER_OP("FixedLengthRecordDatasetV2")
     .Input("buffer_size: int64")
     .Input("compression_type: string")
     .Output("handle: variant")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `filenames` must be a scalar or a vector.
@@ -658,8 +658,8 @@ REGISTER_OP("TFRecordDataset")
     .Input("compression_type: string")
     .Input("buffer_size: int64")
     .Output("handle: variant")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `filenames` must be a scalar or a vector.
diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc
index a4c72b07c68..2c9cbe2f416 100644
--- a/tensorflow/core/ops/experimental_dataset_ops.cc
+++ b/tensorflow/core/ops/experimental_dataset_ops.cc
@@ -145,8 +145,8 @@ REGISTER_OP("CSVDataset")
     .Output("handle: variant")
     .Attr("output_types: list({float,double,int32,int64,string}) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `filenames` must be a scalar or a vector.
@@ -187,8 +187,8 @@ REGISTER_OP("ExperimentalCSVDataset")
     .Output("handle: variant")
     .Attr("output_types: list({float,double,int32,int64,string}) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `filenames` must be a scalar or a vector.
@@ -426,8 +426,8 @@ REGISTER_OP("LMDBDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("ExperimentalLMDBDataset")
@@ -435,8 +435,8 @@ REGISTER_OP("ExperimentalLMDBDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn(shape_inference::ScalarShape);
 
 REGISTER_OP("MapAndBatchDataset")
@@ -508,8 +508,8 @@ REGISTER_OP("ExperimentalMapDataset")
 REGISTER_OP("MatchingFilesDataset")
     .Input("patterns: string")
     .Output("handle: variant")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `patterns` must be a scalar or a vector.
@@ -520,8 +520,8 @@ REGISTER_OP("MatchingFilesDataset")
 REGISTER_OP("ExperimentalMatchingFilesDataset")
     .Input("patterns: string")
     .Output("handle: variant")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // `patterns` must be a scalar or a vector.
@@ -689,8 +689,8 @@ REGISTER_OP("ExperimentalRandomDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // buffer_size, seed, and seed2 should be scalars.
@@ -705,8 +705,8 @@ REGISTER_OP("RandomDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // buffer_size, seed, and seed2 should be scalars.
@@ -893,8 +893,8 @@ REGISTER_OP("SqlDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // driver_name, data_source_name, and query should be scalars.
@@ -911,8 +911,8 @@ REGISTER_OP("ExperimentalSqlDataset")
     .Output("handle: variant")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
-    .SetIsStateful()  // TODO(b/123753214): Source dataset ops must be marked
-                      // stateful to inhibit constant folding.
+    .SetDoNotOptimize()  // TODO(b/123753214): Source dataset ops must
+                         // disable constant folding.
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       shape_inference::ShapeHandle unused;
       // driver_name, data_source_name, and query should be scalars.
@@ -1037,10 +1037,21 @@ REGISTER_OP("ExperimentalUniqueDataset")
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn(shape_inference::ScalarShape);
 
+REGISTER_OP("DummyIterationCounter")
+    .Output("handle: resource")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Scalar());
+      return Status::OK();
+    });
+
 REGISTER_OP("DataServiceDataset")
+    .Input("dataset_id: int64")
+    .Input("processing_mode: string")
     .Input("address: string")
     .Input("protocol: string")
+    .Input("job_name: string")
     .Input("max_outstanding_requests: int64")
+    .Input("iteration_counter: resource")
     .Output("handle: variant")
     .Attr("task_refresh_interval_hint_ms: int = -1")
     .Attr("output_types: list(type) >= 1")
@@ -1056,18 +1067,4 @@ REGISTER_OP("RegisterDataset")
     .Attr("external_state_policy: int")
     .SetShapeFn(shape_inference::ScalarShape);
 
-REGISTER_OP("CreateJob")
-    .Input("dataset_id: int64")
-    .Input("address: string")
-    .Input("protocol: string")
-    .Input("processing_mode: string")
-    .Output("job_token: variant")
-    .SetShapeFn(shape_inference::ScalarShape);
-
-REGISTER_OP("MakeDataServiceIterator")
-    .Input("dataset: variant")
-    .Input("job_token: variant")
-    .Input("iterator: resource")
-    .SetShapeFn(shape_inference::NoOutputs);
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 6fe3447cfc8..cbf03d7b045 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -464,7 +464,7 @@ REGISTER_OP("MulNoNan")
     .Input("x: T")
     .Input("y: T")
     .Output("z: T")
-    .Attr("T: {half, float, double, complex64, complex128}")
+    .Attr("T: {bfloat16, half, float, double, complex64, complex128}")
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);
 
 // Note: This op is not commutative w.r.t. to all its inputs.
@@ -1651,6 +1651,116 @@ REGISTER_OP("Bincount")
       return Status::OK();
     });
 
+REGISTER_OP("DenseBincount")
+    .Input("input: Tidx")
+    .Input("size: Tidx")
+    .Input("weights: T")
+    .Attr("Tidx: {int32, int64}")
+    .Attr("T: {int32, int64, float32, float64}")
+    .Attr("binary_output: bool = false")
+    .Output("output: T")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      // The input `input` must be at most matrix.
+      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 2, &unused));
+      // The input `size` must be a scalar.
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+
+      const Tensor* size_tensor = c->input_tensor(1);
+      if (size_tensor == nullptr) {
+        // Return unknown shape if size is not known.
+        c->set_output(0, c->UnknownShape());
+        return Status::OK();
+      }
+
+      int64 size_val;
+      DataType dtype;
+      TF_RETURN_IF_ERROR(c->GetAttr("Tidx", &dtype));
+      if (dtype == DT_INT32) {
+        size_val = static_cast<int64>(size_tensor->scalar<int32>()());
+      } else if (dtype == DT_INT64) {
+        size_val = size_tensor->scalar<int64>()();
+      } else {
+        return errors::InvalidArgument("size dtype must be int32 or int64");
+      }
+      // Return `[size]` shape if size is known.
+      if (size_val < 0) {
+        return errors::InvalidArgument("size (", size_val,
+                                       ") must be non-negative");
+      }
+      if (c->Rank(c->input(0)) == 1) {
+        c->set_output(0, c->MakeShape({size_val}));
+      } else if (c->Rank(c->input(0)) == 2) {
+        c->set_output(0, c->MakeShape({c->Dim(c->input(0), 0), size_val}));
+      }
+      return Status::OK();
+    });
+
+REGISTER_OP("SparseBincount")
+    .Input("indices: int64")
+    .Input("values: Tidx")
+    .Input("dense_shape: int64")
+    .Input("size: Tidx")
+    .Input("weights: T")
+    .Attr("Tidx: {int32, int64}")
+    .Attr("T: {int32, int64, float32, float64}")
+    .Attr("binary_output: bool = false")
+    .Output("output: T")
+    .SetShapeFn([](InferenceContext* c) {
+      const Tensor* size_tensor = c->input_tensor(3);
+      if (size_tensor == nullptr) {
+        // Return unknown shape if size is not known.
+        c->set_output(0, c->UnknownShape());
+        return Status::OK();
+      }
+
+      int64 size_val;
+      DataType dtype;
+      TF_RETURN_IF_ERROR(c->GetAttr("Tidx", &dtype));
+      if (dtype == DT_INT32) {
+        size_val = static_cast<int64>(size_tensor->scalar<int32>()());
+      } else if (dtype == DT_INT64) {
+        size_val = size_tensor->scalar<int64>()();
+      } else {
+        return errors::InvalidArgument("size dtype must be int32 or int64");
+      }
+      // Return `[size]` shape if size is known.
+      if (size_val < 0) {
+        return errors::InvalidArgument("size (", size_val,
+                                       ") must be non-negative");
+      }
+
+      const Tensor* shape_tensor = c->input_tensor(2);
+      if (shape_tensor == nullptr) {
+        // Return unknown shape if size is not known.
+        c->set_output(0, c->UnknownShape());
+        return Status::OK();
+      }
+      if (shape_tensor->NumElements() == 1) {
+        c->set_output(0, c->MakeShape({size_val}));
+      } else if (shape_tensor->NumElements() == 2) {
+        c->set_output(0,
+                      c->MakeShape({shape_tensor->flat<int64>()(0), size_val}));
+      } else {
+        return errors::InvalidArgument("Input must be less than rank 2");
+      }
+      return Status::OK();
+    });
+
+REGISTER_OP("RaggedBincount")
+    .Input("splits: int64")
+    .Input("values: Tidx")
+    .Input("size: Tidx")
+    .Input("weights: T")
+    .Attr("Tidx: {int32, int64}")
+    .Attr("T: {int32, int64, float32, float64}")
+    .Attr("binary_output: bool = false")
+    .Output("output: T")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->UnknownShape());
+      return Status::OK();
+    });
+
 REGISTER_OP("Cumsum")
     .Input("x: T")
     .Input("axis: Tidx")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index f1591b98ffa..1ea06a2fdac 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -8554,29 +8554,6 @@ op {
     }
   }
 }
-op {
-  name: "CreateJob"
-  input_arg {
-    name: "dataset_id"
-    type: DT_INT64
-  }
-  input_arg {
-    name: "address"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "protocol"
-    type: DT_STRING
-  }
-  input_arg {
-    name: "processing_mode"
-    type: DT_STRING
-  }
-  output_arg {
-    name: "job_token"
-    type: DT_VARIANT
-  }
-}
 op {
   name: "CreateSummaryDbWriter"
   input_arg {
@@ -10500,6 +10477,14 @@ op {
 }
 op {
   name: "DataServiceDataset"
+  input_arg {
+    name: "dataset_id"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "processing_mode"
+    type: DT_STRING
+  }
   input_arg {
     name: "address"
     type: DT_STRING
@@ -10508,10 +10493,18 @@ op {
     name: "protocol"
     type: DT_STRING
   }
+  input_arg {
+    name: "job_name"
+    type: DT_STRING
+  }
   input_arg {
     name: "max_outstanding_requests"
     type: DT_INT64
   }
+  input_arg {
+    name: "iteration_counter"
+    type: DT_RESOURCE
+  }
   output_arg {
     name: "handle"
     type: DT_VARIANT
@@ -11481,6 +11474,119 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "DenseBincount"
+  input_arg {
+    name: "input"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "weights"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "DenseCountSparseOutput"
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "output_type"
+  }
+  output_arg {
+    name: "output_dense_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "minlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "maxlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "DenseToCSRSparseMatrix"
   input_arg {
@@ -12505,6 +12611,14 @@ op {
     }
   }
 }
+op {
+  name: "DummyIterationCounter"
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  is_stateful: true
+}
 op {
   name: "DummyMemoryCache"
   output_arg {
@@ -21639,22 +21753,6 @@ op {
     }
   }
 }
-op {
-  name: "MakeDataServiceIterator"
-  input_arg {
-    name: "dataset"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "job_token"
-    type: DT_VARIANT
-  }
-  input_arg {
-    name: "iterator"
-    type: DT_RESOURCE
-  }
-  is_stateful: true
-}
 op {
   name: "MakeIterator"
   input_arg {
@@ -24235,6 +24333,7 @@ op {
     type: "type"
     allowed_values {
       list {
+        type: DT_BFLOAT16
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
@@ -33062,6 +33161,127 @@ op {
     }
   }
 }
+op {
+  name: "RaggedBincount"
+  input_arg {
+    name: "splits"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "weights"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
+op {
+  name: "RaggedCountSparseOutput"
+  input_arg {
+    name: "splits"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "output_type"
+  }
+  output_arg {
+    name: "output_dense_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "minlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "maxlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "RaggedCross"
   input_arg {
@@ -44448,6 +44668,62 @@ op {
     }
   }
 }
+op {
+  name: "SparseBincount"
+  input_arg {
+    name: "indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "dense_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "size"
+    type_attr: "Tidx"
+  }
+  input_arg {
+    name: "weights"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "Tidx"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "SparseConcat"
   input_arg {
@@ -44557,6 +44833,79 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "SparseCountSparseOutput"
+  input_arg {
+    name: "indices"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "values"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "dense_shape"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "weights"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output_indices"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "output_values"
+    type_attr: "output_type"
+  }
+  output_arg {
+    name: "output_dense_shape"
+    type: DT_INT64
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "minlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "maxlength"
+    type: "int"
+    default_value {
+      i: -1
+    }
+    has_minimum: true
+    minimum: -1
+  }
+  attr {
+    name: "binary_count"
+    type: "bool"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT64
+        type: DT_FLOAT
+      }
+    }
+  }
+}
 op {
   name: "SparseCross"
   input_arg {
diff --git a/tensorflow/core/ops/tpu_host_compute_ops.cc b/tensorflow/core/ops/tpu_host_compute_ops.cc
index 48aeb81ac13..753cc0015d9 100644
--- a/tensorflow/core/ops/tpu_host_compute_ops.cc
+++ b/tensorflow/core/ops/tpu_host_compute_ops.cc
@@ -28,8 +28,7 @@ REGISTER_OP("_XlaSendFromHost")
     .SetIsStateful()
     .SetShapeFn(::tensorflow::shape_inference::NoOutputs)
     .Doc(R"doc(
-A placeholder op for multiple values that will be sent from TensorFlow to a
-running XLA computation.
+A placeholder op to send values to a running XLA computation.
 
 inputs: A list of tensors that will be sent to the XLA computation.
 dynamic_key: The key sent at runtime by the compile node to identify which
@@ -49,8 +48,7 @@ REGISTER_OP("_XlaRecvAtHost")
     .SetIsStateful()
     .SetShapeFn(::tensorflow::shape_inference::UnknownShape)
     .Doc(R"doc(
-A placeholder op for multiple values that will be sent to TensorFlow from a
-running XLA computation.
+A placeholder op to receive values from a running XLA computation.
 
 dynamic_key: The key sent at runtime by the compile node to identify which
 execution the transfer corresponds to.
diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD
index 59cc6d68732..f78b738247d 100644
--- a/tensorflow/core/platform/BUILD
+++ b/tensorflow/core/platform/BUILD
@@ -44,7 +44,6 @@ load(
     "tf_cc_tests",
     "tf_copts",  # @unused
     "tf_cuda_library",
-    "tf_portable_full_lite_protos",
 )
 load(
     "@local_config_rocm//rocm:build_defs.bzl",
@@ -472,10 +471,6 @@ cc_library(
         "protobuf_util.cc",
     ],
     hdrs = ["protobuf.h"],
-    defines = tf_portable_full_lite_protos(
-        full = [],
-        lite = ["TENSORFLOW_LITE_PROTOS"],
-    ),
     deps = [
         ":platform",
         ":types",
@@ -626,7 +621,7 @@ cc_library(
         ":stringpiece",
         ":stringprintf",
         ":types",
-        "//tensorflow/core:error_codes_proto_impl_cc",
+        "//tensorflow/core/protobuf:error_codes_proto_impl_cc",
         "@com_google_absl//absl/base",
     ],
 )
@@ -1477,6 +1472,7 @@ filegroup(
         "abi.h",
         "blocking_counter.h",
         "byte_order.h",
+        "casts.h",
         "coding.cc",
         "coding.h",
         "context.h",
@@ -1562,7 +1558,6 @@ filegroup(
     srcs = [
         "base64.cc",
         "base64.h",
-        "casts.h",
         "cpu_feature_guard.cc",
         "cpu_feature_guard.h",
         "fingerprint.h",
diff --git a/tensorflow/core/platform/build_config.bzl b/tensorflow/core/platform/build_config.bzl
index d1fb5f829ea..ab452562245 100644
--- a/tensorflow/core/platform/build_config.bzl
+++ b/tensorflow/core/platform/build_config.bzl
@@ -26,13 +26,12 @@ load(
     _tf_platform_alias = "tf_platform_alias",
     _tf_platform_deps = "tf_platform_deps",
     _tf_portable_deps_no_runtime = "tf_portable_deps_no_runtime",
+    _tf_portable_proto_lib = "tf_portable_proto_lib",
     _tf_proto_library = "tf_proto_library",
     _tf_proto_library_cc = "tf_proto_library_cc",
     _tf_proto_library_py = "tf_proto_library_py",
     _tf_protobuf_compiler_deps = "tf_protobuf_compiler_deps",
     _tf_protobuf_deps = "tf_protobuf_deps",
-    _tf_protobuf_full_deps = "tf_protobuf_full_deps",
-    _tf_protobuf_lite_deps = "tf_protobuf_lite_deps",
     _tf_protos_all = "tf_protos_all",
     _tf_protos_all_impl = "tf_protos_all_impl",
     _tf_protos_grappler = "tf_protos_grappler",
@@ -67,14 +66,13 @@ tf_lib_proto_parsing_deps = _tf_lib_proto_parsing_deps
 tf_logging_deps = _tf_logging_deps
 tf_platform_alias = _tf_platform_alias
 tf_platform_deps = _tf_platform_deps
+tf_portable_proto_lib = _tf_portable_proto_lib
 tf_portable_deps_no_runtime = _tf_portable_deps_no_runtime
 tf_proto_library = _tf_proto_library
 tf_proto_library_cc = _tf_proto_library_cc
 tf_proto_library_py = _tf_proto_library_py
 tf_protobuf_compiler_deps = _tf_protobuf_compiler_deps
 tf_protobuf_deps = _tf_protobuf_deps
-tf_protobuf_full_deps = _tf_protobuf_full_deps
-tf_protobuf_lite_deps = _tf_protobuf_lite_deps
 tf_protos_all = _tf_protos_all
 tf_protos_all_impl = _tf_protos_all_impl
 tf_protos_grappler = _tf_protos_grappler
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 5728dee6bf4..2dc4fdc0fd9 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -577,8 +577,8 @@ def tf_additional_all_protos():
 
 def tf_protos_all_impl():
     return [
-        clean_dep("//tensorflow/core:autotuning_proto_cc_impl"),
-        clean_dep("//tensorflow/core:conv_autotuning_proto_cc_impl"),
+        clean_dep("//tensorflow/core/protobuf:autotuning_proto_cc_impl"),
+        clean_dep("//tensorflow/core/protobuf:conv_autotuning_proto_cc_impl"),
         clean_dep("//tensorflow/core:protos_all_cc_impl"),
     ]
 
@@ -638,7 +638,9 @@ def tf_additional_core_deps():
         clean_dep("//tensorflow:android"): [],
         clean_dep("//tensorflow:ios"): [],
         clean_dep("//tensorflow:linux_s390x"): [],
-        clean_dep("//tensorflow:windows"): [],
+        clean_dep("//tensorflow:windows"): [
+            "//tensorflow/core/platform/cloud:gcs_file_system",
+        ],
         clean_dep("//tensorflow:no_gcp_support"): [],
         "//conditions:default": [
             "//tensorflow/core/platform/cloud:gcs_file_system",
@@ -656,7 +658,9 @@ def tf_additional_core_deps():
         clean_dep("//tensorflow:android"): [],
         clean_dep("//tensorflow:ios"): [],
         clean_dep("//tensorflow:linux_s390x"): [],
-        clean_dep("//tensorflow:windows"): [],
+        clean_dep("//tensorflow:windows"): [
+            clean_dep("//tensorflow/core/platform/s3:s3_file_system"),
+        ],
         clean_dep("//tensorflow:no_aws_support"): [],
         "//conditions:default": [
             clean_dep("//tensorflow/core/platform/s3:s3_file_system"),
@@ -715,12 +719,6 @@ def tf_fingerprint_deps():
         "@farmhash_archive//:farmhash",
     ]
 
-def tf_protobuf_full_deps():
-    return tf_protobuf_deps()
-
-def tf_protobuf_lite_deps():
-    return tf_protobuf_deps()
-
 def tf_protobuf_deps():
     return if_static(
         [
@@ -729,6 +727,9 @@ def tf_protobuf_deps():
         otherwise = [clean_dep("@com_google_protobuf//:protobuf_headers")],
     )
 
+def tf_portable_proto_lib():
+    return ["//tensorflow/core:protos_all_cc_impl"]
+
 def tf_protobuf_compiler_deps():
     return if_static(
         [
@@ -766,7 +767,7 @@ def tf_portable_deps_no_runtime():
         "@nsync//:nsync_cpp",
         "@com_googlesource_code_re2//:re2",
         "@farmhash_archive//:farmhash",
-    ] + tf_protobuf_deps()
+    ]
 
 def tf_google_mobile_srcs_no_runtime():
     return []
diff --git a/tensorflow/core/platform/default/env.cc b/tensorflow/core/platform/default/env.cc
index e33155a4414..d63b73e99e4 100644
--- a/tensorflow/core/platform/default/env.cc
+++ b/tensorflow/core/platform/default/env.cc
@@ -53,25 +53,48 @@ std::map<std::thread::id, string>& GetThreadNameRegistry()
   return *thread_name_registry;
 }
 
-class StdThread : public Thread {
+// We use the pthread API instead of std::thread so we can control stack sizes.
+class PThread : public Thread {
  public:
-  // thread_options is ignored.
-  StdThread(const ThreadOptions& thread_options, const string& name,
-            std::function<void()> fn)
-      : thread_(fn) {
-    mutex_lock l(name_mutex);
-    GetThreadNameRegistry().emplace(thread_.get_id(), name);
+  PThread(const ThreadOptions& thread_options, const std::string& name,
+          std::function<void()> fn) {
+    ThreadParams* params = new ThreadParams;
+    params->name = name;
+    params->fn = std::move(fn);
+    pthread_attr_t attributes;
+    pthread_attr_init(&attributes);
+    if (thread_options.stack_size != 0) {
+      pthread_attr_setstacksize(&attributes, thread_options.stack_size);
+    }
+    int ret = pthread_create(&thread_, &attributes, &ThreadFn, params);
+    // There is no mechanism for the thread creation API to fail, so we CHECK.
+    CHECK_EQ(ret, 0) << "Thread creation via pthread_create() failed.";
+    pthread_attr_destroy(&attributes);
   }
 
-  ~StdThread() override {
-    std::thread::id thread_id = thread_.get_id();
-    thread_.join();
-    mutex_lock l(name_mutex);
-    GetThreadNameRegistry().erase(thread_id);
-  }
+  ~PThread() override { pthread_join(thread_, nullptr); }
 
  private:
-  std::thread thread_;
+  struct ThreadParams {
+    std::string name;
+    std::function<void()> fn;
+  };
+  static void* ThreadFn(void* params_arg) {
+    std::unique_ptr<ThreadParams> params(
+        reinterpret_cast<ThreadParams*>(params_arg));
+    {
+      mutex_lock l(name_mutex);
+      GetThreadNameRegistry().emplace(std::this_thread::get_id(), params->name);
+    }
+    params->fn();
+    {
+      mutex_lock l(name_mutex);
+      GetThreadNameRegistry().erase(std::this_thread::get_id());
+    }
+    return nullptr;
+  }
+
+  pthread_t thread_;
 };
 
 class PosixEnv : public Env {
@@ -107,7 +130,7 @@ class PosixEnv : public Env {
 
   Thread* StartThread(const ThreadOptions& thread_options, const string& name,
                       std::function<void()> fn) override {
-    return new StdThread(thread_options, name, fn);
+    return new PThread(thread_options, name, fn);
   }
 
   int32 GetCurrentThreadId() override {
diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc
index a8a79a44b47..acc8d223619 100644
--- a/tensorflow/core/platform/default/human_readable_json.cc
+++ b/tensorflow/core/platform/default/human_readable_json.cc
@@ -23,10 +23,6 @@ namespace tensorflow {
 
 Status ProtoToHumanReadableJson(const protobuf::Message& proto, string* result,
                                 bool ignore_accuracy_loss) {
-#ifdef TENSORFLOW_LITE_PROTOS
-  *result = "[human readable output not available on Android]";
-  return Status::OK();
-#else
   result->clear();
 
   protobuf::util::JsonPrintOptions json_options;
@@ -37,31 +33,37 @@ Status ProtoToHumanReadableJson(const protobuf::Message& proto, string* result,
   if (!status.ok()) {
     // Convert error_msg google::protobuf::StringPiece to
     // tensorflow::StringPiece.
-    auto error_msg = status.error_message();
+    auto error_msg = status.message();
     return errors::Internal(
         strings::StrCat("Could not convert proto to JSON string: ",
                         StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
-#endif
+}
+
+Status ProtoToHumanReadableJson(const protobuf::MessageLite& proto,
+                                string* result, bool ignore_accuracy_loss) {
+  *result = "[human readable output not available for lite protos]";
+  return Status::OK();
 }
 
 Status HumanReadableJsonToProto(const string& str, protobuf::Message* proto) {
-#ifdef TENSORFLOW_LITE_PROTOS
-  return errors::Internal("Cannot parse JSON protos on Android");
-#else
   proto->Clear();
   auto status = protobuf::util::JsonStringToMessage(str, proto);
   if (!status.ok()) {
     // Convert error_msg google::protobuf::StringPiece to
     // tensorflow::StringPiece.
-    auto error_msg = status.error_message();
+    auto error_msg = status.message();
     return errors::Internal(
         strings::StrCat("Could not convert JSON string to proto: ",
                         StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
-#endif
+}
+
+Status HumanReadableJsonToProto(const string& str,
+                                protobuf::MessageLite* proto) {
+  return errors::Internal("Cannot parse JSON protos on Android");
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index 74da5b9429d..b29cad05459 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -496,7 +496,7 @@ Status FileSystemCopyFile(FileSystem* src_fs, const string& src,
 
 // A ZeroCopyInputStream on a RandomAccessFile.
 namespace {
-class FileStream : public ::tensorflow::protobuf::io::ZeroCopyInputStream {
+class FileStream : public protobuf::io::ZeroCopyInputStream {
  public:
   explicit FileStream(RandomAccessFile* file) : file_(file), pos_(0) {}
 
@@ -533,14 +533,14 @@ class FileStream : public ::tensorflow::protobuf::io::ZeroCopyInputStream {
 }  // namespace
 
 Status WriteBinaryProto(Env* env, const string& fname,
-                        const ::tensorflow::protobuf::MessageLite& proto) {
+                        const protobuf::MessageLite& proto) {
   string serialized;
   proto.AppendToString(&serialized);
   return WriteStringToFile(env, fname, serialized);
 }
 
 Status ReadBinaryProto(Env* env, const string& fname,
-                       ::tensorflow::protobuf::MessageLite* proto) {
+                       protobuf::MessageLite* proto) {
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file));
   std::unique_ptr<FileStream> stream(new FileStream(file.get()));
@@ -549,7 +549,7 @@ Status ReadBinaryProto(Env* env, const string& fname,
   // one to parse arbitrarily large messages for MessageLite. One most likely
   // doesn't want to put protobufs larger than 64MB on Android, so we should
   // eventually remove this and quit loud when a large protobuf is passed in.
-  ::tensorflow::protobuf::io::CodedInputStream coded_stream(stream.get());
+  protobuf::io::CodedInputStream coded_stream(stream.get());
   // Total bytes hard limit / warning limit are set to 1GB and 512MB
   // respectively.
   coded_stream.SetTotalBytesLimit(1024LL << 20, 512LL << 20);
@@ -563,47 +563,36 @@ Status ReadBinaryProto(Env* env, const string& fname,
 }
 
 Status WriteTextProto(Env* env, const string& fname,
-                      const ::tensorflow::protobuf::Message& proto) {
-#if !defined(TENSORFLOW_LITE_PROTOS)
+                      const protobuf::Message& proto) {
   string serialized;
-  if (!::tensorflow::protobuf::TextFormat::PrintToString(proto, &serialized)) {
+  if (!protobuf::TextFormat::PrintToString(proto, &serialized)) {
     return errors::FailedPrecondition("Unable to convert proto to text.");
   }
   return WriteStringToFile(env, fname, serialized);
-#else
-  return errors::Unimplemented("Can't write text protos with protolite.");
-#endif
 }
 
-Status ReadTextProto(Env* env, const string& fname,
-                     ::tensorflow::protobuf::Message* proto) {
-#if !defined(TENSORFLOW_LITE_PROTOS)
+Status ReadTextProto(Env* env, const string& fname, protobuf::Message* proto) {
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file));
   std::unique_ptr<FileStream> stream(new FileStream(file.get()));
 
-  if (!::tensorflow::protobuf::TextFormat::Parse(stream.get(), proto)) {
+  if (!protobuf::TextFormat::Parse(stream.get(), proto)) {
     TF_RETURN_IF_ERROR(stream->status());
     return errors::DataLoss("Can't parse ", fname, " as text proto");
   }
   return Status::OK();
-#else
-  return errors::Unimplemented("Can't parse text protos with protolite.");
-#endif
 }
 
 Status ReadTextOrBinaryProto(Env* env, const string& fname,
-#if !defined(TENSORFLOW_LITE_PROTOS)
-                             ::tensorflow::protobuf::Message* proto
-#else
-                             ::tensorflow::protobuf::MessageLite* proto
-#endif
-) {
-#if !defined(TENSORFLOW_LITE_PROTOS)
+                             protobuf::Message* proto) {
   if (ReadTextProto(env, fname, proto).ok()) {
     return Status::OK();
   }
-#endif
+  return ReadBinaryProto(env, fname, proto);
+}
+
+Status ReadTextOrBinaryProto(Env* env, const string& fname,
+                             protobuf::MessageLite* proto) {
   return ReadBinaryProto(env, fname, proto);
 }
 
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 7b617c0231f..99924ec1143 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -481,37 +481,31 @@ Status WriteStringToFile(Env* env, const string& fname,
 
 /// Write binary representation of "proto" to the named file.
 Status WriteBinaryProto(Env* env, const string& fname,
-                        const ::tensorflow::protobuf::MessageLite& proto);
+                        const protobuf::MessageLite& proto);
 
 /// Reads contents of named file and parse as binary encoded proto data
 /// and store into `*proto`.
 Status ReadBinaryProto(Env* env, const string& fname,
-                       ::tensorflow::protobuf::MessageLite* proto);
+                       protobuf::MessageLite* proto);
 
 /// Write the text representation of "proto" to the named file.
 Status WriteTextProto(Env* env, const string& fname,
-                      const ::tensorflow::protobuf::Message& proto);
+                      const protobuf::Message& proto);
 
 /// Read contents of named file and parse as text encoded proto data
 /// and store into `*proto`.
-template <typename T, typename std::enable_if<!std::is_base_of<
-                          protobuf::Message, T>::value>::type* = nullptr>
-Status ReadTextProto(Env* env, const string& fname, T* proto) {
+inline Status ReadTextProto(Env* /* env */, const string& /* fname */,
+                            protobuf::MessageLite* /* proto */) {
   return errors::Unimplemented("Can't parse text protos with protolite.");
 }
-
-Status ReadTextProto(Env* env, const string& fname,
-                     ::tensorflow::protobuf::Message* proto);
+Status ReadTextProto(Env* env, const string& fname, protobuf::Message* proto);
 
 /// Read contents of named file and parse as either text or binary encoded proto
 /// data and store into `*proto`.
 Status ReadTextOrBinaryProto(Env* env, const string& fname,
-#if !defined(TENSORFLOW_LITE_PROTOS)
-                             ::tensorflow::protobuf::Message* proto
-#else
-                             ::tensorflow::protobuf::MessageLite* proto
-#endif
-);
+                             protobuf::Message* proto);
+Status ReadTextOrBinaryProto(Env* env, const string& fname,
+                             protobuf::MessageLite* proto);
 
 // START_SKIP_DOXYGEN
 
diff --git a/tensorflow/core/platform/human_readable_json.h b/tensorflow/core/platform/human_readable_json.h
index f6830e20207..12a7bd042ee 100644
--- a/tensorflow/core/platform/human_readable_json.h
+++ b/tensorflow/core/platform/human_readable_json.h
@@ -31,10 +31,14 @@ namespace tensorflow {
 // accuracy loss with large integers.
 Status ProtoToHumanReadableJson(const protobuf::Message& proto, string* result,
                                 bool ignore_accuracy_loss);
+Status ProtoToHumanReadableJson(const protobuf::MessageLite& proto,
+                                string* result, bool ignore_accuracy_loss);
 
 // Converts a string produced by ProtoToHumanReadableJSON to a protobuf.  Not
 // guaranteed to work for general JSON.
 Status HumanReadableJsonToProto(const string& str, protobuf::Message* proto);
+Status HumanReadableJsonToProto(const string& str,
+                                protobuf::MessageLite* proto);
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.cc b/tensorflow/core/platform/profile_utils/cpu_utils.cc
index 587c97875a0..b22123a804a 100644
--- a/tensorflow/core/platform/profile_utils/cpu_utils.cc
+++ b/tensorflow/core/platform/profile_utils/cpu_utils.cc
@@ -88,6 +88,8 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr;
      defined(__ppc__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
     retval = sscanf(line.c_str(), "clock              : %lfMHz", &cpu_freq);
     freq_factor = 1.0;
+#elif defined(__s390x__)
+    retval = sscanf(line.c_str(), "bogomips per cpu: %lf", &cpu_freq);
 #else
     retval = sscanf(line.c_str(), "bogomips : %lf", &cpu_freq);
 #endif
diff --git a/tensorflow/core/platform/protobuf.h b/tensorflow/core/platform/protobuf.h
index e16d89d0c05..6b4db77ea3f 100644
--- a/tensorflow/core/platform/protobuf.h
+++ b/tensorflow/core/platform/protobuf.h
@@ -25,22 +25,20 @@ limitations under the License.
 // TensorFlow code should use the ::tensorflow::protobuf namespace to
 // refer to all protobuf APIs.
 
-#ifndef TENSORFLOW_LITE_PROTOS
+#include "google/protobuf/io/coded_stream.h"
 #include "google/protobuf/io/tokenizer.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
 #include "google/protobuf/descriptor.pb.h"
+#include "google/protobuf/arena.h"
 #include "google/protobuf/descriptor.h"
 #include "google/protobuf/dynamic_message.h"
+#include "google/protobuf/map.h"
+#include "google/protobuf/message.h"
+#include "google/protobuf/repeated_field.h"
 #include "google/protobuf/text_format.h"
 #include "google/protobuf/util/json_util.h"
 #include "google/protobuf/util/type_resolver_util.h"
-#endif
-
-#include "google/protobuf/io/coded_stream.h"
-#include "google/protobuf/io/zero_copy_stream.h"
-#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
-#include "google/protobuf/arena.h"
-#include "google/protobuf/map.h"
-#include "google/protobuf/repeated_field.h"
 
 namespace tensorflow {
 
@@ -79,10 +77,10 @@ inline void SetProtobufStringSwapAllowed(std::string* src, std::string* dest) {
 // tools/proto_text's generated code.  They have the same name as the versions
 // in core/platform/protobuf.h, so the generation code doesn't need to determine
 // if the type is Cord or string at generation time.
-inline std::string ProtobufStringToString(const Cord& s) {
+inline std::string ProtobufStringToString(const absl::Cord& s) {
   return s.ToString();
 }
-inline void SetProtobufStringSwapAllowed(std::string* src, Cord* dest) {
+inline void SetProtobufStringSwapAllowed(std::string* src, absl::Cord* dest) {
   dest->CopyFrom(*src);
 }
 #endif  // defined(TENSORFLOW_PROTOBUF_USES_CORD)
diff --git a/tensorflow/core/platform/protobuf_internal.h b/tensorflow/core/platform/protobuf_internal.h
index d41ee5a468a..0d303197d45 100644
--- a/tensorflow/core/platform/protobuf_internal.h
+++ b/tensorflow/core/platform/protobuf_internal.h
@@ -24,46 +24,19 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Returns the DebugString when available, or a stub message otherwise. Useful
-// for messages that are incompatible with proto_text (e.g. those using Any).
-#ifdef TENSORFLOW_LITE_PROTOS
-template <class T>
-string DebugStringIfAvailable(T proto) {
-  return "[DebugString not available with lite protos]";
-}
-#else
-template <class T>
-auto DebugStringIfAvailable(T proto) -> decltype(proto.DebugString()) {
-  return proto.DebugString();
-}
-#endif  // defined(TENSORFLOW_LITE_PROTOS)
-
 // Utility for parsing an Any value with full or lite protos.
 template <class T>
 Status ParseAny(const google::protobuf::Any& any, T* message,
                 const string& type_name) {
-#ifdef TENSORFLOW_LITE_PROTOS
-  if (any.type_url() != strings::StrCat("type.googleapis.com/", type_name)) {
-    return errors::FailedPrecondition(
-        "Expected Any type_url for: ", type_name,
-        ". Got: ", string(any.type_url().data(), any.type_url().size()), ".");
-  }
-  if (!message->ParseFromString(ProtobufStringToString(any.value()))) {
-    return errors::FailedPrecondition("Failed to unpack: ",
-                                      DebugStringIfAvailable(any));
-  }
-#else
-  CHECK_EQ(type_name, message->descriptor()->full_name());
+  CHECK_EQ(type_name, message->GetTypeName());
   if (!any.Is<T>()) {
     return errors::FailedPrecondition(
-        "Expected Any type_url for: ", message->descriptor()->full_name(),
+        "Expected Any type_url for: ", message->GetTypeName(),
         ". Got: ", string(any.type_url().data(), any.type_url().size()), ".");
   }
   if (!any.UnpackTo(message)) {
-    return errors::FailedPrecondition("Failed to unpack: ",
-                                      DebugStringIfAvailable(any));
+    return errors::FailedPrecondition("Failed to unpack: ", any.DebugString());
   }
-#endif
   return Status::OK();
 }
 
diff --git a/tensorflow/core/platform/ram_file_system.h b/tensorflow/core/platform/ram_file_system.h
index abd673b455c..871d38f97c5 100644
--- a/tensorflow/core/platform/ram_file_system.h
+++ b/tensorflow/core/platform/ram_file_system.h
@@ -79,6 +79,13 @@ class RamRandomAccessFile : public RandomAccessFile, public WritableFile {
     return Status::OK();
   }
 
+#if defined(PLATFORM_GOOGLE)
+  Status Append(const absl::Cord& cord) override {
+    data_->append(cord.char_begin(), cord.char_end());
+    return Status::OK();
+  }
+#endif
+
   Status Close() override { return Status::OK(); }
   Status Flush() override { return Status::OK(); }
   Status Sync() override { return Status::OK(); }
diff --git a/tensorflow/core/platform/s3/BUILD b/tensorflow/core/platform/s3/BUILD
index d174b108279..6afe943c8e3 100644
--- a/tensorflow/core/platform/s3/BUILD
+++ b/tensorflow/core/platform/s3/BUILD
@@ -81,7 +81,12 @@ cc_library(
     hdrs = [
         "s3_file_system.h",
     ],
-    deps = [
+    deps = select({
+        "@org_tensorflow//tensorflow:windows": [
+            "//tensorflow/core/platform:retrying_file_system",
+        ],
+        "//conditions:default": [],
+    }) + [
         ":aws_crypto",
         ":aws_logging",
         "//tensorflow/core:lib",
diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc
index 02658242ab7..1726c9fbc6c 100644
--- a/tensorflow/core/platform/s3/s3_file_system.cc
+++ b/tensorflow/core/platform/s3/s3_file_system.cc
@@ -48,6 +48,13 @@ limitations under the License.
 namespace tensorflow {
 
 namespace {
+#ifdef PLATFORM_WINDOWS
+// On Windows, `Aws::FileSystem::CreateTempFilePath()` return
+// `C:\Users\username\AppData\Local\Temp\`. Adding template will cause an error.
+static const char* kS3TempFileTemplate = nullptr;
+#else
+static const char* kS3TempFileTemplate = "/tmp/s3_filesystem_XXXXXX";
+#endif
 static const char* kS3FileSystemAllocationTag = "S3FileSystemAllocation";
 static const size_t kS3ReadAppendableFileBufferSize = 1024 * 1024;
 static const int64 kS3TimeoutMsec = 300000;                       // 5 min
@@ -271,7 +278,7 @@ class S3WritableFile : public WritableFile {
         transfer_manager_(transfer_manager),
         sync_needed_(true),
         outfile_(Aws::MakeShared<Aws::Utils::TempFile>(
-            kS3FileSystemAllocationTag, "/tmp/s3_filesystem_XXXXXX",
+            kS3FileSystemAllocationTag, kS3TempFileTemplate,
             std::ios_base::binary | std::ios_base::trunc | std::ios_base::in |
                 std::ios_base::out)) {}
 
diff --git a/tensorflow/core/platform/s3/s3_file_system_test.cc b/tensorflow/core/platform/s3/s3_file_system_test.cc
index 95c7467fb74..224e30c6bb3 100644
--- a/tensorflow/core/platform/s3/s3_file_system_test.cc
+++ b/tensorflow/core/platform/s3/s3_file_system_test.cc
@@ -232,7 +232,7 @@ TEST_F(S3FileSystemTest, HasAtomicMove) {
   const string fname = TmpDir("HasAtomicMove");
   TF_ASSERT_OK(WriteString(fname, "test"));
   bool has_atomic_move = true;
-  TF_EXPECT_OK(s3fs.NeedsTempLocation(fname, &has_atomic_move).code());
+  TF_EXPECT_OK(s3fs.HasAtomicMove(fname, &has_atomic_move));
   EXPECT_EQ(has_atomic_move, false);
 }
 
diff --git a/tensorflow/core/platform/tensor_coding.cc b/tensorflow/core/platform/tensor_coding.cc
index 6497fdd0ce0..cd938a5be1d 100644
--- a/tensorflow/core/platform/tensor_coding.cc
+++ b/tensorflow/core/platform/tensor_coding.cc
@@ -132,19 +132,12 @@ std::unique_ptr<StringListDecoder> NewStringListDecoder(const string& in) {
 }
 
 #if defined(TENSORFLOW_PROTOBUF_USES_CORD)
-void AssignRefCounted(StringPiece src, core::RefCounted* obj, Cord* out) {
+void AssignRefCounted(StringPiece src, core::RefCounted* obj, absl::Cord* out) {
   obj->Ref();
-  out->Clear();
-  // Defines a lambda to unref "obj" when Cord deletes this piece of
-  // memory. +[] converts the lambda to a C style function pointer.
-  auto cleanup = +[](absl::string_view donotcare, void* obj) {
-    reinterpret_cast<core::RefCounted*>(obj)->Unref();
-  };
-  out->AppendExternalMemory(absl::string_view(src.data(), src.size()), obj,
-                            cleanup);
+  *out = absl::MakeCordFromExternal(src, [obj] { obj->Unref(); });
 }
 
-void EncodeStringList(const tstring* strings, int64 n, Cord* out) {
+void EncodeStringList(const tstring* strings, int64 n, absl::Cord* out) {
   out->Clear();
   for (int i = 0; i < n; ++i) {
     ::strings::CordAppendVarint(strings[i].size(), out);
@@ -154,7 +147,7 @@ void EncodeStringList(const tstring* strings, int64 n, Cord* out) {
   }
 }
 
-bool DecodeStringList(const Cord& src, string* strings, int64 n) {
+bool DecodeStringList(const absl::Cord& src, string* strings, int64 n) {
   std::vector<uint32> sizes(n);
   CordReader reader(src);
   int64 tot = 0;
@@ -177,7 +170,7 @@ bool DecodeStringList(const Cord& src, string* strings, int64 n) {
   return true;
 }
 
-bool DecodeStringList(const Cord& src, tstring* strings, int64 n) {
+bool DecodeStringList(const absl::Cord& src, tstring* strings, int64 n) {
   std::vector<uint32> sizes(n);
   CordReader reader(src);
   int64 tot = 0;
@@ -200,13 +193,13 @@ bool DecodeStringList(const Cord& src, tstring* strings, int64 n) {
   return true;
 }
 
-void CopyFromArray(Cord* c, const char* base, size_t bytes) {
+void CopyFromArray(absl::Cord* c, const char* base, size_t bytes) {
   c->CopyFrom(base, bytes);
 }
 
 class CordStringListEncoderImpl : public StringListEncoder {
  public:
-  explicit CordStringListEncoderImpl(Cord* out) : out_(out) {}
+  explicit CordStringListEncoderImpl(absl::Cord* out) : out_(out) {}
   ~CordStringListEncoderImpl() override = default;
 
   void Append(const protobuf::MessageLite& m) override {
@@ -222,13 +215,13 @@ class CordStringListEncoderImpl : public StringListEncoder {
   void Finalize() override { out_->Append(rest_); }
 
  private:
-  Cord* out_;
+  absl::Cord* out_;
   string rest_;
 };
 
 class CordStringListDecoderImpl : public StringListDecoder {
  public:
-  explicit CordStringListDecoderImpl(const Cord& in) : reader_(in) {}
+  explicit CordStringListDecoderImpl(const absl::Cord& in) : reader_(in) {}
   ~CordStringListDecoderImpl() override = default;
 
   bool ReadSizes(std::vector<uint32>* sizes) override {
@@ -254,11 +247,11 @@ class CordStringListDecoderImpl : public StringListDecoder {
   std::vector<char> tmp_;
 };
 
-std::unique_ptr<StringListEncoder> NewStringListEncoder(Cord* out) {
+std::unique_ptr<StringListEncoder> NewStringListEncoder(absl::Cord* out) {
   return std::unique_ptr<StringListEncoder>(new CordStringListEncoderImpl(out));
 }
 
-std::unique_ptr<StringListDecoder> NewStringListDecoder(const Cord& in) {
+std::unique_ptr<StringListDecoder> NewStringListDecoder(const absl::Cord& in) {
   return std::unique_ptr<StringListDecoder>(new CordStringListDecoderImpl(in));
 }
 
diff --git a/tensorflow/core/platform/tensor_coding.h b/tensorflow/core/platform/tensor_coding.h
index 010f9f11de7..8b6baf7e0d8 100644
--- a/tensorflow/core/platform/tensor_coding.h
+++ b/tensorflow/core/platform/tensor_coding.h
@@ -100,31 +100,33 @@ std::unique_ptr<StringListDecoder> NewStringListDecoder(const string& in);
 // Store src contents in *out.  If backing memory for src is shared with *out,
 // will ref obj during the call and will arrange to unref obj when no
 // longer needed.
-void AssignRefCounted(StringPiece src, core::RefCounted* obj, Cord* out);
+void AssignRefCounted(StringPiece src, core::RefCounted* obj, absl::Cord* out);
 
 // TODO(kmensah): Macro guard this with a check for Cord support.
-inline void CopyToArray(const Cord& src, char* dst) { src.CopyToArray(dst); }
+inline void CopyToArray(const absl::Cord& src, char* dst) {
+  src.CopyToArray(dst);
+}
 
 // Copy n bytes of src to dst. If pos >= src.size() the result is empty.
 // If pos + n > src.size() the subrange [pos, size()) is copied.
-inline void CopySubrangeToArray(const Cord& src, int64 pos, int64 n,
+inline void CopySubrangeToArray(const absl::Cord& src, int64 pos, int64 n,
                                 char* dst) {
   src.Subcord(pos, n).CopyToArray(dst);
 }
 
 // Store encoding of strings[0..n-1] in *out.
-void EncodeStringList(const tstring* strings, int64 n, Cord* out);
+void EncodeStringList(const tstring* strings, int64 n, absl::Cord* out);
 
 // Decode n strings from src and store in strings[0..n-1].
 // Returns true if successful, false on parse error.
-bool DecodeStringList(const Cord& src, std::string* strings, int64 n);
-bool DecodeStringList(const Cord& src, tstring* strings, int64 n);
+bool DecodeStringList(const absl::Cord& src, std::string* strings, int64 n);
+bool DecodeStringList(const absl::Cord& src, tstring* strings, int64 n);
 
 // Assigns base[0..bytes-1] to *c
-void CopyFromArray(Cord* c, const char* base, size_t bytes);
+void CopyFromArray(absl::Cord* c, const char* base, size_t bytes);
 
-std::unique_ptr<StringListEncoder> NewStringListEncoder(Cord* out);
-std::unique_ptr<StringListDecoder> NewStringListDecoder(const Cord& in);
+std::unique_ptr<StringListEncoder> NewStringListEncoder(absl::Cord* out);
+std::unique_ptr<StringListDecoder> NewStringListDecoder(const absl::Cord& in);
 #endif  // defined(TENSORFLOW_PROTOBUF_USES_CORD)
 
 }  // namespace port
diff --git a/tensorflow/core/platform/windows/subprocess.cc b/tensorflow/core/platform/windows/subprocess.cc
index 59707eae498..cf0cabbc054 100644
--- a/tensorflow/core/platform/windows/subprocess.cc
+++ b/tensorflow/core/platform/windows/subprocess.cc
@@ -378,7 +378,7 @@ int SubProcess::Communicate(const string* stdin_input, string* stdout_output,
       }
     } else {
       CloseHandle(parent_pipe_[CHAN_STDIN]);
-      parent_pipe_[CHAN_STDIN] == NULL;
+      parent_pipe_[CHAN_STDIN] = NULL;
     }
 
     if (parent_pipe_[CHAN_STDOUT] != nullptr) {
diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD
index bc127966659..369d26a92d9 100644
--- a/tensorflow/core/profiler/convert/BUILD
+++ b/tensorflow/core/profiler/convert/BUILD
@@ -17,15 +17,18 @@ cc_library(
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:cost_utils",
-        "//tensorflow/core/profiler/utils:event_span",
+        "//tensorflow/core/profiler/utils:op_metrics_db_utils",
         "//tensorflow/core/profiler/utils:op_utils",
         "//tensorflow/core/profiler/utils:tf_op_utils",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
         "//tensorflow/core/profiler/utils:timespan",
         "//tensorflow/core/profiler/utils:trace_utils",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -35,9 +38,11 @@ tf_cc_test(
     srcs = ["xplane_to_op_metrics_db_test.cc"],
     deps = [
         ":xplane_to_op_metrics_db",
+        "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:op_metrics_db_utils",
         "//tensorflow/core/profiler/utils:time_utils",
         "//tensorflow/core/profiler/utils:xplane_builder",
@@ -86,13 +91,15 @@ cc_library(
         ":op_stats_to_input_pipeline_analysis",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core/platform:logging",
         "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
         "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
         "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
         "//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
+        "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
+        "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
         "//tensorflow/core/profiler/utils:errors",
+        "//tensorflow/core/profiler/utils:html_utils",
         "//tensorflow/core/profiler/utils:math_utils",
         "//tensorflow/core/profiler/utils:op_metrics_db_utils",
         "//tensorflow/core/profiler/utils:time_utils",
@@ -118,11 +125,11 @@ cc_library(
         "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
         "//tensorflow/core/profiler/utils:errors",
         "//tensorflow/core/profiler/utils:event_span",
+        "//tensorflow/core/profiler/utils:html_utils",
         "//tensorflow/core/profiler/utils:math_utils",
         "//tensorflow/core/profiler/utils:tf_op_utils",
         "//tensorflow/core/profiler/utils:time_utils",
         "//tensorflow/core/util:stats_calculator_portable",
-        "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
@@ -135,13 +142,12 @@ cc_library(
     hdrs = ["op_stats_to_tf_stats.h"],
     deps = [
         ":op_metrics_to_record",
+        "//tensorflow/core:lib",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
         "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
         "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
         "//tensorflow/core/profiler/utils:op_metrics_db_utils",
-        "//tensorflow/core/profiler/utils:tf_op_utils",
         "//tensorflow/core/profiler/utils:time_utils",
-        "@com_google_absl//absl/container:flat_hash_set",
     ],
 )
 
@@ -152,13 +158,18 @@ tf_cc_test(
     deps = [
         ":op_stats_to_tf_stats",
         ":xplane_to_op_stats",
+        "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:op_metrics_db_utils",
         "//tensorflow/core/profiler/utils:time_utils",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "//tensorflow/core/profiler/utils:xplane_schema",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -171,6 +182,9 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
         "//tensorflow/core/profiler/utils:event_span",
+        "//tensorflow/core/profiler/utils:timespan",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -205,6 +219,7 @@ cc_library(
     srcs = ["xplane_to_op_stats.cc"],
     hdrs = ["xplane_to_op_stats.h"],
     deps = [
+        ":op_metrics_db_combiner",
         ":step_events_to_steps_db",
         ":xplane_to_kernel_stats_db",
         ":xplane_to_op_metrics_db",
@@ -213,15 +228,20 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
         "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
         "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
         "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:event_span",
         "//tensorflow/core/profiler/utils:hardware_type_utils",
         "//tensorflow/core/profiler/utils:kernel_stats_utils",
+        "//tensorflow/core/profiler/utils:tf_op_utils",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
         "//tensorflow/core/profiler/utils:xplane_schema",
         "//tensorflow/core/profiler/utils:xplane_utils",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -239,11 +259,15 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
         "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
+        "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:group_events",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "//tensorflow/core/profiler/utils:xplane_schema",
         "//tensorflow/core/profiler/utils:xplane_utils",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -256,6 +280,7 @@ cc_library(
         ":op_stats_to_overview_page",
         ":op_stats_to_tf_stats",
         ":trace_events_to_json",
+        ":xplane_to_memory_profile",
         ":xplane_to_op_stats",
         ":xplane_to_trace_events",
         "//tensorflow/core:lib",
@@ -263,11 +288,15 @@ cc_library(
         "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
         "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
         "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
         "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
         "//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
         "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:trace_events_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/rpc/client:save_profile",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
     ],
@@ -279,12 +308,14 @@ tf_cc_test(
     srcs = ["xplane_to_profile_response_test.cc"],
     deps = [
         ":xplane_to_profile_response",
+        "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
         "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc",
         "//tensorflow/core/profiler/protobuf:overview_page_proto_cc",
         "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:group_events",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "//tensorflow/core/profiler/utils:xplane_schema",
@@ -298,13 +329,16 @@ cc_library(
     hdrs = ["xplane_to_step_events.h"],
     deps = [
         "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:event_span",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
+        "//tensorflow/core/profiler/utils:timespan",
         "//tensorflow/core/profiler/utils:trace_utils",
         "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -314,12 +348,16 @@ tf_cc_test(
     srcs = ["xplane_to_step_events_test.cc"],
     deps = [
         ":xplane_to_step_events",
+        "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:event_span",
         "//tensorflow/core/profiler/utils:group_events",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "//tensorflow/core/profiler/utils:xplane_schema",
         "//tensorflow/core/profiler/utils:xplane_utils",
+        "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
 
@@ -333,7 +371,9 @@ cc_library(
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
         "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -349,6 +389,8 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/core/profiler/protobuf:trace_events_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "//tensorflow/core/profiler/utils:xplane_schema",
         "//tensorflow/core/profiler/utils:xplane_utils",
@@ -364,14 +406,14 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
-        "//tensorflow/core/profiler/utils:event_span",
         "//tensorflow/core/profiler/utils:kernel_stats_utils",
         "//tensorflow/core/profiler/utils:tf_op_utils",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
         "//tensorflow/core/profiler/utils:trace_utils",
         "//tensorflow/core/profiler/utils:xplane_schema",
-        "//tensorflow/core/profiler/utils:xplane_utils",
         "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -384,14 +426,14 @@ cc_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:math_utils",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
         "//tensorflow/core/profiler/utils:timespan",
         "//tensorflow/core/profiler/utils:xplane_schema",
-        "//tensorflow/core/profiler/utils:xplane_utils",
         "//tensorflow/core/profiler/utils:xplane_visitor",
         "@com_google_absl//absl/algorithm:container",
-        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -408,9 +450,53 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:tf_xplane_visitor",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "//tensorflow/core/profiler/utils:xplane_schema",
         "//tensorflow/core/profiler/utils:xplane_utils",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "xplane_to_memory_profile",
+    srcs = ["xplane_to_memory_profile.cc"],
+    hdrs = ["xplane_to_memory_profile.h"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/framework:protos_all_cc",
+        "//tensorflow/core/platform:protobuf",
+        "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:tf_xplane_visitor",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+tf_cc_test(
+    name = "xplane_to_memory_profile_test",
+    size = "small",
+    srcs = ["xplane_to_memory_profile_test.cc"],
+    deps = [
+        ":xplane_to_memory_profile",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/profiler/protobuf:memory_profile_proto_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:xplane_builder",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc
index 3f601bb9134..8229d1020b9 100644
--- a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc
+++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
 
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/op_metrics_to_record.cc b/tensorflow/core/profiler/convert/op_metrics_to_record.cc
index b51c679776b..8e28199b827 100644
--- a/tensorflow/core/profiler/convert/op_metrics_to_record.cc
+++ b/tensorflow/core/profiler/convert/op_metrics_to_record.cc
@@ -15,7 +15,9 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
 
+#include <iterator>
 #include <tuple>
+#include <vector>
 
 #include "absl/algorithm/container.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
index ca2a6c28875..83673458d21 100644
--- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
+++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc
@@ -15,11 +15,12 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
 
+#include <math.h>
+
 #include <algorithm>
-#include <utility>
+#include <vector>
 
 #include "google/protobuf/any.pb.h"
-#include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
@@ -27,7 +28,6 @@ limitations under the License.
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
 #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
 #include "tensorflow/core/profiler/utils/errors.h"
 #include "tensorflow/core/profiler/utils/event_span.h"
+#include "tensorflow/core/profiler/utils/html_utils.h"
 #include "tensorflow/core/profiler/utils/math_utils.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
@@ -103,7 +104,7 @@ StepSummary GetStepSummaryForSampleStats(const Stat<double>& sample_stats) {
     avg = sdv = min = max = 0.0;
   } else {
     avg = sample_stats.avg();
-    sdv = std::sqrt(sample_stats.sample_variance());
+    sdv = sqrt(sample_stats.sample_variance());
     min = sample_stats.min();
     max = sample_stats.max();
   }
@@ -243,7 +244,7 @@ enum class InputOpCategory {
   kPreprocessing      // data preprocessing.
 };
 
-string InputOpCategoryString(InputOpCategory category) {
+std::string InputOpCategoryString(InputOpCategory category) {
   switch (category) {
     case InputOpCategory::kEnqueue:
       return "Enqueue";
@@ -327,10 +328,6 @@ InputOpDetails ConvertOpMetricsToInputOpDetails(const OpMetrics& op_metrics,
   return details;
 }
 
-string AnchorElement(absl::string_view url, absl::string_view text) {
-  return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
-}
-
 // Returns the ratio of the host-to-device time in each step to the step-time.
 double RatioOfHostToDeviceTimeToStepTime(
     const OpMetricsDb& host_tf_metrics_db,
@@ -362,9 +359,9 @@ double RatioOfHostToDeviceTimeToStepTime(
 }
 
 void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent,
-                          string* kernel_launch_classification,
-                          string* kernel_launch_statement) {
-  string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent);
+                          std::string* kernel_launch_classification,
+                          std::string* kernel_launch_statement) {
+  std::string percent_str = absl::StrFormat("%.1lf", kernel_launch_percent);
   if (kernel_launch_percent >= kHighlyKernelLaunchBoundThresholdInPercent) {
     *kernel_launch_classification = "high";
     *kernel_launch_statement = absl::StrCat(
@@ -389,14 +386,14 @@ void KernelLaunchAnalysis(bool tfdata_used, double kernel_launch_percent,
 }
 
 void AllOtherAnalysis(bool all_other_reported, double all_other_percent,
-                      string* all_other_classification,
-                      string* all_other_statement) {
+                      std::string* all_other_classification,
+                      std::string* all_other_statement) {
   if (all_other_reported) {
     *all_other_classification = "no";
     *all_other_statement = "";
     return;
   }
-  string percent_str = absl::StrFormat("%.1lf", all_other_percent);
+  std::string percent_str = absl::StrFormat("%.1lf", all_other_percent);
   if (all_other_percent >= kHighlyAllOtherBoundThresholdInPercent) {
     *all_other_classification = "high";
     *all_other_statement =
@@ -588,9 +585,10 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
 }
 
 bool InputAnalysis(double input_percent, double all_other_percent,
-                   string* input_classification, string* input_statement) {
+                   std::string* input_classification,
+                   std::string* input_statement) {
   absl::string_view non_input_time = "other time";
-  string infeed_percent_str = absl::StrFormat("%.1lf", input_percent);
+  std::string infeed_percent_str = absl::StrFormat("%.1lf", input_percent);
   if (input_percent >= kHighlyInfeedBoundThresholdInPercent) {
     *input_classification = "host";
     *input_statement = absl::StrCat(
@@ -610,7 +608,8 @@ bool InputAnalysis(double input_percent, double all_other_percent,
     // Input analysis says it is not input-bound, but "All-Other" time
     // is significant. It could still be input-bound (or Python overhead).
     *input_classification = "both";
-    string all_other_percent_str = absl::StrFormat("%.1lf", all_other_percent);
+    std::string all_other_percent_str =
+        absl::StrFormat("%.1lf", all_other_percent);
     *input_statement = absl::StrCat(
         "Your program is POTENTIALLY input-bound because ",
         all_other_percent_str,
@@ -630,8 +629,8 @@ bool InputAnalysis(double input_percent, double all_other_percent,
   }
 }
 
-void OutputAnalysis(double output_percent, string* output_classification,
-                    string* output_statement) {
+void OutputAnalysis(double output_percent, std::string* output_classification,
+                    std::string* output_statement) {
   string tc_outfeed_percent_str = absl::StrFormat("%.1lf", output_percent);
   if (output_percent >= kHighlyOutfeedBoundThresholdInPercent) {
     *output_classification = "host";
@@ -703,19 +702,19 @@ BottleneckAnalysis ComputeBottleneckAnalysis(
   double kernel_launch_percent =
       100.0 * total_host_prepare_ms / total_step_time_ms;
   double all_other_percent = 100.0 * total_unknown_ms / total_step_time_ms;
-  string input_classification;
-  string input_statement;
+  std::string input_classification;
+  std::string input_statement;
   bool all_other_reported =
       InputAnalysis(input_percent, all_other_percent, &input_classification,
                     &input_statement);
 
-  string kernel_launch_classification;
-  string kernel_launch_statement;
+  std::string kernel_launch_classification;
+  std::string kernel_launch_statement;
   KernelLaunchAnalysis(TfDataInUse(input_time_breakdown), kernel_launch_percent,
                        &kernel_launch_classification, &kernel_launch_statement);
 
-  string all_other_classification;
-  string all_other_statement;
+  std::string all_other_classification;
+  std::string all_other_statement;
   AllOtherAnalysis(all_other_reported, all_other_percent,
                    &all_other_classification, &all_other_statement);
 
@@ -729,9 +728,9 @@ BottleneckAnalysis ComputeBottleneckAnalysis(
   return analysis;
 }
 
-string GetSummaryNextStep(absl::string_view input_classification,
-                          const InputTimeBreakdown& breakdown) {
-  string summary_next_step;
+std::string GetSummaryNextStep(absl::string_view input_classification,
+                               const InputTimeBreakdown& breakdown) {
+  std::string summary_next_step;
   if (input_classification == "host" || input_classification == "both") {
     if (!TfDataInUse(breakdown)) {
       summary_next_step = absl::StrCat(
diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h
index 738daeaac12..93b4df0b2c2 100644
--- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h
+++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h
@@ -16,12 +16,15 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_
 #define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_INPUT_PIPELINE_ANALYSIS_H_
 
+#include <string>
+
 #include "google/protobuf/any.pb.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
 
@@ -50,16 +53,18 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis(
 // Returns true if explanation for "All Others" time is also included in
 // input_statement.
 bool InputAnalysis(double input_percent, double all_other_percent,
-                   string* input_classification, string* input_statement);
+                   std::string* input_classification,
+                   std::string* input_statement);
 
-void OutputAnalysis(double output_percent, string* output_classification,
-                    string* output_statement);
+void OutputAnalysis(double output_percent, std::string* output_classification,
+                    std::string* output_statement);
 
 string GetSummaryNextStep(absl::string_view input_classification,
                           const InputTimeBreakdown& breakdown);
 
 void AddErrorMessages(const OpStats& op_stats,
                       InputPipelineAnalysisResult* result);
+
 }  // namespace profiler
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
index e19690a6606..bec92e0d998 100644
--- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
+++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc
@@ -15,13 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
 
-#include <algorithm>
-#include <utility>
+#include <string>
 
 #include "google/protobuf/any.pb.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
 #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
@@ -30,7 +28,10 @@ limitations under the License.
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
+#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
+#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
 #include "tensorflow/core/profiler/utils/errors.h"
+#include "tensorflow/core/profiler/utils/html_utils.h"
 #include "tensorflow/core/profiler/utils/math_utils.h"
 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
@@ -44,24 +45,23 @@ namespace {
 // statement of suggestion will be made.
 constexpr double kLowPrecisionPercentThreshold = 10;
 
-OverviewPageTip MakeOverviewPageTip(const string& text) {
-  OverviewPageTip tip;
-  tip.set_link(text);
-  return tip;
-}
+struct TfFunctionInfo {
+  absl::string_view function_name;
+  double expensive_call_percent;
+};
 
-string AnchorElement(const string& url, const string& text) {
-  return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
+OverviewPageTip MakeOverviewPageTip(std::string text) {
+  OverviewPageTip tip;
+  tip.set_link(std::move(text));
+  return tip;
 }
 
 // Makes a recommendation for looking up a document.
 // doc_url is expected to be already be escaped suitably for use in an HTML
 // attribute.
-OverviewPageTip MakeOverviewPageTipDocLink(const string& doc_url,
-                                           const string& text) {
-  OverviewPageTip tip;
-  tip.set_link(AnchorElement(doc_url, text));
-  return tip;
+OverviewPageTip MakeOverviewPageTipDocLink(absl::string_view doc_url,
+                                           absl::string_view text) {
+  return MakeOverviewPageTip(AnchorElement(doc_url, text));
 }
 
 void ComputeHostTips(OverviewPageRecommendation* re) {
@@ -75,12 +75,13 @@ void ComputeHostTips(OverviewPageRecommendation* re) {
 
 void ComputeDeviceTips(HardwareType hardware_type,
                        OverviewPageRecommendation* re) {
-  const string& device_name = HardwareType_Name(hardware_type);
-  string timeline_name =
-      (hardware_type == tensorflow::profiler::TPU) ? "TPU core" : device_name;
-  string op_stats_toolname = (hardware_type == tensorflow::profiler::TPU)
-                                 ? "op_profile"
-                                 : "tensorflow_stats";
+  absl::string_view device_name = HardwareType_Name(hardware_type);
+  absl::string_view timeline_name = device_name;
+  absl::string_view op_stats_toolname = "tensorflow_stats";
+  if (hardware_type == tensorflow::profiler::TPU) {
+    timeline_name = "TPU core";
+    op_stats_toolname = "op_profile";
+  }
   *re->add_device_tips() = MakeOverviewPageTip(
       absl::StrCat(op_stats_toolname,
                    " (identify the time-consuming operations "
@@ -121,14 +122,16 @@ std::string GeneratePrecisionStatement(const PrecisionStats& precision_stats) {
 
 }  // namespace
 
-void SetCommonRecommendation(const string& input_classification,
-                             const string& input_statement,
-                             const string& output_statement,
+void SetCommonRecommendation(absl::string_view input_classification,
+                             absl::string_view input_statement,
+                             absl::string_view output_statement,
                              HardwareType hardware_type,
+                             absl::string_view tf_function_statement_html,
                              OverviewPageRecommendation* re) {
-  re->set_bottleneck(input_classification);
-  re->set_statement(input_statement);
-  re->set_output_statement(output_statement);
+  re->set_bottleneck(std::string(input_classification));
+  re->set_statement(std::string(input_statement));
+  re->set_output_statement(std::string(output_statement));
+  re->set_tf_function_statement_html(std::string(tf_function_statement_html));
   ComputeHostTips(re);
   ComputeDeviceTips(hardware_type, re);
   ComputeDocumentationTips(re);
@@ -245,6 +248,35 @@ OverviewPageRunEnvironment ComputeRunEnvironment(
   return re;
 }
 
+std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) {
+  std::vector<TfFunctionInfo> candidates;
+  for (const auto& name_fun : tf_function_db.tf_functions()) {
+    const auto& fun = name_fun.second;
+    if (fun.expensive_call_percent() >= kTfFunctionReportThresholdInPercent) {
+      candidates.push_back({name_fun.first, fun.expensive_call_percent()});
+    }
+  }
+  if (candidates.empty()) return "";
+  auto cmp = [](const TfFunctionInfo& a, const TfFunctionInfo& b) {
+    return a.expensive_call_percent > b.expensive_call_percent;
+  };
+  // Sorts candidates in descending order of expensive_call_percent.
+  absl::c_sort(candidates, cmp);
+  std::string expensive_functions = "";
+  auto num_functions_shown = std::min(
+      static_cast<decltype(candidates)::size_type>(3), candidates.size());
+
+  for (auto i = 0; i < num_functions_shown; i++) {
+    if (i > 0) absl::StrAppend(&expensive_functions, ", ");
+    absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name,
+                    "\"");
+  }
+  if (candidates.size() > num_functions_shown)
+    absl::StrAppend(&expensive_functions, " and more");
+  return absl::StrCat("Expensive tf-functions detected (", expensive_functions,
+                      ") due to either retracing or eager execution.");
+}
+
 OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
                                           HardwareType hardware_type) {
   OverviewPage overview_page;
@@ -258,9 +290,10 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
       overview_page.input_analysis().step_details());
   *overview_page.mutable_recommendation() = ComputeGenericRecommendation(
       bottleneck, op_stats.device_op_metrics_db().precision_stats());
-  SetCommonRecommendation(bottleneck.input_classification(),
-                          bottleneck.input_statement(), "", hardware_type,
-                          overview_page.mutable_recommendation());
+  SetCommonRecommendation(
+      bottleneck.input_classification(), bottleneck.input_statement(), "",
+      hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()),
+      overview_page.mutable_recommendation());
   return overview_page;
 }
 
diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h
index e6d12708e9f..b4b3991a18d 100644
--- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h
+++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h
@@ -17,9 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_TO_OVERVIEW_PAGE_H_
 
 #include "absl/strings/string_view.h"
-#include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
@@ -29,10 +27,16 @@ limitations under the License.
 namespace tensorflow {
 namespace profiler {
 
-void SetCommonRecommendation(const string& input_classification,
-                             const string& input_statement,
-                             const string& output_statement,
+// Reports tf-function optimization opportunity in the Overview Page if the
+// expensive-call-time percentage is over this threshold for at least one of
+// the tf-functions profiled.
+const double kTfFunctionReportThresholdInPercent = 20;
+
+void SetCommonRecommendation(absl::string_view input_classification,
+                             absl::string_view input_statement,
+                             absl::string_view output_statement,
                              HardwareType hardware_type,
+                             absl::string_view tf_function_statement_html,
                              OverviewPageRecommendation* re);
 
 OverviewPageRecommendation ComputeGenericRecommendation(
@@ -47,6 +51,9 @@ OverviewPageRunEnvironment ComputeRunEnvironment(
 OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats,
                                           HardwareType hardware_type);
 
+// Returns a html which provides tf-function related recommendation.
+std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db);
+
 void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis);
 
 }  // namespace profiler
diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc
index da409f89a60..e23813a5b5d 100644
--- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc
+++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc
@@ -15,13 +15,12 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
 
-#include "absl/container/flat_hash_set.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/convert/op_metrics_to_record.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
-#include "tensorflow/core/profiler/utils/tf_op_utils.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc
index 3e098da7eb8..9ca83b51a70 100644
--- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc
+++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc
@@ -15,10 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
 
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
-#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
-#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
@@ -75,8 +79,8 @@ TEST(OpStatsToTfStats, GpuTfStats) {
                        kKernel3DurationNs, /*on_device=*/true, kKernel3,
                        &device_plane, &stream2);
 
-  const OpStats& op_stats = ConvertXSpaceToOpStats(space);
-  const TfStatsDatabase& tf_stats = ConvertOpStatsToTfStats(op_stats);
+  const OpStats op_stats = ConvertXSpaceToOpStats(space);
+  const TfStatsDatabase tf_stats = ConvertOpStatsToTfStats(op_stats);
 
   // TfOp1, TfOp2, Idle
   EXPECT_EQ(3, tf_stats.with_idle().tf_stats_record_size());
diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc
index ed0d83ade2f..e4713cd73fb 100644
--- a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc
+++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc
@@ -15,10 +15,18 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
 
 #include <sstream>
+#include <utility>
+#include <vector>
 
 #include "google/protobuf/any.pb.h"
+#include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
+#include "tensorflow/core/profiler/utils/event_span.h"
+#include "tensorflow/core/profiler/utils/timespan.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.h b/tensorflow/core/profiler/convert/step_events_to_steps_db.h
index b3ea74e905f..9db65163f7a 100644
--- a/tensorflow/core/profiler/convert/step_events_to_steps_db.h
+++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_STEP_EVENTS_TO_STEPS_DB_H_
 #define TENSORFLOW_CORE_PROFILER_CONVERT_STEP_EVENTS_TO_STEPS_DB_H_
 
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
 #include "tensorflow/core/profiler/utils/event_span.h"
 
diff --git a/tensorflow/core/profiler/convert/trace_events_to_json.cc b/tensorflow/core/profiler/convert/trace_events_to_json.cc
index 9c8176c10ad..07e32ced9d0 100644
--- a/tensorflow/core/profiler/convert/trace_events_to_json.cc
+++ b/tensorflow/core/profiler/convert/trace_events_to_json.cc
@@ -15,9 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/trace_events_to_json.h"
 
+#include <algorithm>
+#include <map>
+#include <utility>
+
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "include/json/json.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc
index 785902e2a50..023d6a73d77 100644
--- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc
@@ -15,16 +15,20 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h"
 
+#include <functional>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
-#include "tensorflow/core/profiler/utils/event_span.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/trace_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
-#include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h
index 04bd0e8ae5f..9c7fca22887 100644
--- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h
+++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h
@@ -17,9 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_KERNEL_STATS_DB_H_
 
 #include <functional>
-#include <vector>
 
-#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
@@ -31,6 +29,7 @@ KernelStatsDb ConvertDeviceTraceXPlaneToKernelStatsDb(
     const XPlane& device_trace,
     const std::function<void(const XEventVisitor&, KernelReport*)>&
         on_kernel_fn);
+
 }  // namespace profiler
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc
new file mode 100644
index 00000000000..5b2a7489241
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc
@@ -0,0 +1,474 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
+
+namespace tensorflow {
+namespace profiler {
+
+namespace {
+
+// Index of the time-sorted memory_profile_snapshots list, and the
+// MemoryActivityMetadata proto it contains.
+using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
+
+// Aggregated memory stats from an allocator. Temporary container to fill
+// MemoryAggregationStats.
+struct AggregationStats {
+  int64 bytes_reserved = 0;
+  int64 bytes_allocated = 0;
+  int64 bytes_available = 0;
+  double fragmentation = 0;
+  int64 peak_bytes_in_use = 0;
+};
+
+// Metadata associated with each memory allocation/deallocation activity.
+// Temporary container to fill MemoryActivityMetadata.
+struct ActivityMetadata {
+  int64 requested_bytes = 0;
+  int64 allocation_bytes = 0;
+  uint64 address = 0;
+  absl::string_view tf_op_name;
+  int64 step_id = -1;
+  absl::string_view region_type;
+  int64 data_type = 0;
+  absl::string_view tensor_shape;
+};
+
+bool IsMemoryAllocation(int64 event_type) {
+  return event_type == HostEventType::kMemoryAllocation;
+}
+
+bool IsMemoryDeallocation(int64 event_type) {
+  return event_type == HostEventType::kMemoryDeallocation;
+}
+
+void FillAggregationStats(const AggregationStats& src,
+                          MemoryAggregationStats* dst) {
+  dst->set_stack_reserved_bytes(src.bytes_reserved);
+  dst->set_heap_allocated_bytes(src.bytes_allocated);
+  dst->set_free_memory_bytes(src.bytes_available);
+  dst->set_fragmentation(src.fragmentation);
+  dst->set_peak_bytes_in_use(src.peak_bytes_in_use);
+}
+
+void FillActivityMetadata(int64 event_type, const ActivityMetadata& src,
+                          MemoryActivityMetadata* dst) {
+  if (IsMemoryAllocation(event_type)) {
+    dst->set_memory_activity(ALLOCATION);
+  } else if (IsMemoryDeallocation(event_type)) {
+    dst->set_memory_activity(DEALLOCATION);
+  }
+  dst->set_requested_bytes(src.requested_bytes);
+  dst->set_allocation_bytes(src.allocation_bytes);
+  dst->set_address(src.address);
+  dst->set_tf_op_name(std::string(src.tf_op_name));
+  dst->set_step_id(src.step_id);
+  dst->set_region_type(std::string(src.region_type));
+  dst->set_data_type(tensorflow::DataTypeString(
+      static_cast<tensorflow::DataType>(src.data_type)));
+  dst->set_tensor_shape(std::string(src.tensor_shape));
+}
+
+void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps,
+                          MemoryProfileSummary* summary) {
+  // Update the peak memory usage over allocator's lifetime.
+  summary->set_peak_bytes_usage_lifetime(stats.peak_bytes_in_use);
+  MemoryAggregationStats* peak_stats = summary->mutable_peak_stats();
+  // If we reach (or stay at) peak memory usage within the profiling window,
+  // update memory profile summary.
+  if (stats.bytes_reserved + stats.bytes_allocated >=
+      peak_stats->peak_bytes_in_use()) {
+    peak_stats->set_peak_bytes_in_use(stats.bytes_reserved +
+                                      stats.bytes_allocated);
+    peak_stats->set_stack_reserved_bytes(stats.bytes_reserved);
+    peak_stats->set_heap_allocated_bytes(stats.bytes_allocated);
+    peak_stats->set_free_memory_bytes(stats.bytes_available);
+    peak_stats->set_fragmentation(stats.fragmentation);
+    summary->set_peak_stats_time_ps(time_offset_ps);
+    summary->set_memory_capacity(stats.bytes_reserved + stats.bytes_allocated +
+                                 stats.bytes_available);
+  }
+}
+
+// Generate memory profile proto by processing host trace XPlane.
+MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
+  XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
+  MemoryProfile memory_profile;
+  auto* step_count = memory_profile.mutable_step_count();
+  // Iterate over all XEvents in the XPlane, and add the XStats to a new
+  // MemoryProfileSnapshot if the EventType is kMemoryAllocation or
+  // kMemoryDeallocation.
+  plane.ForEachLine([&](const XLineVisitor& line) {
+    line.ForEachEvent([&](const XEventVisitor& event) {
+      int64 event_type = event.Type().value_or(kUnknownHostEventType);
+      if (!(IsMemoryAllocation(event_type) ||
+            IsMemoryDeallocation(event_type))) {
+        return;
+      }
+
+      AggregationStats stats;
+      ActivityMetadata metadata;
+      std::string memory_id;
+      event.ForEachStat([&](const XStatVisitor& stat) {
+        if (stat.Type() == StatType::kIndexOnHost ||
+            stat.Type() == StatType::kDeviceOrdinal) {
+          memory_id = absl::StrFormat("%d", stat.IntValue());
+        } else if (stat.Type() == StatType::kAllocatorName) {
+          memory_id = stat.ToString();
+        } else if (stat.Type() == StatType::kBytesReserved) {
+          stats.bytes_reserved = stat.IntValue();
+        } else if (stat.Type() == StatType::kBytesAllocated) {
+          stats.bytes_allocated = stat.IntValue();
+        } else if (stat.Type() == StatType::kBytesAvailable) {
+          stats.bytes_available = stat.IntValue();
+        } else if (stat.Type() == StatType::kFragmentation) {
+          stats.fragmentation = stat.DoubleValue();
+        } else if (stat.Type() == StatType::kPeakBytesInUse) {
+          stats.peak_bytes_in_use = stat.IntValue();
+        } else if (stat.Type() == StatType::kRequestedBytes) {
+          metadata.requested_bytes = stat.IntValue();
+        } else if (stat.Type() == StatType::kAllocationBytes) {
+          metadata.allocation_bytes = stat.IntValue();
+        } else if (stat.Type() == StatType::kAddress) {
+          metadata.address = stat.IntValue();
+        } else if (stat.Type() == StatType::kTfOp) {
+          metadata.tf_op_name = stat.StrOrRefValue();
+        } else if (stat.Type() == StatType::kStepId) {
+          metadata.step_id = stat.IntValue();
+          if (metadata.step_id != 0) (*step_count)[metadata.step_id]++;
+        } else if (stat.Type() == StatType::kRegionType) {
+          metadata.region_type = stat.StrOrRefValue();
+        } else if (stat.Type() == StatType::kDataType) {
+          metadata.data_type = stat.IntValue();
+        } else if (stat.Type() == StatType::kTensorShapes) {
+          metadata.tensor_shape = stat.StrOrRefValue();
+        }
+      });
+
+      MemoryProfileSnapshot* snapshot =
+          (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
+              .add_memory_profile_snapshots();
+      snapshot->set_time_offset_ps(event.OffsetPs());
+      FillAggregationStats(stats, snapshot->mutable_aggregation_stats());
+      FillActivityMetadata(event_type, metadata,
+                           snapshot->mutable_activity_metadata());
+
+      MemoryProfileSummary* summary =
+          (*memory_profile.mutable_memory_profile_per_allocator())[memory_id]
+              .mutable_profile_summary();
+      UpdateProfileSummary(stats, event.OffsetPs(), summary);
+    });
+  });
+  return memory_profile;
+}
+
+// Sequentialize step ids for the memory profile.
+void UpdateStepId(const tensorflow::protobuf::Map<
+                      tensorflow::protobuf_int64 /*orig_step_id*/,
+                      tensorflow::protobuf_int64 /*count*/>& step_count,
+                  PerAllocatorMemoryProfile* memory_profile) {
+  // Map from original random step id to sequential step id.
+  absl::flat_hash_map<int64 /*orig_step_id*/, int64 /*step_id*/> step_map;
+  constexpr int kUnknownStep = -2;
+  constexpr double kStepFilterRatio = 0.1;  // Magic number for filtering.
+  tensorflow::protobuf_int64 max_step_count = 0;
+  for (const auto& step_and_count : step_count) {
+    max_step_count = std::max(max_step_count, step_and_count.second);
+  }
+  // Filter out noisy and incomplete original step ids.
+  for (const auto& step_and_count : step_count) {
+    if (static_cast<double>(step_and_count.second) / max_step_count >
+        kStepFilterRatio) {
+      step_map[step_and_count.first] = kUnknownStep;
+    }
+  }
+
+  // Update the step ids in memory_profile for this allocator.
+  int64 step_id = -1;
+  for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
+    DCHECK(snapshot.has_activity_metadata());
+    // Convert the random step id to sequential step id.
+    int64 orig_step_id = snapshot.activity_metadata().step_id();
+    if (step_map.contains(orig_step_id) &&
+        step_map[orig_step_id] == kUnknownStep) {
+      step_map[orig_step_id] = ++step_id;
+    }
+    snapshot.mutable_activity_metadata()->set_step_id(step_id);
+  }
+  VLOG(2) << "Max sequential step id in profile: " << step_id;
+}
+
+// Update the MemoryActivityMetadata for each deallocation event by copying from
+// matching allocation.
+void UpdateDeallocation(PerAllocatorMemoryProfile* memory_profile) {
+  absl::flat_hash_map<uint64 /*address*/, const MemoryActivityMetadata*>
+      addr_metadata_map;
+  for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
+    // Match the deallocation with previous allocation based on address.
+    uint64 address = snapshot.activity_metadata().address();
+    if (snapshot.activity_metadata().memory_activity() == DEALLOCATION) {
+      if (addr_metadata_map.contains(address)) {
+        const MemoryActivityMetadata* alloc_meta = addr_metadata_map[address];
+        snapshot.mutable_activity_metadata()->set_tf_op_name(
+            alloc_meta->tf_op_name());
+        snapshot.mutable_activity_metadata()->set_region_type(
+            alloc_meta->region_type());
+        snapshot.mutable_activity_metadata()->set_data_type(
+            alloc_meta->data_type());
+        snapshot.mutable_activity_metadata()->set_tensor_shape(
+            alloc_meta->tensor_shape());
+        // In case of following (unexpected) deallocations to the same chunk
+        // address, leave the metadata as it is (empty or already captured).
+        addr_metadata_map.erase(address);
+      } else {
+        VLOG(2)
+            << "Can't find matching memory allocation for this deallocation: "
+            << snapshot.DebugString();
+      }
+    } else if (!addr_metadata_map.contains(address)) {  // Allocation.
+      addr_metadata_map[address] = &snapshot.activity_metadata();
+    } else {
+      VLOG(2) << "There are two allocations recorded for the same address: "
+              << address
+              << ". The later allocation event is: " << snapshot.DebugString();
+    }
+  }
+  VLOG(2) << "Number of allocations that cannot find matching dealloctions: "
+          << addr_metadata_map.size();
+}
+
+// Return the step id for the peak memory usage data point.
+int64 GetPeakMemoryStep(int64 peak_bytes_profile,
+                        const PerAllocatorMemoryProfile* memory_profile) {
+  int64 peak_bytes_profile_step_id = 0;
+  for (const auto& snapshot : memory_profile->memory_profile_snapshots()) {
+    // Get the step id of the peak memory usage.
+    if (peak_bytes_profile ==
+        snapshot.aggregation_stats().heap_allocated_bytes() +
+            snapshot.aggregation_stats().stack_reserved_bytes()) {
+      DCHECK(snapshot.has_activity_metadata());
+      peak_bytes_profile_step_id = snapshot.activity_metadata().step_id();
+    }
+  }
+  return peak_bytes_profile_step_id;
+}
+
+// Functor that compares (index, metadata) pair to sort in the order of
+// allocation bytes and requested bytes (descending), as well as TF Op name,
+// region type, data type, and tensor shape (ascending).
+struct MetadataComparator {
+  bool operator()(const IndexMetaPair& a, const IndexMetaPair& b) const {
+    const MemoryActivityMetadata* a_meta = a.second;
+    const MemoryActivityMetadata* b_meta = b.second;
+    DCHECK_NE(a_meta, nullptr);
+    DCHECK_NE(b_meta, nullptr);
+
+    auto lhs =
+        std::make_tuple(-a_meta->allocation_bytes(), -a_meta->requested_bytes(),
+                        a_meta->tf_op_name(), a_meta->region_type(),
+                        a_meta->data_type(), a_meta->tensor_shape());
+    auto rhs =
+        std::make_tuple(-b_meta->allocation_bytes(), -b_meta->requested_bytes(),
+                        b_meta->tf_op_name(), b_meta->region_type(),
+                        b_meta->data_type(), b_meta->tensor_shape());
+    return lhs < rhs;
+  }
+};
+
+// If applicable, add items into active_allocs vector and special_allocations
+// proto for the unmapped memory usage (in heap) and stack reservation at peak.
+void InsertSpecialAllocations(int64 unmapped_allocation_bytes, int64 step_id,
+                              PerAllocatorMemoryProfile* memory_profile,
+                              std::vector<IndexMetaPair>* active_allocs) {
+  int index = 0;
+  if (unmapped_allocation_bytes > 0) {
+    MemoryActivityMetadata* special_allocation =
+        memory_profile->add_special_allocations();
+    FillActivityMetadata(
+        HostEventType::kMemoryAllocation,
+        {unmapped_allocation_bytes, unmapped_allocation_bytes, 0,
+         "preallocated/unknown", step_id, "persist", 0, "unknown"},
+        special_allocation);
+    active_allocs->push_back({--index, special_allocation});
+  }
+  int64 stack_bytes =
+      memory_profile->profile_summary().peak_stats().stack_reserved_bytes();
+  if (stack_bytes > 0) {
+    MemoryActivityMetadata* special_allocation =
+        memory_profile->add_special_allocations();
+    FillActivityMetadata(
+        HostEventType::kMemoryAllocation,
+        {stack_bytes, stack_bytes, 0, "stack", step_id, "stack", 0, "unknown"},
+        special_allocation);
+    active_allocs->push_back({--index, special_allocation});
+  }
+}
+
+bool operator==(const IndexMetaPair& a, const IndexMetaPair& b) {
+  const MemoryActivityMetadata* a_meta = a.second;
+  const MemoryActivityMetadata* b_meta = b.second;
+  return a_meta->allocation_bytes() == b_meta->allocation_bytes() &&
+         a_meta->requested_bytes() == b_meta->requested_bytes() &&
+         a_meta->tf_op_name() == b_meta->tf_op_name() &&
+         a_meta->region_type() == b_meta->region_type() &&
+         a_meta->data_type() == b_meta->data_type() &&
+         a_meta->tensor_shape() == b_meta->tensor_shape();
+}
+
+// Generate the memory breakdown table of active allocations at the peak usage
+// (within profiling window) and fill each ActiveAllocation proto (i.e. a row).
+void ProcessActiveAllocations(int64 peak_bytes_profile_step_id,
+                              PerAllocatorMemoryProfile* memory_profile) {
+  int64 unmapped_allocation_bytes =
+      memory_profile->profile_summary().peak_stats().heap_allocated_bytes();
+  int64 unmapped_deallocation_bytes = 0;
+  absl::flat_hash_map<int64 /*address*/, IndexMetaPair> active_alloc_map;
+  // Only account for the memory activities in the step that includes peak
+  // memory usage.
+  for (int i = 0; i < memory_profile->memory_profile_snapshots_size(); i++) {
+    const auto& snapshot = memory_profile->memory_profile_snapshots().at(i);
+    DCHECK(snapshot.has_activity_metadata());
+    const MemoryActivityMetadata& metadata = snapshot.activity_metadata();
+    if (snapshot.time_offset_ps() >
+        memory_profile->profile_summary().peak_stats_time_ps())
+      break;
+    if (metadata.step_id() != peak_bytes_profile_step_id) continue;
+
+    if (metadata.memory_activity() == ALLOCATION) {
+      active_alloc_map[metadata.address()] = {i, &metadata};
+      unmapped_allocation_bytes -= metadata.allocation_bytes();
+    } else {
+      DCHECK_EQ(metadata.memory_activity(), DEALLOCATION);
+      if (active_alloc_map.contains(metadata.address())) {
+        active_alloc_map.erase(metadata.address());
+      } else {
+        unmapped_deallocation_bytes += metadata.allocation_bytes();
+      }
+      unmapped_allocation_bytes += metadata.allocation_bytes();
+    }
+  }
+  // This separates the persistent memory from the freed memory from last step's
+  // allocations.
+  unmapped_allocation_bytes -= unmapped_deallocation_bytes;
+
+  VLOG(2) << "unmapped_allocation_bytes=" << unmapped_allocation_bytes
+          << ", unmapped_deallocation_bytes=" << unmapped_deallocation_bytes;
+
+  // Using pair of (index, MemoryActivityMetadata*) so that we can sort by the
+  // metadata, and fetch metadata by indexing the time-sorted snapshots at
+  // frontend.
+  std::vector<IndexMetaPair> active_allocs;
+  for (const auto& address_and_index_meta : active_alloc_map) {
+    active_allocs.push_back(address_and_index_meta.second);
+  }
+
+  InsertSpecialAllocations(unmapped_allocation_bytes,
+                           peak_bytes_profile_step_id, memory_profile,
+                           &active_allocs);
+
+  std::sort(active_allocs.begin(), active_allocs.end(), MetadataComparator());
+
+  // Fill the sorted active_allocations proto messages at peak memory usage.
+  // Merge identical allocations and show occurrences.
+  for (int i = 0; i < active_allocs.size(); i++) {
+    ActiveAllocation* allocation = memory_profile->add_active_allocations();
+    allocation->set_snapshot_index(active_allocs[i].first);
+    if (active_allocs[i].first < 0) {
+      allocation->set_special_index(-active_allocs[i].first - 1);
+    } else {
+      allocation->set_special_index(-1);
+    }
+    allocation->set_num_occurrences(1);
+    while (i < active_allocs.size() - 1 &&
+           active_allocs[i] == active_allocs[i + 1]) {
+      allocation->set_num_occurrences(allocation->num_occurrences() + 1);
+      i++;
+    }
+  }
+
+  VLOG(2) << "Distinctive active allocation count="
+          << memory_profile->active_allocations_size();
+}
+
+// Post-process the memory profile to correctly update proto fields, and break
+// down peak memory usage for each allocator.
+void ProcessMemoryProfileProto(MemoryProfile* memory_profile) {
+  memory_profile->set_num_hosts(1);
+  // Add sorted memory ids within memory profile data to the selection list.
+  for (const auto& id_and_allocator_profile :
+       memory_profile->memory_profile_per_allocator()) {
+    if (!id_and_allocator_profile.second.memory_profile_snapshots().empty()) {
+      memory_profile->add_memory_ids(id_and_allocator_profile.first);
+    }
+  }
+  absl::c_sort(*memory_profile->mutable_memory_ids());
+
+  for (auto& id_and_allocator_profile :
+       *memory_profile->mutable_memory_profile_per_allocator()) {
+    PerAllocatorMemoryProfile* allocator_memory_profile =
+        &id_and_allocator_profile.second;
+    // Sort the memory_profile_snapshots by time_offset_ps (ascending) in proto.
+    absl::c_sort(
+        *allocator_memory_profile->mutable_memory_profile_snapshots(),
+        [](const MemoryProfileSnapshot& a, const MemoryProfileSnapshot& b) {
+          return a.time_offset_ps() < b.time_offset_ps();
+        });
+
+    UpdateStepId(memory_profile->step_count(), allocator_memory_profile);
+    UpdateDeallocation(allocator_memory_profile);
+
+    int64 peak_bytes_profile = allocator_memory_profile->profile_summary()
+                                   .peak_stats()
+                                   .peak_bytes_in_use();
+    int64 peak_step_id =
+        GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile);
+    ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
+  }
+}
+
+}  // namespace
+
+MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane) {
+  MemoryProfile memory_profile = GenerateMemoryProfile(&host_plane);
+  ProcessMemoryProfileProto(&memory_profile);
+  return memory_profile;
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.h b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h
new file mode 100644
index 00000000000..bd8a6e8df08
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h
@@ -0,0 +1,31 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_
+
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Process the host threads XPlane and generate MemoryProfile result.
+MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane);
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_
diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc
new file mode 100644
index 00000000000..e0d87ac7567
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc
@@ -0,0 +1,121 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
+
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+// Tests with a sample profile with multiple memory allocation and deallocation
+// activities within one memory allocator captured in host trace.
+TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) {
+  XSpace space;
+  XPlane* host_plane = space.add_planes();
+  XPlaneBuilder host_plane_builder(host_plane);
+  host_plane_builder.SetName(kHostThreads);
+  host_plane_builder.ReserveLines(1);
+
+  auto tf_executor_thread = host_plane_builder.GetOrCreateLine(0);
+  CreateXEventWithIntAndStringViewMetadataValue(
+      &host_plane_builder, &tf_executor_thread, "MemoryAllocation", 40000, 1000,
+      {{StatType::kBytesReserved, 2000},
+       {StatType::kBytesAllocated, 3000},
+       {StatType::kBytesAvailable, 5000},
+       {StatType::kPeakBytesInUse, 8500},
+       {StatType::kRequestedBytes, 200},
+       {StatType::kAllocationBytes, 256},
+       {StatType::kAddress, 222333},
+       {StatType::kStepId, -93746},
+       {StatType::kDataType, 1}},
+      {{StatType::kAllocatorName, "GPU_0_bfc"},
+       {StatType::kTfOp, "foo/bar"},
+       {StatType::kRegionType, "output"},
+       {StatType::kTensorShapes, "[3, 3, 512, 512]"}});
+
+  CreateXEventWithIntAndStringViewMetadataValue(
+      &host_plane_builder, &tf_executor_thread, "MemoryDeallocation", 50000,
+      1000,
+      {{StatType::kBytesReserved, 2000},
+       {StatType::kBytesAllocated, 2744},
+       {StatType::kBytesAvailable, 5256},
+       {StatType::kPeakBytesInUse, 8500},
+       {StatType::kRequestedBytes, 200},
+       {StatType::kAllocationBytes, 256},
+       {StatType::kAddress, 222333},
+       {StatType::kStepId, 0},
+       {StatType::kDataType, 0}},
+      {{StatType::kAllocatorName, "GPU_0_bfc"},
+       {StatType::kRegionType, ""},
+       {StatType::kTensorShapes, ""}});
+
+  CreateXEventWithIntAndStringViewMetadataValue(
+      &host_plane_builder, &tf_executor_thread, "MemoryAllocation", 70000, 1000,
+      {{StatType::kBytesReserved, 2000},
+       {StatType::kBytesAllocated, 5000},
+       {StatType::kBytesAvailable, 3000},
+       {StatType::kPeakBytesInUse, 9500},
+       {StatType::kRequestedBytes, 300},
+       {StatType::kAllocationBytes, 300},
+       {StatType::kAddress, 345678},
+       {StatType::kStepId, -93746},
+       {StatType::kDataType, 9}},
+      {{StatType::kAllocatorName, "GPU_0_bfc"},
+       {StatType::kTfOp, "mul_grad/Sum"},
+       {StatType::kRegionType, "temp"},
+       {StatType::kTensorShapes, "[1, 2]"}});
+
+  MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
+  EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1);
+  EXPECT_EQ(memory_profile.num_hosts(), 1);
+  EXPECT_EQ(memory_profile.memory_ids_size(), 1);
+  EXPECT_EQ(memory_profile.step_count().size(), 1);
+  EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first,
+            "GPU_0_bfc");
+  const auto& allocator_memory_profile =
+      memory_profile.memory_profile_per_allocator().begin()->second;
+  EXPECT_EQ(
+      allocator_memory_profile.profile_summary().peak_bytes_usage_lifetime(),
+      9500);
+  EXPECT_EQ(allocator_memory_profile.profile_summary()
+                .peak_stats()
+                .peak_bytes_in_use(),
+            7000);
+  EXPECT_EQ(allocator_memory_profile.profile_summary().peak_stats_time_ps(),
+            70000);
+  EXPECT_EQ(allocator_memory_profile.memory_profile_snapshots_size(), 3);
+  EXPECT_EQ(allocator_memory_profile.active_allocations_size(), 3);
+  EXPECT_EQ(
+      allocator_memory_profile.active_allocations().at(2).snapshot_index(), 2);
+  EXPECT_EQ(allocator_memory_profile.special_allocations_size(), 2);
+  EXPECT_EQ(allocator_memory_profile.special_allocations().at(1).tf_op_name(),
+            "stack");
+  EXPECT_EQ(
+      allocator_memory_profile.special_allocations().at(1).allocation_bytes(),
+      2000);
+}
+
+}  // namespace
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc
index 09df59e44d9..4a369b8b96a 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc
@@ -15,21 +15,31 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
 
+#include <algorithm>
+#include <memory>
 #include <vector>
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
 #include "tensorflow/core/profiler/convert/op_stack.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/cost_utils.h"
+#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
 #include "tensorflow/core/profiler/utils/op_utils.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
+#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/timespan.h"
 #include "tensorflow/core/profiler/utils/trace_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h
index 1a785d0335f..f2d7fc702fc 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h
+++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h
@@ -21,10 +21,9 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
-#include "tensorflow/core/profiler/utils/event_span.h"
 #include "tensorflow/core/profiler/utils/op_utils.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
-#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc
index 3e577d00e1c..8bd0443b8f6 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db_test.cc
@@ -15,9 +15,12 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
 
+#include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
index 7fdd6ffd8cb..f008219cbd2 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
@@ -15,7 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
 
+#include <vector>
+
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
 #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h"
 #include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h"
 #include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
@@ -23,12 +27,19 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
 #include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/event_span.h"
 #include "tensorflow/core/profiler/utils/hardware_type_utils.h"
 #include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
+#include "tensorflow/core/profiler/utils/tf_op_utils.h"
+#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
index c7b140b6a67..7b4652f6c0b 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
@@ -15,10 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
 
+#include "absl/strings/str_cat.h"
 #include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
+#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/group_events.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc
index 75973a475ab..e6fe74942fc 100644
--- a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc
@@ -15,23 +15,31 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h"
 
 #include "absl/container/flat_hash_set.h"
-#include "tensorflow/core/lib/core/errors.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h"
 #include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h"
 #include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h"
 #include "tensorflow/core/profiler/convert/trace_events_to_json.h"
+#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h"
 #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
 #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h"
 #include "tensorflow/core/profiler/profiler_service.pb.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/memory_profile.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
 #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/rpc/client/save_profile.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -42,6 +50,8 @@ const absl::string_view kTensorflowStats = "tensorflow_stats";
 const absl::string_view kInputPipeline = "input_pipeline";
 const absl::string_view kOverviewPage = "overview_page";
 const absl::string_view kKernelStats = "kernel_stats";
+const absl::string_view kMemoryProfile = "memory_profile";
+const absl::string_view kXPlane = "xplane";
 
 HardwareType HardwareTypeFromRunEnvironment(const RunEnvironment& run_env) {
   if (run_env.device_type() == "GPU") return HardwareType::GPU;
@@ -57,9 +67,27 @@ void AddToolData(absl::string_view tool_name, const Proto& tool_output,
   tool_output.SerializeToString(tool_data->mutable_data());
 }
 
+template <typename Proto>
+Status ConvertProtoToJson(const Proto& proto_output, std::string* json_output) {
+  protobuf::util::JsonPrintOptions json_options;
+  json_options.always_print_primitive_fields = true;
+  auto status = protobuf::util::MessageToJsonString(proto_output, json_output,
+                                                    json_options);
+  if (!status.ok()) {
+    // Convert error_msg google::protobuf::StringPiece (or absl::string_view) to
+    // tensorflow::StringPiece.
+    auto error_msg = status.message();
+    return errors::Internal(
+        strings::StrCat("Could not convert proto to JSON string: ",
+                        StringPiece(error_msg.data(), error_msg.length())));
+  }
+  return Status::OK();
+}
+
 // Returns the tool name with extension.
 string ToolName(absl::string_view tool) {
   if (tool == kTraceViewer) return "trace.json.gz";
+  if (tool == kMemoryProfile) return "memory_profile.json.gz";
   return absl::StrCat(tool, ".pb");
 }
 
@@ -70,6 +98,7 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace,
                                       ProfileResponse* response) {
   absl::flat_hash_set<absl::string_view> tools(req.tools().begin(),
                                                req.tools().end());
+  AddToolData(ToolName(kXPlane), xspace, response);
   if (tools.empty()) return Status::OK();
   if (tools.contains(kTraceViewer)) {
     Trace trace;
@@ -107,6 +136,16 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace,
   if (tools.contains(kKernelStats)) {
     AddToolData(ToolName(kKernelStats), op_stats.kernel_stats_db(), response);
   }
+  if (tools.contains(kMemoryProfile)) {
+    if (const XPlane* host_plane = FindPlaneWithName(xspace, kHostThreads)) {
+      MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
+      std::string json_output;
+      TF_RETURN_IF_ERROR(ConvertProtoToJson(memory_profile, &json_output));
+      TF_RETURN_IF_ERROR(SaveGzippedToolDataToTensorboardProfile(
+          req.repository_root(), req.session_id(), req.host_name(),
+          ToolName(kMemoryProfile), json_output));
+    }
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response.h b/tensorflow/core/profiler/convert/xplane_to_profile_response.h
index 84b9fdd914b..03ca13f1788 100644
--- a/tensorflow/core/profiler/convert/xplane_to_profile_response.h
+++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.h
@@ -15,8 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_PROFILE_RESPONSE_H_
 #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_PROFILE_RESPONSE_H_
 
-#include "absl/container/flat_hash_set.h"
-#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/profiler/profiler_service.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc
index 8414f263288..ad9ca1028f6 100644
--- a/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc
@@ -14,13 +14,14 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h"
 
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/profiler/profiler_service.pb.h"
 #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h"
 #include "tensorflow/core/profiler/protobuf/overview_page.pb.h"
 #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
-#include "tensorflow/core/profiler/utils/xplane_schema.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -76,11 +77,11 @@ TEST(ConvertXPlaneToProfileResponse, OverviewPage) {
   request.add_tools("overview_page");
   ProfileResponse response;
   TF_CHECK_OK(ConvertXSpaceToProfileResponse(xspace, request, &response));
-  EXPECT_EQ(1, response.tool_data_size());
-  EXPECT_EQ("overview_page.pb", response.tool_data(/*index=*/0).name());
+  EXPECT_EQ(2, response.tool_data_size());
+  EXPECT_EQ("overview_page.pb", response.tool_data(/*index=*/1).name());
   OverviewPage overview_page;
   ASSERT_TRUE(
-      overview_page.ParseFromString(response.tool_data(/*index=*/0).data()));
+      overview_page.ParseFromString(response.tool_data(/*index=*/1).data()));
 }
 
 TEST(ConvertXPlaneToProfileResponse, InputPipeline) {
@@ -90,11 +91,11 @@ TEST(ConvertXPlaneToProfileResponse, InputPipeline) {
   request.add_tools("input_pipeline");
   ProfileResponse response;
   TF_CHECK_OK(ConvertXSpaceToProfileResponse(xspace, request, &response));
-  EXPECT_EQ(1, response.tool_data_size());
-  EXPECT_EQ("input_pipeline.pb", response.tool_data(/*index=*/0).name());
+  EXPECT_EQ(2, response.tool_data_size());
+  EXPECT_EQ("input_pipeline.pb", response.tool_data(/*index=*/1).name());
   InputPipelineAnalysisResult input_pipeline;
   ASSERT_TRUE(
-      input_pipeline.ParseFromString(response.tool_data(/*index=*/0).data()));
+      input_pipeline.ParseFromString(response.tool_data(/*index=*/1).data()));
 }
 
 TEST(ConvertXPlaneToProfileResponse, TensorflowStats) {
@@ -104,11 +105,11 @@ TEST(ConvertXPlaneToProfileResponse, TensorflowStats) {
   request.add_tools("tensorflow_stats");
   ProfileResponse response;
   TF_CHECK_OK(ConvertXSpaceToProfileResponse(xspace, request, &response));
-  EXPECT_EQ(1, response.tool_data_size());
-  EXPECT_EQ("tensorflow_stats.pb", response.tool_data(/*index=*/0).name());
+  EXPECT_EQ(2, response.tool_data_size());
+  EXPECT_EQ("tensorflow_stats.pb", response.tool_data(/*index=*/1).name());
   TfStatsDatabase tf_stats_db;
   ASSERT_TRUE(
-      tf_stats_db.ParseFromString(response.tool_data(/*index=*/0).data()));
+      tf_stats_db.ParseFromString(response.tool_data(/*index=*/1).data()));
 }
 
 }  // namespace
diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.cc b/tensorflow/core/profiler/convert/xplane_to_step_events.cc
index 78bd3dbee0f..7bb7cd6943c 100644
--- a/tensorflow/core/profiler/convert/xplane_to_step_events.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_step_events.cc
@@ -15,29 +15,37 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
 
-#include "tensorflow/core/lib/strings/str_util.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/event_span.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/timespan.h"
 #include "tensorflow/core/profiler/utils/trace_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
 namespace {
 
 inline bool IsExplicitHostStepMarker(absl::string_view event_name) {
-  return (str_util::StartsWith(event_name, "train") ||
-          str_util::StartsWith(event_name, "test") ||
-          str_util::StartsWith(event_name, "TraceContext")) &&
-         !str_util::StrContains(event_name, "/");
+  return (absl::StartsWith(event_name, "train") ||
+          absl::StartsWith(event_name, "test") ||
+          absl::StartsWith(event_name, "TraceContext")) &&
+         !absl::StrContains(event_name, "/");
 }
 
 // Returns true if the given event_name should be considered as real computation
 // on CPU.
 inline bool IsRealCpuCompute(absl::string_view event_name) {
-  bool not_real = str_util::StartsWith(event_name, "EagerExecute") ||
-                  str_util::StartsWith(event_name, "EagerLocalExecute") ||
-                  str_util::StartsWith(event_name, "EagerKernelExecute") ||
-                  str_util::StartsWith(event_name, "FunctionRun") ||
+  bool not_real = absl::StartsWith(event_name, "EagerExecute") ||
+                  absl::StartsWith(event_name, "EagerLocalExecute") ||
+                  absl::StartsWith(event_name, "EagerKernelExecute") ||
+                  absl::StartsWith(event_name, "FunctionRun") ||
                   IsExplicitHostStepMarker(event_name);
   return !not_real;
 }
diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.h b/tensorflow/core/profiler/convert/xplane_to_step_events.h
index a7ac3b9e89e..62fc89813a1 100644
--- a/tensorflow/core/profiler/convert/xplane_to_step_events.h
+++ b/tensorflow/core/profiler/convert/xplane_to_step_events.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/event_span.h"
-#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc b/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc
index 3e1610c2e0f..36e6a2c3091 100644
--- a/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc
@@ -15,7 +15,13 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
 
+#include <vector>
+
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/event_span.h"
 #include "tensorflow/core/profiler/utils/group_events.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc
index ea57a53321c..b25cdc4d219 100644
--- a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc
@@ -15,20 +15,25 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
 
+#include <algorithm>
 #include <stack>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include "absl/algorithm/container.h"
-#include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
-#include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/math_utils.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/timespan.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
-#include "tensorflow/core/profiler/utils/xplane_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
@@ -54,6 +59,21 @@ std::pair<TfFunctionExecutionMode, TfFunctionCompiler> Decode(
   DCHECK(false);
 }
 
+double ComputeExpensiveCallPercent(const TfFunction& tf_function) {
+  // Computes the expensiveness in terms of time (rather than count).
+  uint64 total_call_time_ps = 0;
+  uint64 expensive_call_time_ps = 0;
+  for (const auto& mode_metrics : tf_function.metrics()) {
+    const auto mode = mode_metrics.first;
+    const auto& metrics = mode_metrics.second;
+    total_call_time_ps += metrics.self_time_ps();
+    if (mode == TRACED_MODE || mode == EAGER_MODE) {
+      expensive_call_time_ps += metrics.self_time_ps();
+    }
+  }
+  return SafeDivide(100.0 * expensive_call_time_ps, total_call_time_ps);
+}
+
 // Each invocation of a tf-function creates an ActivationRecord.
 struct ActivationRecord {
   std::string function_name;               // name of the tf-function.
@@ -133,6 +153,7 @@ void CombineTfFunction(const TfFunction& src, TfFunction* dst) {
       CombineTfFunctionMetrics(src_metrics, dst_metrics);
     }
   }
+  dst->set_expensive_call_percent(ComputeExpensiveCallPercent(*dst));
 }
 
 // Execution history of all tf-functions invoked.
@@ -145,7 +166,7 @@ class TfFunctionExecutions {
       int64 tracing_count = 0;
       event.ForEachStat([&mode, &tracing_count](const XStatVisitor& stat) {
         if (stat.Type() == StatType::kTfFunctionCall)
-          mode = std::string(stat.StrValue());
+          mode = std::string(stat.StrOrRefValue());
         if (stat.Type() == StatType::kTfFunctionTracingCount)
           tracing_count = stat.IntValue();
       });
@@ -210,6 +231,10 @@ class TfFunctionExecutions {
       metrics->set_count(metrics->count() + 1);
       metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps);
     }
+    for (auto& name_fun : *result.mutable_tf_functions()) {
+      TfFunction& fun = name_fun.second;
+      fun.set_expensive_call_percent(ComputeExpensiveCallPercent(fun));
+    }
     return result;
   }
 
@@ -243,9 +268,9 @@ class TfFunctionExecutions {
 
 }  // namespace
 
-std::string DebugString(const TfFunctionDb tf_function_db) {
+std::string DebugString(const TfFunctionDb& tf_function_db) {
   std::string str;
-  ::tensorflow::protobuf::TextFormat::PrintToString(tf_function_db, &str);
+  protobuf::TextFormat::PrintToString(tf_function_db, &str);
   return str;
 }
 
diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.h b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h
index 470b22d34b8..df55ac79bb8 100644
--- a/tensorflow/core/profiler/convert/xplane_to_tf_functions.h
+++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h
@@ -16,8 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
 #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
 
+#include <string>
+
 #include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
-#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc
index 253ef1a74f9..25e56d17418 100644
--- a/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc
@@ -15,12 +15,17 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
 
+#include <string>
+
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -33,6 +38,8 @@ const absl::string_view kTracedXla = "traced-xla";
 const absl::string_view kNotTracedNonXla = "notTraced-nonXla";
 const absl::string_view kNotTracedXla = "notTraced-xla";
 
+constexpr double kMaxError = 0.001;
+
 TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) {
   TfFunctionDb result;
   const XPlane* host_plane = FindPlaneWithName(space, kHostThreads);
@@ -75,6 +82,8 @@ TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) {
       tf_function_db.tf_functions().at(kFunctionName);
   EXPECT_EQ(tf_function.total_tracing_count(), 4);
   EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER);
+  EXPECT_NEAR(tf_function.expensive_call_percent(), 90, kMaxError);
+
   const auto& metrics = tf_function.metrics();
   EXPECT_EQ(metrics.size(), 2);
   EXPECT_EQ(metrics.count(TRACED_MODE), 1);
@@ -108,6 +117,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
       tf_function_db.tf_functions().at(kOuterFunctionName);
   EXPECT_EQ(outer.total_tracing_count(), 1);
   EXPECT_EQ(outer.compiler(), OTHER_COMPILER);
+  EXPECT_NEAR(outer.expensive_call_percent(), 100, kMaxError);
   const auto& outer_metrics = outer.metrics();
   EXPECT_EQ(outer_metrics.size(), 1);
   EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1);
@@ -118,6 +128,7 @@ TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
       tf_function_db.tf_functions().at(kInnerFunctionName);
   EXPECT_EQ(inner.total_tracing_count(), 0);
   EXPECT_EQ(inner.compiler(), XLA_COMPILER);
+  EXPECT_NEAR(inner.expensive_call_percent(), 0, kMaxError);
   const auto& inner_metrics = inner.metrics();
   EXPECT_EQ(inner_metrics.size(), 1);
   EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1);
@@ -148,6 +159,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
       tf_function_db.tf_functions().at(kEagerFunctionName);
   EXPECT_EQ(eager.total_tracing_count(), 0);
   EXPECT_EQ(eager.compiler(), INVALID_COMPILER);
+  EXPECT_NEAR(eager.expensive_call_percent(), 100, kMaxError);
   const auto& eager_metrics = eager.metrics();
   EXPECT_EQ(eager_metrics.size(), 1);
   EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1);
@@ -158,6 +170,7 @@ TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
       tf_function_db.tf_functions().at(kConcreteFunctionName);
   EXPECT_EQ(concrete.total_tracing_count(), 0);
   EXPECT_EQ(concrete.compiler(), INVALID_COMPILER);
+  EXPECT_NEAR(concrete.expensive_call_percent(), 0, kMaxError);
   const auto& concrete_metrics = concrete.metrics();
   EXPECT_EQ(concrete_metrics.size(), 1);
   EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1);
diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc
index a40af395558..c404f7bb7e4 100644
--- a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc
@@ -15,8 +15,21 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h"
 
+#include <stddef.h>
+
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -72,6 +85,12 @@ void ConvertXSpaceToTraceEvents(const XSpace& xspace, Trace* trace) {
     xplane.ForEachLine([&](const XLineVisitor& xline) {
       int64 resource_id = xline.Id();  // Either thread id or CUDA stream id.
       xline.ForEachEvent([&](const XEventVisitor& xevent) {
+        int64 event_type =
+            xevent.Type().value_or(HostEventType::kUnknownHostEventType);
+        if (event_type == HostEventType::kMemoryAllocation ||
+            event_type == HostEventType::kMemoryDeallocation) {
+          return;
+        }
         auto* event = trace->add_trace_events();
         auto& args = *event->mutable_args();
         event->set_device_id(device_id);
diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events.h b/tensorflow/core/profiler/convert/xplane_to_trace_events.h
index 5c6fbead805..b7bddb7b366 100644
--- a/tensorflow/core/profiler/convert/xplane_to_trace_events.h
+++ b/tensorflow/core/profiler/convert/xplane_to_trace_events.h
@@ -16,7 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TRACE_EVENTS_H_
 #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TRACE_EVENTS_H_
 
-#include "absl/strings/str_split.h"
+#include <string>
+
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc
index afff5e60d97..b9a9fe09981 100644
--- a/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc
@@ -16,8 +16,9 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h"
 
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
-#include "tensorflow/core/profiler/utils/xplane_schema.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index 9fab42cd54a..85fa4e7fc44 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -423,8 +423,10 @@ tf_cc_test(
     deps = [
         ":traceme_recorder",
         "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
         "@com_google_absl//absl/strings",
-        "@com_google_googletest//:gtest_main",
+        "@com_google_googletest//:gtest",
     ],
 )
 
@@ -434,7 +436,6 @@ cc_library(
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core/profiler:profiler_options_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
     ],
 )
@@ -444,6 +445,7 @@ cc_library(
     hdrs = ["profiler_factory.h"],
     deps = [
         ":profiler_interface",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
     ] + if_static([
         ":profiler_factory_impl",
     ]),
@@ -461,8 +463,7 @@ cc_library(
     deps = [
         ":profiler_interface",
         "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
     ],
     alwayslink = True,
 )
@@ -513,15 +514,10 @@ tf_cc_test(
     srcs = ["scoped_annotation_test.cc"],
     deps = [
         ":annotation_stack",
-        "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
         "//tensorflow/core/profiler/lib:scoped_annotation",
         "@com_google_absl//absl/strings",
     ],
@@ -544,6 +540,6 @@ tf_cc_test(
         ":parse_annotation",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/core/profiler/internal/annotation_stack.cc b/tensorflow/core/profiler/internal/annotation_stack.cc
index 4cfd1027a68..4c15ca47c3d 100644
--- a/tensorflow/core/profiler/internal/annotation_stack.cc
+++ b/tensorflow/core/profiler/internal/annotation_stack.cc
@@ -15,6 +15,10 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/internal/annotation_stack.h"
 
+#include <atomic>
+
+#include "tensorflow/core/platform/types.h"
+
 namespace tensorflow {
 namespace profiler {
 namespace internal {
diff --git a/tensorflow/core/profiler/internal/annotation_stack.h b/tensorflow/core/profiler/internal/annotation_stack.h
index 38cd962cb32..e626c4c73cc 100644
--- a/tensorflow/core/profiler/internal/annotation_stack.h
+++ b/tensorflow/core/profiler/internal/annotation_stack.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include <stddef.h>
 
 #include <atomic>
+#include <utility>
 
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
diff --git a/tensorflow/core/profiler/internal/cpu/BUILD b/tensorflow/core/profiler/internal/cpu/BUILD
index e156667c5a7..c24c8c7d456 100644
--- a/tensorflow/core/profiler/internal/cpu/BUILD
+++ b/tensorflow/core/profiler/internal/cpu/BUILD
@@ -18,6 +18,7 @@ cc_library(
         "//tensorflow/core/profiler/utils:tf_op_utils",
         "//tensorflow/core/profiler/utils:xplane_builder",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -26,10 +27,10 @@ cc_library(
     srcs = ["host_tracer.cc"],
     deps = [
         ":host_tracer_utils",
-        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
         "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/internal:traceme_recorder",
@@ -50,14 +51,17 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/lib:profiler_session",
         "//tensorflow/core/profiler/lib:traceme",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler/utils:xplane_schema",
         "//tensorflow/core/profiler/utils:xplane_visitor",
+        "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
-        "@com_google_googletest//:gtest_main",
+        "@com_google_googletest//:gtest",
     ],
 )
 
@@ -67,17 +71,14 @@ cc_library(
     copts = ["-fexceptions"],
     features = ["-use_header_modules"],
     deps = [
-        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
         "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
-        "//tensorflow/core/profiler/utils:xplane_schema",
-        "//tensorflow/core/profiler/utils:xplane_utils",
         "//tensorflow/python/profiler/internal:python_hooks",
-        "@com_google_absl//absl/strings",
     ],
     alwayslink = True,
 )
@@ -86,9 +87,12 @@ cc_library(
     name = "metadata_collector",
     srcs = ["metadata_collector.cc"],
     deps = [
+        "//tensorflow/compiler/xla/service:hlo_proto_cc",
         "//tensorflow/compiler/xla/service/gpu:gpu_debug_info_manager",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
         "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc
index 753d8c53b9c..be1a7a2777b 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc
@@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <memory>
+#include <string>
 #include <utility>
 #include <vector>
 
 #include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/cpu/host_tracer_utils.h"
 #include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
@@ -119,8 +124,8 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) {
           std::vector<absl::string_view> parts =
               absl::StrSplit(event.name, kUserMetadataMarker);
           if (parts.size() >= 2) {
-            ns->set_node_name(string(parts[0]));
-            ns->set_timeline_label(string(parts[1]));
+            ns->set_node_name(std::string(parts[0]));
+            ns->set_timeline_label(std::string(parts[1]));
           } else {
             ns->set_node_name(std::move(event.name));
           }
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc
index e32ba92de66..499b7b6b564 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc
@@ -12,17 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <memory>
+#include <ostream>
 #include <string>
 
 #include <gmock/gmock.h>
-#include <gtest/gtest.h>
+#include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
 #include "tensorflow/core/profiler/lib/profiler_session.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
@@ -38,13 +44,13 @@ namespace {
 
 using ::testing::UnorderedElementsAre;
 
-NodeExecStats MakeNodeStats(const string& name, uint32 thread_id,
-                            const string& label = "") {
+NodeExecStats MakeNodeStats(absl::string_view name, uint32 thread_id,
+                            absl::string_view label = "") {
   NodeExecStats ns;
-  ns.set_node_name(name);
+  ns.set_node_name(std::string(name));
   ns.set_thread_id(thread_id);
   if (!label.empty()) {
-    ns.set_timeline_label(label);
+    ns.set_timeline_label(std::string(label));
   }
   return ns;
 }
@@ -109,7 +115,7 @@ TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) {
 
 TEST(HostTracerTest, CollectsTraceMeEventsAsXSpace) {
   uint32 thread_id;
-  string thread_name = "MyThreadName";
+  std::string thread_name = "MyThreadName";
   XSpace space;
 
   // We start a thread with a known and controled name. As of the time of
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc
index a4709ae2113..2e5d8ac1770 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc
@@ -14,10 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/internal/cpu/host_tracer_utils.h"
 
+#include <string>
 #include <utility>
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/parse_annotation.h"
 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
diff --git a/tensorflow/core/profiler/internal/cpu/metadata_collector.cc b/tensorflow/core/profiler/internal/cpu/metadata_collector.cc
index c6aa7840920..58da20ae3c5 100644
--- a/tensorflow/core/profiler/internal/cpu/metadata_collector.cc
+++ b/tensorflow/core/profiler/internal/cpu/metadata_collector.cc
@@ -13,17 +13,23 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <memory>
+#include <string>
 #include <utility>
 #include <vector>
 
 #include "tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/internal/cpu/python_tracer.cc b/tensorflow/core/profiler/internal/cpu/python_tracer.cc
index aa259f53cfa..d684cb8f768 100644
--- a/tensorflow/core/profiler/internal/cpu/python_tracer.cc
+++ b/tensorflow/core/profiler/internal/cpu/python_tracer.cc
@@ -12,18 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <utility>
-#include <vector>
+#include <memory>
 
-#include "absl/strings/str_split.h"
-#include "tensorflow/core/framework/step_stats.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/python/profiler/internal/python_hooks.h"
 
diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc
index 1110e103d57..9119c3d5d0b 100644
--- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc
+++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc
@@ -17,10 +17,8 @@ limitations under the License.
 
 #include "absl/container/flat_hash_map.h"
 #include "absl/container/node_hash_map.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/cleanup.h"
-#include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mem.h"
@@ -286,19 +284,14 @@ void CUPTIAPI FreeCuptiActivityBuffer(CUcontext context, uint32_t stream_id,
           << reinterpret_cast<uintptr_t>(buffer) << std::dec
           << " size: " << size << " valid_size: " << valid_size;
 
-  // Ensure buffer is free when this function returns.
-  auto buffer_cleanup =
-      gtl::MakeCleanup([buffer] { port::AlignedFree(buffer); });
+  if (valid_size > 0) {
+    VLOG(3) << "Activity profile for stream " << stream_id;
 
-  if (valid_size <= 0) {
-    return;
+    CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton();
+    cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size)
+        .IgnoreError();
   }
-
-  VLOG(3) << "Activity profile for stream " << stream_id;
-
-  CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton();
-  cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size)
-      .IgnoreError();
+  port::AlignedFree(buffer);
 }
 
 void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id,
@@ -984,7 +977,7 @@ class CudaEventRecorder {
   using StreamKey = std::pair<CUcontext, CUstream>;
 
   absl::node_hash_map<CUcontext, ContextInfo> context_infos_;
-  absl::flat_hash_map<StreamKey, StreamInfo, hash<StreamKey>> stream_infos_;
+  absl::flat_hash_map<StreamKey, StreamInfo> stream_infos_;
 };
 
 // This hook uses cuda events to measure device side activities.
diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.h b/tensorflow/core/profiler/internal/gpu/cupti_tracer.h
index c6e0c50b093..e236afc5c41 100644
--- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.h
+++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.h
@@ -21,9 +21,9 @@ limitations under the License.
 #include "absl/container/node_hash_set.h"
 #include "absl/types/optional.h"
 #include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/gpu/cupti_interface.h"
 
diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer.cc b/tensorflow/core/profiler/internal/gpu/device_tracer.cc
index 534a1d53752..ac6662c8432 100644
--- a/tensorflow/core/profiler/internal/gpu/device_tracer.cc
+++ b/tensorflow/core/profiler/internal/gpu/device_tracer.cc
@@ -27,9 +27,9 @@ limitations under the License.
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "tensorflow/core/framework/step_stats.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/abi.h"
 #include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
@@ -85,7 +85,7 @@ void CreateXEvent(const CuptiTracerEvent& event, XPlaneBuilder* plane,
   if (!event.annotation.empty()) {
     xevent.AddStatValue(*plane->GetOrCreateStatMetadata(
                             GetStatTypeStr(StatType::kKernelAnnotation)),
-                        event.annotation);
+                        *plane->GetOrCreateStatMetadata(event.annotation));
   }
   if (event.context_id != CuptiTracerEvent::kInvalidContextId) {
     xevent.AddStatValue(
@@ -102,7 +102,7 @@ void CreateXEvent(const CuptiTracerEvent& event, XPlaneBuilder* plane,
                         event.kernel_info.block_y, event.kernel_info.block_z);
     xevent.AddStatValue(*plane->GetOrCreateStatMetadata(
                             GetStatTypeStr(StatType::kKernelDetails)),
-                        kernel_details);
+                        *plane->GetOrCreateStatMetadata(kernel_details));
   } else if (event.type == CuptiTracerEventType::MemcpyH2D ||
              event.type == CuptiTracerEventType::MemcpyD2H ||
              event.type == CuptiTracerEventType::MemcpyD2D ||
@@ -145,7 +145,7 @@ void CreateXEvent(const CuptiTracerEvent& event, XPlaneBuilder* plane,
   if (!annotation_stack.empty()) {
     xevent.AddStatValue(
         *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
-        annotation_stack.begin()->name);
+        *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name));
   }
 }
 
diff --git a/tensorflow/core/profiler/internal/parse_annotation.cc b/tensorflow/core/profiler/internal/parse_annotation.cc
index 2a3fa3f8454..32c26befa3d 100644
--- a/tensorflow/core/profiler/internal/parse_annotation.cc
+++ b/tensorflow/core/profiler/internal/parse_annotation.cc
@@ -15,6 +15,9 @@ limitations under the License.
 #include "tensorflow/core/profiler/internal/parse_annotation.h"
 
 #include <stack>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include "absl/strings/ascii.h"
 #include "absl/strings/str_split.h"
diff --git a/tensorflow/core/profiler/internal/parse_annotation.h b/tensorflow/core/profiler/internal/parse_annotation.h
index 6c2e536962b..bb0f12217d3 100644
--- a/tensorflow/core/profiler/internal/parse_annotation.h
+++ b/tensorflow/core/profiler/internal/parse_annotation.h
@@ -16,7 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_PARSE_ANNOTATION_H_
 #define TENSORFLOW_CORE_PROFILER_INTERNAL_PARSE_ANNOTATION_H_
 
-#include <utility>
 #include <vector>
 
 #include "absl/strings/string_view.h"
diff --git a/tensorflow/core/profiler/internal/parse_annotation_test.cc b/tensorflow/core/profiler/internal/parse_annotation_test.cc
index 4d4a2d5ea95..e5d876ac5af 100644
--- a/tensorflow/core/profiler/internal/parse_annotation_test.cc
+++ b/tensorflow/core/profiler/internal/parse_annotation_test.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/internal/parse_annotation.h"
 
+#include <vector>
+
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/internal/profiler_factory.cc b/tensorflow/core/profiler/internal/profiler_factory.cc
index e2bae59b892..5152e79bdc8 100644
--- a/tensorflow/core/profiler/internal/profiler_factory.cc
+++ b/tensorflow/core/profiler/internal/profiler_factory.cc
@@ -14,8 +14,14 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/internal/profiler_factory.h"
 
+#include <memory>
+#include <utility>
+#include <vector>
+
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/internal/profiler_factory.h b/tensorflow/core/profiler/internal/profiler_factory.h
index 6bcdcf28c3c..c223d7275d9 100644
--- a/tensorflow/core/profiler/internal/profiler_factory.h
+++ b/tensorflow/core/profiler/internal/profiler_factory.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/internal/profiler_interface.h b/tensorflow/core/profiler/internal/profiler_interface.h
index 2605e834f09..9fe85e38652 100644
--- a/tensorflow/core/profiler/internal/profiler_interface.h
+++ b/tensorflow/core/profiler/internal/profiler_interface.h
@@ -15,8 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_
 #define TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_
 
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/profiler/profiler_options.pb.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 
diff --git a/tensorflow/core/profiler/internal/scoped_annotation_test.cc b/tensorflow/core/profiler/internal/scoped_annotation_test.cc
index 70a627fd640..50c1244b9ee 100644
--- a/tensorflow/core/profiler/internal/scoped_annotation_test.cc
+++ b/tensorflow/core/profiler/internal/scoped_annotation_test.cc
@@ -15,10 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/lib/scoped_annotation.h"
 
+#include <string>
+
 #include "absl/strings/str_cat.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
-#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/annotation_stack.h"
 
 namespace tensorflow {
@@ -48,11 +49,13 @@ TEST(ScopedAnnotation, Simple) {
   EXPECT_EQ(AnnotationStack::Get(), "");  // not enabled
 }
 
-string GenerateRandomString(int length) { return string(length, 'a'); }
+std::string GenerateRandomString(int length) {
+  return std::string(length, 'a');
+}
 
 void BM_ScopedAnnotationDisabled(int iters, int annotation_size) {
   testing::StopTiming();
-  string annotation = GenerateRandomString(annotation_size);
+  std::string annotation = GenerateRandomString(annotation_size);
   testing::StartTiming();
   for (int i = 0; i < iters; i++) {
     ScopedAnnotation trace(annotation);
@@ -64,7 +67,7 @@ BENCHMARK(BM_ScopedAnnotationDisabled)->Arg(8)->Arg(32)->Arg(128);
 
 void BM_ScopedAnnotationEnabled(int iters, int annotation_size) {
   testing::StopTiming();
-  string annotation = GenerateRandomString(annotation_size);
+  std::string annotation = GenerateRandomString(annotation_size);
   AnnotationStack::Enable(true);
   testing::StartTiming();
   for (int i = 0; i < iters; i++) {
@@ -78,7 +81,7 @@ BENCHMARK(BM_ScopedAnnotationEnabled)->Arg(8)->Arg(32)->Arg(128);
 
 void BM_ScopedAnnotationEnabled_Nested(int iters, int annotation_size) {
   testing::StopTiming();
-  string annotation = GenerateRandomString(annotation_size);
+  std::string annotation = GenerateRandomString(annotation_size);
   AnnotationStack::Enable(true);
   testing::StartTiming();
   for (int i = 0; i < iters; i++) {
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc
index 22b3bdc2042..56e6e2bcba3 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats.cc
@@ -58,7 +58,6 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph,
       ckpt_reader_(std::move(ckpt_reader)) {
   CHECK(graph) << "Must at least have GraphDef";
 
-  absl::PrintF("Parsing Inputs...\n");
   AddGraph(std::move(graph));
   if (run_meta && run_meta->has_step_stats()) {
     AddRunMeta(0, std::move(run_meta));
diff --git a/tensorflow/core/profiler/internal/traceme_recorder.cc b/tensorflow/core/profiler/internal/traceme_recorder.cc
index 365e3992bc3..268585bde8c 100644
--- a/tensorflow/core/profiler/internal/traceme_recorder.cc
+++ b/tensorflow/core/profiler/internal/traceme_recorder.cc
@@ -16,8 +16,18 @@ limitations under the License.
 
 #include <stddef.h>
 
+#include <algorithm>
+#include <atomic>
+#include <new>
+#include <utility>
+#include <vector>
+
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/internal/traceme_recorder.h b/tensorflow/core/profiler/internal/traceme_recorder.h
index 8b5b32cf4bc..1da7d4cebb1 100644
--- a/tensorflow/core/profiler/internal/traceme_recorder.h
+++ b/tensorflow/core/profiler/internal/traceme_recorder.h
@@ -15,8 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_TRACEME_RECORDER_H_
 #define TENSORFLOW_CORE_PROFILER_INTERNAL_TRACEME_RECORDER_H_
 
-#include <stddef.h>
-
 #include <atomic>
 #include <vector>
 
diff --git a/tensorflow/core/profiler/internal/traceme_recorder_test.cc b/tensorflow/core/profiler/internal/traceme_recorder_test.cc
index 90478881361..8d7abc94e8f 100644
--- a/tensorflow/core/profiler/internal/traceme_recorder_test.cc
+++ b/tensorflow/core/profiler/internal/traceme_recorder_test.cc
@@ -15,19 +15,28 @@ limitations under the License.
 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
 
 #include <atomic>
+#include <istream>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include <gmock/gmock.h>
-#include <gtest/gtest.h>
 #include "absl/strings/str_cat.h"
-#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/notification.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/threadpool.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace profiler {
 namespace {
 
+using ::testing::ElementsAre;
+
 MATCHER_P(Named, name, "") { return arg.name == name; }
 
 constexpr static uint64 kNanosInSec = 1000000000;
@@ -45,7 +54,7 @@ TEST(RecorderTest, SingleThreaded) {
 
   ASSERT_EQ(results.size(), 1);
   EXPECT_THAT(results[0].events,
-              ::testing::ElementsAre(Named("during1"), Named("during2")));
+              ElementsAre(Named("during1"), Named("during2")));
 }
 
 void SpinNanos(int nanos) {
diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD
index 33486685fb8..6316fd118fc 100644
--- a/tensorflow/core/profiler/lib/BUILD
+++ b/tensorflow/core/profiler/lib/BUILD
@@ -47,18 +47,19 @@ cc_library(
     deps = [
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
-        "//tensorflow/core:framework",
         "//tensorflow/core/platform",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/core/profiler:profiler_options_proto_cc",
-        "//tensorflow/core/util:ptr_util",
+        "@com_google_absl//absl/memory",
+        "//tensorflow/core:protos_all_cc",
     ] + if_not_android([
         ":profiler_utils",
         "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/utils:derived_timeline",
         "//tensorflow/core/profiler/utils:group_events",
         "//tensorflow/core/profiler/utils:xplane_utils",
+        "//tensorflow/core/profiler/utils:xplane_schema",
     ]),
     alwayslink = True,
 )
@@ -110,6 +111,7 @@ cc_library(
         ":traceme",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
diff --git a/tensorflow/core/profiler/lib/annotated_traceme.h b/tensorflow/core/profiler/lib/annotated_traceme.h
index f40c1e9ad92..c3257e2adbe 100644
--- a/tensorflow/core/profiler/lib/annotated_traceme.h
+++ b/tensorflow/core/profiler/lib/annotated_traceme.h
@@ -15,7 +15,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_LIB_ANNOTATED_TRACEME_H_
 #define TENSORFLOW_CORE_PROFILER_LIB_ANNOTATED_TRACEME_H_
 
+#include <utility>
+
 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/lib/scoped_annotation.h"
diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc
index b907f74179c..9783cd14f95 100644
--- a/tensorflow/core/profiler/lib/profiler_session.cc
+++ b/tensorflow/core/profiler/lib/profiler_session.cc
@@ -15,19 +15,28 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/lib/profiler_session.h"
 
-#include "tensorflow/core/lib/core/errors.h"
+#include <memory>
+
+#include "absl/memory/memory.h"
 #include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/platform.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/protobuf/error_codes.pb.h"
 #include "tensorflow/core/util/env_var.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 #if !defined(IS_MOBILE_PLATFORM)
 #include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/lib/profiler_utils.h"
 #include "tensorflow/core/profiler/utils/derived_timeline.h"
 #include "tensorflow/core/profiler/utils/group_events.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
 #endif
 
@@ -44,7 +53,7 @@ ProfileOptions GetOptions(const ProfileOptions& opts) {
 
 /*static*/ std::unique_ptr<ProfilerSession> ProfilerSession::Create(
     const ProfileOptions& options) {
-  return WrapUnique(new ProfilerSession(options));
+  return absl::WrapUnique(new ProfilerSession(options));
 }
 
 /*static*/ std::unique_ptr<ProfilerSession> ProfilerSession::Create() {
diff --git a/tensorflow/core/profiler/lib/profiler_session.h b/tensorflow/core/profiler/lib/profiler_session.h
index 1c20876d9d0..6f92b047eb7 100644
--- a/tensorflow/core/profiler/lib/profiler_session.h
+++ b/tensorflow/core/profiler/lib/profiler_session.h
@@ -18,12 +18,14 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
-#include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
 #include "tensorflow/core/profiler/profiler_options.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/profiler/lib/scoped_annotation.h b/tensorflow/core/profiler/lib/scoped_annotation.h
index 61b0cf42dd6..2cad5fd4708 100644
--- a/tensorflow/core/profiler/lib/scoped_annotation.h
+++ b/tensorflow/core/profiler/lib/scoped_annotation.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include <stddef.h>
 
 #include <atomic>
+#include <utility>
 
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/macros.h"
diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h
index 8b42f187850..af93ac11b1e 100644
--- a/tensorflow/core/profiler/lib/traceme.h
+++ b/tensorflow/core/profiler/lib/traceme.h
@@ -15,7 +15,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
 
+#include <new>
+#include <utility>
+
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "absl/strings/strip.h"
 #include "tensorflow/core/platform/env_time.h"
diff --git a/tensorflow/core/profiler/profiler_service.proto b/tensorflow/core/profiler/profiler_service.proto
index 37ca4084e42..a096a10efe2 100644
--- a/tensorflow/core/profiler/profiler_service.proto
+++ b/tensorflow/core/profiler/profiler_service.proto
@@ -10,6 +10,10 @@ import "tensorflow/core/profiler/profiler_service_monitor_result.proto";
 service ProfilerService {
   // Starts a profiling session, blocks until it completes, and returns data.
   rpc Profile(ProfileRequest) returns (ProfileResponse) {}
+  // Signal to terminate the Profile rpc for a on-going profiling session,
+  // The Profile rpc will return successfully and prematurely without timeout.
+  // This is used by programmatic mode to end the session in workers.
+  rpc Terminate(TerminateRequest) returns (TerminateResponse) {}
   // Collects profiling data and returns user-friendly metrics.
   rpc Monitor(MonitorRequest) returns (MonitorResponse) {}
 }
@@ -81,6 +85,13 @@ message ProfileResponse {
   // next-field: 8
 }
 
+message TerminateRequest {
+  // Which session id to terminate.
+  string session_id = 1;
+}
+
+message TerminateResponse {}
+
 message MonitorRequest {
   // Duration for which to profile between each update.
   uint64 duration_ms = 1;
diff --git a/tensorflow/core/profiler/protobuf/memory_profile.proto b/tensorflow/core/profiler/protobuf/memory_profile.proto
index 385b3db07e9..7a5272c60b2 100644
--- a/tensorflow/core/profiler/protobuf/memory_profile.proto
+++ b/tensorflow/core/profiler/protobuf/memory_profile.proto
@@ -122,4 +122,7 @@ message MemoryProfile {
   // Ids for profiled memory allocators, used to populate memory selection list
   // at front end.
   repeated string memory_ids = 3;
+  // Map of original random int64 step id to the count of memory activity events
+  // assigned with it.
+  map<int64 /*orig_step_id*/, int64 /*count*/> step_count = 4;
 }
diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto
index 8c83dbd0871..018aa759cc5 100644
--- a/tensorflow/core/profiler/protobuf/overview_page.proto
+++ b/tensorflow/core/profiler/protobuf/overview_page.proto
@@ -84,6 +84,9 @@ message OverviewPageRecommendation {
   // A statement for output that recommends the next steps for investigating the
   // bottleneck.
   string output_statement = 9;
+  // A statement that recommends the next steps for investigating tf-function
+  // related bottleneck (it is a html so that it can link to other tools/docs.
+  string tf_function_statement_html = 10;
   // A list of tips for improving host performance.
   repeated OverviewPageTip host_tips = 3;
   // A list of tips for improving device performance.
diff --git a/tensorflow/core/profiler/protobuf/tf_function.proto b/tensorflow/core/profiler/protobuf/tf_function.proto
index fe07c00c8d3..1f5e1530475 100644
--- a/tensorflow/core/profiler/protobuf/tf_function.proto
+++ b/tensorflow/core/profiler/protobuf/tf_function.proto
@@ -49,6 +49,9 @@ message TfFunction {
   int64 total_tracing_count = 2;
   // Compiler used to compile this function.
   TfFunctionCompiler compiler = 3;
+  // Percentage of time spent in the expensive calls to this function in the
+  // profiled period.
+  double expensive_call_percent = 4;
 }
 
 // Statistics for all tf-functions.
diff --git a/tensorflow/core/profiler/rpc/BUILD b/tensorflow/core/profiler/rpc/BUILD
index d8af53fe8f9..1e572dfd9bd 100644
--- a/tensorflow/core/profiler/rpc/BUILD
+++ b/tensorflow/core/profiler/rpc/BUILD
@@ -14,14 +14,12 @@ cc_library(
         ["//tensorflow_serving/model_servers:__pkg__"],
     ),
     deps = [
-        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
         "//tensorflow/core/profiler/convert:xplane_to_profile_response",
         "//tensorflow/core/profiler/lib:profiler_session_headers",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
-        "@com_google_absl//absl/container:flat_hash_set",
-        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/memory",
         tf_grpc_cc_dependency(),
     ],
 )
@@ -36,7 +34,6 @@ cc_library(
     ],
     deps = [
         ":profiler_service_impl",
-        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/core/profiler/rpc/client/BUILD b/tensorflow/core/profiler/rpc/client/BUILD
index 43ebb35230c..609f98aa6c1 100644
--- a/tensorflow/core/profiler/rpc/client/BUILD
+++ b/tensorflow/core/profiler/rpc/client/BUILD
@@ -11,9 +11,10 @@ cc_library(
     visibility = ["//tensorflow/python/profiler/internal:__pkg__"],
     deps = [
         ":save_profile",
-        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/profiler:profiler_analysis_proto_cc",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
         "@com_google_absl//absl/strings",
         tf_grpc_cc_dependency(),
@@ -28,8 +29,8 @@ cc_library(
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler:profiler_service_proto_cc",
-        "//tensorflow/core/profiler/protobuf:trace_events_proto_cc",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/time",
     ],
diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc
index ebc74c9252c..a8642aff54a 100644
--- a/tensorflow/core/profiler/rpc/client/capture_profile.cc
+++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc
@@ -14,19 +14,25 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/rpc/client/capture_profile.h"
 
+#include <iostream>
+#include <limits>
+#include <memory>
 #include <vector>
 
 #include "grpcpp/grpcpp.h"
-#include "absl/strings/escaping.h"
-#include "absl/strings/match.h"
 #include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
 #include "absl/strings/str_split.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/profiler_analysis.grpc.pb.h"
+#include "tensorflow/core/profiler/profiler_analysis.pb.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
+#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
+#include "tensorflow/core/profiler/profiler_service.pb.h"
 #include "tensorflow/core/profiler/rpc/client/save_profile.h"
-#include "tensorflow/core/util/events_writer.h"
+#include "tensorflow/core/protobuf/error_codes.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.h b/tensorflow/core/profiler/rpc/client/capture_profile.h
index 404912ef716..c809d2099ae 100644
--- a/tensorflow/core/profiler/rpc/client/capture_profile.h
+++ b/tensorflow/core/profiler/rpc/client/capture_profile.h
@@ -17,9 +17,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_
 #define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_
 
-#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/rpc/client/save_profile.cc b/tensorflow/core/profiler/rpc/client/save_profile.cc
index ab2e494871c..9cf2e291692 100644
--- a/tensorflow/core/profiler/rpc/client/save_profile.cc
+++ b/tensorflow/core/profiler/rpc/client/save_profile.cc
@@ -15,20 +15,27 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/rpc/client/save_profile.h"
 
-#include <cstdio>
-#include <ctime>
+#include <initializer_list>
+#include <memory>
+#include <sstream>
+#include <string>
 #include <vector>
 
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
 #include "absl/strings/strip.h"
 #include "absl/time/clock.h"
 #include "absl/time/time.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/io/compression.h"
+#include "tensorflow/core/lib/io/zlib_compression_options.h"
+#include "tensorflow/core/lib/io/zlib_outputbuffer.h"
 #include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/protobuf.h"
-#include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/file_system.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/profiler/profiler_service.pb.h"
+
 // Windows.h #defines ERROR, but it is also used in
 // tensorflow/core/util/event.proto
 #undef ERROR
@@ -56,9 +63,9 @@ string ProfilerJoinPathImpl(std::initializer_list<absl::string_view> paths) {
 
     path = absl::StripPrefix(path, kPathSep);
     if (absl::EndsWith(result, kPathSep)) {
-      strings::StrAppend(&result, path);
+      absl::StrAppend(&result, path);
     } else {
-      strings::StrAppend(&result, kPathSep, path);
+      absl::StrAppend(&result, kPathSep, path);
     }
   }
 
@@ -75,7 +82,8 @@ string ProfilerJoinPath(const T&... args) {
 constexpr char kProtoTraceFileName[] = "trace";
 constexpr char kTfStatsHelperSuffix[] = "tf_stats_helper_result";
 
-Status DumpToolDataToLogDirectory(StringPiece run_dir, const string& host,
+Status DumpToolDataToLogDirectory(absl::string_view run_dir,
+                                  absl::string_view host,
                                   const ProfileToolData& tool,
                                   std::ostream* os) {
   // Don't save the intermediate results for combining the per host tool data.
diff --git a/tensorflow/core/profiler/rpc/client/save_profile.h b/tensorflow/core/profiler/rpc/client/save_profile.h
index d9070f06c71..2e8fc96390a 100644
--- a/tensorflow/core/profiler/rpc/client/save_profile.h
+++ b/tensorflow/core/profiler/rpc/client/save_profile.h
@@ -16,7 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_SAVE_PROFILE_H_
 #define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_SAVE_PROFILE_H_
 
-#include "tensorflow/core/lib/core/status.h"
+#include <ostream>
+
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/profiler_service.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/rpc/profiler_server.cc b/tensorflow/core/profiler/rpc/profiler_server.cc
index 36f0f9efad9..f05a829fb93 100644
--- a/tensorflow/core/profiler/rpc/profiler_server.cc
+++ b/tensorflow/core/profiler/rpc/profiler_server.cc
@@ -16,18 +16,19 @@ limitations under the License.
 #include "tensorflow/core/profiler/rpc/profiler_server.h"
 
 #include <memory>
-#include <utility>
+#include <string>
 
 #include "grpcpp/grpcpp.h"
 #include "absl/strings/str_cat.h"
-#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 
 void ProfilerServer::StartProfilerServer(int32 port) {
-  string server_address = absl::StrCat("0.0.0.0:", port);
+  std::string server_address = absl::StrCat("0.0.0.0:", port);
   service_ = CreateProfilerService();
   ::grpc::ServerBuilder builder;
   builder.AddListeningPort(server_address, ::grpc::InsecureServerCredentials());
diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.cc b/tensorflow/core/profiler/rpc/profiler_service_impl.cc
index 8f1be23594a..0a234d7e4da 100644
--- a/tensorflow/core/profiler/rpc/profiler_service_impl.cc
+++ b/tensorflow/core/profiler/rpc/profiler_service_impl.cc
@@ -15,19 +15,24 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
 
+#include <memory>
+
 #include "grpcpp/support/status.h"
-#include "absl/container/flat_hash_set.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_join.h"
-#include "absl/strings/string_view.h"
-#include "tensorflow/core/lib/core/errors.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/memory/memory.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
 #include "tensorflow/core/profiler/lib/profiler_session.h"
+#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
+#include "tensorflow/core/profiler/profiler_service.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace {
@@ -61,11 +66,16 @@ class ProfilerServiceImpl : public grpc::ProfilerService::Service {
     }
 
     Env* env = Env::Default();
-    for (size_t i = 0; i < req->duration_ms(); ++i) {
+    for (uint64 i = 0; i < req->duration_ms(); ++i) {
       env->SleepForMicroseconds(EnvTime::kMillisToMicros);
       if (ctx->IsCancelled()) {
         return ::grpc::Status::CANCELLED;
       }
+      if (TF_PREDICT_FALSE(IsStopped(req->session_id()))) {
+        mutex_lock lock(mutex_);
+        stop_signals_per_session_.erase(req->session_id());
+        break;
+      }
     }
 
     status = CollectDataToResponse(*req, profiler.get(), response);
@@ -76,12 +86,31 @@ class ProfilerServiceImpl : public grpc::ProfilerService::Service {
 
     return ::grpc::Status::OK;
   }
+
+  ::grpc::Status Terminate(::grpc::ServerContext* ctx,
+                           const TerminateRequest* req,
+                           TerminateResponse* response) override {
+    mutex_lock lock(mutex_);
+    stop_signals_per_session_[req->session_id()] = true;
+    return ::grpc::Status::OK;
+  }
+
+ private:
+  bool IsStopped(const std::string& session_id) {
+    mutex_lock lock(mutex_);
+    auto it = stop_signals_per_session_.find(session_id);
+    return it != stop_signals_per_session_.end() && it->second;
+  }
+
+  mutex mutex_;
+  absl::flat_hash_map<std::string, bool> stop_signals_per_session_
+      GUARDED_BY(mutex_);
 };
 
 }  // namespace
 
 std::unique_ptr<grpc::ProfilerService::Service> CreateProfilerService() {
-  return MakeUnique<ProfilerServiceImpl>();
+  return absl::make_unique<ProfilerServiceImpl>();
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.h b/tensorflow/core/profiler/rpc/profiler_service_impl.h
index 4a7636cf101..00a850acbf2 100644
--- a/tensorflow/core/profiler/rpc/profiler_service_impl.h
+++ b/tensorflow/core/profiler/rpc/profiler_service_impl.h
@@ -15,10 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVICE_IMPL_H_
 #define TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVICE_IMPL_H_
 
-#include "grpcpp/grpcpp.h"
-#include "grpcpp/server_context.h"
-#include "grpcpp/support/status.h"
-#include "tensorflow/core/profiler/lib/profiler_session.h"
+#include <memory>
+
 #include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD
index 189b39d6ea6..ca20236d63b 100644
--- a/tensorflow/core/profiler/utils/BUILD
+++ b/tensorflow/core/profiler/utils/BUILD
@@ -30,6 +30,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
@@ -50,6 +51,14 @@ cc_library(
     hdrs = ["math_utils.h"],
 )
 
+cc_library(
+    name = "html_utils",
+    hdrs = ["html_utils.h"],
+    deps = [
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "op_metrics_db_utils",
     srcs = ["op_metrics_db_utils.cc"],
@@ -83,7 +92,6 @@ cc_library(
     hdrs = ["tf_op_utils.h"],
     deps = [
         "//tensorflow/core:regexp_internal",
-        "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -96,6 +104,7 @@ tf_cc_test(
         ":tf_op_utils",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
     ],
 )
 
@@ -146,6 +155,20 @@ cc_library(
     ],
 )
 
+tf_cc_test(
+    name = "xplane_builder_test",
+    size = "small",
+    srcs = ["xplane_builder_test.cc"],
+    deps = [
+        ":xplane_builder",
+        ":xplane_visitor",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "xplane_schema",
     srcs = ["xplane_schema.cc"],
@@ -157,7 +180,6 @@ cc_library(
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
-        "@com_google_absl//absl/types:span",
     ],
 )
 
@@ -183,7 +205,6 @@ tf_cc_test(
     name = "xplane_utils_test",
     srcs = ["xplane_utils_test.cc"],
     deps = [
-        ":time_utils",
         ":xplane_builder",
         ":xplane_utils",
         ":xplane_visitor",
@@ -192,6 +213,8 @@ tf_cc_test(
         "//tensorflow/core:test_main",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -219,6 +242,7 @@ cc_library(
     deps = [
         ":xplane_schema",
         ":xplane_visitor",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
     ],
 )
 
@@ -230,9 +254,11 @@ cc_library(
     deps = [
         ":tf_op_utils",
         ":tf_xplane_visitor",
+        ":xplane_builder",
         ":xplane_schema",
         ":xplane_utils",
         ":xplane_visitor",
+        "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -250,10 +276,13 @@ tf_cc_test(
         ":xplane_builder",
         ":xplane_schema",
         ":xplane_utils",
+        ":xplane_visitor",
+        "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -268,10 +297,13 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/grappler/costs:cost_estimator",
+        "//tensorflow/core/grappler/costs:op_context",
         "//tensorflow/core/grappler/costs:op_level_cost_estimator",
         "//tensorflow/core/grappler/costs:op_performance_data_cc",
-        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -283,6 +315,7 @@ cc_library(
         ":group_events",
         ":tf_op_utils",
         ":tf_xplane_visitor",
+        ":time_utils",
         ":timespan",
         ":trace_utils",
         ":xplane_builder",
@@ -292,8 +325,10 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -308,6 +343,8 @@ tf_cc_test(
         ":xplane_builder",
         ":xplane_schema",
         ":xplane_utils",
+        ":xplane_visitor",
+        "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
@@ -334,10 +371,10 @@ cc_library(
         ":xplane_builder",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core/framework:protos_all_cc",
         "//tensorflow/core/profiler/protobuf:tfstreamz_proto_cc",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/strings:str_format",
     ],
 )
diff --git a/tensorflow/core/profiler/utils/cost_utils.cc b/tensorflow/core/profiler/utils/cost_utils.cc
index 754aa655af3..a94f09bb79c 100644
--- a/tensorflow/core/profiler/utils/cost_utils.cc
+++ b/tensorflow/core/profiler/utils/cost_utils.cc
@@ -15,12 +15,27 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/cost_utils.h"
 
+#include <string>
+#include <vector>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/grappler/costs/cost_estimator.h"
+#include "tensorflow/core/grappler/costs/op_context.h"
 #include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/utils/cost_utils.h b/tensorflow/core/profiler/utils/cost_utils.h
index f1095556c2b..a778bca5330 100644
--- a/tensorflow/core/profiler/utils/cost_utils.h
+++ b/tensorflow/core/profiler/utils/cost_utils.h
@@ -15,12 +15,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_COST_UTILS_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_COST_UTILS_H_
 
-#include <set>
+#include <string>
 
-#include "absl/strings/string_view.h"
+#include "absl/container/flat_hash_set.h"
+#include "tensorflow/core/grappler/costs/cost_estimator.h"
 #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
 #include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
@@ -46,7 +47,8 @@ class TfOpRoofLineCostEstimator
   OpRoofLineStats Predict(const XEventVisitor& event);
 
  private:
-  std::set<string> unsupported_ops_;  // summary for unsupported ops.
+  absl::flat_hash_set<std::string>
+      unsupported_ops_;  // summary for unsupported ops.
 
   TF_DISALLOW_COPY_AND_ASSIGN(TfOpRoofLineCostEstimator);
 };
diff --git a/tensorflow/core/profiler/utils/derived_timeline.cc b/tensorflow/core/profiler/utils/derived_timeline.cc
index c99d8e82cb7..112c0977763 100644
--- a/tensorflow/core/profiler/utils/derived_timeline.cc
+++ b/tensorflow/core/profiler/utils/derived_timeline.cc
@@ -14,15 +14,27 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/utils/derived_timeline.h"
 
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/group_events.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/time_utils.h"
 #include "tensorflow/core/profiler/utils/timespan.h"
 #include "tensorflow/core/profiler/utils/trace_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
diff --git a/tensorflow/core/profiler/utils/derived_timeline.h b/tensorflow/core/profiler/utils/derived_timeline.h
index 61b62bdc8da..cd4da7996c5 100644
--- a/tensorflow/core/profiler/utils/derived_timeline.h
+++ b/tensorflow/core/profiler/utils/derived_timeline.h
@@ -15,7 +15,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_DERIVED_TIMELINE_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_DERIVED_TIMELINE_H_
 
+#include <functional>
+#include <vector>
+
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/group_events.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc
index f3e6b66f087..76a0188480a 100644
--- a/tensorflow/core/profiler/utils/derived_timeline_test.cc
+++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc
@@ -15,8 +15,9 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/derived_timeline.h"
 
-#include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/group_events.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
@@ -24,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/utils/errors.cc b/tensorflow/core/profiler/utils/errors.cc
index d829ee06709..9c678e98a43 100644
--- a/tensorflow/core/profiler/utils/errors.cc
+++ b/tensorflow/core/profiler/utils/errors.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/errors.h"
 
+#include "absl/strings/string_view.h"
+
 namespace tensorflow {
 namespace profiler {
 
diff --git a/tensorflow/core/profiler/utils/event_span.cc b/tensorflow/core/profiler/utils/event_span.cc
index 1c64f7bf6bb..5e0413c4ba2 100644
--- a/tensorflow/core/profiler/utils/event_span.cc
+++ b/tensorflow/core/profiler/utils/event_span.cc
@@ -14,14 +14,19 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/utils/event_span.h"
 
-#include <chrono>  // NOLINT
-#include <ctime>
-#include <thread>  // NOLINT
+#include <string>
+#include <utility>
 #include <vector>
 
+#include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/utils/timespan.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -269,10 +274,7 @@ void CombineStepEvents(const StepEvents& src, StepEvents* dst) {
 
 // Converts from overlapped step-events to non-overlapped step-events.
 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) {
-  auto start_time = std::chrono::steady_clock::now();
   StepEvents non_overlapped_step_events;
-
-  // We could parallelize the following loop if necessary.
   for (const auto& step_events : overlapped_step_events) {
     const auto& step_id = step_events.first;
     const auto& step_details = step_events.second;
@@ -281,12 +283,6 @@ StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) {
     *non_overlapped_step_events[step_id].MutableEvents() =
         ToNonOverlappedEvents(step_details.Events());
   }
-  auto end_time = std::chrono::steady_clock::now();
-  auto elapsed_time_us = std::chrono::duration_cast<std::chrono::microseconds>(
-      end_time - start_time);
-  double elapsed_time_ms = elapsed_time_us.count() / 1000.0;
-  LOG(INFO) << "Generation of step-events took " << elapsed_time_ms << " ms"
-            << std::endl;
   return non_overlapped_step_events;
 }
 
diff --git a/tensorflow/core/profiler/utils/event_span.h b/tensorflow/core/profiler/utils/event_span.h
index 36b31722968..1adc6a75d82 100644
--- a/tensorflow/core/profiler/utils/event_span.h
+++ b/tensorflow/core/profiler/utils/event_span.h
@@ -16,10 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
 
+#include <string>
 #include <vector>
 
 #include "absl/container/flat_hash_map.h"
-#include "tensorflow/core/platform/logging.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/utils/timespan.h"
diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc
index c51b9a043cf..42961492225 100644
--- a/tensorflow/core/profiler/utils/group_events.cc
+++ b/tensorflow/core/profiler/utils/group_events.cc
@@ -15,13 +15,25 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/group_events.h"
 
-#include <stack>
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
 
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
@@ -434,6 +446,9 @@ std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {
       {HostEventType::kExecutorStateProcess,
        HostEventType::kIteratorGetNextOp,
        {StatType::kStepId, StatType::kIterNum}},
+      {HostEventType::kExecutorStateProcess,
+       HostEventType::kIteratorGetNextAsOptionalOp,
+       {StatType::kStepId, StatType::kIterNum}},
       {HostEventType::kKernelLaunch,
        HostEventType::kKernelExecute,
        {StatType::kCorrelationId}},
diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h
index 1140f2dab8d..4b6fc58e3b8 100644
--- a/tensorflow/core/profiler/utils/group_events.h
+++ b/tensorflow/core/profiler/utils/group_events.h
@@ -16,9 +16,16 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_
 
+#include <functional>
 #include <memory>
+#include <string>
+#include <vector>
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc
index 6b6a0d2a19d..11996ba4068 100644
--- a/tensorflow/core/profiler/utils/group_events_test.cc
+++ b/tensorflow/core/profiler/utils/group_events_test.cc
@@ -16,12 +16,15 @@ limitations under the License.
 #include "tensorflow/core/profiler/utils/group_events.h"
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/utils/hardware_type_utils.cc b/tensorflow/core/profiler/utils/hardware_type_utils.cc
index 75896c03851..e2a4004555b 100644
--- a/tensorflow/core/profiler/utils/hardware_type_utils.cc
+++ b/tensorflow/core/profiler/utils/hardware_type_utils.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/utils/html_utils.h b/tensorflow/core/profiler/utils/html_utils.h
new file mode 100644
index 00000000000..215d9f51d5b
--- /dev/null
+++ b/tensorflow/core/profiler/utils/html_utils.h
@@ -0,0 +1,36 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
+#define TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
+
+#include <string>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Creates a html that links to the given url with the given text.
+inline std::string AnchorElement(absl::string_view url,
+                                 absl::string_view text) {
+  return absl::StrCat("<a href=\"", url, "\" target=\"_blank\">", text, "</a>");
+}
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_UTILS_HTML_UTILS_H_
diff --git a/tensorflow/core/profiler/utils/kernel_stats_utils.cc b/tensorflow/core/profiler/utils/kernel_stats_utils.cc
index 14038d5c177..c40c3a89c9c 100644
--- a/tensorflow/core/profiler/utils/kernel_stats_utils.cc
+++ b/tensorflow/core/profiler/utils/kernel_stats_utils.cc
@@ -15,15 +15,17 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
 
+#include <algorithm>
+#include <string>
 #include <tuple>
 #include <vector>
 
 #include "absl/strings/match.h"
 #include "absl/strings/numbers.h"
-#include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
 
 namespace tensorflow {
@@ -34,15 +36,15 @@ void ParseKernelLaunchParams(absl::string_view xstat_kernel_details,
   const std::vector<absl::string_view> params =
       absl::StrSplit(xstat_kernel_details, absl::ByAnyChar(":\n"));
 
-  constexpr uint32_t kNumDimensions = 3;
-  for (uint32_t dim = 0; dim < kNumDimensions; ++dim) {
+  constexpr uint32 kNumDimensions = 3;
+  for (uint32 dim = 0; dim < kNumDimensions; ++dim) {
     kernel->add_block_dim(1);
     kernel->add_grid_dim(1);
   }
 
   // Process value pairs.
-  for (uint32_t ii = 0; ii < params.size(); ii += 2) {
-    uint32_t value = 0;
+  for (uint32 ii = 0; ii < params.size(); ii += 2) {
+    uint32 value = 0;
     if (params[ii] == "registers_per_thread" &&
         absl::SimpleAtoi(params[ii + 1], &value)) {
       kernel->set_registers_per_thread(value);
diff --git a/tensorflow/core/profiler/utils/op_metrics_db_utils.cc b/tensorflow/core/profiler/utils/op_metrics_db_utils.cc
index 06307d6d102..863d2f79819 100644
--- a/tensorflow/core/profiler/utils/op_metrics_db_utils.cc
+++ b/tensorflow/core/profiler/utils/op_metrics_db_utils.cc
@@ -15,8 +15,13 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
 
+#include <algorithm>
+#include <string>
+
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/utils/math_utils.h"
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
@@ -40,7 +45,7 @@ class DeviceTfOpMetricsDbBuilder : public OpMetricsDbBuilder {
         /*hlo_module_id=*/0, tf_op_name);
     if (tf_op_metrics->category().empty()) {
       tf_op_metrics->set_category(
-          tf_op_type == kUnknownOp ? "Unknown" : string(tf_op_type));
+          tf_op_type == kUnknownOp ? "Unknown" : std::string(tf_op_type));
     }
     tf_op_metrics->set_is_eager(device_op_metrics.is_eager());
     // The occurrences of a TF-op is the maximum among the occurrences of all
@@ -89,8 +94,8 @@ uint64 IdleTimePs(const OpMetricsDb& metrics_db) {
 void AddIdleOp(OpMetricsDb* db) {
   uint64 idle_time_ps = IdleTimePs(*db);
   OpMetrics* metrics = db->add_metrics_db();
-  metrics->set_name(string(kIdle));
-  metrics->set_category(string(kIdle));
+  metrics->set_name(std::string(kIdle));
+  metrics->set_category(std::string(kIdle));
   metrics->set_occurrences(0);
   metrics->set_time_ps(idle_time_ps);
   metrics->set_self_time_ps(idle_time_ps);
diff --git a/tensorflow/core/profiler/utils/op_utils.cc b/tensorflow/core/profiler/utils/op_utils.cc
index 74ce13def0a..921e0617902 100644
--- a/tensorflow/core/profiler/utils/op_utils.cc
+++ b/tensorflow/core/profiler/utils/op_utils.cc
@@ -15,8 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/op_utils.h"
 
+#include <algorithm>
+#include <string>
+
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/utils/tf_op_utils.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -69,9 +75,9 @@ void DeviceOpMetricsDbBuilder::EnterOp(uint64 program_id,
   OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(program_id, name);
   if (op_metrics->category().empty())
     op_metrics->set_category(category == kUnknownOp ? "unknown"
-                                                    : string(category));
+                                                    : std::string(category));
   if (op_metrics->provenance().empty())
-    op_metrics->set_provenance(string(provenance));
+    op_metrics->set_provenance(std::string(provenance));
   op_metrics->set_is_eager(op_metrics->is_eager() || is_eager);
   op_metrics->set_occurrences(op_metrics->occurrences() + occurrences);
   op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
diff --git a/tensorflow/core/profiler/utils/op_utils.h b/tensorflow/core/profiler/utils/op_utils.h
index 8aaa0f4f5c2..f94328d1b8d 100644
--- a/tensorflow/core/profiler/utils/op_utils.h
+++ b/tensorflow/core/profiler/utils/op_utils.h
@@ -16,13 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_
 
-#include <string>
-
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
-#include "tensorflow/core/profiler/utils/tf_op_utils.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc
index 5a4204440a3..630a74c4e47 100644
--- a/tensorflow/core/profiler/utils/tf_op_utils.cc
+++ b/tensorflow/core/profiler/utils/tf_op_utils.cc
@@ -15,11 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 
+#include <string>
+#include <vector>
+
 #include "absl/strings/ascii.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
-#include "absl/strings/strip.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/regexp.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h
index d1ac69e2976..b8af9463d51 100644
--- a/tensorflow/core/profiler/utils/tf_op_utils.h
+++ b/tensorflow/core/profiler/utils/tf_op_utils.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TF_OP_UTILS_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_TF_OP_UTILS_H_
 
+#include <string>
 #include <vector>
 
-#include "absl/base/attributes.h"
 #include "absl/strings/match.h"
 #include "absl/strings/string_view.h"
 
diff --git a/tensorflow/core/profiler/utils/tf_op_utils_test.cc b/tensorflow/core/profiler/utils/tf_op_utils_test.cc
index fa5169557d1..136dbee2430 100644
--- a/tensorflow/core/profiler/utils/tf_op_utils_test.cc
+++ b/tensorflow/core/profiler/utils/tf_op_utils_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/tf_op_utils.h"
 
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
diff --git a/tensorflow/core/profiler/utils/tf_xplane_visitor.h b/tensorflow/core/profiler/utils/tf_xplane_visitor.h
index 33a170f8efd..17a7b94ef92 100644
--- a/tensorflow/core/profiler/utils/tf_xplane_visitor.h
+++ b/tensorflow/core/profiler/utils/tf_xplane_visitor.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TF_XPLANE_VISITOR_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_TF_XPLANE_VISITOR_H_
 
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
diff --git a/tensorflow/core/profiler/utils/tfstreamz_utils.cc b/tensorflow/core/profiler/utils/tfstreamz_utils.cc
index b531c69e30e..f4cbaa84100 100644
--- a/tensorflow/core/profiler/utils/tfstreamz_utils.cc
+++ b/tensorflow/core/profiler/utils/tfstreamz_utils.cc
@@ -14,37 +14,46 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/utils/tfstreamz_utils.h"
 
+#include <map>
 #include <memory>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
-#include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
 #include "absl/strings/substitute.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/framework/summary.pb.h"
 #include "tensorflow/core/lib/monitoring/collected_metrics.h"
-#include "tensorflow/core/lib/monitoring/collection_registry.h"
-#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/lib/monitoring/metric_def.h"
+#include "tensorflow/core/lib/monitoring/types.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/tfstreamz.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
 
 namespace tensorflow {
 namespace profiler {
 
 namespace {
-string ConstructXStatName(const string& name, const monitoring::Point& point) {
+
+std::string ConstructXStatName(absl::string_view name,
+                               const monitoring::Point& point) {
   if (point.labels.empty()) {
-    return name;
+    return std::string(name);
   }
   return absl::Substitute(
       "$0{$1}", name,
-      absl::StrJoin(point.labels, ", ",
-                    [](string* out, const monitoring::Point::Label& label) {
-                      absl::StrAppend(out, label.name, "=", label.value);
-                    }));
+      absl::StrJoin(
+          point.labels, ", ",
+          [](std::string* out, const monitoring::Point::Label& label) {
+            absl::StrAppend(out, label.name, "=", label.value);
+          }));
 }
 
-string SerializePercentile(const monitoring::Percentiles& percentiles) {
+std::string SerializePercentile(const monitoring::Percentiles& percentiles) {
   tfstreamz::Percentiles output;
   output.set_unit_of_measure(
       static_cast<tfstreamz::UnitOfMeasure>(percentiles.unit_of_measure));
@@ -82,11 +91,11 @@ Status SerializeToXPlane(const std::vector<TfStreamzSnapshot>& snapshots,
     xevent.SetEndTimestampNs(snapshot.end_time_ns);
     auto& metric_descriptor_map = snapshot.metrics->metric_descriptor_map;
     for (const auto& point_set : snapshot.metrics->point_set_map) {
-      const string& metric_name = point_set.first;
+      const std::string& metric_name = point_set.first;
       // Each metrics have multiple points corresponding to different labels.
       for (const auto& point : point_set.second->points) {
         // Generates one KPI metric for each point.
-        string stat_name = ConstructXStatName(metric_name, *point);
+        std::string stat_name = ConstructXStatName(metric_name, *point);
         auto* metadata = xplane.GetOrCreateStatMetadata(stat_name);
         auto it = metric_descriptor_map.find(metric_name);
         if (it != metric_descriptor_map.end()) {
@@ -100,7 +109,8 @@ Status SerializeToXPlane(const std::vector<TfStreamzSnapshot>& snapshots,
             xevent.AddStatValue(*metadata, point->bool_value);
             break;
           case monitoring::ValueType::kString:
-            xevent.AddStatValue(*metadata, point->string_value);
+            xevent.AddStatValue(*metadata, *xplane.GetOrCreateStatMetadata(
+                                               point->string_value));
             break;
           case monitoring::ValueType::kHistogram:
             xevent.AddStatValue(*metadata,
diff --git a/tensorflow/core/profiler/utils/tfstreamz_utils.h b/tensorflow/core/profiler/utils/tfstreamz_utils.h
index ae8e4079bcb..1ab21ed1b5e 100644
--- a/tensorflow/core/profiler/utils/tfstreamz_utils.h
+++ b/tensorflow/core/profiler/utils/tfstreamz_utils.h
@@ -15,11 +15,13 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TFSTREAMZ_UTILS_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_TFSTREAMZ_UTILS_H_
 
+#include <memory>
+#include <vector>
+
 #include "tensorflow/core/lib/monitoring/collected_metrics.h"
-#include "tensorflow/core/lib/monitoring/collection_registry.h"
 #include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
-#include "tensorflow/core/profiler/utils/xplane_builder.h"
 
 namespace tensorflow {
 namespace profiler {
diff --git a/tensorflow/core/profiler/utils/timespan.h b/tensorflow/core/profiler/utils/timespan.h
index bccbeaa796f..82775af1415 100644
--- a/tensorflow/core/profiler/utils/timespan.h
+++ b/tensorflow/core/profiler/utils/timespan.h
@@ -16,6 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TIMESPAN_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_TIMESPAN_H_
 
+#include <algorithm>
+#include <string>
+
 #include "absl/strings/str_cat.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
diff --git a/tensorflow/core/profiler/utils/xplane_builder.cc b/tensorflow/core/profiler/utils/xplane_builder.cc
index 9e66a15cc36..f923f3982f4 100644
--- a/tensorflow/core/profiler/utils/xplane_builder.cc
+++ b/tensorflow/core/profiler/utils/xplane_builder.cc
@@ -14,6 +14,14 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 
+#include <algorithm>
+#include <string>
+#include <utility>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
 
 namespace tensorflow {
@@ -54,7 +62,7 @@ XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(
   return metadata;
 }
 
-XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(string&& name) {
+XEventMetadata* XPlaneBuilder::GetOrCreateEventMetadata(std::string&& name) {
   XEventMetadata*& metadata = event_metadata_by_name_[name];
   if (metadata == nullptr) {
     metadata =
diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h
index 803cc7b89c2..b0d743a0caf 100644
--- a/tensorflow/core/profiler/utils/xplane_builder.h
+++ b/tensorflow/core/profiler/utils/xplane_builder.h
@@ -15,10 +15,15 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_BUILDER_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_BUILDER_H_
 
+#include <stddef.h>
+
+#include <string>
+#include <utility>
+
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/string_view.h"
-#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/time_utils.h"
@@ -53,12 +58,12 @@ class XStatsBuilder {
   void AddStatValue(const XStatMetadata& metadata, absl::string_view value,
                     bool is_bytes = false) {
     if (is_bytes) {
-      AddStat(metadata)->set_bytes_value(string(value));
+      AddStat(metadata)->set_bytes_value(std::string(value));
     } else {
-      AddStat(metadata)->set_str_value(string(value));
+      AddStat(metadata)->set_str_value(std::string(value));
     }
   }
-  void AddStatValue(const XStatMetadata& metadata, string&& value,
+  void AddStatValue(const XStatMetadata& metadata, std::string&& value,
                     bool is_bytes = false) {
     if (is_bytes) {
       AddStat(metadata)->set_bytes_value(std::move(value));
@@ -160,7 +165,7 @@ class XLineBuilder {
 
   int64 NumEvents() { return line_->events_size(); }
 
-  void SetName(absl::string_view name) { line_->set_name(string(name)); }
+  void SetName(absl::string_view name) { line_->set_name(std::string(name)); }
 
   void SetNameIfEmpty(absl::string_view name) {
     if (line_->name().empty()) SetName(name);
@@ -205,7 +210,7 @@ class XPlaneBuilder : public XStatsBuilder<XPlane> {
   int64 Id() { return plane_->id(); }
   void SetId(int64 id) { plane_->set_id(id); }
 
-  void SetName(absl::string_view name) { plane_->set_name(string(name)); }
+  void SetName(absl::string_view name) { plane_->set_name(std::string(name)); }
 
   void ReserveLines(size_t num_lines) {
     plane_->mutable_lines()->Reserve(num_lines);
@@ -222,7 +227,7 @@ class XPlaneBuilder : public XStatsBuilder<XPlane> {
 
   XEventMetadata* GetOrCreateEventMetadata(int64 metadata_id);
   XEventMetadata* GetOrCreateEventMetadata(absl::string_view name);
-  XEventMetadata* GetOrCreateEventMetadata(string&& name);
+  XEventMetadata* GetOrCreateEventMetadata(std::string&& name);
   inline XEventMetadata* GetOrCreateEventMetadata(const char* name) {
     return GetOrCreateEventMetadata(absl::string_view(name));
   }
@@ -251,7 +256,7 @@ void XStatsBuilder<T>::AddStat(const XStatMetadata& key, const XStat& stat,
   if (stat.value_case() == XStat::kRefValue) {
     const auto& stat_metadata_map = src.stat_metadata();
     const auto it = stat_metadata_map.find(stat.ref_value());
-    if (ABSL_PREDICT_FALSE(it == stat_metadata_map.end())) {
+    if (TF_PREDICT_FALSE(it == stat_metadata_map.end())) {
       // the reference value in stat is not found in XStatMetadata from src.
       return;
     }
diff --git a/tensorflow/core/profiler/utils/xplane_builder_test.cc b/tensorflow/core/profiler/utils/xplane_builder_test.cc
new file mode 100644
index 00000000000..e55e01d8233
--- /dev/null
+++ b/tensorflow/core/profiler/utils/xplane_builder_test.cc
@@ -0,0 +1,73 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
+
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+TEST(TimespanTests, NonInstantSpanIncludesSingleTimeTests) {
+  XPlane plane;
+  XPlaneBuilder xplane_builder(&plane);
+  XLineBuilder xline_builder = xplane_builder.GetOrCreateLine(0);
+  XEventBuilder event_builder = xline_builder.AddEvent(
+      *xplane_builder.GetOrCreateEventMetadata("1st event"));
+  event_builder.AddStatValue(
+      *xplane_builder.GetOrCreateStatMetadata("int stat"), 1234LL);
+  event_builder.AddStatValue(
+      *xplane_builder.GetOrCreateStatMetadata("string stat"),
+      std::string("abc"));
+  event_builder.AddStatValue(
+      *xplane_builder.GetOrCreateStatMetadata("double stat"), 1.0);
+  event_builder.AddStatValue(
+      *xplane_builder.GetOrCreateStatMetadata("ref stat"),
+      *xplane_builder.GetOrCreateStatMetadata("referenced abc"));
+
+  XPlaneVisitor xplane_visitor(&plane);
+  EXPECT_EQ(xplane_visitor.NumLines(), 1);
+  int num_stats = 0;
+  xplane_visitor.ForEachLine([&](const XLineVisitor& xline) {
+    xline.ForEachEvent([&](const XEventVisitor& xevent) {
+      EXPECT_EQ(xevent.Name(), "1st event");
+      xevent.ForEachStat([&](const XStatVisitor& stat) {
+        if (stat.Name() == "int stat") {
+          EXPECT_EQ(stat.IntValue(), 1234LL);
+          num_stats++;
+        } else if (stat.Name() == "string stat") {
+          EXPECT_EQ(stat.StrOrRefValue(), "abc");
+          num_stats++;
+        } else if (stat.Name() == "double stat") {
+          EXPECT_EQ(stat.DoubleValue(), 1.0);
+          num_stats++;
+        } else if (stat.Name() == "ref stat") {
+          EXPECT_EQ(stat.StrOrRefValue(), "referenced abc");
+          num_stats++;
+        }
+      });
+    });
+  });
+  EXPECT_EQ(num_stats, 4);
+}
+
+}  // namespace
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc
index af0008186bc..f8ff31b078a 100644
--- a/tensorflow/core/profiler/utils/xplane_schema.cc
+++ b/tensorflow/core/profiler/utils/xplane_schema.cc
@@ -17,7 +17,10 @@ limitations under the License.
 
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -102,6 +105,7 @@ const HostEventTypeMap& GetHostEventTypeMap() {
       {"LocalExecutable::Execute", kLocalExecutableExecute},
       // tf.data related.
       {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
+      {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
       // Virtual events for grouping.
       {"HostTrainingLoopIteration", kHostTrainingLoopIteration},
       {"AsyncExecutorTraceContext", kAsyncExecutorTraceContext},
diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h
index e8c377b6e3d..31ff90155f5 100644
--- a/tensorflow/core/profiler/utils/xplane_schema.h
+++ b/tensorflow/core/profiler/utils/xplane_schema.h
@@ -16,11 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_SCHEMA_H_
 
-#include "absl/strings/match.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
-#include "absl/types/span.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -100,6 +99,7 @@ enum HostEventType {
   kLocalExecutableExecute,
   // tf.data related.
   kIteratorGetNextOp,
+  kIteratorGetNextAsOptionalOp,
   // Virtual events for grouping.
   kHostTrainingLoopIteration,
   kAsyncExecutorTraceContext,
diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc
index 3fe3cc96d85..7f5221c5391 100644
--- a/tensorflow/core/profiler/utils/xplane_utils.cc
+++ b/tensorflow/core/profiler/utils/xplane_utils.cc
@@ -14,12 +14,21 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
 
+#include <algorithm>
+#include <string>
+#include <utility>
+#include <vector>
+
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env_time.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/timespan.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
 namespace tensorflow {
@@ -171,6 +180,29 @@ XEventBuilder CreateXEventWithStringViewMetadataValue(
   return event_builder;
 }
 
+XEventBuilder CreateXEventWithIntAndStringViewMetadataValue(
+    XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
+    absl::string_view event_name, int64 offset_ps, int64 duration_ps,
+    const absl::flat_hash_map<StatType, int64 /*stat_value*/>& int_stats,
+    const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
+        str_stats) {
+  auto event_builder = line_builder->AddEvent(
+      *plane_builder->GetOrCreateEventMetadata(event_name));
+  event_builder.SetOffsetPs(offset_ps);
+  event_builder.SetDurationPs(duration_ps);
+  for (const auto& stat_type_and_value : int_stats) {
+    event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata(
+                                   GetStatTypeStr(stat_type_and_value.first)),
+                               stat_type_and_value.second);
+  }
+  for (const auto& stat_type_and_value : str_stats) {
+    event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata(
+                                   GetStatTypeStr(stat_type_and_value.first)),
+                               stat_type_and_value.second);
+  }
+  return event_builder;
+}
+
 void RemovePlaneWithName(XSpace* space, absl::string_view name) {
   auto* planes = space->mutable_planes();
   planes->erase(
@@ -226,10 +258,17 @@ void SortXSpace(XSpace* space) {
   for (XPlane& plane : *space->mutable_planes()) SortXPlane(&plane);
 }
 
+// Normalize the line's timestamp in this XPlane.
+// NOTE: This can be called multiple times on the same plane. Only the first
+// call will do the normalization, subsequent calls will do nothing.
+// The assumption is that both line's timestamp_ns and start_time_ns are
+// nano-seconds from epoch time, the different of these values is much
+// smaller than these value.
 void NormalizeTimestamps(XPlane* plane, uint64 start_time_ns) {
   for (XLine& line : *plane->mutable_lines()) {
-    DCHECK_GE(line.timestamp_ns(), start_time_ns);
-    line.set_timestamp_ns(line.timestamp_ns() - start_time_ns);
+    if (line.timestamp_ns() >= start_time_ns) {
+      line.set_timestamp_ns(line.timestamp_ns() - start_time_ns);
+    }
   }
 }
 
diff --git a/tensorflow/core/profiler/utils/xplane_utils.h b/tensorflow/core/profiler/utils/xplane_utils.h
index a98e37f96ea..49087c49cd8 100644
--- a/tensorflow/core/profiler/utils/xplane_utils.h
+++ b/tensorflow/core/profiler/utils/xplane_utils.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
@@ -63,6 +64,14 @@ XEventBuilder CreateXEventWithStringViewMetadataValue(
     const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
         stats);
 
+// Creates an XEvent with int64 and string stats.
+XEventBuilder CreateXEventWithIntAndStringViewMetadataValue(
+    XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
+    absl::string_view event_name, int64 offset_ps, int64 duration_ps,
+    const absl::flat_hash_map<StatType, int64 /*stat_value*/>& int_stats,
+    const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
+        str_stats);
+
 void RemovePlaneWithName(XSpace* space, absl::string_view name);
 void RemoveEmptyPlanes(XSpace* space);
 void RemoveEmptyLines(XPlane* plane);
diff --git a/tensorflow/core/profiler/utils/xplane_utils_test.cc b/tensorflow/core/profiler/utils/xplane_utils_test.cc
index b9b15b2e8a9..04e06fcb05b 100644
--- a/tensorflow/core/profiler/utils/xplane_utils_test.cc
+++ b/tensorflow/core/profiler/utils/xplane_utils_test.cc
@@ -15,9 +15,14 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/utils/xplane_utils.h"
 
+#include <string>
+
 #include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "tensorflow/core/platform/env_time.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/profiler/utils/xplane_builder.h"
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
diff --git a/tensorflow/core/profiler/utils/xplane_visitor.cc b/tensorflow/core/profiler/utils/xplane_visitor.cc
index 1870fb61b5a..42068b7c61a 100644
--- a/tensorflow/core/profiler/utils/xplane_visitor.cc
+++ b/tensorflow/core/profiler/utils/xplane_visitor.cc
@@ -14,7 +14,16 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
 
+#include <string>
+#include <utility>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 
 namespace tensorflow {
 namespace profiler {
@@ -25,11 +34,6 @@ XStatVisitor::XStatVisitor(const XPlaneVisitor* plane, const XStat* stat)
       plane_(plane),
       type_(plane->GetStatType(stat->metadata_id())) {}
 
-absl::string_view XStatVisitor::RefValue() const {
-  const XStatMetadata* metadata = plane_->GetStatMetadata(stat_->ref_value());
-  return metadata ? absl::string_view(metadata->name()) : absl::string_view();
-}
-
 std::string XStatVisitor::ToString() const {
   switch (stat_->value_case()) {
     case XStat::kInt64Value:
diff --git a/tensorflow/core/profiler/utils/xplane_visitor.h b/tensorflow/core/profiler/utils/xplane_visitor.h
index ef6d0ad6f86..4120a2821ca 100644
--- a/tensorflow/core/profiler/utils/xplane_visitor.h
+++ b/tensorflow/core/profiler/utils/xplane_visitor.h
@@ -15,9 +15,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_VISITOR_H_
 #define TENSORFLOW_CORE_PROFILER_UTILS_XPLANE_VISITOR_H_
 
+#include <stddef.h>
+
 #include <functional>
-#include <unordered_map>
-#include <utility>
+#include <string>
+#include <vector>
 
 #include "absl/container/flat_hash_map.h"
 #include "absl/strings/string_view.h"
@@ -53,10 +55,6 @@ class XStatVisitor {
 
   double DoubleValue() const { return stat_->double_value(); }
 
-  absl::string_view StrValue() const { return stat_->str_value(); }
-
-  absl::string_view RefValue() const;
-
   // Returns a string view.
   // REQUIRED: the value type should be string type or reference type.
   absl::string_view StrOrRefValue() const;
diff --git a/tensorflow/core/protobuf/BUILD b/tensorflow/core/protobuf/BUILD
new file mode 100644
index 00000000000..a374c808a14
--- /dev/null
+++ b/tensorflow/core/protobuf/BUILD
@@ -0,0 +1,182 @@
+# For platform specific build config
+load(
+    "//tensorflow/core/platform:build_config.bzl",
+    "tf_additional_all_protos",
+    "tf_proto_library",
+    "tf_proto_library_cc",
+    "tf_pyclif_proto_library",
+)
+
+package(
+    default_visibility = [
+        "//tensorflow:internal",
+        "//tensorflow/core:__subpackages__",
+        "//tensorflow_models:__subpackages__",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+COMMON_PROTO_SRCS = [
+    "bfc_memory_map.proto",
+    "config.proto",
+    "cluster.proto",
+    "debug.proto",
+    "device_filters.proto",
+    "device_properties.proto",
+    "graph_debug_info.proto",
+    "queue_runner.proto",
+    "rewriter_config.proto",
+    "tensor_bundle.proto",
+    "saver.proto",
+    "verifier_config.proto",
+]
+
+[
+    [
+        tf_pyclif_proto_library(
+            name = "%s_pyclif" % proto_name,
+            proto_lib = ":for_core_protos",
+            proto_srcfile = "%s.proto" % proto_name,
+            visibility = ["//visibility:public"],
+        ),
+    ]
+    for proto_name in [
+        "config",
+        "device_properties",
+        "graph_debug_info",
+        "meta_graph",
+        "saved_model",
+    ]
+]
+
+tf_proto_library(
+    name = "autotuning_proto",
+    srcs = ["autotuning.proto"],
+    cc_api_version = 2,
+    make_default_target_header_only = True,
+)
+
+tf_proto_library(
+    name = "conv_autotuning_proto",
+    srcs = ["conv_autotuning.proto"],
+    cc_api_version = 2,
+    make_default_target_header_only = True,
+    protodeps = [
+        "//tensorflow/stream_executor:dnn_proto",
+    ],
+)
+
+tf_proto_library_cc(
+    name = "worker_proto",
+    srcs = ["worker.proto"],
+    cc_api_version = 2,
+    protodeps = tf_additional_all_protos(),
+    visibility = ["//visibility:public"],
+)
+
+tf_proto_library_cc(
+    name = "worker_service_proto",
+    srcs = ["worker_service.proto"],
+    has_services = 1,
+    cc_api_version = 2,
+    cc_stubby_versions = ["2"],
+    protodeps = [":worker_proto"],
+)
+
+tf_proto_library_cc(
+    name = "master_proto",
+    srcs = ["master.proto"],
+    cc_api_version = 2,
+    protodeps = tf_additional_all_protos(),
+    visibility = ["//tensorflow:internal"],
+)
+
+tf_proto_library_cc(
+    name = "master_service_proto",
+    srcs = ["master_service.proto"],
+    has_services = 1,
+    cc_api_version = 2,
+    cc_stubby_versions = ["2"],
+    protodeps = [":master_proto"],
+)
+
+tf_proto_library_cc(
+    name = "eager_service_proto",
+    srcs = ["eager_service.proto"],
+    has_services = 1,
+    cc_api_version = 2,
+    cc_grpc_version = 1,
+    cc_stubby_versions = ["2"],
+    protodeps = tf_additional_all_protos(),
+)
+
+tf_proto_library_cc(
+    name = "replay_log_proto",
+    srcs = ["replay_log.proto"],
+    cc_api_version = 2,
+    protodeps = [
+        ":master_proto",
+    ] + tf_additional_all_protos(),
+)
+
+tf_proto_library(
+    name = "error_codes_proto_impl",
+    srcs = ["error_codes.proto"],
+    cc_api_version = 2,
+    make_default_target_header_only = True,
+)
+
+exports_files(
+    srcs = ["error_codes.proto"] + COMMON_PROTO_SRCS + [
+        # Protos which are not needed on mobile builds, but should be included
+        # in protos_all.
+        #
+        # Note that some protos are in neither core_proto_srcs nor this
+        # filegroup; e.g. ones with individual proto_library targets.
+        "control_flow.proto",
+        # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+        # "critical_section.proto",
+        "data/experimental/snapshot.proto",
+        "debug_event.proto",
+        "meta_graph.proto",
+        "named_tensor.proto",
+        "remote_tensor_handle.proto",
+        "saved_model.proto",
+        "saved_object_graph.proto",
+        "struct.proto",
+        "tensorflow_server.proto",
+        "trackable_object_graph.proto",
+        "transport_options.proto",
+    ],
+)
+
+tf_proto_library(
+    name = "for_core_protos",
+    srcs = COMMON_PROTO_SRCS + [
+        # Protos which are not needed on mobile builds, but should be included
+        # in protos_all.
+        #
+        # Note that some protos are in neither core_proto_srcs nor this
+        # filegroup; e.g. ones with individual proto_library targets.
+        "control_flow.proto",
+        # TODO(ebrevdo): Re-enable once CriticalSection is in core.
+        # "critical_section.proto",
+        "data/experimental/snapshot.proto",
+        "debug_event.proto",
+        "meta_graph.proto",
+        "named_tensor.proto",
+        "remote_tensor_handle.proto",
+        "saved_model.proto",
+        "saved_object_graph.proto",
+        "struct.proto",
+        "tensorflow_server.proto",
+        "trackable_object_graph.proto",
+        "transport_options.proto",
+    ],
+    cc_api_version = 2,
+    make_default_target_header_only = True,
+    protodeps = [
+        ":error_codes_proto_impl",
+        "//tensorflow/core/framework:protos_all",
+    ],
+)
diff --git a/tensorflow/core/protobuf/eager_service.proto b/tensorflow/core/protobuf/eager_service.proto
index e9e21777d3f..3fe2bd486ba 100644
--- a/tensorflow/core/protobuf/eager_service.proto
+++ b/tensorflow/core/protobuf/eager_service.proto
@@ -69,6 +69,7 @@ message QueueItem {
     // enqueued in streaming call. Request with this item type waits for pending
     // nodes to finish on the remote executor and report status.
     SyncRemoteExecutorForStream sync_remote_executor_for_stream = 6;
+    SendPackedHandleOp send_packed_handle = 7;
   }
 }
 
@@ -238,6 +239,27 @@ message SendTensorOp {
   string device_name = 3;
 }
 
+// Send a packed TensorHandle to a remote worker.
+message SendPackedHandleOp {
+  // Op id of the remote packed TensorHandle.
+  int64 op_id = 1;
+
+  message LocalTensorHandle {
+    TensorProto tensor = 1;
+    // Device where the tensor is produced.
+    string device = 2;
+  }
+
+  message Handle {
+    oneof item {
+      LocalTensorHandle local_handle = 1;
+      RemoteTensorHandle remote_handle = 2;
+    }
+  }
+
+  repeated Handle handles = 2;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 //
 // Eager Service defines a TensorFlow service that executes operations eagerly
diff --git a/tensorflow/core/protobuf/remote_tensor_handle.proto b/tensorflow/core/protobuf/remote_tensor_handle.proto
index 10995226a9b..36e3f810b73 100644
--- a/tensorflow/core/protobuf/remote_tensor_handle.proto
+++ b/tensorflow/core/protobuf/remote_tensor_handle.proto
@@ -21,11 +21,11 @@ message RemoteTensorHandle {
   int64 op_id = 1;
   // The index into the outputs of the operation that produced this tensor.
   int32 output_num = 2;
-  // Device of the operation that produced this tensor. Cannot be empty.
+  // Device where the tensor is located. Cannot be empty.
   // For multi-device functions, it's the default device passed to placer.
   string device = 3;
-  // Device where the tensor is located. Can be empty if the operation producing
-  // this tensor is a multi-device function.
+  // Device of the operation producing this tensor. Can be empty if the
+  // operation producing this tensor is a multi-device function.
   string op_device = 4;
   // Tensor type.
   DataType dtype = 5;
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index e3df3820c71..a534c0cf827 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -21,7 +21,7 @@ limitations under the License.
 // Also update tensorflow/tensorflow.bzl and
 // tensorflow/tools/pip_package/setup.py
 #define TF_MAJOR_VERSION 2
-#define TF_MINOR_VERSION 1
+#define TF_MINOR_VERSION 2
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 387  // Updated: 2020/4/30
+#define TF_GRAPH_DEF_VERSION 401  // Updated: 2020/5/14
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD
index 6184f52d240..46a8759a257 100644
--- a/tensorflow/core/tpu/BUILD
+++ b/tensorflow/core/tpu/BUILD
@@ -1,6 +1,10 @@
 # Description: Utilities for TPU Operations
 
 package(
+    default_visibility = [
+        "//tensorflow/core/tpu:__subpackages__",
+        "//tensorflow/stream_executor/tpu:__subpackages__",
+    ],
     licenses = ["notice"],  # Apache 2.0
 )
 
@@ -32,3 +36,51 @@ cc_library(
         "//tensorflow/core/protobuf/tpu:tpu_embedding_output_layout_proto_cc",
     ],
 )
+
+cc_library(
+    name = "tpu_compilation_device",
+    srcs = ["tpu_compilation_device.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":tpu_defs",
+        ":tpu_node_device_util",
+        "//tensorflow/compiler/tf2xla:xla_compiler",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tpu_node_device_util",
+    srcs = ["tpu_node_device_util.cc"],
+    hdrs = ["tpu_node_device_util.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/compiler/tf2xla:tf2xla_util",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+    ],
+)
+
+cc_library(
+    name = "tpu_defs",
+    srcs = ["tpu_defs.cc"],
+    hdrs = ["tpu_defs.h"],
+    deps = ["//tensorflow/core:protos_all_cc"],
+)
+
+cc_library(
+    name = "tpu_init_mode",
+    srcs = ["tpu_init_mode.cc"],
+    hdrs = ["tpu_init_mode.h"],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
+cc_library(
+    name = "tpu_config_c_api",
+    hdrs = ["tpu_config_c_api.h"],
+    deps = [
+        "//tensorflow/c:tf_status",
+    ],
+)
diff --git a/tensorflow/core/tpu/tpu_compilation_device.cc b/tensorflow/core/tpu/tpu_compilation_device.cc
new file mode 100644
index 00000000000..2b2314820bc
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_compilation_device.cc
@@ -0,0 +1,24 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/core/tpu/tpu_defs.h"
+#include "tensorflow/core/tpu/tpu_node_device_util.h"
+
+namespace tensorflow {
+
+REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/tpu/tpu_config_c_api.h b/tensorflow/core/tpu/tpu_config_c_api.h
new file mode 100644
index 00000000000..b7caf0648b1
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_config_c_api.h
@@ -0,0 +1,59 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_CONFIG_C_API_H_
+#define TENSORFLOW_CORE_TPU_TPU_CONFIG_C_API_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "tensorflow/c/tf_status.h"
+
+typedef struct TpuSerializedProto TpuSerializedProto;
+
+extern "C" {
+
+bool TPUHostInitialized();
+
+void ConfigureDistributedTpuOp_DoWork(const size_t num_cores_per_host_size,
+                                      const int32_t* num_cores_per_host,
+                                      size_t* host_config_output_size,
+                                      char** host_config_output,
+                                      TF_Status* status);
+
+void WaitForDistributedTpuOp_DoWork(
+    const size_t num_hosts, const size_t num_cores_per_host,
+    const int32_t** host_ordinal_to_global_core_id_map,
+    size_t* tpu_topology_output_size, char** tpu_topology_output,
+    TF_Status* status);
+
+void ShutdownDistributedTpuOp_DoWork(TF_Status* status);
+
+void InitializeHostForDistributedTpuOp_DoWork(
+    const size_t tpu_host_config_size, const char* tpu_host_config,
+    const bool enable_whole_mesh_compilations, size_t* core_id_output_size,
+    int32_t** core_id_output, TF_Status* status);
+
+void SetGlobalTPUArrayOp_DoWork(const size_t tpu_topology_size,
+                                const char* tpu_topology, TF_Status* status);
+
+void DisconnectDistributedTpuChipsOp_DoWork(int32_t* number_of_chips_output,
+                                            TF_Status* status);
+
+void TpuConfigurationApi_FreeCharArray(char* output);
+void TpuConfigurationApi_FreeInt32Array(int32_t* output);
+}
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_CONFIG_C_API_H_
diff --git a/tensorflow/core/tpu/tpu_defs.cc b/tensorflow/core/tpu/tpu_defs.cc
new file mode 100644
index 00000000000..dc370ea2ba7
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_defs.cc
@@ -0,0 +1,28 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/tpu/tpu_defs.h"
+
+namespace tensorflow {
+
+const char* const DEVICE_TPU_NODE = "TPU";
+const char* const TPU_FAST_MEM_ATTR = "_TPU_FAST_MEM";
+const char* const DEVICE_TPU_REPLICATED_CORE = "TPU_REPLICATED_CORE";
+const char* const DEVICE_TPU_SYSTEM = "TPU_SYSTEM";
+const char* const DEVICE_TPU_XLA_JIT = "XLA_TPU_JIT";
+const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR =
+    "_mirrored_variable_indices";
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h
new file mode 100644
index 00000000000..497afb5c392
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_defs.h
@@ -0,0 +1,57 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Common definitions related to TPUs.
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_DEFS_H_
+#define TENSORFLOW_CORE_TPU_TPU_DEFS_H_
+
+#include <array>
+
+#include "tensorflow/core/framework/types.pb.h"
+
+namespace tensorflow {
+
+// Name of the TPU device, which corresponds to a single core.
+extern const char* const DEVICE_TPU_NODE;  // "TPU";
+
+// The TPU_REPLICATED_CORE device is a virtual device corresponding to one core
+// of a replicated TPU computation. Only valid within the body of a
+// TPUReplicate computation.
+extern const char* const DEVICE_TPU_REPLICATED_CORE;
+
+extern const char* const DEVICE_TPU_SYSTEM;  // "TPU_SYSTEM";
+
+// Name of the XLA_TPU_JIT compilation device, which is an internal device to
+// compile graphs for TPU. Not registered as a device; no operators can be
+// assigned to this device by a user.
+extern const char* const DEVICE_TPU_XLA_JIT;  // "XLA_TPU_JIT";
+
+// Attribute used internally to pass "is_mirrored_variable" attribute on
+// TPUReplicatedInput nodes to _TPUReplicate.
+extern const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR;
+
+// Attribute used internally to annoate ops which might consume TPU FastMem
+// variable.
+extern const char* const TPU_FAST_MEM_ATTR;  // "_TPU_FAST_MEM"
+
+// Supported types for TPUs.
+static constexpr std::array<DataType, 11> kTpuAllTypes = {
+    {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL,
+     DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8}};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_DEFS_H_
diff --git a/tensorflow/core/tpu/tpu_init_mode.cc b/tensorflow/core/tpu/tpu_init_mode.cc
new file mode 100644
index 00000000000..42952df29d8
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_init_mode.cc
@@ -0,0 +1,66 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/tpu/tpu_init_mode.h"
+
+#include <atomic>
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/mutex.h"
+
+namespace tensorflow {
+
+namespace {
+
+mutex init_mode_mutex(LINKER_INITIALIZED);
+TPUInitMode init_mode TF_GUARDED_BY(init_mode_mutex);
+
+}  // namespace
+
+namespace test {
+
+void ForceSetTPUInitMode(const TPUInitMode mode) {
+  mutex_lock l(init_mode_mutex);
+  init_mode = mode;
+}
+
+}  // namespace test
+
+Status SetTPUInitMode(const TPUInitMode mode) {
+  if (mode == TPUInitMode::kNone) {
+    return errors::InvalidArgument("State cannot be set to: ",
+                                   static_cast<int>(mode));
+  }
+  {
+    mutex_lock l(init_mode_mutex);
+    if (init_mode != TPUInitMode::kNone && mode != init_mode) {
+      return errors::FailedPrecondition(
+          "TPUInit already attempted with mode: ", static_cast<int>(init_mode),
+          " and cannot be changed to: ", static_cast<int>(mode),
+          ". You are most probably trying to initialize the TPU system, both "
+          "using the explicit API and using an initialization Op within the "
+          "graph; please choose one. ");
+    }
+    init_mode = mode;
+  }
+  return Status::OK();
+}
+
+TPUInitMode GetTPUInitMode() {
+  mutex_lock l(init_mode_mutex);
+  return init_mode;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/tpu/tpu_init_mode.h b/tensorflow/core/tpu/tpu_init_mode.h
new file mode 100644
index 00000000000..73ca68ad8a0
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_init_mode.h
@@ -0,0 +1,47 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_INIT_MODE_H_
+#define TENSORFLOW_CORE_TPU_TPU_INIT_MODE_H_
+
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+enum class TPUInitMode : int { kNone, kGlobal, kRegular };
+
+// Sets the TPU initialization mode appropriately.
+//
+// Requires that mode is not kNone, and mode doesn't transition kGlobal
+// <-> kRegular.
+//
+// IMPLEMENTATION DETAILS:
+// Used internally to record the current mode and type of API used for TPU
+// initialization in a global static variable.
+Status SetTPUInitMode(TPUInitMode mode);
+
+// Returns the current TPUInitMode.
+TPUInitMode GetTPUInitMode();
+
+namespace test {
+
+// Forces the tpu init mode to be changed.
+void ForceSetTPUInitMode(TPUInitMode mode);
+
+}  // namespace test
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_INIT_MODE_H_
diff --git a/tensorflow/core/tpu/tpu_node_device_util.cc b/tensorflow/core/tpu/tpu_node_device_util.cc
new file mode 100644
index 00000000000..2dfd7d984d6
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_node_device_util.cc
@@ -0,0 +1,37 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/tpu/tpu_node_device_util.h"
+
+#include "tensorflow/compiler/tf2xla/tf2xla_util.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+
+namespace tensorflow {
+
+bool TpuOpFilter(KernelDef* kdef) {
+  StringPiece op(kdef->op());
+  VLOG(2) << "TpuOpFilter " << op;
+  // Enable const string operands to Assert op (b/69167214).
+  if (op == "Const") {
+    AddDtypeToKernelDefConstraint("dtype", DT_STRING, kdef);
+  }
+  if (op == "Assert") {
+    AddDtypeToKernelDefConstraint("T", DT_STRING, kdef);
+  }
+  return true;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/tpu/tpu_node_device_util.h b/tensorflow/core/tpu/tpu_node_device_util.h
new file mode 100644
index 00000000000..c6d5be9f5a6
--- /dev/null
+++ b/tensorflow/core/tpu/tpu_node_device_util.h
@@ -0,0 +1,30 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_NODE_DEVICE_H_
+#define TENSORFLOW_CORE_TPU_TPU_NODE_DEVICE_H_
+
+#include "tensorflow/core/framework/kernel_def.pb.h"
+
+namespace tensorflow {
+
+// This is a BackendOpFilter. (see tensorflow/compiler/tf2xla/xla_op_registry.h)
+// It returns true if the op should be registered on the device, it may
+// optionally modify the KernelDef.
+bool TpuOpFilter(KernelDef* kdef);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_NODE_DEVICE_H_
diff --git a/tensorflow/core/util/debug_events_writer.cc b/tensorflow/core/util/debug_events_writer.cc
index 595f92d07c0..d9c3393ce3c 100644
--- a/tensorflow/core/util/debug_events_writer.cc
+++ b/tensorflow/core/util/debug_events_writer.cc
@@ -179,7 +179,7 @@ Status DebugEventsWriter::Init() {
   metadata->set_tensorflow_version(TF_VERSION_STRING);
   metadata->set_file_version(
       strings::Printf("%s%d", kVersionPrefix, kCurrentFormatVersion));
-  SerializeAndWriteDebugEvent(&debug_event, METADATA);
+  TF_RETURN_IF_ERROR(SerializeAndWriteDebugEvent(&debug_event, METADATA));
   TF_RETURN_WITH_CONTEXT_IF_ERROR(
       metadata_writer_->Flush(), "Failed to flush debug event metadata writer");
 
@@ -189,38 +189,38 @@ Status DebugEventsWriter::Init() {
   return Status::OK();
 }
 
-void DebugEventsWriter::WriteSourceFile(SourceFile* source_file) {
+Status DebugEventsWriter::WriteSourceFile(SourceFile* source_file) {
   DebugEvent debug_event;
   debug_event.set_allocated_source_file(source_file);
-  SerializeAndWriteDebugEvent(&debug_event, SOURCE_FILES);
+  return SerializeAndWriteDebugEvent(&debug_event, SOURCE_FILES);
 }
 
-void DebugEventsWriter::WriteStackFrameWithId(
+Status DebugEventsWriter::WriteStackFrameWithId(
     StackFrameWithId* stack_frame_with_id) {
   DebugEvent debug_event;
   debug_event.set_allocated_stack_frame_with_id(stack_frame_with_id);
-  SerializeAndWriteDebugEvent(&debug_event, STACK_FRAMES);
+  return SerializeAndWriteDebugEvent(&debug_event, STACK_FRAMES);
 }
 
-void DebugEventsWriter::WriteGraphOpCreation(
+Status DebugEventsWriter::WriteGraphOpCreation(
     GraphOpCreation* graph_op_creation) {
   DebugEvent debug_event;
   debug_event.set_allocated_graph_op_creation(graph_op_creation);
-  SerializeAndWriteDebugEvent(&debug_event, GRAPHS);
+  return SerializeAndWriteDebugEvent(&debug_event, GRAPHS);
 }
 
-void DebugEventsWriter::WriteDebuggedGraph(DebuggedGraph* debugged_graph) {
+Status DebugEventsWriter::WriteDebuggedGraph(DebuggedGraph* debugged_graph) {
   DebugEvent debug_event;
   debug_event.set_allocated_debugged_graph(debugged_graph);
-  SerializeAndWriteDebugEvent(&debug_event, GRAPHS);
+  return SerializeAndWriteDebugEvent(&debug_event, GRAPHS);
 }
 
-void DebugEventsWriter::WriteExecution(Execution* execution) {
+Status DebugEventsWriter::WriteExecution(Execution* execution) {
   if (circular_buffer_size_ <= 0) {
     // No cyclic-buffer behavior.
     DebugEvent debug_event;
     debug_event.set_allocated_execution(execution);
-    SerializeAndWriteDebugEvent(&debug_event, EXECUTION);
+    return SerializeAndWriteDebugEvent(&debug_event, EXECUTION);
   } else {
     // Circular buffer behavior.
     DebugEvent debug_event;
@@ -234,16 +234,18 @@ void DebugEventsWriter::WriteExecution(Execution* execution) {
     if (execution_buffer_.size() > circular_buffer_size_) {
       execution_buffer_.pop_front();
     }
+    return Status::OK();
   }
 }
 
-void DebugEventsWriter::WriteGraphExecutionTrace(
+Status DebugEventsWriter::WriteGraphExecutionTrace(
     GraphExecutionTrace* graph_execution_trace) {
+  TF_RETURN_IF_ERROR(Init());
   if (circular_buffer_size_ <= 0) {
     // No cyclic-buffer behavior.
     DebugEvent debug_event;
     debug_event.set_allocated_graph_execution_trace(graph_execution_trace);
-    SerializeAndWriteDebugEvent(&debug_event, GRAPH_EXECUTION_TRACES);
+    return SerializeAndWriteDebugEvent(&debug_event, GRAPH_EXECUTION_TRACES);
   } else {
     // Circular buffer behavior.
     DebugEvent debug_event;
@@ -257,15 +259,14 @@ void DebugEventsWriter::WriteGraphExecutionTrace(
     if (graph_execution_trace_buffer_.size() > circular_buffer_size_) {
       graph_execution_trace_buffer_.pop_front();
     }
+    return Status::OK();
   }
 }
 
-void DebugEventsWriter::WriteGraphExecutionTrace(const string& tfdbg_context_id,
-                                                 const string& device_name,
-                                                 const string& op_name,
-                                                 int32 output_slot,
-                                                 int32 tensor_debug_mode,
-                                                 const Tensor& tensor_value) {
+Status DebugEventsWriter::WriteGraphExecutionTrace(
+    const string& tfdbg_context_id, const string& device_name,
+    const string& op_name, int32 output_slot, int32 tensor_debug_mode,
+    const Tensor& tensor_value) {
   std::unique_ptr<GraphExecutionTrace> trace(new GraphExecutionTrace());
   trace->set_tfdbg_context_id(tfdbg_context_id);
   if (!op_name.empty()) {
@@ -279,7 +280,7 @@ void DebugEventsWriter::WriteGraphExecutionTrace(const string& tfdbg_context_id,
   }
   trace->set_device_name(device_name);
   tensor_value.AsProtoTensorContent(trace->mutable_tensor_proto());
-  WriteGraphExecutionTrace(trace.release());
+  return WriteGraphExecutionTrace(trace.release());
 }
 
 void DebugEventsWriter::WriteSerializedNonExecutionDebugEvent(
@@ -487,8 +488,8 @@ Status DebugEventsWriter::InitNonMetadataFile(DebugEventFileType type) {
   return Status::OK();
 }
 
-void DebugEventsWriter::SerializeAndWriteDebugEvent(DebugEvent* debug_event,
-                                                    DebugEventFileType type) {
+Status DebugEventsWriter::SerializeAndWriteDebugEvent(DebugEvent* debug_event,
+                                                      DebugEventFileType type) {
   std::unique_ptr<SingleDebugEventFileWriter>* writer = nullptr;
   SelectWriter(type, &writer);
   if (writer != nullptr) {
@@ -497,6 +498,11 @@ void DebugEventsWriter::SerializeAndWriteDebugEvent(DebugEvent* debug_event,
     string str;
     debug_event->AppendToString(&str);
     (*writer)->WriteSerializedDebugEvent(str);
+    return Status::OK();
+  } else {
+    return errors::Internal(
+        "Unable to find debug events file writer for DebugEventsFileType ",
+        type);
   }
 }
 
diff --git a/tensorflow/core/util/debug_events_writer.h b/tensorflow/core/util/debug_events_writer.h
index 6d219d7c9ef..39835adf1a6 100644
--- a/tensorflow/core/util/debug_events_writer.h
+++ b/tensorflow/core/util/debug_events_writer.h
@@ -119,27 +119,27 @@ class DebugEventsWriter {
   // The four DebugEvent fields below are written _without_ the circular buffer.
   // Source file contents are written to the *.source_files file.
   // Takes ownership of source_file.
-  void WriteSourceFile(SourceFile* source_file);
+  Status WriteSourceFile(SourceFile* source_file);
   // Stack frames are written to the *.code_locations file.
   // Takes ownership of stack_frame_with_id.
-  void WriteStackFrameWithId(StackFrameWithId* stack_frame_with_id);
+  Status WriteStackFrameWithId(StackFrameWithId* stack_frame_with_id);
   // Graph op creation events are written to the *.graphs file.
   // Takes ownership of graph_op_creation.
-  void WriteGraphOpCreation(GraphOpCreation* graph_op_creation);
+  Status WriteGraphOpCreation(GraphOpCreation* graph_op_creation);
   // Debugged graphs are written to the *.graphs file.
   // Takes ownership of debugged_graph.
-  void WriteDebuggedGraph(DebuggedGraph* debugged_graph);
+  Status WriteDebuggedGraph(DebuggedGraph* debugged_graph);
 
   // The two DebugEvent fields below are written to the circular buffer
   // and saved to disk only at the FlushExecutionFiles() call.
   // Execution events (eager execution of an op or a tf.function) are written to
   // the *.execution file.
   // Takes ownership of execution.
-  void WriteExecution(Execution* execution);
+  Status WriteExecution(Execution* execution);
   // Graph execution traces (graph-internal tensor values or their summaries)
   // are written to the *.graph_execution_traces file.
   // Takes ownership of graph_execution_trace.
-  void WriteGraphExecutionTrace(GraphExecutionTrace* graph_execution_trace);
+  Status WriteGraphExecutionTrace(GraphExecutionTrace* graph_execution_trace);
 
   // Write a graph execution trace without using a protocol buffer.
   // Instead, pass the raw values related to the graph execution trace.
@@ -155,11 +155,11 @@ class DebugEventsWriter {
   //   tensor_value: The value of the tensor that describes the tensor(s)
   //     that this trace is concerned with. The semantics of this tensor value
   //     depends on the value of `tensor_debug_mode`.
-  void WriteGraphExecutionTrace(const string& tfdbg_context_id,
-                                const string& device_name,
-                                const string& op_name, int32 output_slot,
-                                int32 tensor_debug_mode,
-                                const Tensor& tensor_value);
+  Status WriteGraphExecutionTrace(const string& tfdbg_context_id,
+                                  const string& device_name,
+                                  const string& op_name, int32 output_slot,
+                                  int32 tensor_debug_mode,
+                                  const Tensor& tensor_value);
 
   // Writes a serialized DebugEvent to one of the debug-events files
   // concerned with the non-execution events: the SOURCE_FILES, STACK_FRAMES
@@ -217,8 +217,8 @@ class DebugEventsWriter {
   // Initialize the TFRecord writer for non-metadata file type.
   Status InitNonMetadataFile(DebugEventFileType type);
 
-  void SerializeAndWriteDebugEvent(DebugEvent* debug_event,
-                                   DebugEventFileType type);
+  Status SerializeAndWriteDebugEvent(DebugEvent* debug_event,
+                                     DebugEventFileType type);
 
   void SelectWriter(DebugEventFileType type,
                     std::unique_ptr<SingleDebugEventFileWriter>** writer);
diff --git a/tensorflow/core/util/debug_events_writer_test.cc b/tensorflow/core/util/debug_events_writer_test.cc
index 66cde55864b..bd0c731bc90 100644
--- a/tensorflow/core/util/debug_events_writer_test.cc
+++ b/tensorflow/core/util/debug_events_writer_test.cc
@@ -263,7 +263,7 @@ TEST_F(DebugEventsWriterTest, WriteSourceFile) {
   source_file_1->add_lines("");
   source_file_1->add_lines("print(tf.constant([42.0]))");
   source_file_1->add_lines("");
-  writer->WriteSourceFile(source_file_1);
+  TF_ASSERT_OK(writer->WriteSourceFile(source_file_1));
 
   SourceFile* source_file_2 = new SourceFile();
   source_file_2->set_file_path("/home/tf_programs/train.py");
@@ -271,7 +271,7 @@ TEST_F(DebugEventsWriterTest, WriteSourceFile) {
   source_file_2->add_lines("import tensorflow.keras as keras");
   source_file_2->add_lines("");
   source_file_2->add_lines("model = keras.Sequential()");
-  writer->WriteSourceFile(source_file_2);
+  TF_ASSERT_OK(writer->WriteSourceFile(source_file_2));
 
   TF_ASSERT_OK(writer->FlushNonExecutionFiles());
   TF_ASSERT_OK(writer->Close());
@@ -336,8 +336,8 @@ TEST_F(DebugEventsWriterTest, WriteStackFramesFile) {
   file_line_col->set_func("my_func");
   file_line_col->set_code("  x = x ** 2.0");
 
-  writer->WriteStackFrameWithId(stack_frame_1);
-  writer->WriteStackFrameWithId(stack_frame_2);
+  TF_ASSERT_OK(writer->WriteStackFrameWithId(stack_frame_1));
+  TF_ASSERT_OK(writer->WriteStackFrameWithId(stack_frame_2));
   TF_ASSERT_OK(writer->FlushNonExecutionFiles());
   TF_ASSERT_OK(writer->Close());
 
@@ -382,12 +382,12 @@ TEST_F(DebugEventsWriterTest, WriteGraphOpCreationAndDebuggedGraph) {
   GraphOpCreation* graph_op_creation = new GraphOpCreation();
   graph_op_creation->set_op_type("MatMul");
   graph_op_creation->set_op_name("Dense_1/MatMul");
-  writer->WriteGraphOpCreation(graph_op_creation);
+  TF_ASSERT_OK(writer->WriteGraphOpCreation(graph_op_creation));
 
   DebuggedGraph* debugged_graph = new DebuggedGraph();
   debugged_graph->set_graph_id("deadbeaf");
   debugged_graph->set_graph_name("my_func_graph");
-  writer->WriteDebuggedGraph(debugged_graph);
+  TF_ASSERT_OK(writer->WriteDebuggedGraph(debugged_graph));
 
   TF_ASSERT_OK(writer->FlushNonExecutionFiles());
   TF_ASSERT_OK(writer->Close());
@@ -428,7 +428,7 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheSameFile) {
     SourceFile* source_file = new SourceFile();
     source_file->set_file_path(file_path);
     source_file->set_host_name("localhost.localdomain");
-    writer->WriteSourceFile(source_file);
+    TF_ASSERT_OK(writer->WriteSourceFile(source_file));
   };
   for (size_t i = 0; i < kConcurrentWrites; ++i) {
     thread_pool->Schedule(fn);
@@ -469,7 +469,7 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteAndFlushCallsToTheSameFile) {
     SourceFile* source_file = new SourceFile();
     source_file->set_file_path(file_path);
     source_file->set_host_name("localhost.localdomain");
-    writer->WriteSourceFile(source_file);
+    TF_ASSERT_OK(writer->WriteSourceFile(source_file));
     TF_ASSERT_OK(writer->FlushNonExecutionFiles());
   };
   for (size_t i = 0; i < kConcurrentWrites; ++i) {
@@ -512,16 +512,16 @@ TEST_F(DebugEventsWriterTest, ConcurrentWriteCallsToTheDifferentFiles) {
       source_file->set_file_path(
           strings::Printf("/home/tf_programs/program_%.2d.py", index));
       source_file->set_host_name("localhost.localdomain");
-      writer->WriteSourceFile(source_file);
+      TF_ASSERT_OK(writer->WriteSourceFile(source_file));
     } else if (index % 3 == 1) {
       StackFrameWithId* stack_frame = new StackFrameWithId();
       stack_frame->set_id(strings::Printf("e%.2d", index));
-      writer->WriteStackFrameWithId(stack_frame);
+      TF_ASSERT_OK(writer->WriteStackFrameWithId(stack_frame));
     } else {
       GraphOpCreation* op_creation = new GraphOpCreation();
       op_creation->set_op_type("Log");
       op_creation->set_op_name(strings::Printf("Log_%.2d", index));
-      writer->WriteGraphOpCreation(op_creation);
+      TF_ASSERT_OK(writer->WriteGraphOpCreation(op_creation));
     }
   };
   for (size_t i = 0; i < kConcurrentWrites; ++i) {
@@ -586,7 +586,7 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferNoFlush) {
     Execution* execution = new Execution();
     execution->set_op_type("Log");
     execution->add_input_tensor_ids(i);
-    writer->WriteExecution(execution);
+    TF_ASSERT_OK(writer->WriteExecution(execution));
   }
 
   std::vector<DebugEvent> actuals;
@@ -611,7 +611,7 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
     Execution* execution = new Execution();
     execution->set_op_type("Log");
     execution->add_input_tensor_ids(i);
-    writer->WriteExecution(execution);
+    TF_ASSERT_OK(writer->WriteExecution(execution));
   }
 
   TF_ASSERT_OK(writer->FlushExecutionFiles());
@@ -637,7 +637,7 @@ TEST_F(DebugEventsWriterTest, WriteExecutionWithCyclicBufferFlush) {
     Execution* execution = new Execution();
     execution->set_op_type("Abs");
     execution->add_input_tensor_ids(counter.fetch_add(1));
-    writer->WriteExecution(execution);
+    TF_ASSERT_OK(writer->WriteExecution(execution));
   };
   for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) {
     thread_pool->Schedule(fn);
@@ -682,7 +682,7 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
   for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) {
     GraphExecutionTrace* trace = new GraphExecutionTrace();
     trace->set_tfdbg_context_id(strings::Printf("graph_%.2ld", i));
-    writer->WriteGraphExecutionTrace(trace);
+    TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace));
   }
 
   std::vector<DebugEvent> actuals;
@@ -695,6 +695,31 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferNoFlush) {
   TF_ASSERT_OK(writer->Close());
 }
 
+TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithoutPreviousInitCall) {
+  const size_t kCyclicBufferSize = -1;
+  DebugEventsWriter* writer =
+      DebugEventsWriter::GetDebugEventsWriter(dump_root_, kCyclicBufferSize);
+  // NOTE(cais): `writer->Init()` is not called here before
+  // WriteGraphExecutionTrace() is called. This test checks that this is okay
+  // and the `GraphExecutionTrace` gets written correctly even without `Init()`
+  // being called first. This scenario can happen when a TF Graph with tfdbg
+  // debug ops are executed on a remote TF server.
+
+  GraphExecutionTrace* trace = new GraphExecutionTrace();
+  trace->set_tfdbg_context_id(strings::Printf("graph_0"));
+  TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace));
+  TF_ASSERT_OK(writer->FlushExecutionFiles());
+
+  std::vector<DebugEvent> actuals;
+  ReadDebugEventProtos(writer, DebugEventFileType::GRAPH_EXECUTION_TRACES,
+                       &actuals);
+  EXPECT_EQ(actuals.size(), 1);
+  EXPECT_EQ(actuals[0].graph_execution_trace().tfdbg_context_id(), "graph_0");
+
+  // Close the writer so the files can be safely deleted.
+  TF_ASSERT_OK(writer->Close());
+}
+
 TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
   const size_t kCyclicBufferSize = 10;
   DebugEventsWriter* writer =
@@ -706,7 +731,7 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
   for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) {
     GraphExecutionTrace* trace = new GraphExecutionTrace();
     trace->set_tfdbg_context_id(strings::Printf("graph_%.2ld", i));
-    writer->WriteGraphExecutionTrace(trace);
+    TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace));
   }
 
   TF_ASSERT_OK(writer->FlushExecutionFiles());
@@ -731,7 +756,7 @@ TEST_F(DebugEventsWriterTest, WriteGrahExecutionTraceWithCyclicBufferFlush) {
     GraphExecutionTrace* trace = new GraphExecutionTrace();
     trace->set_tfdbg_context_id(
         strings::Printf("new_graph_%.2ld", counter.fetch_add(1)));
-    writer->WriteGraphExecutionTrace(trace);
+    TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace));
   };
   for (size_t i = 0; i < kCyclicBufferSize * 2; ++i) {
     thread_pool->Schedule(fn);
@@ -818,7 +843,7 @@ TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) {
     Execution* execution = new Execution();
     execution->set_op_type("Log");
     execution->add_input_tensor_ids(i);
-    writer->WriteExecution(execution);
+    TF_ASSERT_OK(writer->WriteExecution(execution));
   }
   TF_ASSERT_OK(writer->FlushExecutionFiles());
 
@@ -834,7 +859,7 @@ TEST_F(DebugEventsWriterTest, DisableCyclicBufferBehavior) {
   for (size_t i = 0; i < kNumEvents; ++i) {
     GraphExecutionTrace* trace = new GraphExecutionTrace();
     trace->set_tfdbg_context_id(strings::Printf("graph_%.2ld", i));
-    writer->WriteGraphExecutionTrace(trace);
+    TF_ASSERT_OK(writer->WriteGraphExecutionTrace(trace));
   }
   TF_ASSERT_OK(writer->FlushExecutionFiles());
 
diff --git a/tensorflow/core/util/dump_graph.cc b/tensorflow/core/util/dump_graph.cc
index dd273af2a00..0e16f9d3fb3 100644
--- a/tensorflow/core/util/dump_graph.cc
+++ b/tensorflow/core/util/dump_graph.cc
@@ -156,18 +156,22 @@ Status CreateWritableFile(Env* env, const string& dirname, const string& name,
   return env->NewWritableFile(*filepath, file);
 }
 
-template <class T>
-Status WriteTextProtoToUniqueFile(T& proto, WritableFile* file) {
+Status WriteTextProtoToUniqueFile(const tensorflow::protobuf::Message& proto,
+                                  WritableFile* file) {
   string s;
-#if defined(TENSORFLOW_LITE_PROTOS)
-  if (!SerializeToStringDeterministic(proto, &s)) {
-    return errors::Internal("Failed to serialize proto to string.");
-  }
-#else
   if (!::tensorflow::protobuf::TextFormat::PrintToString(proto, &s)) {
     return errors::FailedPrecondition("Unable to convert proto to text.");
   }
-#endif
+  TF_RETURN_IF_ERROR(file->Append(s));
+  return file->Close();
+}
+
+Status WriteTextProtoToUniqueFile(
+    const tensorflow::protobuf::MessageLite& proto, WritableFile* file) {
+  string s;
+  if (!SerializeToStringDeterministic(proto, &s)) {
+    return errors::Internal("Failed to serialize proto to string.");
+  }
   TF_RETURN_IF_ERROR(file->Append(s));
   return file->Close();
 }
diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h
index 61d1e3c9453..b4de2ffa8e9 100644
--- a/tensorflow/core/util/gpu_device_functions.h
+++ b/tensorflow/core/util/gpu_device_functions.h
@@ -672,6 +672,12 @@ __device__ detail::ToTypeIfConvertible<U, T> GpuAtomicAdd(T* ptr, U value) {
   return atomicAdd(ptr, value);
 }
 
+__device__ inline int64 GpuAtomicAdd(int64* ptr, int64 value) {
+  // This cast should be safe since module-2 addition should work fine. However,
+  // signed overflow is not handled correctly since it's undefined behavior.
+  return atomicAdd(reinterpret_cast<uint64*>(ptr), static_cast<uint64>(value));
+}
+
 __device__ inline Eigen::half GpuAtomicAdd(Eigen::half* ptr,
                                            Eigen::half value) {
   return detail::GpuAtomicCasHelper(
@@ -725,9 +731,14 @@ __device__ inline double GpuAtomicSub(double* ptr, double value) {
   return GpuAtomicAdd(ptr, -value);
 }
 
+__device__ inline tensorflow::int64 GpuAtomicSub(tensorflow::int64* ptr,
+                                                 tensorflow::int64 value) {
+  return GpuAtomicAdd(ptr, -value);
+}
+
 __device__ inline tensorflow::uint64 GpuAtomicSub(tensorflow::uint64* ptr,
                                                   tensorflow::uint64 value) {
-  return GpuAtomicAdd(ptr, -value);
+  return GpuAtomicAdd(ptr, -static_cast<tensorflow::int64>(value));
 }
 
 __device__ inline Eigen::half GpuAtomicSub(Eigen::half* ptr,
diff --git a/tensorflow/core/util/mkl_threadpool.h b/tensorflow/core/util/mkl_threadpool.h
index 8c9db0a1940..da4b516d3b8 100644
--- a/tensorflow/core/util/mkl_threadpool.h
+++ b/tensorflow/core/util/mkl_threadpool.h
@@ -66,13 +66,13 @@ struct MklDnnThreadPool : public dnnl::threadpool_iface {
       : eigen_interface_(ctx->device()
                              ->tensorflow_cpu_worker_threads()
                              ->workers->AsEigenThreadPool()) {}
-  virtual int get_num_threads() override {
+  virtual int get_num_threads() const override {
     return eigen_interface_->NumThreads();
   }
-  virtual bool get_in_parallel() override {
+  virtual bool get_in_parallel() const override {
     return (eigen_interface_->CurrentThreadId() != -1) ? true : false;
   }
-  virtual uint64_t get_flags() override { return ASYNCHRONOUS; }
+  virtual uint64_t get_flags() const override { return ASYNCHRONOUS; }
   virtual void parallel_for(int n,
                             const std::function<void(int, int)>& fn) override {
     // Should never happen (handled by DNNL)
@@ -86,12 +86,17 @@ struct MklDnnThreadPool : public dnnl::threadpool_iface {
 
     int nthr = get_num_threads();
     int njobs = std::min(n, nthr);
+    bool balance = (nthr < n);
     for (int i = 0; i < njobs; i++) {
       eigen_interface_->ScheduleWithHint(
-          [i, n, njobs, fn]() {
-            int start, end;
-            balance211(n, njobs, i, &start, &end);
-            for (int j = start; j < end; j++) fn(j, n);
+          [balance, i, n, njobs, fn]() {
+            if (balance) {
+              int start, end;
+              balance211(n, njobs, i, &start, &end);
+              for (int j = start; j < end; j++) fn(j, n);
+            } else {
+              fn(i, n);
+            }
           },
           i, i + 1);
     }
diff --git a/tensorflow/core/util/proto/descriptors.cc b/tensorflow/core/util/proto/descriptors.cc
index 3f82091ba91..56abd495d16 100644
--- a/tensorflow/core/util/proto/descriptors.cc
+++ b/tensorflow/core/util/proto/descriptors.cc
@@ -68,14 +68,18 @@ Status GetDescriptorPoolFromBinary(
     const string& source,
     std::unique_ptr<protobuf::DescriptorPool>* owned_desc_pool) {
   if (!absl::StartsWith(source, "bytes://")) {
-    return errors::InvalidArgument(
-        "Source does not represent serialized file descriptor set proto.");
+    return errors::InvalidArgument(absl::StrCat(
+        "Source does not represent serialized file descriptor set proto. ",
+        "This may be due to a missing dependency on the file containing ",
+        "REGISTER_DESCRIPTOR_POOL(\"", source, "\", ...);"));
   }
   // Parse the FileDescriptorSet.
   protobuf::FileDescriptorSet proto;
   if (!proto.ParseFromString(string(absl::StripPrefix(source, "bytes://")))) {
-    return errors::InvalidArgument(
-        "Source does not represent serialized file descriptor set proto.");
+    return errors::InvalidArgument(absl::StrCat(
+        "Source does not represent serialized file descriptor set proto. ",
+        "This may be due to a missing dependency on the file containing ",
+        "REGISTER_DESCRIPTOR_POOL(\"", source, "\", ...);"));
   }
   return CreatePoolFromSet(proto, owned_desc_pool);
 }
diff --git a/tensorflow/core/util/test_log.proto b/tensorflow/core/util/test_log.proto
index ddb7a0275ac..6d3af02e657 100644
--- a/tensorflow/core/util/test_log.proto
+++ b/tensorflow/core/util/test_log.proto
@@ -1,6 +1,8 @@
 // Protocol messages for describing the results of benchmarks and unit tests.
 syntax = "proto3";
 
+package tensorflow;
+
 import "google/protobuf/any.proto";
 import "google/protobuf/wrappers.proto";
 
@@ -9,14 +11,12 @@ option java_outer_classname = "TestLogProtos";
 option java_multiple_files = true;
 option java_package = "org.tensorflow.util.testlog";
 
-package tensorflow;
-
 message EntryValue {
   oneof kind {
     double double_value = 1;
     string string_value = 2;
   }
-};
+}
 
 message MetricEntry {
   // Metric name
@@ -62,7 +62,7 @@ message BenchmarkEntry {
   // Metric name, value and expected range. This can include accuracy metrics
   // typically used to determine whether the accuracy test has passed
   repeated MetricEntry metrics = 7;
-};
+}
 
 message BenchmarkEntries {
   repeated BenchmarkEntry entry = 1;
@@ -72,7 +72,7 @@ message BuildConfiguration {
   string mode = 1;               // opt, dbg, etc
   repeated string cc_flags = 2;  // CC compiler flags, if known
   repeated string opts = 3;      // Bazel compilation options, if known
-};
+}
 
 message CommitId {
   oneof kind {
@@ -85,7 +85,7 @@ message CommitId {
   string snapshot = 3;
   // Changelist tested if the change list is not already submitted.
   int64 pending_changelist = 4;
-};
+}
 
 message CPUInfo {
   int64 num_cores = 1;
@@ -105,7 +105,7 @@ message CPUInfo {
 
   // Cache sizes (in bytes), e.g. "L2": 262144 (for 256KB)
   map<string, int64> cache_size = 6;
-};
+}
 
 message MemoryInfo {
   int64 total = 1;      // Total virtual memory in bytes
@@ -113,26 +113,26 @@ message MemoryInfo {
 }
 
 message GPUInfo {
-  string model = 1;  // e.g. "Tesla K40c"
-  string uuid = 2;   // Final entry in output of "nvidia-smi -L"
+  string model = 1;   // e.g. "Tesla K40c"
+  string uuid = 2;    // Final entry in output of "nvidia-smi -L"
   string bus_id = 3;  // e.g. "0000:04:00.0"
-};
+}
 
 message PlatformInfo {
-  string bits = 1;       // e.g. '64bit'
-  string linkage = 2;    // e.g. 'ELF'
-  string machine = 3;    // e.g. 'i386'
-  string release = 4;    // e.g. '3.13.0-76-generic'
-  string system = 5;     // e.g. 'Linux'
-  string version = 6;    // e.g. '#120-Ubuntu SMP Mon Jan 18 15:59:10 UTC 2016'
-};
+  string bits = 1;     // e.g. '64bit'
+  string linkage = 2;  // e.g. 'ELF'
+  string machine = 3;  // e.g. 'i386'
+  string release = 4;  // e.g. '3.13.0-76-generic'
+  string system = 5;   // e.g. 'Linux'
+  string version = 6;  // e.g. '#120-Ubuntu SMP Mon Jan 18 15:59:10 UTC 2016'
+}
 
 message AvailableDeviceInfo {       // Matches DeviceAttributes
   string name = 1;                  // Device name.
   string type = 2;                  // Device type, e.g. 'CPU' or 'GPU'.
   int64 memory_limit = 3;           // Memory capacity in bytes.
   string physical_description = 4;  // The physical description of this device.
-};
+}
 
 message MachineConfiguration {
   // Host name of machine that ran the benchmark.
@@ -154,7 +154,7 @@ message MachineConfiguration {
   repeated AvailableDeviceInfo available_device_info = 5;
 
   MemoryInfo memory_info = 6;
-};
+}
 
 // Run-specific items such as arguments to the test / benchmark.
 message RunConfiguration {
@@ -206,6 +206,7 @@ message TestResults {
     PYTHON_BENCHMARK = 2;
     ANDROID_BENCHMARK = 3;
     EDGE_BENCHMARK = 4;
+    IOS_BENCHMARK = 5;
   }
   BenchmarkType benchmark_type = 10;
 
@@ -219,4 +220,4 @@ message TestResults {
   // TensorFlow version this benchmark runs against.
   // This can be either set to full version or just the major version.
   string tf_version = 12;
-};
+}
diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD
index 162a44ac109..a0e5005d45a 100644
--- a/tensorflow/examples/label_image/BUILD
+++ b/tensorflow/examples/label_image/BUILD
@@ -35,7 +35,7 @@ tf_cc_binary(
             # cc:cc_ops is used to include image ops (for label_image)
             # Jpg, gif, and png related code won't be included
             "//tensorflow/cc:cc_ops",
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
             # cc:android_tensorflow_image_op is for including jpeg/gif/png
             # decoder to enable real-image evaluation on Android
             "//tensorflow/core/kernels:android_tensorflow_image_op",
diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go
index 95547045111..c4ea8abb543 100644
--- a/tensorflow/go/genop/internal/genop.go
+++ b/tensorflow/go/genop/internal/genop.go
@@ -567,11 +567,10 @@ func isListAttr(attrdef *odpb.OpDef_AttrDef) bool {
 // This is useful when 's' corresponds to a "oneof" protocol buffer message.
 // For example, consider the protocol buffer message:
 //   oneof value { bool b = 1;  int64 i = 2; }
-// String() on a Go corresponding object (using proto.CompactTextString) will
-// print "b:true", or "i:7" etc. This function strips out the leading "b:" or
-// "i:".
-func stripLeadingColon(s fmt.Stringer) string {
-	x := s.String()
+// proto.CompactTextString) will print "b:true", or "i:7" etc. This function
+// strips out the leading "b:" or "i:".
+func stripLeadingColon(m proto.Message) string {
+	x := proto.CompactTextString(m)
 	y := strings.SplitN(x, ":", 2)
 	if len(y) < 2 {
 		return x
diff --git a/tensorflow/go/genop/internal/genop_test.go b/tensorflow/go/genop/internal/genop_test.go
index a339d181e8d..b467efc7aea 100644
--- a/tensorflow/go/genop/internal/genop_test.go
+++ b/tensorflow/go/genop/internal/genop_test.go
@@ -533,6 +533,261 @@ func TestOp(scope *Scope, bb tf.Output, aa tf.Output, optional ...TestOpAttr) (c
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
+`,
+		},
+		{
+			tag: "SampleDistortedBoundingBox",
+			opdef: `
+name: "SampleDistortedBoundingBox"
+input_arg {
+	name: "image_size"
+	type_attr: "T"
+}
+input_arg {
+	name: "bounding_boxes"
+	type: DT_FLOAT
+}
+output_arg {
+	name: "begin"
+	type_attr: "T"
+}
+output_arg {
+	name: "size"
+	type_attr: "T"
+}
+output_arg {
+	name: "bboxes"
+	type: DT_FLOAT
+}
+attr {
+	name: "T"
+	type: "type"
+	allowed_values {
+		list {
+			type: DT_UINT8
+			type: DT_INT8
+			type: DT_INT16
+			type: DT_INT32
+			type: DT_INT64
+		}
+	}
+}
+attr {
+	name: "seed"
+	type: "int"
+	default_value {
+		i: 0
+	}
+}
+attr {
+	name: "seed2"
+	type: "int"
+	default_value {
+		i: 0
+	}
+}
+attr {
+	name: "min_object_covered"
+	type: "float"
+	default_value {
+		f: 0.1
+	}
+}
+attr {
+	name: "aspect_ratio_range"
+	type: "list(float)"
+	default_value {
+		list {
+			f: 0.75
+			f: 1.33
+		}
+	}
+}
+attr {
+	name: "area_range"
+	type: "list(float)"
+	default_value {
+		list {
+			f: 0.05
+			f: 1
+		}
+	}
+}
+attr {
+	name: "max_attempts"
+	type: "int"
+	default_value {
+		i: 100
+	}
+}
+attr {
+	name: "use_image_if_no_bounding_boxes"
+	type: "bool"
+	default_value {
+		b: false
+	}
+}
+is_stateful: true
+`,
+			apidef: `
+op {
+  graph_op_name: "SampleDistortedBoundingBox"
+  in_arg {
+    name: "image_size"
+    description: "Blah blah"
+  }
+  in_arg {
+    name: "bounding_boxes"
+    description: "Blah blah"
+  }
+  out_arg {
+    name: "begin"
+    description: "Blah blah"
+  }
+  out_arg {
+    name: "size"
+    description: "Blah blah"
+  }
+  out_arg {
+    name: "bboxes"
+    description: "Blah blah"
+  }
+  attr {
+    name: "seed"
+    description: "Blah blah"
+  }
+  attr {
+    name: "seed2"
+    description: "Blah blah"
+  }
+  attr {
+    name: "min_object_covered"
+    description: "Blah blah"
+  }
+  attr {
+    name: "aspect_ratio_range"
+    description: "Blah blah"
+  }
+  attr {
+    name: "area_range"
+    description: "Blah blah"
+  }
+  attr {
+    name: "max_attempts"
+    description: "Blah blah"
+  }
+  attr {
+    name: "use_image_if_no_bounding_boxes"
+    description: "Blah blah"
+  }
+  summary: "Generate a single randomly distorted bounding box for an image."
+	description: "Blah blah"
+}
+`,
+			wanted: `
+// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
+type SampleDistortedBoundingBoxAttr func(optionalAttr)
+
+// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to 0.1
+func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["min_object_covered"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to <f:0.75 f:1.33 >
+func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["aspect_ratio_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to <f:0.05 f:1 >
+func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["area_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to 100
+func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["max_attempts"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
+//
+// value: Blah blah
+// If not specified, defaults to false
+func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["use_image_if_no_bounding_boxes"] = value
+	}
+}
+
+// Generate a single randomly distorted bounding box for an image.
+//
+// Blah blah
+//
+// Arguments:
+//	image_size: Blah blah
+//	bounding_boxes: Blah blah
+//
+// Returns:
+//	begin: Blah blah
+//	size: Blah blah
+//	bboxes: Blah blah
+func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SampleDistortedBoundingBox",
+		Input: []tf.Input{
+			image_size, bounding_boxes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
 `,
 		},
 	}
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 00f3d280b32..04c36ed3399 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -1274,7 +1274,7 @@ type SqueezeAttr func(optionalAttr)
 // value: If specified, only squeezes the dimensions listed. The dimension
 // index starts at 0. It is an error to squeeze a dimension that is not 1. Must
 // be in the range `[-rank(input), rank(input))`.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func SqueezeAxis(value []int64) SqueezeAttr {
@@ -1358,7 +1358,7 @@ type PlaceholderAttr func(optionalAttr)
 //
 // value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the
 // shape is unconstrained.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func PlaceholderShape(value tf.Shape) PlaceholderAttr {
 	return func(m optionalAttr) {
 		m["shape"] = value
@@ -1950,8 +1950,8 @@ func GatherV2BatchDims(value int64) GatherV2Attr {
 // Gather slices from `params` axis `axis` according to `indices`.
 //
 // `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `params.shape[:axis] + indices.shape +
-// params.shape[axis + 1:]` where:
+// Produces an output tensor with shape `params.shape[:axis] +
+// indices.shape[batch_dims:] + params.shape[axis + 1:]` where:
 //
 // ```python
 //     # Scalar indices (output is rank(params) - 1).
@@ -3594,7 +3594,7 @@ func BoostedTreesSparseCalculateBestFeatureSplit(scope *Scope, node_id_range tf.
 //	l1: l1 regularization factor on leaf weights, per instance based.
 //	l2: l2 regularization factor on leaf weights, per instance based.
 //	tree_complexity: adjustment to the gain, per leaf based.
-//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	min_node_weight: minimum avg of hessians in a node before required for the node to be considered for splitting.
 //	logits_dimension: The dimension of logit, i.e., number of classes.
 //
 // Returns:
@@ -4016,7 +4016,7 @@ func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSampler
 //
 // value: A list of unigram counts or probabilities, one per ID in sequential
 // order. Exactly one of vocab_file and unigrams should be passed to this op.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
 	return func(m optionalAttr) {
 		m["unigrams"] = value
@@ -4710,6 +4710,66 @@ func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAtt
 	return op.Output(0)
 }
 
+// DenseCountSparseOutputAttr is an optional argument to DenseCountSparseOutput.
+type DenseCountSparseOutputAttr func(optionalAttr)
+
+// DenseCountSparseOutputMinlength sets the optional minlength attribute to value.
+//
+// value: Minimum value to count. Can be set to -1 for no minimum.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func DenseCountSparseOutputMinlength(value int64) DenseCountSparseOutputAttr {
+	return func(m optionalAttr) {
+		m["minlength"] = value
+	}
+}
+
+// DenseCountSparseOutputMaxlength sets the optional maxlength attribute to value.
+//
+// value: Maximum value to count. Can be set to -1 for no maximum.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func DenseCountSparseOutputMaxlength(value int64) DenseCountSparseOutputAttr {
+	return func(m optionalAttr) {
+		m["maxlength"] = value
+	}
+}
+
+// Performs sparse-output bin counting for a tf.tensor input.
+//
+//   Counts the number of times each value occurs in the input.
+//
+// Arguments:
+//	values: Tensor containing data to count.
+//	weights: A Tensor of the same shape as indices containing per-index weight values. May
+// also be the empty tensor if no weights are used.
+//	binary_output: Whether to output the number of occurrences of each value or 1.
+//
+// Returns:
+//	output_indices: Indices tensor for the resulting sparse tensor object.
+//	output_values: Values tensor for the resulting sparse tensor object.
+//	output_dense_shape: Shape tensor for the resulting sparse tensor object.
+func DenseCountSparseOutput(scope *Scope, values tf.Output, weights tf.Output, binary_output bool, optional ...DenseCountSparseOutputAttr) (output_indices tf.Output, output_values tf.Output, output_dense_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"binary_output": binary_output}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DenseCountSparseOutput",
+		Input: []tf.Input{
+			values, weights,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
 type CTCBeamSearchDecoderAttr func(optionalAttr)
 
@@ -5648,7 +5708,7 @@ func CudnnRNNV3TimeMajor(value bool) CudnnRNNV3Attr {
 //     shape is [batch_size, seq_length, dir * num_units].
 // output_h: The same shape has input_h.
 // output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
+// is_training: Indicates whether this operation is used for inference or
 //   training.
 // time_major: Indicates whether the input/output format is time major or batch
 //     major.
@@ -5799,7 +5859,7 @@ func CudnnRNNV2IsTraining(value bool) CudnnRNNV2Attr {
 //     dir * num_units].
 // output_h: The same shape has input_h.
 // output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
+// is_training: Indicates whether this operation is used for inference or
 //   training.
 // reserve_space: An opaque tensor that can be used in backprop calculation. It
 //   is only produced if is_training is true.
@@ -7106,7 +7166,7 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source
 type TensorArrayV2Attr func(optionalAttr)
 
 // TensorArrayV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
 	return func(m optionalAttr) {
 		m["element_shape"] = value
@@ -7231,7 +7291,7 @@ type TensorArrayConcatV3Attr func(optionalAttr)
 // excluding the first dimension. Used to validate the shapes of
 // TensorArray elements. If this shape is not fully specified, concatenating
 // zero-size TensorArrays is an error.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
 	return func(m optionalAttr) {
 		m["element_shape_except0"] = value
@@ -7290,7 +7350,7 @@ type TensorArrayGatherV3Attr func(optionalAttr)
 // value: The expected shape of an element, if known. Used to
 // validate the shapes of TensorArray elements. If this shape is not
 // fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr {
 	return func(m optionalAttr) {
 		m["element_shape"] = value
@@ -7781,7 +7841,7 @@ type PriorityQueueV2Attr func(optionalAttr)
 // PriorityQueueV2ComponentTypes sets the optional component_types attribute to value.
 //
 // value: The type of each component in a value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr {
@@ -8088,7 +8148,7 @@ type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr)
 // MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
 //
 // value: The type list for the return values.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr {
@@ -8100,7 +8160,7 @@ func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDe
 // MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
 //
 // value: The list of shapes being produced.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr {
@@ -8456,7 +8516,7 @@ func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output
 type OptimizeDatasetAttr func(optionalAttr)
 
 // OptimizeDatasetOptimizationConfigs sets the optional optimization_configs attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func OptimizeDatasetOptimizationConfigs(value []string) OptimizeDatasetAttr {
 	return func(m optionalAttr) {
 		m["optimization_configs"] = value
@@ -8542,6 +8602,73 @@ func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.Dat
 	return components
 }
 
+// RaggedCountSparseOutputAttr is an optional argument to RaggedCountSparseOutput.
+type RaggedCountSparseOutputAttr func(optionalAttr)
+
+// RaggedCountSparseOutputMinlength sets the optional minlength attribute to value.
+//
+// value: Minimum value to count. Can be set to -1 for no minimum.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RaggedCountSparseOutputMinlength(value int64) RaggedCountSparseOutputAttr {
+	return func(m optionalAttr) {
+		m["minlength"] = value
+	}
+}
+
+// RaggedCountSparseOutputMaxlength sets the optional maxlength attribute to value.
+//
+// value: Maximum value to count. Can be set to -1 for no maximum.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RaggedCountSparseOutputMaxlength(value int64) RaggedCountSparseOutputAttr {
+	return func(m optionalAttr) {
+		m["maxlength"] = value
+	}
+}
+
+// Performs sparse-output bin counting for a ragged tensor input.
+//
+//   Counts the number of times each value occurs in the input.
+//
+// Arguments:
+//	splits: Tensor containing the row splits of the ragged tensor to count.
+//	values: Tensor containing values of the sparse tensor to count.
+//	weights: A Tensor of the same shape as indices containing per-index weight values.
+// May also be the empty tensor if no weights are used.
+//	binary_output: Whether to output the number of occurrences of each value or 1.
+//
+// Returns:
+//	output_indices: Indices tensor for the resulting sparse tensor object.
+//	output_values: Values tensor for the resulting sparse tensor object.
+//	output_dense_shape: Shape tensor for the resulting sparse tensor object.
+//   END
+//   }
+//   attr {
+//     name: "T"
+//     description: <<END
+// Dtype of the input values tensor.
+func RaggedCountSparseOutput(scope *Scope, splits tf.Output, values tf.Output, weights tf.Output, binary_output bool, optional ...RaggedCountSparseOutputAttr) (output_indices tf.Output, output_values tf.Output, output_dense_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"binary_output": binary_output}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RaggedCountSparseOutput",
+		Input: []tf.Input{
+			splits, values, weights,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // Gets the next output from the given iterator .
 func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
 	if scope.Err() != nil {
@@ -9165,7 +9292,7 @@ type RandomShuffleQueueV2Attr func(optionalAttr)
 // be either 0 or the same as the length of component_types. If the length of
 // this attr is 0, the shapes of queue elements are not constrained, and
 // only one element may be dequeued at a time.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
@@ -9388,7 +9515,7 @@ func DebugIdentityV2TensorDebugMode(value int64) DebugIdentityV2Attr {
 // DebugIdentityV2DebugUrls sets the optional debug_urls attribute to value.
 //
 // value: List of URLs to debug targets, e.g., file:///foo/tfdbg_dump.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func DebugIdentityV2DebugUrls(value []string) DebugIdentityV2Attr {
 	return func(m optionalAttr) {
 		m["debug_urls"] = value
@@ -9453,7 +9580,7 @@ func DebugNanCountTensorName(value string) DebugNanCountAttr {
 //
 // value: List of URLs to debug targets, e.g.,
 //   file:///foo/tfdbg_dump, grpc:://localhost:11011.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func DebugNanCountDebugUrls(value []string) DebugNanCountAttr {
 	return func(m optionalAttr) {
 		m["debug_urls"] = value
@@ -9527,7 +9654,7 @@ func DebugIdentityTensorName(value string) DebugIdentityAttr {
 //
 // value: List of URLs to debug targets, e.g.,
 //   file:///foo/tfdbg_dump, grpc:://localhost:11011
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func DebugIdentityDebugUrls(value []string) DebugIdentityAttr {
 	return func(m optionalAttr) {
 		m["debug_urls"] = value
@@ -9852,22 +9979,6 @@ func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names
 	return op.Output(0)
 }
 
-// Creates an iterator for reading from the tf.data service.
-//
-// Returns the created operation.
-func MakeDataServiceIterator(scope *Scope, dataset tf.Output, job_token tf.Output, iterator tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MakeDataServiceIterator",
-		Input: []tf.Input{
-			dataset, job_token, iterator,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Registers a dataset with the tf.data service.
 func RegisterDataset(scope *Scope, dataset tf.Output, address tf.Output, protocol tf.Output, external_state_policy int64) (dataset_id tf.Output) {
 	if scope.Err() != nil {
@@ -9897,7 +10008,7 @@ func DataServiceDatasetTaskRefreshIntervalHintMs(value int64) DataServiceDataset
 }
 
 // Creates a dataset that reads data from the tf.data service.
-func DataServiceDataset(scope *Scope, address tf.Output, protocol tf.Output, max_outstanding_requests tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...DataServiceDatasetAttr) (handle tf.Output) {
+func DataServiceDataset(scope *Scope, dataset_id tf.Output, processing_mode tf.Output, address tf.Output, protocol tf.Output, job_name tf.Output, max_outstanding_requests tf.Output, iteration_counter tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...DataServiceDatasetAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -9908,7 +10019,7 @@ func DataServiceDataset(scope *Scope, address tf.Output, protocol tf.Output, max
 	opspec := tf.OpSpec{
 		Type: "DataServiceDataset",
 		Input: []tf.Input{
-			address, protocol, max_outstanding_requests,
+			dataset_id, processing_mode, address, protocol, job_name, max_outstanding_requests, iteration_counter,
 		},
 		Attrs: attrs,
 	}
@@ -10410,7 +10521,7 @@ func ParseExampleDatasetV2Deterministic(value string) ParseExampleDatasetV2Attr
 }
 
 // ParseExampleDatasetV2RaggedKeys sets the optional ragged_keys attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseExampleDatasetV2RaggedKeys(value []string) ParseExampleDatasetV2Attr {
@@ -10420,7 +10531,7 @@ func ParseExampleDatasetV2RaggedKeys(value []string) ParseExampleDatasetV2Attr {
 }
 
 // ParseExampleDatasetV2RaggedValueTypes sets the optional ragged_value_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseExampleDatasetV2RaggedValueTypes(value []tf.DataType) ParseExampleDatasetV2Attr {
@@ -10430,7 +10541,7 @@ func ParseExampleDatasetV2RaggedValueTypes(value []tf.DataType) ParseExampleData
 }
 
 // ParseExampleDatasetV2RaggedSplitTypes sets the optional ragged_split_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseExampleDatasetV2RaggedSplitTypes(value []tf.DataType) ParseExampleDatasetV2Attr {
@@ -11942,7 +12053,7 @@ func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBo
 //
 // value: The cropped area of the image must have an aspect ratio =
 // width / height within this range.
-// If not specified, defaults to {f:0.75 f:1.33}
+// If not specified, defaults to <f:0.75 f:1.33 >
 func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
 	return func(m optionalAttr) {
 		m["aspect_ratio_range"] = value
@@ -11953,7 +12064,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted
 //
 // value: The cropped area of the image must contain a fraction of the
 // supplied image within this range.
-// If not specified, defaults to {f:0.05 f:1}
+// If not specified, defaults to <f:0.05 f:1 >
 func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
 	return func(m optionalAttr) {
 		m["area_range"] = value
@@ -12056,6 +12167,103 @@ func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_box
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Draw bounding boxes on a batch of images.
+//
+// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+// boxes specified by the locations in `boxes`. The coordinates of the each
+// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example, if an image is 100 x 200 pixels (height x width) and the bounding
+// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
+//
+// Parts of the bounding box may fall outside the image.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+// boxes.
+//	colors: 2-D. A list of RGBA colors to cycle through for the boxes.
+//
+// Returns 4-D with the same shape as `images`. The batch of input images with
+// bounding boxes drawn on the images.
+func DrawBoundingBoxesV2(scope *Scope, images tf.Output, boxes tf.Output, colors tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DrawBoundingBoxesV2",
+		Input: []tf.Input{
+			images, boxes, colors,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Draw bounding boxes on a batch of images.
+//
+// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+// boxes specified by the locations in `boxes`. The coordinates of the each
+// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example, if an image is 100 x 200 pixels (height x width) and the bounding
+// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
+//
+// Parts of the bounding box may fall outside the image.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+// boxes.
+//
+// Returns 4-D with the same shape as `images`. The batch of input images with
+// bounding boxes drawn on the images.
+func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DrawBoundingBoxes",
+		Input: []tf.Input{
+			images, boxes,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Convert one or more images from HSV to RGB.
+//
+// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+// value of the pixels. The output is only well defined if the value in `images`
+// are in `[0,1]`.
+//
+// See `rgb_to_hsv` for a description of the HSV encoding.
+//
+// Arguments:
+//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
+//
+// Returns `images` converted to RGB.
+func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "HSVToRGB",
+		Input: []tf.Input{
+			images,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Converts one or more images from RGB to HSV.
 //
 // Outputs a tensor of the same shape as the `images` tensor, containing the HSV
@@ -13102,7 +13310,7 @@ func ParseExampleDatasetSloppy(value bool) ParseExampleDatasetAttr {
 }
 
 // ParseExampleDatasetRaggedKeys sets the optional ragged_keys attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseExampleDatasetRaggedKeys(value []string) ParseExampleDatasetAttr {
@@ -13112,7 +13320,7 @@ func ParseExampleDatasetRaggedKeys(value []string) ParseExampleDatasetAttr {
 }
 
 // ParseExampleDatasetRaggedValueTypes sets the optional ragged_value_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseExampleDatasetRaggedValueTypes(value []tf.DataType) ParseExampleDatasetAttr {
@@ -13122,7 +13330,7 @@ func ParseExampleDatasetRaggedValueTypes(value []tf.DataType) ParseExampleDatase
 }
 
 // ParseExampleDatasetRaggedSplitTypes sets the optional ragged_split_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseExampleDatasetRaggedSplitTypes(value []tf.DataType) ParseExampleDatasetAttr {
@@ -13487,6 +13695,68 @@ func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_sl
 	return scope.AddOperation(opspec)
 }
 
+// SparseCountSparseOutputAttr is an optional argument to SparseCountSparseOutput.
+type SparseCountSparseOutputAttr func(optionalAttr)
+
+// SparseCountSparseOutputMinlength sets the optional minlength attribute to value.
+//
+// value: Minimum value to count. Can be set to -1 for no minimum.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func SparseCountSparseOutputMinlength(value int64) SparseCountSparseOutputAttr {
+	return func(m optionalAttr) {
+		m["minlength"] = value
+	}
+}
+
+// SparseCountSparseOutputMaxlength sets the optional maxlength attribute to value.
+//
+// value: Maximum value to count. Can be set to -1 for no maximum.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func SparseCountSparseOutputMaxlength(value int64) SparseCountSparseOutputAttr {
+	return func(m optionalAttr) {
+		m["maxlength"] = value
+	}
+}
+
+// Performs sparse-output bin counting for a sparse tensor input.
+//
+//   Counts the number of times each value occurs in the input.
+//
+// Arguments:
+//	indices: Tensor containing the indices of the sparse tensor to count.
+//	values: Tensor containing values of the sparse tensor to count.
+//	dense_shape: Tensor containing the dense shape of the sparse tensor to count.
+//	weights: A Tensor of the same shape as indices containing per-index weight values.
+// May also be the empty tensor if no weights are used.
+//	binary_output: Whether to output the number of occurrences of each value or 1.
+//
+// Returns:
+//	output_indices: Indices tensor for the resulting sparse tensor object.
+//	output_values: Values tensor for the resulting sparse tensor object.
+//	output_dense_shape: Shape tensor for the resulting sparse tensor object.
+func SparseCountSparseOutput(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, weights tf.Output, binary_output bool, optional ...SparseCountSparseOutputAttr) (output_indices tf.Output, output_values tf.Output, output_dense_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"binary_output": binary_output}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseCountSparseOutput",
+		Input: []tf.Input{
+			indices, values, dense_shape, weights,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // DebugNumericSummaryV2Attr is an optional argument to DebugNumericSummaryV2.
 type DebugNumericSummaryV2Attr func(optionalAttr)
 
@@ -13625,7 +13895,7 @@ func DebugNumericSummaryTensorName(value string) DebugNumericSummaryAttr {
 //
 // value: List of URLs to debug targets, e.g.,
 //   file:///foo/tfdbg_dump, grpc:://localhost:11011.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func DebugNumericSummaryDebugUrls(value []string) DebugNumericSummaryAttr {
 	return func(m optionalAttr) {
 		m["debug_urls"] = value
@@ -13865,35 +14135,6 @@ func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output, optional
 	return op.Output(0)
 }
 
-// Calculate product with tridiagonal matrix.
-//
-// Calculates product of two matrices, where left matrix is a tridiagonal matrix.
-//
-// Arguments:
-//	superdiag: Tensor of shape `[..., 1, M]`, representing superdiagonals of
-// tri-diagonal matrices to the left of multiplication. Last element is ignored.
-//	maindiag: Tensor of shape `[..., 1, M]`, representing main diagonals of tri-diagonal
-// matrices to the left of multiplication.
-//	subdiag: Tensor of shape `[..., 1, M]`, representing subdiagonals of tri-diagonal
-// matrices to the left of multiplication. First element is ignored.
-//	rhs: Tensor of shape `[..., M, N]`, representing MxN matrices to the right of
-// multiplication.
-//
-// Returns Tensor of shape `[..., M, N]` containing the product.
-func TridiagonalMatMul(scope *Scope, superdiag tf.Output, maindiag tf.Output, subdiag tf.Output, rhs tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TridiagonalMatMul",
-		Input: []tf.Input{
-			superdiag, maindiag, subdiag, rhs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes gradients for SparseSegmentMean.
 //
 // Returns tensor "output" with same shape as grad, except for dimension 0 whose
@@ -14231,86 +14472,6 @@ func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent.
-type ResourceApplyGradientDescentAttr func(optionalAttr)
-
-// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' by subtracting 'alpha' * 'delta' from it.
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	delta: The change.
-//
-// Returns the created operation.
-func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyGradientDescent",
-		Input: []tf.Input{
-			var_, alpha, delta,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes the matrix logarithm of one or more square matrices:
-//
-//
-// \\(log(exp(A)) = A\\)
-//
-// This op is only defined for complex matrices. If A is positive-definite and
-// real, then casting to a complex matrix, taking the logarithm and casting back
-// to a real matrix will give the correct result.
-//
-// This function computes the matrix logarithm using the Schur-Parlett algorithm.
-// Details of the algorithm can be found in Section 11.6.2 of:
-// Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008.
-// ISBN 978-0-898716-46-7.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the exponential for all input submatrices `[..., :, :]`.
-//
-// Arguments:
-//	input: Shape is `[..., M, M]`.
-//
-// Returns Shape is `[..., M, M]`.
-//
-// @compatibility(scipy)
-// Equivalent to scipy.linalg.logm
-// @end_compatibility
-func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixLogarithm",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that emits the key-value pairs in one or more LMDB files.
 //
 // The Lightning Memory-Mapped Database Manager, or LMDB, is an embedded binary
@@ -14781,42 +14942,6 @@ func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
 	return op.Output(0)
 }
 
-// Draw bounding boxes on a batch of images.
-//
-// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-// boxes specified by the locations in `boxes`. The coordinates of the each
-// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example, if an image is 100 x 200 pixels (height x width) and the bounding
-// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
-//
-// Parts of the bounding box may fall outside the image.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-// boxes.
-//	colors: 2-D. A list of RGBA colors to cycle through for the boxes.
-//
-// Returns 4-D with the same shape as `images`. The batch of input images with
-// bounding boxes drawn on the images.
-func DrawBoundingBoxesV2(scope *Scope, images tf.Output, boxes tf.Output, colors tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DrawBoundingBoxesV2",
-		Input: []tf.Input{
-			images, boxes, colors,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // AvgPoolAttr is an optional argument to AvgPool.
 type AvgPoolAttr func(optionalAttr)
 
@@ -15014,7 +15139,7 @@ func TensorSummaryDescription(value string) TensorSummaryAttr {
 // TensorSummaryLabels sets the optional labels attribute to value.
 //
 // value: An unused list of strings.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func TensorSummaryLabels(value []string) TensorSummaryAttr {
 	return func(m optionalAttr) {
 		m["labels"] = value
@@ -15271,7 +15396,7 @@ func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableO
 }
 
 // MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
 	return func(m optionalAttr) {
 		m["value_shape"] = value
@@ -15863,7 +15988,7 @@ func RetrieveTPUEmbeddingAdagradParameters(scope *Scope, num_shards int64, shard
 // N is the size of the segment being reduced.
 //
 // Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
+// missing, the `output` tensor at that position will be zeroed.
 //
 // Read
 // [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
@@ -15987,7 +16112,7 @@ type ParseSingleSequenceExampleAttr func(optionalAttr)
 // each context Feature given in context_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
@@ -15997,7 +16122,7 @@ func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSing
 }
 
 // ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
@@ -16013,7 +16138,7 @@ func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseS
 // The number of elements in the Feature corresponding to context_dense_key[j]
 // must always equal context_dense_shapes[j].NumEntries().
 // The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
@@ -16028,7 +16153,7 @@ func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleS
 // of data in each FeatureList given in feature_list_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
@@ -16044,7 +16169,7 @@ func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) Parse
 // The shape of each Feature in the FeatureList corresponding to
 // feature_list_dense_key[j] must always equal
 // feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
@@ -16546,41 +16671,6 @@ func NextAfter(scope *Scope, x1 tf.Output, x2 tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// Draw bounding boxes on a batch of images.
-//
-// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-// boxes specified by the locations in `boxes`. The coordinates of the each
-// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example, if an image is 100 x 200 pixels (height x width) and the bounding
-// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
-//
-// Parts of the bounding box may fall outside the image.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-// boxes.
-//
-// Returns 4-D with the same shape as `images`. The batch of input images with
-// bounding boxes drawn on the images.
-func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DrawBoundingBoxes",
-		Input: []tf.Input{
-			images, boxes,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Bucketizes 'input' based on 'boundaries'.
 //
 // For example, if the inputs are
@@ -17150,6 +17240,242 @@ func CumulativeLogsumexp(scope *Scope, x tf.Output, axis tf.Output, optional ...
 	return op.Output(0)
 }
 
+// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent.
+type ResourceApplyGradientDescentAttr func(optionalAttr)
+
+// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' by subtracting 'alpha' * 'delta' from it.
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	delta: The change.
+//
+// Returns the created operation.
+func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyGradientDescent",
+		Input: []tf.Input{
+			var_, alpha, delta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the matrix logarithm of one or more square matrices:
+//
+//
+// \\(log(exp(A)) = A\\)
+//
+// This op is only defined for complex matrices. If A is positive-definite and
+// real, then casting to a complex matrix, taking the logarithm and casting back
+// to a real matrix will give the correct result.
+//
+// This function computes the matrix logarithm using the Schur-Parlett algorithm.
+// Details of the algorithm can be found in Section 11.6.2 of:
+// Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008.
+// ISBN 978-0-898716-46-7.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the exponential for all input submatrices `[..., :, :]`.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+//
+// @compatibility(scipy)
+// Equivalent to scipy.linalg.logm
+// @end_compatibility
+func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixLogarithm",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseBincountAttr is an optional argument to SparseBincount.
+type SparseBincountAttr func(optionalAttr)
+
+// SparseBincountBinaryOutput sets the optional binary_output attribute to value.
+//
+// value: bool; Whether the kernel should count the appearance or number of occurrences.
+// If not specified, defaults to false
+func SparseBincountBinaryOutput(value bool) SparseBincountAttr {
+	return func(m optionalAttr) {
+		m["binary_output"] = value
+	}
+}
+
+// Counts the number of occurrences of each value in an integer array.
+//
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
+//
+// Values in `arr` outside of the range [0, size) are ignored.
+//
+// Arguments:
+//	indices: 2D int64 `Tensor`.
+//	values: 1D int `Tensor`.
+//	dense_shape: 1D int64 `Tensor`.
+//	size: non-negative int scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `input`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
+//
+// Returns 1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`].
+// The counts or summed weights for each value in the range [0, size).
+func SparseBincount(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, size tf.Output, weights tf.Output, optional ...SparseBincountAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseBincount",
+		Input: []tf.Input{
+			indices, values, dense_shape, size, weights,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Calculate product with tridiagonal matrix.
+//
+// Calculates product of two matrices, where left matrix is a tridiagonal matrix.
+//
+// Arguments:
+//	superdiag: Tensor of shape `[..., 1, M]`, representing superdiagonals of
+// tri-diagonal matrices to the left of multiplication. Last element is ignored.
+//	maindiag: Tensor of shape `[..., 1, M]`, representing main diagonals of tri-diagonal
+// matrices to the left of multiplication.
+//	subdiag: Tensor of shape `[..., 1, M]`, representing subdiagonals of tri-diagonal
+// matrices to the left of multiplication. First element is ignored.
+//	rhs: Tensor of shape `[..., M, N]`, representing MxN matrices to the right of
+// multiplication.
+//
+// Returns Tensor of shape `[..., M, N]` containing the product.
+func TridiagonalMatMul(scope *Scope, superdiag tf.Output, maindiag tf.Output, subdiag tf.Output, rhs tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TridiagonalMatMul",
+		Input: []tf.Input{
+			superdiag, maindiag, subdiag, rhs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+//
+// if < 0, `scale * features` otherwise.
+//
+// To be used together with
+// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
+// For correct dropout, use `tf.contrib.nn.alpha_dropout`.
+//
+// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
+func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Selu",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DenseBincountAttr is an optional argument to DenseBincount.
+type DenseBincountAttr func(optionalAttr)
+
+// DenseBincountBinaryOutput sets the optional binary_output attribute to value.
+//
+// value: bool; Whether the kernel should count the appearance or number of occurrences.
+// If not specified, defaults to false
+func DenseBincountBinaryOutput(value bool) DenseBincountAttr {
+	return func(m optionalAttr) {
+		m["binary_output"] = value
+	}
+}
+
+// Counts the number of occurrences of each value in an integer array.
+//
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
+//
+// Values in `arr` outside of the range [0, size) are ignored.
+//
+// Arguments:
+//	input: 1D or 2D int `Tensor`.
+//	size: non-negative int scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
+//
+// Returns 1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`].
+// The counts or summed weights for each value in the range [0, size).
+func DenseBincount(scope *Scope, input tf.Output, size tf.Output, weights tf.Output, optional ...DenseBincountAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DenseBincount",
+		Input: []tf.Input{
+			input, size, weights,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Returns the complex conjugate of a complex number.
 //
 // Given a tensor `input` of complex numbers, this operation returns a tensor of
@@ -17745,7 +18071,7 @@ func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_
 // Computes the sum along sparse segments of a tensor.
 //
 // Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
+// missing, the `output` tensor at that position will be zeroed.
 //
 // Read
 // [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation)
@@ -18643,7 +18969,7 @@ func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2
 //
 // value: The cropped area of the image must have an aspect ratio =
 // width / height within this range.
-// If not specified, defaults to {f:0.75 f:1.33}
+// If not specified, defaults to <f:0.75 f:1.33 >
 func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr {
 	return func(m optionalAttr) {
 		m["aspect_ratio_range"] = value
@@ -18654,7 +18980,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort
 //
 // value: The cropped area of the image must contain a fraction of the
 // supplied image within this range.
-// If not specified, defaults to {f:0.05 f:1}
+// If not specified, defaults to <f:0.05 f:1 >
 func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
 	return func(m optionalAttr) {
 		m["area_range"] = value
@@ -19058,7 +19384,7 @@ func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
 // ImageSummaryBadColor sets the optional bad_color attribute to value.
 //
 // value: Color to use for pixels with non-finite values.
-// If not specified, defaults to {dtype:DT_UINT8 tensor_shape:{dim:{size:4}} int_val:255 int_val:0 int_val:0 int_val:255}
+// If not specified, defaults to <dtype:DT_UINT8 tensor_shape:<dim:<size:4 > > int_val:255 int_val:0 int_val:0 int_val:255 >
 func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
 	return func(m optionalAttr) {
 		m["bad_color"] = value
@@ -19906,7 +20232,7 @@ func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Ou
 // Adjust the saturation of one or more images.
 //
 // `images` is a tensor of at least 3 dimensions.  The last dimension is
-// interpretted as channels, and must be three.
+// interpreted as channels, and must be three.
 //
 // The input image is considered in the RGB colorspace. Conceptually, the RGB
 // colors are first mapped into HSV. A scale is then applied all the saturation
@@ -20129,7 +20455,7 @@ func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
 // filter element on that dimension. The dimension order is determined by the
 // value of `data_format`, see above for details. Dilations in the batch and
 // depth dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
 func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -21273,7 +21599,7 @@ func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
 // dimension, the amount of padding inserted before and after the dimension is
 // `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
 // `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr {
 	return func(m optionalAttr) {
 		m["explicit_paddings"] = value
@@ -21301,7 +21627,7 @@ func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
 // element on that dimension. The dimension order is determined by the value of
 // `data_format`, see above for details. Dilations in the batch and depth
 // dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -21981,7 +22307,7 @@ func Conv2DUseCudnnOnGpu(value bool) Conv2DAttr {
 // dimension, the amount of padding inserted before and after the dimension is
 // `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
 // `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func Conv2DExplicitPaddings(value []int64) Conv2DAttr {
 	return func(m optionalAttr) {
 		m["explicit_paddings"] = value
@@ -22009,7 +22335,7 @@ func Conv2DDataFormat(value string) Conv2DAttr {
 // filter element on that dimension. The dimension order is determined by the
 // value of `data_format`, see above for details. Dilations in the batch and
 // depth dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func Conv2DDilations(value []int64) Conv2DAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -22205,7 +22531,7 @@ func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataTy
 // QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value.
 //
 // value: List of dilation values.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -22213,7 +22539,7 @@ func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64
 }
 
 // QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList sets the optional padding_list attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
 	return func(m optionalAttr) {
 		m["padding_list"] = value
@@ -22274,7 +22600,7 @@ func QuantizedDepthwiseConv2DWithBiasAndReluOutType(value tf.DataType) Quantized
 // QuantizedDepthwiseConv2DWithBiasAndReluDilations sets the optional dilations attribute to value.
 //
 // value: List of dilation values.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func QuantizedDepthwiseConv2DWithBiasAndReluDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -22282,7 +22608,7 @@ func QuantizedDepthwiseConv2DWithBiasAndReluDilations(value []int64) QuantizedDe
 }
 
 // QuantizedDepthwiseConv2DWithBiasAndReluPaddingList sets the optional padding_list attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func QuantizedDepthwiseConv2DWithBiasAndReluPaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
 	return func(m optionalAttr) {
 		m["padding_list"] = value
@@ -22389,7 +22715,7 @@ func QuantizedDepthwiseConv2DWithBiasOutType(value tf.DataType) QuantizedDepthwi
 // QuantizedDepthwiseConv2DWithBiasDilations sets the optional dilations attribute to value.
 //
 // value: List of dilation values.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func QuantizedDepthwiseConv2DWithBiasDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -22448,7 +22774,7 @@ func QuantizedDepthwiseConv2DOutType(value tf.DataType) QuantizedDepthwiseConv2D
 // QuantizedDepthwiseConv2DDilations sets the optional dilations attribute to value.
 //
 // value: List of dilation values.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func QuantizedDepthwiseConv2DDilations(value []int64) QuantizedDepthwiseConv2DAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -22622,7 +22948,7 @@ func QuantizedConv2DPerChannelOutType(value tf.DataType) QuantizedConv2DPerChann
 // QuantizedConv2DPerChannelDilations sets the optional dilations attribute to value.
 //
 // value: list of dilation values.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func QuantizedConv2DPerChannelDilations(value []int64) QuantizedConv2DPerChannelAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -22999,7 +23325,7 @@ func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
 // filter element on that dimension. The dimension order is determined by the
 // value of `data_format`, see above for details. Dilations in the batch and
 // depth dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
 func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -23164,7 +23490,7 @@ func QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode(value string) Qua
 // dimension of `a` (after being transposed if `transpose_a` is non-zero) must
 // match the outer dimension of `b` (after being transposed if `transposed_b` is
 // non-zero). Then do broadcast add operation with bias values on the matrix
-// mulplication result. The bias size must match inner dimension of `b`.  Then do
+// multiplication result. The bias size must match inner dimension of `b`.  Then do
 // relu activation to get non-negative result. Then do requantize operation to get
 // final uint8 result.
 //
@@ -23251,7 +23577,7 @@ func QuantizedMatMulWithBiasAndReluInputQuantMode(value string) QuantizedMatMulW
 // dimension of `a` (after being transposed if `transpose_a` is non-zero) must
 // match the outer dimension of `b` (after being transposed if `transposed_b` is
 // non-zero). Then do broadcast add operation with bias values on the matrix
-// mulplication result. The bias size must match inner dimension of `b`. Then do
+// multiplication result. The bias size must match inner dimension of `b`. Then do
 // relu activation to get non-negative result.
 //
 // Arguments:
@@ -23335,7 +23661,7 @@ func QuantizedMatMulWithBiasInputQuantMode(value string) QuantizedMatMulWithBias
 // dimension of `a` (after being transposed if `transpose_a` is non-zero) must
 // match the outer dimension of `b` (after being transposed if `transposed_b` is
 // non-zero). Then do broadcast add operation with bias values on the matrix
-// mulplication result. The bias size must match inner dimension of `b`.
+// multiplication result. The bias size must match inner dimension of `b`.
 //
 // Arguments:
 //	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
@@ -23374,7 +23700,7 @@ func QuantizedMatMulWithBias(scope *Scope, a tf.Output, b tf.Output, bias tf.Out
 type TensorArrayGatherV2Attr func(optionalAttr)
 
 // TensorArrayGatherV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func TensorArrayGatherV2ElementShape(value tf.Shape) TensorArrayGatherV2Attr {
 	return func(m optionalAttr) {
 		m["element_shape"] = value
@@ -23569,7 +23895,7 @@ func CopyTensorName(value string) CopyAttr {
 // <debug_op>;<grpc_url>;<gated_grpc>, wherein gated_grpc is boolean represented
 // as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1",
 // "DebugIdentity;file:///tmp/tfdbg_1;0".
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func CopyDebugOpsSpec(value []string) CopyAttr {
 	return func(m optionalAttr) {
 		m["debug_ops_spec"] = value
@@ -23608,9 +23934,12 @@ func Copy(scope *Scope, input tf.Output, optional ...CopyAttr) (output tf.Output
 	return op.Output(0)
 }
 
-//     Updates specified rows with values in `v`.
+// Updates specified rows 'i' with values 'v'.
 //
-//     Computes `x[i, :] = v; return x`.
+// Computes `x[i, :] = v; return x`.
+//
+// Originally this function is mutative however for compilation we make this
+// operation create / operate on a copy of `x`.
 //
 // Arguments:
 //	x: A tensor of type `T`.
@@ -23798,7 +24127,7 @@ type FIFOQueueV2Attr func(optionalAttr)
 // be either 0 or the same as the length of component_types. If the length of
 // this attr is 0, the shapes of queue elements are not constrained, and
 // only one element may be dequeued at a time.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
@@ -24142,7 +24471,7 @@ func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTable
 // MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value.
 //
 // value: The shape of each value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr {
 	return func(m optionalAttr) {
 		m["value_shape"] = value
@@ -25319,7 +25648,7 @@ func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksi
 type Conv3DBackpropFilterAttr func(optionalAttr)
 
 // Conv3DBackpropFilterDilations sets the optional dilations attribute to value.
-// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
 func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -25382,7 +25711,7 @@ func Conv3DDataFormat(value string) Conv3DAttr {
 // filter element on that dimension. The dimension order is determined by the
 // value of `data_format`, see above for details. Dilations in the batch and
 // depth dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
 func Conv3DDilations(value []int64) Conv3DAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -25605,7 +25934,7 @@ func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *
 type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
 
 // DepthwiseConv2dNativeBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func DepthwiseConv2dNativeBackpropInputExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
 	return func(m optionalAttr) {
 		m["explicit_paddings"] = value
@@ -25633,7 +25962,7 @@ func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dN
 // element on that dimension. The dimension order is determined by the value of
 // `data_format`, see above for details. Dilations in the batch and depth
 // dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -26117,7 +26446,7 @@ func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
 // filter element on that dimension. The dimension order is determined by the
 // value of `data_format`, see above for details. Dilations in the batch and
 // depth dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
 func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
@@ -26913,7 +27242,7 @@ func ParseSequenceExampleV2NcontextSparse(value int64) ParseSequenceExampleV2Att
 // each context Feature given in context_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2ContextSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -26925,7 +27254,7 @@ func ParseSequenceExampleV2ContextSparseTypes(value []tf.DataType) ParseSequence
 // ParseSequenceExampleV2ContextRaggedValueTypes sets the optional context_ragged_value_types attribute to value.
 //
 // value: RaggedTensor.value dtypes for the ragged context features.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2ContextRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -26937,7 +27266,7 @@ func ParseSequenceExampleV2ContextRaggedValueTypes(value []tf.DataType) ParseSeq
 // ParseSequenceExampleV2ContextRaggedSplitTypes sets the optional context_ragged_split_types attribute to value.
 //
 // value: RaggedTensor.row_split dtypes for the ragged context features.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -26953,7 +27282,7 @@ func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSeq
 // The number of elements in the Feature corresponding to context_dense_key[j]
 // must always equal context_dense_shapes[j].NumEntries().
 // The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2ContextDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr {
@@ -26983,7 +27312,7 @@ func ParseSequenceExampleV2NfeatureListDense(value int64) ParseSequenceExampleV2
 }
 
 // ParseSequenceExampleV2FeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -26998,7 +27327,7 @@ func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSeque
 // of data in each FeatureList given in feature_list_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2FeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -27010,7 +27339,7 @@ func ParseSequenceExampleV2FeatureListSparseTypes(value []tf.DataType) ParseSequ
 // ParseSequenceExampleV2FeatureListRaggedValueTypes sets the optional feature_list_ragged_value_types attribute to value.
 //
 // value: RaggedTensor.value dtypes for the ragged FeatureList features.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2FeatureListRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -27022,7 +27351,7 @@ func ParseSequenceExampleV2FeatureListRaggedValueTypes(value []tf.DataType) Pars
 // ParseSequenceExampleV2FeatureListRaggedSplitTypes sets the optional feature_list_ragged_split_types attribute to value.
 //
 // value: RaggedTensor.row_split dtypes for the ragged FeatureList features.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
@@ -27038,7 +27367,7 @@ func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) Pars
 // The shape of each Feature in the FeatureList corresponding to
 // feature_list_dense_key[j] must always equal
 // feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr {
@@ -27067,7 +27396,7 @@ func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenc
 //	feature_list_dense_keys: The keys expected in the SequenceExamples' feature_lists associated
 // with lists of dense values.
 //	feature_list_ragged_keys: The keys expected in the FeatureLists associated with ragged values.
-//	feature_list_dense_missing_assumed_empty: A vector corresponding 1:1 with featue_list_dense_keys, indicating which
+//	feature_list_dense_missing_assumed_empty: A vector corresponding 1:1 with feature_list_dense_keys, indicating which
 // features may be missing from the SequenceExamples.  If the associated
 // FeatureList is missing, it is treated as empty.
 //	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
@@ -28219,7 +28548,7 @@ func BatchMaxEnqueuedBatches(value int64) BatchAttr {
 }
 
 // BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func BatchAllowedBatchSizes(value []int64) BatchAttr {
 	return func(m optionalAttr) {
 		m["allowed_batch_sizes"] = value
@@ -30846,7 +31175,7 @@ func VarHandleOpSharedName(value string) VarHandleOpAttr {
 //
 // value: The allowed devices containing the resource variable. Set when the output
 // ResourceHandle represents a per-replica/partitioned resource variable.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func VarHandleOpAllowedDevices(value []string) VarHandleOpAttr {
 	return func(m optionalAttr) {
 		m["allowed_devices"] = value
@@ -31073,29 +31402,6 @@ func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...Booste
 	return op.Output(0)
 }
 
-// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
-//
-// if < 0, `scale * features` otherwise.
-//
-// To be used together with
-// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
-// For correct dropout, use `tf.contrib.nn.alpha_dropout`.
-//
-// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
-func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Selu",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad.
 type ResourceSparseApplyAdagradAttr func(optionalAttr)
 
@@ -32216,7 +32522,7 @@ func CopyHostTensorName(value string) CopyHostAttr {
 // <debug_op>;<grpc_url>;<gated_grpc>, wherein gated_grpc is boolean represented
 // as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1",
 // "DebugIdentity;file:///tmp/tfdbg_1;0".
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func CopyHostDebugOpsSpec(value []string) CopyHostAttr {
 	return func(m optionalAttr) {
 		m["debug_ops_spec"] = value
@@ -32545,7 +32851,7 @@ type IteratorFromStringHandleAttr func(optionalAttr)
 //
 // value: If specified, defines the type of each tuple component in an
 // element produced by the resulting iterator.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr {
@@ -32558,7 +32864,7 @@ func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromString
 //
 // value: If specified, defines the shape of each tuple component in an
 // element produced by the resulting iterator.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr {
@@ -34214,7 +34520,7 @@ type TensorArrayV3Attr func(optionalAttr)
 // value: The expected shape of an element, if known. Used to
 // validate the shapes of TensorArray elements. If this shape is not
 // fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
 	return func(m optionalAttr) {
 		m["element_shape"] = value
@@ -36304,7 +36610,7 @@ func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr
 // each context Feature given in context_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
@@ -36314,7 +36620,7 @@ func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceEx
 }
 
 // ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr {
@@ -36330,7 +36636,7 @@ func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenc
 // The number of elements in the Feature corresponding to context_dense_key[j]
 // must always equal context_dense_shapes[j].NumEntries().
 // The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
@@ -36345,7 +36651,7 @@ func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExamp
 // of data in each FeatureList given in feature_list_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
@@ -36361,7 +36667,7 @@ func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequen
 // The shape of each Feature in the FeatureList corresponding to
 // feature_list_dense_key[j] must always equal
 // feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 //
 // REQUIRES: len(value) >= 0
 func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
@@ -36584,7 +36890,7 @@ func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_t
 // Adjust the hue of one or more images.
 //
 // `images` is a tensor of at least 3 dimensions.  The last dimension is
-// interpretted as channels, and must be three.
+// interpreted as channels, and must be three.
 //
 // The input image is considered in the RGB colorspace. Conceptually, the RGB
 // colors are first mapped into HSV. A delta is then applied all the hue values,
@@ -37041,7 +37347,7 @@ func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...R
 // Computes the mean along sparse segments of a tensor.
 //
 // Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
+// missing, the `output` tensor at that position will be zeroed.
 //
 // Read
 // [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
@@ -38339,6 +38645,58 @@ func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output,
 	return op.Output(0)
 }
 
+// RaggedBincountAttr is an optional argument to RaggedBincount.
+type RaggedBincountAttr func(optionalAttr)
+
+// RaggedBincountBinaryOutput sets the optional binary_output attribute to value.
+//
+// value: bool; Whether the kernel should count the appearance or number of occurrences.
+// If not specified, defaults to false
+func RaggedBincountBinaryOutput(value bool) RaggedBincountAttr {
+	return func(m optionalAttr) {
+		m["binary_output"] = value
+	}
+}
+
+// Counts the number of occurrences of each value in an integer array.
+//
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
+//
+// Values in `arr` outside of the range [0, size) are ignored.
+//
+// Arguments:
+//	splits: 1D int64 `Tensor`.
+//	values: 2D int `Tensor`.
+//	size: non-negative int scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `input`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
+//
+// Returns 1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`].
+// The counts or summed weights for each value in the range [0, size).
+func RaggedBincount(scope *Scope, splits tf.Output, values tf.Output, size tf.Output, weights tf.Output, optional ...RaggedBincountAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RaggedBincount",
+		Input: []tf.Input{
+			splits, values, size, weights,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
 type StatelessRandomNormalAttr func(optionalAttr)
 
@@ -38907,7 +39265,7 @@ type PrelinearizeTupleAttr func(optionalAttr)
 // tuple shapes in the order the shapes appear in the "shapes" input. The layout
 // elements for a sub-shape can be set to -1 in which case the corresponding layout
 // will be computed by the infeed operation.
-// If not specified, defaults to {}
+// If not specified, defaults to <>
 func PrelinearizeTupleLayouts(value []int64) PrelinearizeTupleAttr {
 	return func(m optionalAttr) {
 		m["layouts"] = value
@@ -39625,712 +39983,41 @@ func RegexReplace(scope *Scope, input tf.Output, pattern tf.Output, rewrite tf.O
 	return op.Output(0)
 }
 
-// IRFFTAttr is an optional argument to IRFFT.
-type IRFFTAttr func(optionalAttr)
+// ExperimentalRebatchDatasetAttr is an optional argument to ExperimentalRebatchDataset.
+type ExperimentalRebatchDatasetAttr func(optionalAttr)
 
-// IRFFTTreal sets the optional Treal attribute to value.
-// If not specified, defaults to DT_FLOAT
-func IRFFTTreal(value tf.DataType) IRFFTAttr {
-	return func(m optionalAttr) {
-		m["Treal"] = value
-	}
-}
-
-// Inverse real-valued fast Fourier transform.
-//
-// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most dimension of `input`.
-//
-// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
-// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
-// `fft_length` is not provided, it is computed from the size of the inner-most
-// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
-// compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
-// than the corresponding dimension of `input`, the dimension is cropped. If it is
-// larger, the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A complex tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
-//
-// Returns A float32 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length` samples of its inverse
-//   1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft
-// @end_compatibility
-func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFTAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "IRFFT",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes square root of x element-wise.
-//
-// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
-func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sqrt",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
-//
-// This operation folds the padded areas of `input` by `MirrorPad` according to the
-// `paddings` you specify. `paddings` must be the same as `paddings` argument
-// given to the corresponding `MirrorPad` op.
-//
-// The folded size of each dimension D of the output is:
-//
-// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
-//
-// For example:
-//
-// ```
-// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
-// # 'paddings' is [[0, 1]], [0, 1]].
-// # 'mode' is SYMMETRIC.
-// # rank of 't' is 2.
-// pad(t, paddings) ==> [[ 1,  5]
-//                       [11, 28]]
-// ```
-//
-// Arguments:
-//	input: The input tensor to be folded.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	mode: The mode used in the `MirrorPad` op.
-//
-// Returns The folded tensor.
-func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"mode": mode}
-	opspec := tf.OpSpec{
-		Type: "MirrorPadGrad",
-		Input: []tf.Input{
-			input, paddings,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Produces the max pool of the input tensor for quantized types.
-//
-// Arguments:
-//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
-//
-// Returns:
-//	output
-//	min_output: The float value that the lowest quantized output value represents.
-//	max_output: The float value that the highest quantized output value represents.
-func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMaxPool",
-		Input: []tf.Input{
-			input, min_input, max_input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
-type ResourceApplyAdagradAttr func(optionalAttr)
-
-// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
+// ExperimentalRebatchDatasetUseFallback sets the optional use_fallback attribute to value.
 // If not specified, defaults to true
-func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr {
+func ExperimentalRebatchDatasetUseFallback(value bool) ExperimentalRebatchDatasetAttr {
 	return func(m optionalAttr) {
-		m["update_slots"] = value
+		m["use_fallback"] = value
 	}
 }
 
-// Update '*var' according to the adagrad scheme.
+// Creates a dataset that changes the batch size.
 //
-// accum += grad * grad
-// var -= lr * grad * (1 / sqrt(accum))
+// Creates a dataset that changes the batch size of the dataset to current batch
+// size // num_replicas.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
+//	input_dataset: A variant tensor representing the input dataset.
+//	num_replicas: A scalar representing the number of replicas to distribute this batch across. As
+// a result of this transformation the current batch size would end up being
+// divided  by this parameter.
 //
-// Returns the created operation.
-func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch.
-type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr)
-
-// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
 //
-// value: The TPU device to use. Should be >= 0 and less than the number
-// of TPU cores in the task on which the node is placed.
-// If not specified, defaults to -1
-func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value.
-//
-// value: A list of string scalars, one for each embedding table that specify
-// how to normalize the embedding activations after weighted summation.
-// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
-// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
-// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
-// all tables.
-// If not specified, defaults to {}
-func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr {
-	return func(m optionalAttr) {
-		m["combiners"] = value
-	}
-}
-
-// An op that enqueues TPUEmbedding input indices from a SparseTensor.
-//
-// This Op eases the porting of code that uses embedding_lookup_sparse(),
-// although some Python preprocessing of the SparseTensor arguments to
-// embedding_lookup_sparse() is required to produce the arguments to this Op,
-// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
-// step.
-//
-// The tensors at corresponding positions in the three input lists
-// must have the same shape, i.e. rank 1 with dim_size() equal to the total
-// number of lookups into the table described by the corresponding table_id.
-//
-// Arguments:
-//	sample_indices: A list of rank 1 Tensors specifying the training example and
-// feature to which the corresponding embedding_indices and aggregation_weights
-// values belong. sample_indices[i] must equal b * nf + f, where nf is the
-// number of features from the corresponding table, f is in [0, nf), and
-// b is in [0, batch size).
-//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
-//	aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
-// (training example, feature) -- aggregation weights.
-//	mode_override: A string input that overrides the mode specified in the
-// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-//
-// Returns the created operation.
-func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EnqueueTPUEmbeddingSparseBatch",
-		Input: []tf.Input{
-			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
-type ResourceScatterNdUpdateAttr func(optionalAttr)
-
-// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
-//
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Applies sparse `updates` to individual values or slices within a given
-//
-// variable according to `indices`.
-//
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-// ```
-//
-// For example, say we want to update 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that update would look like this:
-//
-// ```python
-//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
-//     indices = tf.constant([[4], [3], [1] ,[7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     update = tf.scatter_nd_update(ref, indices, updates)
-//     with tf.Session() as sess:
-//       print sess.run(update)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, 11, 3, 10, 9, 6, 7, 12]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
-//
-// Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of updated
-// values to add to ref.
-//
-// Returns the created operation.
-func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdUpdate",
-		Input: []tf.Input{
-			ref, indices, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates and returns an empty tensor list.
-//
-// All list elements must be tensors of dtype element_dtype and shape compatible
-// with element_shape.
-//
-// handle: an empty tensor list.
-// element_dtype: the type of elements in the list.
-// element_shape: a shape compatible with that of elements in the list.
-func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "EmptyTensorList",
-		Input: []tf.Input{
-			element_shape, max_num_elements,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Sets up TPUEmbedding in a distributed TPU system.
-//
-// Arguments:
-//	config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
-// describes the embedding lookups of the program.
-//
-// Returns the created operation.
-func ConfigureTPUEmbedding(scope *Scope, config string) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"config": config}
-	opspec := tf.OpSpec{
-		Type: "ConfigureTPUEmbedding",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.
-type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve Momentum embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the Momentum optimization algorithm.
-//	momenta: Parameter momenta updated by the Momentum optimization algorithm.
-//	gradient_accumulators: Parameter gradient_accumulators updated by the Momentum optimization algorithm.
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// StatelessRandomUniformFullIntAttr is an optional argument to StatelessRandomUniformFullInt.
-type StatelessRandomUniformFullIntAttr func(optionalAttr)
-
-// StatelessRandomUniformFullIntDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_UINT64
-func StatelessRandomUniformFullIntDtype(value tf.DataType) StatelessRandomUniformFullIntAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom random integers from a uniform distribution.
-//
-// The generated values are uniform integers covering the whole range of `dtype`.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessRandomUniformFullInt(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformFullIntAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniformFullInt",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Shuts down a running distributed TPU system.
-//
-// The op returns an error if no system is running.
-//
-// Returns the created operation.
-func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ShutdownDistributedTPU",
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
-type ResourceApplyMomentumAttr func(optionalAttr)
-
-// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// accum = accum * momentum + grad
-// var -= lr * accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns the value stored in an Optional variant or raises an error if none exists.
-func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_replicas tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalRebatchDatasetAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "OptionalGetValue",
-		Input: []tf.Input{
-			optional,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("OptionalGetValue", err)
-		return
-	}
-	return components
-}
-
-// Determine the script codes of a given tensor of Unicode integer code points.
-//
-// This operation converts Unicode code points to script codes corresponding to
-// each code point. Script codes correspond to International Components for
-// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
-// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
-// match input shape.
-//
-// Examples:
-//
-// >>> tf.strings.unicode_script([1, 31, 38])
-// <tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 0, 0], dtype=int32)>
-//
-// Arguments:
-//	input: A Tensor of int32 Unicode code points.
-//
-// Returns A Tensor of int32 script codes corresponding to each input code point.
-func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeScript",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CropAndResizeAttr is an optional argument to CropAndResize.
-type CropAndResizeAttr func(optionalAttr)
-
-// CropAndResizeMethod sets the optional method attribute to value.
-//
-// value: A string specifying the sampling method for resizing. It can be either
-// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling
-// methods are supported: Bilinear and Nearest Neighbor.
-// If not specified, defaults to "bilinear"
-func CropAndResizeMethod(value string) CropAndResizeAttr {
-	return func(m optionalAttr) {
-		m["method"] = value
-	}
-}
-
-// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
-//
-// value: Value used for extrapolation, when applicable.
-// If not specified, defaults to 0
-func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
-	return func(m optionalAttr) {
-		m["extrapolation_value"] = value
-	}
-}
-
-// Extracts crops from the input image tensor and resizes them.
-//
-// Extracts crops from the input image tensor and resizes them using bilinear
-// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
-// common output size specified by `crop_size`. This is more general than the
-// `crop_to_bounding_box` op which extracts a fixed size slice from the input image
-// and does not allow resizing or aspect ratio change.
-//
-// Returns a tensor with `crops` from the input `image` at positions defined at the
-// bounding box locations in `boxes`. The cropped boxes are all resized (with
-// bilinear or nearest neighbor interpolation) to a fixed
-// `size = [crop_height, crop_width]`. The result is a 4-D tensor
-// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
-// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
-// results to using `tf.image.resize_bilinear()` or
-// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
-// `align_corners=True`.
-//
-// Arguments:
-//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-// Both `image_height` and `image_width` need to be positive.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
-// cropped image patches are resized to this size. The aspect ratio of the image
-// content is not preserved. Both `crop_height` and `crop_width` need to be
-// positive.
-//
-// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CropAndResize",
+		Type: "ExperimentalRebatchDataset",
 		Input: []tf.Input{
-			image, boxes, box_ind, crop_size,
+			input_dataset, num_replicas,
 		},
 		Attrs: attrs,
 	}
@@ -40338,1613 +40025,63 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou
 	return op.Output(0)
 }
 
-// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
-type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
-// If not specified, defaults to {}
-func DepthwiseConv2dNativeBackpropFilterExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
-func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of depthwise convolution with respect to the filter.
+// Concatenates tensors along one dimension.
 //
 // Arguments:
-//	input: 4-D with shape based on `data_format`.  For example, if
-// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
-// in_width, in_channels]` tensor.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
+//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [0, rank(values)).
+//	values: The `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
 //
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropFilter",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that zips together `input_datasets`.
-//
-// The elements of the resulting dataset are created by zipping corresponding
-// elements from each of the input datasets.
-//
-// The size of the resulting dataset will match the size of the smallest input
-// dataset, and no error will be raised if input datasets have different sizes.
-//
-// Arguments:
-//	input_datasets: List of `N` variant Tensors representing datasets to be zipped together.
-//
-//
-func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ZipDataset",
-		Input: []tf.Input{
-			tf.OutputList(input_datasets),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Rounds the values of a tensor to the nearest integer, element-wise.
-//
-// Rounds half to even.  Also known as bankers rounding. If you want to round
-// according to the current system rounding mode use std::cint.
-func Round(scope *Scope, x tf.Output) (y tf.Output) {
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Round",
+		Type: "Concat",
 		Input: []tf.Input{
-			x,
+			concat_dim, tf.OutputList(values),
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a tree ensemble model and returns a handle to it.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble resource to be created.
-//	stamp_token: Token to use as the initial value of the resource stamp.
-//	tree_ensemble_serialized: Serialized proto of the tree ensemble.
-//
-// Returns the created operation.
-func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesCreateEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
+// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
+type ResourceApplyPowerSignAttr func(optionalAttr)
 
-// Calculates the softmax of a CSRSparseMatrix.
+// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
 //
-// Calculate the softmax of the innermost dimensions of a SparseMatrix.
-//
-// Missing values are treated as `-inf` (i.e., logits of zero probability); and
-// the output has the same sparsity structure as the input (though missing values
-// in the output may now be treated as having probability zero).
-//
-// Arguments:
-//	logits: A CSRSparseMatrix.
-//
-//
-// Returns A CSRSparseMatrix.
-func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (softmax tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"type": type_}
-	opspec := tf.OpSpec{
-		Type: "SparseMatrixSoftmax",
-		Input: []tf.Input{
-			logits,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RestoreAttr is an optional argument to Restore.
-type RestoreAttr func(optionalAttr)
-
-// RestorePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`.
-// If not specified, defaults to -1
-func RestorePreferredShard(value int64) RestoreAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
-	}
-}
-
-// Restores a tensor from checkpoint files.
-//
-// Reads a tensor stored in one or several files. If there are several files (for
-// instance because a tensor was saved as slices), `file_pattern` may contain
-// wildcard symbols (`*` and `?`) in the filename portion only, not in the
-// directory portion.
-//
-// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
-// in which file the requested tensor is likely to be found. This op will first
-// open the file at index `preferred_shard` in the list of matching files and try
-// to restore tensors from that file.  Only if some tensors or tensor slices are
-// not found in that first file, then the Op opens all the files. Setting
-// `preferred_shard` to match the value passed as the `shard` input
-// of a matching `Save` Op may speed up Restore.  This attribute only affects
-// performance, not correctness.  The default value -1 means files are processed in
-// order.
-//
-// See also `RestoreSlice`.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Restore",
-		Input: []tf.Input{
-			file_pattern, tensor_name,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EnqueueTPUEmbeddingRaggedTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingRaggedTensorBatch.
-type EnqueueTPUEmbeddingRaggedTensorBatchAttr func(optionalAttr)
-
-// EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. Should be >= 0 and less than the number
-// of TPU cores in the task on which the node is placed.
-// If not specified, defaults to -1
-func EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// EnqueueTPUEmbeddingRaggedTensorBatchCombiners sets the optional combiners attribute to value.
-//
-// value: A list of string scalars, one for each embedding table that specify
-// how to normalize the embedding activations after weighted summation.
-// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
-// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
-// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
-// all tables.
-// If not specified, defaults to {}
-func EnqueueTPUEmbeddingRaggedTensorBatchCombiners(value []string) EnqueueTPUEmbeddingRaggedTensorBatchAttr {
-	return func(m optionalAttr) {
-		m["combiners"] = value
-	}
-}
-
-// EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value.
-// If not specified, defaults to {}
-func EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr {
-	return func(m optionalAttr) {
-		m["max_sequence_lengths"] = value
-	}
-}
-
-// Eases the porting of code that uses tf.nn.embedding_lookup().
-//
-// sample_splits[i], embedding_indices[i] and aggregation_weights[i] correspond
-// to the ith feature. table_ids[i] indicates which embedding table to look up ith
-// feature.
-//
-// The tensors at corresponding positions in two of the input lists,
-// embedding_indices and aggregation_weights, must have the same shape, i.e. rank 1
-// with dim_size() equal to the total number of lookups into the table described by
-// the corresponding feature.
-//
-// Arguments:
-//	sample_splits: A list of rank 1 Tensors specifying the break points for splitting
-// embedding_indices and aggregation_weights into rows.
-// It corresponds to ids.row_splits in embedding_lookup(), when ids is a
-// RaggedTensor.
-//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
-// It corresponds to ids.values in embedding_lookup(), when ids is a RaggedTensor.
-//	aggregation_weights: A list of rank 1 Tensors containing per training example
-// aggregation weights. It corresponds to the values field of a RaggedTensor
-// with the same row_splits as ids in embedding_lookup(), when ids is a
-// RaggedTensor.
-//	mode_override: A string input that overrides the mode specified in the
-// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-//	table_ids: A list of integers specifying the identifier of the embedding table
-// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
-// corresponding input. The ith input is looked up using table_ids[i]. The size
-// of the table_ids list must be equal to that of sample_indices,
-// embedding_indices and aggregation_weights.
-//
-// Returns the created operation.
-func EnqueueTPUEmbeddingRaggedTensorBatch(scope *Scope, sample_splits []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingRaggedTensorBatchAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"table_ids": table_ids}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EnqueueTPUEmbeddingRaggedTensorBatch",
-		Input: []tf.Input{
-			tf.OutputList(sample_splits), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum.
-type ResourceApplyKerasMomentumAttr func(optionalAttr)
-
-// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
 // contention.
 // If not specified, defaults to false
-func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr {
+func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var + momentum * accum, so in the end, the var you get is actually
-// var + momentum * accum.
-// If not specified, defaults to false
-func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// accum = accum * momentum - lr * grad
-// var += accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyKerasMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// CumsumAttr is an optional argument to Cumsum.
-type CumsumAttr func(optionalAttr)
-
-// CumsumExclusive sets the optional exclusive attribute to value.
-//
-// value: If `True`, perform exclusive cumsum.
-// If not specified, defaults to false
-func CumsumExclusive(value bool) CumsumAttr {
-	return func(m optionalAttr) {
-		m["exclusive"] = value
-	}
-}
-
-// CumsumReverse sets the optional reverse attribute to value.
-//
-// value: A `bool` (default: False).
-// If not specified, defaults to false
-func CumsumReverse(value bool) CumsumAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Compute the cumulative sum of the tensor `x` along `axis`.
-//
-// By default, this op performs an inclusive cumsum, which means that the first
-// element of the input is identical to the first element of the output:
-//
-// ```python
-// tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
-// ```
-//
-// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
-// performed instead:
-//
-// ```python
-// tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
-// ```
-//
-// By setting the `reverse` kwarg to `True`, the cumsum is performed in the
-// opposite direction:
-//
-// ```python
-// tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
-// ```
-//
-// This is more efficient than using separate `tf.reverse` ops.
-//
-// The `reverse` and `exclusive` kwargs can also be combined:
-//
-// ```python
-// tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
-// ```
-//
-// Arguments:
-//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-// `[-rank(x), rank(x))`.
-func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Cumsum",
-		Input: []tf.Input{
-			x, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Performs gradient updates of embedding tables.
-//
-// Arguments:
-//	inputs: A TensorList of gradients with which to update embedding tables.
-// This argument has the same length and shapes as the return value of
-// RecvTPUEmbeddingActivations, but contains gradients of the model's loss
-// with respect to the embedding activations. The embedding tables are updated
-// from these gradients via the optimizer specified in the TPU embedding
-// configuration given to tpu.initialize_system.
-//	learning_rates: A TensorList of float32 scalars, one for each dynamic learning
-// rate tag: see the comments in
-// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto.
-// Multiple tables can share the same dynamic learning rate tag as specified
-// in the configuration. If the learning rates for all tables are constant,
-// this list should be empty.
-//	config: Serialized TPUEmbeddingConfiguration proto.
-//
-// Returns the created operation.
-func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"config": config}
-	opspec := tf.OpSpec{
-		Type: "SendTPUEmbeddingGradients",
-		Input: []tf.Input{
-			tf.OutputList(inputs), tf.OutputList(learning_rates),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MapPeekAttr is an optional argument to MapPeek.
-type MapPeekAttr func(optionalAttr)
-
-// MapPeekCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapPeekCapacity(value int64) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapPeekMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapPeekMemoryLimit(value int64) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapPeekContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapPeekContainer(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapPeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapPeekSharedName(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op peeks at the values at the specified key.  If the
-//
-// underlying container does not contain this key
-// this op will block until it does.
-func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapPeek",
-		Input: []tf.Input{
-			key, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapPeek", err)
-		return
-	}
-	return values
-}
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters.
-type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingCenteredRMSPropParametersConfig(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve centered RMSProp embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the centered RMSProp optimization algorithm.
-//	ms: Parameter ms updated by the centered RMSProp optimization algorithm.
-//	mom: Parameter mom updated by the centered RMSProp optimization algorithm.
-//	mg: Parameter mg updated by the centered RMSProp optimization algorithm.
-func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Transforms a vector of brain.Example protos (as strings) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing a batch of binary serialized Example protos.
-//	names: A vector containing the names of the serialized protos.
-// May contain, for example, table key (descriptive) names for the
-// corresponding serialized protos.  These are purely useful for debugging
-// purposes, and the presence of values here has no effect on the output.
-// May also be an empty vector if no names are available.
-// If non-empty, this vector must be the same length as "serialized".
-//	sparse_keys: A list of Nsparse string Tensors (scalars).
-// The keys expected in the Examples' features associated with sparse values.
-//	dense_keys: A list of Ndense string Tensors (scalars).
-// The keys expected in the Examples' features associated with dense values.
-//	dense_defaults: A list of Ndense Tensors (some may be empty).
-// dense_defaults[j] provides default values
-// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-// The input type is inferred from dense_defaults[j], even when it's empty.
-// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-// then the shape of dense_defaults[j] must match that of dense_shapes[j].
-// If dense_shapes[j] has an undefined major dimension (variable strides dense
-// feature), dense_defaults[j] must contain a single element:
-// the padding element.
-//	sparse_types: A list of Nsparse types; the data types of data in each Feature
-// given in sparse_keys.
-// Currently the ParseExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-//	dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
-// given in dense_keys.
-// The number of elements in the Feature corresponding to dense_key[j]
-// must always equal dense_shapes[j].NumEntries().
-// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
-// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
-// The dense outputs are just the inputs row-stacked by batch.
-// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
-// the shape of the output Tensor dense_values[j] will be
-// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
-// of elements of length D1 * .... * DN, across all minibatch entries
-// in the input.  Any minibatch entry with less than M blocks of elements of
-// length D1 * ... * DN will be padded with the corresponding default_value
-// scalar element along the second dimension.
-func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes}
-	opspec := tf.OpSpec{
-		Type: "ParseExample",
-		Input: []tf.Input{
-			serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	return sparse_indices, sparse_values, sparse_shapes, dense_values
-}
-
-// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
-type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
-
-// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// var: Should be from a Variable().
-//
-// Arguments:
-//
-//	accum: Should be from a Variable().
-//	accum_update: : Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//
-// Returns the created operation.
-func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdadelta",
-		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes sigmoid of `x` element-wise.
-//
-// Specifically, `y = 1 / (1 + exp(-x))`.
-func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sigmoid",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug.
-type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve ADAM embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the ADAM optimization algorithm.
-//	momenta: Parameter momenta updated by the ADAM optimization algorithm.
-//	velocities: Parameter velocities updated by the ADAM optimization algorithm.
-//	gradient_accumulators: Parameter gradient_accumulators updated by the ADAM optimization algorithm.
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam.
-type ResourceApplyAdamAttr func(optionalAttr)
-
-// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, uses the nesterov update.
-// If not specified, defaults to false
-func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the Adam algorithm.
-//
-// $$\text{lr}_t := \mathrm{learning_rate} * \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$
-// $$m_t := \beta_1 * m_{t-1} + (1 - \beta_1) * g$$
-// $$v_t := \beta_2 * v_{t-1} + (1 - \beta_2) * g * g$$
-// $$\text{variable} := \text{variable} - \text{lr}_t * m_t / (\sqrt{v_t} + \epsilon)$$
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	beta2_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdam",
-		Input: []tf.Input{
-			var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// DatasetToGraphAttr is an optional argument to DatasetToGraph.
-type DatasetToGraphAttr func(optionalAttr)
-
-// DatasetToGraphStatefulWhitelist sets the optional stateful_whitelist attribute to value.
-// If not specified, defaults to {}
-//
-// REQUIRES: len(value) >= 0
-func DatasetToGraphStatefulWhitelist(value []string) DatasetToGraphAttr {
-	return func(m optionalAttr) {
-		m["stateful_whitelist"] = value
-	}
-}
-
-// DatasetToGraphAllowStateful sets the optional allow_stateful attribute to value.
-// If not specified, defaults to false
-func DatasetToGraphAllowStateful(value bool) DatasetToGraphAttr {
-	return func(m optionalAttr) {
-		m["allow_stateful"] = value
-	}
-}
-
-// DatasetToGraphStripDeviceAssignment sets the optional strip_device_assignment attribute to value.
-// If not specified, defaults to false
-func DatasetToGraphStripDeviceAssignment(value bool) DatasetToGraphAttr {
-	return func(m optionalAttr) {
-		m["strip_device_assignment"] = value
-	}
-}
-
-// Returns a serialized GraphDef representing `input_dataset`.
-//
-// Returns a graph representation for `input_dataset`.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to return the graph representation for.
-//
-// Returns The graph representation of the dataset (as serialized GraphDef).
-func DatasetToGraph(scope *Scope, input_dataset tf.Output, optional ...DatasetToGraphAttr) (graph tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DatasetToGraph",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
-type QuantizeAndDequantizeV3Attr func(optionalAttr)
-
-// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3NarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func QuantizeAndDequantizeV3NarrowRange(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3Axis sets the optional axis attribute to value.
-// If not specified, defaults to -1
-func QuantizeAndDequantizeV3Axis(value int64) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["axis"] = value
-	}
-}
-
-// Quantizes then dequantizes a tensor.
-//
-// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
-// tensor, so its value can change during training.
-func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV3",
-		Input: []tf.Input{
-			input, input_min, input_max, num_bits,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x * y element-wise.
-//
-// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Mul",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softplus gradients for a softplus operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding softplus operation.
-//	features: The features passed as input to the corresponding softplus operation.
-//
-// Returns The gradients: `gradients / (1 + exp(-features))`.
-func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftplusGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softmax cross entropy cost and gradients to backpropagate.
-//
-// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
-// a matrix of label probabilities, but rather a single label per row
-// of features.  This label is considered to have probability 1.0 for the
-// given row.
-//
-// Inputs are the logits, not probabilities.
-//
-// Arguments:
-//	features: batch_size x num_classes matrix
-//	labels: batch_size vector with values in [0, num_classes).
-// This is the label for the given minibatch entry.
-//
-// Returns:
-//	loss: Per example loss (batch_size vector).
-//	backprop: backpropagated gradients (batch_size x num_classes matrix).
-func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSoftmaxCrossEntropyWithLogits",
-		Input: []tf.Input{
-			features, labels,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent.
-type ResourceApplyProximalGradientDescentAttr func(optionalAttr)
-
-// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' as FOBOS algorithm with fixed learning rate.
-//
-// prox_v = var - alpha * delta
-// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	delta: The change.
-//
-// Returns the created operation.
-func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyProximalGradientDescent",
-		Input: []tf.Input{
-			var_, alpha, l1, l2, delta,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Worker heartbeat op.
-//
-// Heartbeats may be sent periodically to indicate the coordinator is still active,
-// to retrieve the current worker status and to expedite shutdown when necessary.
-//
-// Arguments:
-//	request: A string tensor containing a serialized WorkerHeartbeatRequest
-//
-// Returns A string tensor containing a serialized WorkerHeartbeatResponse
-func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "WorkerHeartbeat",
-		Input: []tf.Input{
-			request,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the item in the list with the given index.
-//
-// input_handle: the list
-// index: the position in the list from which an element will be retrieved
-// item: the element at that position
-//
-//
-func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListGetItem",
-		Input: []tf.Input{
-			input_handle, index, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.
-type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve proximal Adagrad embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the proximal Adagrad optimization algorithm.
-//	accumulators: Parameter accumulators updated by the proximal Adagrad optimization algorithm.
-//	gradient_accumulators: Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm.
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Returns x / y element-wise.
-//
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Div",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeCSVAttr is an optional argument to DecodeCSV.
-type DecodeCSVAttr func(optionalAttr)
-
-// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
-//
-// value: char delimiter to separate fields in a record.
-// If not specified, defaults to ","
-func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["field_delim"] = value
-	}
-}
-
-// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
-//
-// value: If false, treats double quotation marks as regular
-// characters inside of the string fields (ignoring RFC 4180, Section 2,
-// Bullet 5).
-// If not specified, defaults to true
-func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["use_quote_delim"] = value
-	}
-}
-
-// DecodeCSVNaValue sets the optional na_value attribute to value.
-//
-// value: Additional string to recognize as NA/NaN.
-// If not specified, defaults to ""
-func DecodeCSVNaValue(value string) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["na_value"] = value
-	}
-}
-
-// DecodeCSVSelectCols sets the optional select_cols attribute to value.
-// If not specified, defaults to {}
-func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["select_cols"] = value
-	}
-}
-
-// Convert CSV records to tensors. Each column maps to one tensor.
-//
-// RFC 4180 format is expected for the CSV records.
-// (https://tools.ietf.org/html/rfc4180)
-// Note that we allow leading and trailing spaces with int or float field.
-//
-// Arguments:
-//	records: Each string is a record/row in the csv and all records should have
-// the same format.
-//	record_defaults: One tensor per column of the input record, with either a
-// scalar default value for that column or an empty vector if the column is
-// required.
-//
-// Returns Each tensor will have the same shape as records.
-func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeCSV",
-		Input: []tf.Input{
-			records, tf.OutputList(record_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("DecodeCSV", err)
-		return
-	}
-	return output
-}
-
-// Enqueue a Tensor on the computation outfeed.
-//
-// Arguments:
-//	input: A tensor that will be inserted into the outfeed queue.
-//
-// Returns the created operation.
-func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OutfeedEnqueue",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// DecodeJpegAttr is an optional argument to DecodeJpeg.
-type DecodeJpegAttr func(optionalAttr)
-
-// DecodeJpegChannels sets the optional channels attribute to value.
-//
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodeJpegChannels(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
-}
-
-// DecodeJpegRatio sets the optional ratio attribute to value.
-//
-// value: Downscaling ratio.
-// If not specified, defaults to 1
-func DecodeJpegRatio(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["ratio"] = value
-	}
-}
-
-// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
-//
-// value: If true use a slower but nicer upscaling of the
-// chroma planes (yuv420/422 only).
-// If not specified, defaults to true
-func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["fancy_upscaling"] = value
-	}
-}
-
-// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
-//
-// value: If true try to recover an image from truncated input.
-// If not specified, defaults to false
-func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["try_recover_truncated"] = value
-	}
-}
-
-// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
-//
-// value: The minimum required fraction of lines before a truncated
-// input is accepted.
-// If not specified, defaults to 1
-func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["acceptable_fraction"] = value
-	}
-}
-
-// DecodeJpegDctMethod sets the optional dct_method attribute to value.
-//
-// value: string specifying a hint about the algorithm used for
-// decompression.  Defaults to "" which maps to a system-specific
-// default.  Currently valid values are ["INTEGER_FAST",
-// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
-// jpeg library changes to a version that does not have that specific
-// option.)
-// If not specified, defaults to ""
-func DecodeJpegDctMethod(value string) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["dct_method"] = value
-	}
-}
-
-// Decode a JPEG-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-//
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
-//
-//
-// This op also supports decoding PNGs and non-animated GIFs since the interface is
-// the same, though it is cleaner to use `tf.io.decode_image`.
-//
-// Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeJpeg",
-		Input: []tf.Input{
-			contents,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the number of nonzeroes of `sparse_matrix`.
-//
-// Arguments:
-//	sparse_matrix: A CSRSparseMatrix.
-//
-// Returns The number of nonzeroes of `sparse_matrix`.
-func SparseMatrixNNZ(scope *Scope, sparse_matrix tf.Output) (nnz tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseMatrixNNZ",
-		Input: []tf.Input{
-			sparse_matrix,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Reshapes a SparseTensor to represent values in a new dense shape.
-//
-// This operation has the same semantics as reshape on the represented dense
-// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
-//
-// If one component of `new_shape` is the special value -1, the size of that
-// dimension is computed so that the total dense size remains constant.  At
-// most one component of `new_shape` can be -1.  The number of dense elements
-// implied by `new_shape` must be the same as the number of dense elements
-// originally implied by `input_shape`.
-//
-// Reshaping does not affect the order of values in the SparseTensor.
-//
-// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
-// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
-// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
-// `output_shape` has length `R_out`.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
-// SparseTensor.
-//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
-//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
-//
-// Returns:
-//	output_indices: 2-D.  `N x R_out` matrix with the updated indices of non-empty
-// values in the output SparseTensor.
-//	output_shape: 1-D.  `R_out` vector with the full dense shape of the output
-// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
-// filled in.
-func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReshape",
-		Input: []tf.Input{
-			input_indices, input_shape, new_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.
-type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load proximal Adagrad embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, accumulators, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
-type ResourceApplyAdaMaxAttr func(optionalAttr)
-
-// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AdaMax algorithm.
+// Update '*var' according to the AddSign update.
 //
 // m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// v_t <- max(beta2 * v_{t-1}, abs(g))
-// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+// variable <- variable - lr_t * update
 //
 // Arguments:
 //	var_: Should be from a Variable().
 //	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
 //	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
+//	logbase: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
 //	grad: The gradient.
 //
 // Returns the created operation.
-func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -41953,101 +40090,86 @@ func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdaMax",
+		Type: "ResourceApplyPowerSign",
 		Input: []tf.Input{
-			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+			var_, m, lr, logbase, sign_decay, beta, grad,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+// Converts a tensor to a scalar predicate.
 //
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
+// Converts a tensor to a scalar predicate with the following rules:
 //
-// Returns:
-//	stamp_token: Stamp token of the tree ensemble resource.
-//	num_trees: The number of trees in the tree ensemble resource.
-//	num_finalized_trees: The number of trees that were finished successfully.
-//	num_attempted_layers: The number of layers we attempted to build (but not necessarily succeeded).
-//	last_layer_nodes_range: Rank size 2 tensor that contains start and end ids of the nodes in the latest
-// layer.
-func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
+// - For 0D tensors, truthiness is determined by comparing against a "zero"
+//   value. For numerical types it is the obvious zero. For strings it is the
+//   empty string.
+//
+// - For >0D tensors, truthiness is determined by looking at the number of
+//   elements. If has zero elements, then the result is false. Otherwise the
+//   result is true.
+//
+// This matches the behavior of If and While for determining if a tensor counts
+// as true/false for a branch condition.
+func ToBool(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesGetEnsembleStates",
+		Type: "ToBool",
 		Input: []tf.Input{
-			tree_ensemble_handle,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
-type ResourceScatterNdAddAttr func(optionalAttr)
+// GenerateBoundingBoxProposalsAttr is an optional argument to GenerateBoundingBoxProposals.
+type GenerateBoundingBoxProposalsAttr func(optionalAttr)
 
-// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
+// GenerateBoundingBoxProposalsPostNmsTopn sets the optional post_nms_topn attribute to value.
 //
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
+// value: An integer. Maximum number of rois in the output.
+// If not specified, defaults to 300
+func GenerateBoundingBoxProposalsPostNmsTopn(value int64) GenerateBoundingBoxProposalsAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["post_nms_topn"] = value
 	}
 }
 
-// Applies sparse addition to individual values or slices in a Variable.
+// This op produces Region of Interests from given bounding boxes(bbox_deltas) encoded wrt anchors according to eq.2 in arXiv:1506.01497
 //
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
-// ```
-//
-// For example, say we want to add 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that addition would look like this:
-//
-// ```python
-// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
-// indices = tf.constant([[4], [3], [1], [7]])
-// updates = tf.constant([9, 10, 11, 12])
-// add = tf.scatter_nd_add(ref, indices, updates)
-// with tf.Session() as sess:
-//   print sess.run(add)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, 13, 3, 14, 14, 6, 7, 20]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
+//       The op selects top `pre_nms_topn` scoring boxes, decodes them with respect to anchors,
+//       applies non-maximal suppression on overlapping boxes with higher than
+//       `nms_threshold` intersection-over-union (iou) value, discarding boxes where shorter
+//       side is less than `min_size`.
+//       Inputs:
+//       `scores`: A 4D tensor of shape [Batch, Height, Width, Num Anchors] containing the scores per anchor at given position
+//       `bbox_deltas`: is a tensor of shape [Batch, Height, Width, 4 x Num Anchors] boxes encoded to each anchor
+//       `anchors`: A 1D tensor of shape [4 x Num Anchors], representing the anchors.
+//       Outputs:
+//       `rois`: output RoIs, a 3D tensor of shape [Batch, post_nms_topn, 4], padded by 0 if less than post_nms_topn candidates found.
+//       `roi_probabilities`: probability scores of each roi in 'rois', a 2D tensor of shape [Batch,post_nms_topn], padded with 0 if needed, sorted by scores.
 //
 // Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of
-// values to add to ref.
+//	scores: A 4-D float tensor of shape `[num_images, height, width, num_achors]` containing scores of the boxes for given anchors, can be unsorted.
+//	bbox_deltas: A 4-D float tensor of shape `[num_images, height, width, 4 x num_anchors]`. encoding boxes with respec to each anchor.
+// Coordinates are given in the form [dy, dx, dh, dw].
+//	image_info: A 2-D float tensor of shape `[num_images, 5]` containing image information Height, Width, Scale.
+//	anchors: A 2-D float tensor of shape `[num_anchors, 4]` describing the anchor boxes. Boxes are formatted in the form [y1, x1, y2, x2].
+//	nms_threshold: A scalar float tensor for non-maximal-suppression threshold.
+//	pre_nms_topn: A scalar int tensor for the number of top scoring boxes to be used as input.
+//	min_size: A scalar float tensor. Any box that has a smaller size than min_size will be discarded.
 //
-// Returns the created operation.
-func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
+// Returns:
+//	rois: A 3-D float tensor of shape `[num_images,post_nms_topn,4]` representing the selected
+// region of interest boxes. Sorted in descending order in scores.
+//	roi_probabilities: A 2-D float tensor of shape `[num_images, post_nms_topn]` representing the score of the
+// region of interest box in `rois` tensor at the same index.
+func GenerateBoundingBoxProposals(scope *Scope, scores tf.Output, bbox_deltas tf.Output, image_info tf.Output, anchors tf.Output, nms_threshold tf.Output, pre_nms_topn tf.Output, min_size tf.Output, optional ...GenerateBoundingBoxProposalsAttr) (rois tf.Output, roi_probabilities tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -42056,1480 +40178,80 @@ func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, update
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdAdd",
+		Type: "GenerateBoundingBoxProposals",
 		Input: []tf.Input{
-			ref, indices, updates,
+			scores, bbox_deltas, image_info, anchors, nms_threshold, pre_nms_topn, min_size,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a dataset that batches input elements into a SparseTensor.
-//
-// Arguments:
-//	input_dataset: A handle to an input dataset. Must have a single component.
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	row_shape: A vector representing the dense shape of each row in the produced
-// SparseTensor. The shape may be partially specified, using `-1` to indicate
-// that a particular dimension should use the maximum size of all batch elements.
-//
-//
-func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "DenseToSparseBatchDataset",
-		Input: []tf.Input{
-			input_dataset, batch_size, row_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.
-type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load Adadelta parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Adadelta optimization algorithm.
-//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
-//	updates: Value of updates used in the Adadelta optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, accumulators, updates, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Convert one or more images from HSV to RGB.
-//
-// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
-// value of the pixels. The output is only well defined if the value in `images`
-// are in `[0,1]`.
-//
-// See `rgb_to_hsv` for a description of the HSV encoding.
-//
-// Arguments:
-//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
-//
-// Returns `images` converted to RGB.
-func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HSVToRGB",
-		Input: []tf.Input{
-			images,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of `NOT x` element-wise.
-//
-// Arguments:
-//	x: A `Tensor` of type `bool`.
-//
-// Returns A `Tensor` of type `bool` with the same shape as `x`. The logical negation of `x`.
-func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogicalNot",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs deterministic pseudorandom random numbers from a Poisson distribution.
-//
-// Outputs random values from a Poisson distribution.
-//
-// The outputs are a deterministic function of `shape`, `seed`, and `lam`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//	lam: The rate of the Poisson distribution. Shape must match the rightmost dimensions
-// of `shape`.
-//	dtype: The type of the output.
-//
-// Returns Random values with specified shape.
-func StatelessRandomPoisson(scope *Scope, shape tf.Output, seed tf.Output, lam tf.Output, dtype tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomPoisson",
-		Input: []tf.Input{
-			shape, seed, lam,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.
-type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve Adadelta embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the Adadelta optimization algorithm.
-//	accumulators: Parameter accumulators updated by the Adadelta optimization algorithm.
-//	updates: Parameter updates updated by the Adadelta optimization algorithm.
-//	gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
-type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyFtrlV2MultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value.
-// If not specified, defaults to false
-func ResourceSparseApplyFtrlV2MultiplyLinearByLr(value bool) ResourceSparseApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["multiply_linear_by_lr"] = value
-	}
-}
-
-// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-//
-// That is for rows we have grad for, we update var, accum and linear as follows:
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 shrinkage regularization. Must be a scalar.
-//
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrlV2",
-		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug.
-type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load FTRL embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the FTRL optimization algorithm.
-//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
-//	linears: Value of linears used in the FTRL optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, accumulators, linears, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters.
-type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdadeltaParametersConfig(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load Adadelta embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Adadelta optimization algorithm.
-//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
-//	updates: Value of updates used in the Adadelta optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdadeltaParameters",
-		Input: []tf.Input{
-			parameters, accumulators, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Records the latency of producing `input_dataset` elements in a StatsAggregator.
-func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "LatencyStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the power of one value to another.
-//
-// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
-// corresponding elements in `x` and `y`. For example:
-//
-// ```
-// # tensor 'x' is [[2, 2]], [3, 3]]
-// # tensor 'y' is [[8, 16], [2, 3]]
-// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
-// ```
-func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pow",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Element-wise multiplication of a sparse matrix with a dense tensor.
-//
-// Returns a sparse matrix.
-//
-// The dense tensor `b` may be either a scalar; otherwise `a` must be a rank-3
-// `SparseMatrix`; in this case `b` must be shaped `[batch_size, 1, 1]` and the
-// multiply operation broadcasts.
-//
-// **NOTE** even if `b` is zero, the sparsity structure of the output does not
-// change.
-//
-// Arguments:
-//	a: A CSRSparseMatrix.
-//	b: A dense tensor.
-//
-// Returns A dense output tensor.
-func SparseMatrixMul(scope *Scope, a tf.Output, b tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseMatrixMul",
-		Input: []tf.Input{
-			a, b,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the element-wise sum of a list of tensors.
-//
-// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-// wait for all of its inputs to be ready before beginning to sum. This can
-// save memory if inputs are ready at different times, since minimum temporary
-// storage is proportional to the output size rather than the inputs size.
-//
-// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
-//
-// Returns a `Tensor` of same shape and type as the elements of `inputs`.
-//
-// Arguments:
-//	inputs: A list of `Tensor` objects, each with same shape and type.
-//	shape: Shape of elements of `inputs`.
-func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape": shape}
-	opspec := tf.OpSpec{
-		Type: "AccumulateNV2",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// An op enabling differentiation of TPU Embeddings.
-//
-// This op simply returns its first input, which is assumed to have been sliced
-// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of
-// this op, and its first argument being a trainable Variable, enables automatic
-// differentiation of graphs containing embeddings via the TPU Embedding Python
-// libraries.
-//
-// Arguments:
-//	embedding_variable: A trainable variable, enabling optimizers to find this op.
-//	sliced_activations: The embedding activations Tensor to return.
-//	table_id: The id of the table in the embedding layer configuration from which
-// these activations were computed.
-//	lookup_id: Identifier of the set of embedding indices which produced these
-// activations.
-func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id}
-	opspec := tf.OpSpec{
-		Type: "TPUEmbeddingActivations",
-		Input: []tf.Input{
-			embedding_variable, sliced_activations,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the LSTM cell backward propagation for the entire time sequence.
-//
-// This implementation is to be used in conjunction of BlockLSTMV2.
-//
-// Arguments:
-//	seq_len_max: Maximum time length actually used by this input. Outputs are padded
-// with zeros beyond this length.
-//	x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs).
-//	cs_prev: Value of the initial cell state.
-//	h_prev: Initial output of cell (to be used for peephole).
-//	w: The weight matrix.
-//	wci: The weight matrix for input gate peephole connection.
-//	wcf: The weight matrix for forget gate peephole connection.
-//	wco: The weight matrix for output gate peephole connection.
-//	b: The bias vector.
-//	i: The input gate over the whole time sequence.
-//	cs: The cell state before the tanh over the whole time sequence.
-//	f: The forget gate over the whole time sequence.
-//	o: The output gate over the whole time sequence.
-//	ci: The cell input over the whole time sequence.
-//	co: The cell after the tanh over the whole time sequence.
-//	h: The output h vector over the whole time sequence.
-//	cs_grad: The current gradient of cs.
-//	h_grad: The gradient of h vector.
-//	use_peephole: Whether to use peephole weights.
-//
-// Returns:
-//	x_grad: The gradient of x to be back-propped.
-//	cs_prev_grad: The gradient of cs_prev to be back-propped.
-//	h_prev_grad: The gradient of h_prev to be back-propped.
-//	w_grad: The gradient for w to be back-propped.
-//	wci_grad: The gradient for wci to be back-propped.
-//	wcf_grad: The gradient for wcf to be back-propped.
-//	wco_grad: The gradient for wco to be back-propped.
-//	b_grad: The gradient for w to be back-propped.
-func BlockLSTMGradV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"use_peephole": use_peephole}
-	opspec := tf.OpSpec{
-		Type: "BlockLSTMGradV2",
-		Input: []tf.Input{
-			seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7)
-}
-
-// Returns the element-wise max of two SparseTensors.
-//
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-//
-// Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-//
-// Returns:
-//	output_indices: 2-D.  The indices of the output SparseTensor.
-//	output_values: 1-D.  The values of the output SparseTensor.
-func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMaximum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes the Bessel i1e function of `x` element-wise.
-//
-// Exponentially scaled modified Bessel function of order 0 defined as
-// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`.
-//
-// This function is faster and numerically stabler than `bessel_i1(x)`.
-func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BesselI1e",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes rectified linear gradients for a Relu operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu operation.
-//	features: The features passed as input to the corresponding Relu operation, OR
-// the outputs of that operation (both work equivalently).
-//
-// Returns `gradients * (features > 0)`.
-func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReluGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters.
-type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingRMSPropParametersConfig(value string) LoadTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load RMSProp embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the RMSProp optimization algorithm.
-//	ms: Value of ms used in the RMSProp optimization algorithm.
-//	mom: Value of mom used in the RMSProp optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingRMSPropParameters",
-		Input: []tf.Input{
-			parameters, ms, mom,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns a batched diagonal tensor with a given batched diagonal values.
-//
-// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
-// everything else padded with zeros. The diagonal is computed as follows:
-//
-// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
-// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
-//
-// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
-//
-// For example:
-//
-// ```
-// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
-//
-// and diagonal.shape = (2, 4)
-//
-// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
-//                                      [0, 2, 0, 0]
-//                                      [0, 0, 3, 0]
-//                                      [0, 0, 0, 4]],
-//                                     [[5, 0, 0, 0]
-//                                      [0, 6, 0, 0]
-//                                      [0, 0, 7, 0]
-//                                      [0, 0, 0, 8]]]
-//
-// which has shape (2, 4, 4)
-// ```
-//
-// Arguments:
-//	diagonal: Rank `k`, where `k >= 1`.
-//
-// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
-func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixDiag",
-		Input: []tf.Input{
-			diagonal,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
-type StatelessTruncatedNormalAttr func(optionalAttr)
-
-// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessTruncatedNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug.
-type RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve SGD embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the stochastic gradient descent optimization algorithm.
-//	gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (parameters tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1)
 }
 
-// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
-type StatelessRandomUniformAttr func(optionalAttr)
+// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2.
+type InitializeTableFromTextFileV2Attr func(optionalAttr)
 
-// StatelessRandomUniformDtype sets the optional dtype attribute to value.
+// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value.
 //
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom random values from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniform",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
-type MaxPoolGradGradV2Attr func(optionalAttr)
-
-// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGradV2",
-		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters.
-type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
+// value: Number of elements of the file, use -1 if unknown.
 // If not specified, defaults to -1
-func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr {
+//
+// REQUIRES: value >= -1
+func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr {
 	return func(m optionalAttr) {
-		m["table_id"] = value
+		m["vocab_size"] = value
 	}
 }
 
-// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
+// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value.
+//
+// value: Delimiter to separate fields in a line.
+// If not specified, defaults to "\t"
+func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr {
 	return func(m optionalAttr) {
-		m["table_name"] = value
+		m["delimiter"] = value
 	}
 }
 
-// RetrieveTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMomentumParametersConfig(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve Momentum embedding parameters.
+// Initializes a table from a text file.
 //
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
+// It inserts one key-value pair into the table for each line of the file.
+// The key and value is extracted from the whole line content, elements from the
+// split line based on `delimiter` or the line number (starting from zero).
+// Where to extract the key and value from a line is specified by `key_index` and
+// `value_index`.
 //
-// Returns:
-//	parameters: Parameter parameters updated by the Momentum optimization algorithm.
-//	momenta: Parameter momenta updated by the Momentum optimization algorithm.
-func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingMomentumParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
-type PaddingFIFOQueueV2Attr func(optionalAttr)
-
-// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types.
-// Shapes of fixed rank but variable size are allowed by setting
-// any shape dimension to -1.  In this case, the inputs' shape may vary along
-// the given dimension, and DequeueMany will pad the given dimension with
-// zeros up to the maximum shape of all elements in the given batch.
-// If the length of this attr is 0, different queue elements may have
-// different ranks and shapes, but only one element may be dequeued at a time.
-// If not specified, defaults to {}
-//
-// REQUIRES: len(value) >= 0
-func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements in first-in first-out order.
-//
-// Variable-size shapes are allowed by setting the corresponding shape dimensions
-// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-// size of any given element in the minibatch.  See below for details.
+// - A value of -1 means use the line number(starting from zero), expects `int64`.
+// - A value of -2 means use the whole line content, expects `string`.
+// - A value >= 0 means use the index (starting at zero) of the split line based
+//   on `delimiter`.
 //
 // Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PaddingFIFOQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug.
-type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load Momentum embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Momentum optimization algorithm.
-//	momenta: Value of momenta used in the Momentum optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm.
-//
-//
+//	table_handle: Handle to a table which will be initialized.
+//	filename: Filename of a vocabulary text file.
+//	key_index: Column index in a line to get the table `key` values from.
+//	value_index: Column index that represents information of a line to get the table
+// `value` values from.
 //
 // Returns the created operation.
-func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) {
+func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug",
+		Type: "InitializeTableFromTextFileV2",
 		Input: []tf.Input{
-			parameters, momenta, gradient_accumulators,
+			table_handle, filename,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// RecvAttr is an optional argument to Recv.
-type RecvAttr func(optionalAttr)
-
-// RecvClientTerminated sets the optional client_terminated attribute to value.
-//
-// value: If set to true, this indicates that the node was added
-// to the graph as a result of a client-side feed or fetch of Tensor data,
-// in which case the corresponding send or recv is expected to be managed
-// locally by the caller.
-// If not specified, defaults to false
-func RecvClientTerminated(value bool) RecvAttr {
-	return func(m optionalAttr) {
-		m["client_terminated"] = value
-	}
-}
-
-// Receives the named tensor from send_device on recv_device.
-//
-// Arguments:
-//
-//	tensor_name: The name of the tensor to receive.
-//	send_device: The name of the device sending the tensor.
-//	send_device_incarnation: The current incarnation of send_device.
-//	recv_device: The name of the device receiving the tensor.
-//
-// Returns The tensor to receive.
-func Recv(scope *Scope, tensor_type tf.DataType, tensor_name string, send_device string, send_device_incarnation int64, recv_device string, optional ...RecvAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"tensor_type": tensor_type, "tensor_name": tensor_name, "send_device": send_device, "send_device_incarnation": send_device_incarnation, "recv_device": recv_device}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Recv",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OrderedMapStageAttr is an optional argument to OrderedMapStage.
-type OrderedMapStageAttr func(optionalAttr)
-
-// OrderedMapStageCapacity sets the optional capacity attribute to value.
-//
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapStageContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func OrderedMapStageContainer(value string) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapStageSharedName sets the optional shared_name attribute to value.
-//
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Stage (key, values) in the underlying container which behaves like a ordered
-//
-// associative container.   Elements are ordered by key.
-//
-// Arguments:
-//	key: int64
-//
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
-//
-//
-// Returns the created operation.
-func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapStage",
-		Input: []tf.Input{
-			key, indices, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata.
-type TPUReplicateMetadataAttr func(optionalAttr)
-
-// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value.
-//
-// value: Number of cores per replica. Used for model parallelism.
-// If not specified, defaults to 1
-func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["num_cores_per_replica"] = value
-	}
-}
-
-// TPUReplicateMetadataTopology sets the optional topology attribute to value.
-//
-// value: TopologyProto indicating the topology of the TPU pod slice.
-// If not specified, defaults to ""
-func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["topology"] = value
-	}
-}
-
-// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value.
-//
-// value: Whether to place the computation on the TPU.
-// If not specified, defaults to true
-func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["use_tpu"] = value
-	}
-}
-
-// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value.
-//
-// value: The assignment of devices for the computation.
-// If not specified, defaults to {}
-func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["device_assignment"] = value
-	}
-}
-
-// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value.
-//
-// value: DEPRECATED. Use num_cores_per_replica instead.
-// If not specified, defaults to {}
-func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["computation_shape"] = value
-	}
-}
-
-// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value.
-// If not specified, defaults to {}
-func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["host_compute_core"] = value
-	}
-}
-
-// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value.
-// If not specified, defaults to {}
-func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["padding_map"] = value
-	}
-}
-
-// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value.
-// If not specified, defaults to "STEP_MARK_AT_ENTRY"
-func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["step_marker_location"] = value
-	}
-}
-
-// TPUReplicateMetadataAllowSoftPlacement sets the optional allow_soft_placement attribute to value.
-// If not specified, defaults to false
-func TPUReplicateMetadataAllowSoftPlacement(value bool) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["allow_soft_placement"] = value
-	}
-}
-
-// Metadata indicating how the TPU computation should be replicated.
-//
-// This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph.
-//
-// Arguments:
-//	num_replicas: Number of replicas of the computation
-//
-// Returns the created operation.
-func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_replicas": num_replicas}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TPUReplicateMetadata",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RequantizePerChannelAttr is an optional argument to RequantizePerChannel.
-type RequantizePerChannelAttr func(optionalAttr)
-
-// RequantizePerChannelOutType sets the optional out_type attribute to value.
-//
-// value: The quantized type of output tensor that needs to be converted.
-// If not specified, defaults to DT_QUINT8
-func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Requantizes input with min and max values known per channel.
-//
-// Arguments:
-//	input: The original input tensor.
-//	input_min: The minimum value of the input tensor
-//	input_max: The maximum value of the input tensor.
-//	requested_output_min: The minimum value of the output tensor requested.
-//	requested_output_max: The maximum value of the output tensor requested.
-//
-// Returns:
-//	output: Output tensor.
-//	output_min: The minimum value of the final output tensor
-//	output_max: The maximum value of the final output tensor.
-func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RequantizePerChannel",
-		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// LeakyReluAttr is an optional argument to LeakyRelu.
-type LeakyReluAttr func(optionalAttr)
-
-// LeakyReluAlpha sets the optional alpha attribute to value.
-// If not specified, defaults to 0.2
-func LeakyReluAlpha(value float32) LeakyReluAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// Computes rectified linear: `max(features, features * alpha)`.
-func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LeakyRelu",
-		Input: []tf.Input{
-			features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns an element-wise indication of the sign of a number.
 //
 // `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
@@ -43602,580 +40324,6 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu
 	return scope.AddOperation(opspec)
 }
 
-// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters.
-type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMDLAdagradLightParametersConfig(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load MDL Adagrad Light embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm.
-//	accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm.
-//	weights: Value of weights used in the MDL Adagrad Light optimization algorithm.
-//	benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingMDLAdagradLightParameters",
-		Input: []tf.Input{
-			parameters, accumulators, weights, benefits,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns the next record (key, value pair) produced by a Reader.
-//
-// Will dequeue from the input queue if necessary (e.g. when the
-// Reader needs to start reading from a new file since it has finished
-// with the previous file).
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-//	queue_handle: Handle to a Queue, with string work items.
-//
-// Returns:
-//	key: A scalar.
-//	value: A scalar.
-func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderReadV2",
-		Input: []tf.Input{
-			reader_handle, queue_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// CumprodAttr is an optional argument to Cumprod.
-type CumprodAttr func(optionalAttr)
-
-// CumprodExclusive sets the optional exclusive attribute to value.
-//
-// value: If `True`, perform exclusive cumprod.
-// If not specified, defaults to false
-func CumprodExclusive(value bool) CumprodAttr {
-	return func(m optionalAttr) {
-		m["exclusive"] = value
-	}
-}
-
-// CumprodReverse sets the optional reverse attribute to value.
-//
-// value: A `bool` (default: False).
-// If not specified, defaults to false
-func CumprodReverse(value bool) CumprodAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Compute the cumulative product of the tensor `x` along `axis`.
-//
-// By default, this op performs an inclusive cumprod, which means that the first
-// element of the input is identical to the first element of the output:
-//
-// ```python
-// tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
-// ```
-//
-// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
-// performed instead:
-//
-// ```python
-// tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
-// ```
-//
-// By setting the `reverse` kwarg to `True`, the cumprod is performed in the
-// opposite direction:
-//
-// ```python
-// tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
-// ```
-//
-// This is more efficient than using separate `tf.reverse` ops.
-//
-// The `reverse` and `exclusive` kwargs can also be combined:
-//
-// ```python
-// tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
-// ```
-//
-// Arguments:
-//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-// `[-rank(x), rank(x))`.
-func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Cumprod",
-		Input: []tf.Input{
-			x, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug.
-type LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load SGD embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, parameters tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a Dataset that returns pseudorandom numbers.
-//
-// Creates a Dataset that returns a stream of uniformly distributed
-// pseudorandom 64-bit signed integers.
-//
-// In the TensorFlow Python API, you can instantiate this dataset via the
-// class `tf.data.experimental.RandomDataset`.
-//
-// Instances of this dataset are also created as a result of the
-// `hoist_random_uniform` static optimization. Whether this optimization is
-// performed is determined by the `experimental_optimization.hoist_random_uniform`
-// option of `tf.data.Options`.
-//
-// Arguments:
-//	seed: A scalar seed for the random number generator. If either seed or
-// seed2 is set to be non-zero, the random number generator is seeded
-// by the given seed.  Otherwise, a random seed is used.
-//	seed2: A second scalar seed to avoid seed collision.
-//
-//
-func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "RandomDataset",
-		Input: []tf.Input{
-			seed, seed2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
-type FractionalAvgPoolAttr func(optionalAttr)
-
-// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value.
-//
-// value: When set to True, generates the pooling sequence in a
-// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-// difference between pseudorandom and random.
-// If not specified, defaults to false
-func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["pseudo_random"] = value
-	}
-}
-
-// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [41/3, 26/3] for fractional avg pooling.
-// If not specified, defaults to false
-func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value.
-//
-// value: When set to True, a fixed pooling region will be used when
-// iterating over a FractionalAvgPool node in the computation graph. Mainly used
-// in unit test to make FractionalAvgPool deterministic.
-// If not specified, defaults to false
-func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["deterministic"] = value
-	}
-}
-
-// FractionalAvgPoolSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Performs fractional average pooling on the input.
-//
-// Fractional average pooling is similar to Fractional max pooling in the pooling
-// region generation step. The only difference is that after pooling regions are
-// generated, a mean operation is performed instead of a max operation in each
-// pooling region.
-//
-// Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-// supports row and col dimension and should be >= 1.0. For example, a valid
-// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-// must be 1.0 because we don't allow pooling on batch and channels
-// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-// respectively.
-//
-// Returns:
-//	output: output tensor after fractional avg pooling.
-//	row_pooling_sequence: row pooling sequence, needed to calculate gradient.
-//	col_pooling_sequence: column pooling sequence, needed to calculate gradient.
-func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FractionalAvgPool",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt.
-type StatefulUniformFullIntAttr func(optionalAttr)
-
-// StatefulUniformFullIntDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_UINT64
-func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random integers from a uniform distribution.
-//
-// The generated values are uniform integers covering the whole range of `dtype`.
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	shape: The shape of the output tensor.
-//
-// Returns Random values with specified shape.
-func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatefulUniformFullInt",
-		Input: []tf.Input{
-			resource, algorithm, shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters.
-type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingStochasticGradientDescentParametersConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load SGD embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingStochasticGradientDescentParameters",
-		Input: []tf.Input{
-			parameters,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Counts the number of occurrences of each value in an integer array.
-//
-// Outputs a vector with length `size` and the same dtype as `weights`. If
-// `weights` are empty, then index `i` stores the number of times the value `i` is
-// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
-// the value in `weights` at each index where the corresponding value in `arr` is
-// `i`.
-//
-// Values in `arr` outside of the range [0, size) are ignored.
-//
-// Arguments:
-//	arr: int32 `Tensor`.
-//	size: non-negative int32 scalar `Tensor`.
-//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
-// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
-// equal to 1.
-//
-// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
-// each value in the range [0, size).
-func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Bincount",
-		Input: []tf.Input{
-			arr, size, weights,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gradients for batch normalization.
-//
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
-//
-// This op is deprecated. See `tf.nn.batch_normalization`.
-//
-// Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this Tensor will be multiplied
-// with the normalized Tensor.
-//	backprop: 4D backprop Tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-//
-// Returns:
-//	dx: 4D backprop tensor for input.
-//	dm: 1D backprop tensor for mean.
-//	dv: 1D backprop tensor for variance.
-//	db: 1D backprop tensor for beta.
-//	dg: 1D backprop tensor for gamma.
-func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
-	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalizationGrad",
-		Input: []tf.Input{
-			t, m, v, gamma, backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Strip leading and trailing whitespaces from the Tensor.
-//
-// Arguments:
-//	input: A string `Tensor` of any shape.
-//
-// Returns A string `Tensor` of the same shape as the input.
-//
-// Examples:
-//
-// >>> tf.strings.strip(["\nTensorFlow", "     The python library    "]).numpy()
-// array([b'TensorFlow', b'The python library'], dtype=object)
-func StringStrip(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StringStrip",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns the number of work units this Reader has finished processing.
 //
 // Arguments:
@@ -44403,283 +40551,23 @@ func LoadTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, parameters tf
 	return scope.AddOperation(opspec)
 }
 
-// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
-type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
-//
-// and `max` to 'outputs' tensor of same shape as `inputs`.
-//
-// `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
-//
-// Before quantization, `min` and `max` values are adjusted with the following
-// logic.
-// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
-// the behavior can be unexpected:
-// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
-// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
-// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
-// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
-//
-// This operation has a gradient and thus allows for training `min` and `max`
-// values.
-func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVars",
-		Input: []tf.Input{
-			inputs, min, max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Enqueue multiple Tensor values on the computation outfeed.
+// Strip leading and trailing whitespaces from the Tensor.
 //
 // Arguments:
-//	inputs: A list of tensors that will be inserted into the outfeed queue as an
-// XLA tuple.
+//	input: A string `Tensor` of any shape.
 //
-// Returns the created operation.
-func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) {
+// Returns A string `Tensor` of the same shape as the input.
+//
+// Examples:
+//
+// >>> tf.strings.strip(["\nTensorFlow", "     The python library    "]).numpy()
+// array([b'TensorFlow', b'The python library'], dtype=object)
+func StringStrip(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "OutfeedEnqueueTuple",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters.
-type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingADAMParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingADAMParametersConfig(value string) LoadTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load ADAM embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the ADAM optimization algorithm.
-//	momenta: Value of momenta used in the ADAM optimization algorithm.
-//	velocities: Value of velocities used in the ADAM optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingADAMParameters",
-		Input: []tf.Input{
-			parameters, momenta, velocities,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MapClearAttr is an optional argument to MapClear.
-type MapClearAttr func(optionalAttr)
-
-// MapClearCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapClearCapacity(value int64) MapClearAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapClearMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapClearMemoryLimit(value int64) MapClearAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapClearContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapClearContainer(value string) MapClearAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapClearSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapClearSharedName(value string) MapClearAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes all elements in the underlying container.
-//
-// Returns the created operation.
-func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapClear",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Deserialize `SparseTensor` objects.
-//
-// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
-// the last dimension stores serialized `SparseTensor` objects and the other N
-// dimensions (N >= 0) correspond to a batch. The ranks of the original
-// `SparseTensor` objects must all match. When the final `SparseTensor` is
-// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
-// the sparse tensors have been concatenated along new dimensions, one for each
-// batch.
-//
-// The output `SparseTensor` object's shape values for the original dimensions
-// are the max across the input `SparseTensor` objects' shape values for the
-// corresponding dimensions. The new dimensions match the size of the batch.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the serialized input is a `[2 x 3]` matrix representing two
-// original `SparseTensor` objects:
-//
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-//
-// and
-//
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-//
-// then the final deserialized `SparseTensor` will be:
-//
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-//
-// Arguments:
-//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
-// must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` objects.
-func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "DeserializeSparse",
-		Input: []tf.Input{
-			serialized_sparse,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Decode web-safe base64-encoded strings.
-//
-// Input may or may not have padding at the end. See EncodeBase64 for padding.
-// Web-safe means that input must use - and _ instead of + and /.
-//
-// Arguments:
-//	input: Base64 strings to decode.
-//
-// Returns Decoded strings.
-func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeBase64",
+		Type: "StringStrip",
 		Input: []tf.Input{
 			input,
 		},
@@ -44688,2324 +40576,6 @@ func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters.
-type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdagradParametersConfig(value string) LoadTPUEmbeddingAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load Adagrad embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdagradParameters",
-		Input: []tf.Input{
-			parameters, accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes the mean along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
-// over `j` such that `segment_ids[j] == i` and `N` is the total number of
-// values summed.
-//
-// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
-// </div>
-//
-// For example:
-//
-// ```
-// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
-// tf.segment_mean(c, tf.constant([0, 0, 1]))
-// # ==> [[2.5, 2.5, 2.5, 2.5],
-// #      [5, 6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMean",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CTCLossV2Attr is an optional argument to CTCLossV2.
-type CTCLossV2Attr func(optionalAttr)
-
-// CTCLossV2PreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
-//
-// value: Scalar, if true then repeated labels are
-// collapsed prior to the CTC calculation.
-// If not specified, defaults to false
-func CTCLossV2PreprocessCollapseRepeated(value bool) CTCLossV2Attr {
-	return func(m optionalAttr) {
-		m["preprocess_collapse_repeated"] = value
-	}
-}
-
-// CTCLossV2CtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
-//
-// value: Scalar.  If set to false, *during* CTC calculation
-// repeated non-blank labels will not be merged and are interpreted as
-// individual labels.  This is a simplified version of CTC.
-// If not specified, defaults to true
-func CTCLossV2CtcMergeRepeated(value bool) CTCLossV2Attr {
-	return func(m optionalAttr) {
-		m["ctc_merge_repeated"] = value
-	}
-}
-
-// CTCLossV2IgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
-//
-// value: Scalar. If set to true, during CTC
-// calculation, items that have longer output sequences than input sequences
-// are skipped: they don't contribute to the loss term and have zero-gradient.
-// If not specified, defaults to false
-func CTCLossV2IgnoreLongerOutputsThanInputs(value bool) CTCLossV2Attr {
-	return func(m optionalAttr) {
-		m["ignore_longer_outputs_than_inputs"] = value
-	}
-}
-
-// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
-//
-// the gradient.  This class performs the softmax operation for you, so inputs
-// should be e.g. linear projections of outputs by an LSTM.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. Default blank
-// label is 0 rather num_classes - 1.
-//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
-// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
-// `(batch b, time t)`.
-//	labels_values: The values (labels) associated with the given batch and time.
-//	sequence_length: A vector containing sequence lengths (batch).
-//
-// Returns:
-//	loss: A vector (batch) containing log-probabilities.
-//	gradient: The gradient of `loss`.  3-D, shape:
-// `(max_time x batch_size x num_classes)`.
-func CTCLossV2(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossV2Attr) (loss tf.Output, gradient tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCLossV2",
-		Input: []tf.Input{
-			inputs, labels_indices, labels_values, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum.
-type ResourceSparseApplyKerasMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var + momentum * accum, so in the end, the var you get is actually
-// var + momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-//
-// accum = accum * momentum - lr * grad
-// var += accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyKerasMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LuAttr is an optional argument to Lu.
-type LuAttr func(optionalAttr)
-
-// LuOutputIdxType sets the optional output_idx_type attribute to value.
-// If not specified, defaults to DT_INT32
-func LuOutputIdxType(value tf.DataType) LuAttr {
-	return func(m optionalAttr) {
-		m["output_idx_type"] = value
-	}
-}
-
-// Computes the LU decomposition of one or more square matrices.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices.
-//
-// The input has to be invertible.
-//
-// The output consists of two tensors LU and P containing the LU decomposition
-// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and
-// upper triangular factors.
-//
-// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of
-// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower
-// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose
-// entries correspond to the upper triangular part, including the diagonal, of LU.
-//
-// P represents a permutation matrix encoded as a list of indices each between `0`
-// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to
-// P, then the L, U and P satisfies P_mat * input = L * U.
-//
-// Arguments:
-//	input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of
-// size `[M, M]`.
-//
-// Returns:
-//	lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the
-// lower triangular factor `L` with unit diagonal, and whose upper triangular part
-// denotes the upper triangular factor `U`.
-//	p: Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is
-// `[..., M]`.
-// @compatibility(scipy)
-// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are
-// packed into a single tensor, the permutation is applied to `input` instead of
-// the right hand side and the permutation `P` is returned as a list of indices
-// instead of a permutation matrix.
-// @end_compatibility
-func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Lu",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// ImageProjectiveTransformV2Attr is an optional argument to ImageProjectiveTransformV2.
-type ImageProjectiveTransformV2Attr func(optionalAttr)
-
-// ImageProjectiveTransformV2FillMode sets the optional fill_mode attribute to value.
-//
-// value: Fill mode, "REFLECT", "WRAP", or "CONSTANT".
-// If not specified, defaults to "CONSTANT"
-func ImageProjectiveTransformV2FillMode(value string) ImageProjectiveTransformV2Attr {
-	return func(m optionalAttr) {
-		m["fill_mode"] = value
-	}
-}
-
-// Applies the given transform to each of the images.
-//
-// If one row of `transforms` is `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps
-// the *output* point `(x, y)` to a transformed *input* point
-// `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
-// `k = c0 x + c1 y + 1`. If the transformed point lays outside of the input
-// image, the output pixel is set to 0.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	transforms: 2-D Tensor, `[batch, 8]` or `[1, 8]` matrix, where each row corresponds to a 3 x 3
-// projective transformation matrix, with the last entry assumed to be 1. If there
-// is one row, the same transformation will be applied to all images.
-//	output_shape: 1-D Tensor [new_height, new_width].
-//	interpolation: Interpolation method, "NEAREST" or "BILINEAR".
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ImageProjectiveTransformV2(scope *Scope, images tf.Output, transforms tf.Output, output_shape tf.Output, interpolation string, optional ...ImageProjectiveTransformV2Attr) (transformed_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"interpolation": interpolation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ImageProjectiveTransformV2",
-		Input: []tf.Input{
-			images, transforms, output_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple.
-type InfeedEnqueueTupleAttr func(optionalAttr)
-
-// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence for
-// all the tuple shapes, in the order the shapes appear in the "shapes" input.
-// The layout elements for a sub-shape can be set to -1, in which case the
-// corresponding layout will be computed by the infeed operation.
-// If not specified, defaults to {}
-func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr {
-	return func(m optionalAttr) {
-		m["layouts"] = value
-	}
-}
-
-// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. This should be -1 when the Op
-// is running on a TPU device, and >= 0 when the Op is running on the CPU
-// device.
-// If not specified, defaults to -1
-func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// Feeds multiple Tensor values into the computation as an XLA tuple.
-//
-// Arguments:
-//	inputs: A list of tensors that will be provided using the infeed mechanism.
-//	shapes: The shapes of each tensor in `inputs`.
-//
-// Returns the created operation.
-func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shapes": shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "InfeedEnqueueTuple",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Extracts the average gradient in the given ConditionalAccumulator.
-//
-// The op blocks until sufficient (i.e., more than num_required)
-// gradients have been accumulated.  If the accumulator has already
-// aggregated more than num_required gradients, it returns the average of
-// the accumulated gradients.  Also automatically increments the recorded
-// global_step in the accumulator by 1, and resets the aggregate to 0.
-//
-// Arguments:
-//	handle: The handle to an accumulator.
-//	num_required: Number of gradients required before we return an aggregate.
-//	dtype: The data type of accumulated gradients. Needs to correspond to the type
-// of the accumulator.
-//
-// Returns The average of the accumulated gradients.
-func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "ResourceAccumulatorTakeGradient",
-		Input: []tf.Input{
-			handle, num_required,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// InfeedEnqueueAttr is an optional argument to InfeedEnqueue.
-type InfeedEnqueueAttr func(optionalAttr)
-
-// InfeedEnqueueShape sets the optional shape attribute to value.
-//
-// value: The shape of the tensor.
-// If not specified, defaults to {}
-func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr {
-	return func(m optionalAttr) {
-		m["shape"] = value
-	}
-}
-
-// InfeedEnqueueLayout sets the optional layout attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence.
-// If a layout attribute is passed, but its values are all -1, the layout will
-// be computed by the infeed operation.
-// If not specified, defaults to {}
-func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr {
-	return func(m optionalAttr) {
-		m["layout"] = value
-	}
-}
-
-// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. This should be -1 when the Op
-// is running on a TPU device, and >= 0 when the Op is running on the CPU
-// device.
-// If not specified, defaults to -1
-func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// An op which feeds a single Tensor value into the computation.
-//
-// Arguments:
-//	input: A tensor that will be provided using the infeed mechanism.
-//
-// Returns the created operation.
-func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "InfeedEnqueue",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// SerializeManySparseAttr is an optional argument to SerializeManySparse.
-type SerializeManySparseAttr func(optionalAttr)
-
-// SerializeManySparseOutType sets the optional out_type attribute to value.
-//
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
-//
-// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
-// is treated as the minibatch dimension.  Elements of the `SparseTensor`
-// must be sorted in increasing order of this first dimension.  The serialized
-// `SparseTensor` objects going into each row of `serialized_sparse` will have
-// rank `R-1`.
-//
-// The minibatch size `N` is extracted from `sparse_shape[0]`.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeManySparse",
-		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Says whether the targets are in the top `K` predictions.
-//
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
-//
-// More formally, let
-//
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
-//
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-//
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
-//
-// Returns Computed precision at `k` as a `bool Tensor`.
-func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InTopKV2",
-		Input: []tf.Input{
-			predictions, targets, k,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates an Optional variant with no value.
-func OptionalNone(scope *Scope) (optional tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OptionalNone",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters.
-type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve SGD embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm.
-func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Output a fact about factorials.
-func Fact(scope *Scope) (fact tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Fact",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Elementwise computes the bitwise left-shift of `x` and `y`.
-//
-// If `y` is negative, or greater than or equal to the width of `x` in bits the
-// result is implementation defined.
-//
-// Example:
-//
-// ```python
-// import tensorflow as tf
-// from tensorflow.python.ops import bitwise_ops
-// import numpy as np
-// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
-//
-// for dtype in dtype_list:
-//   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
-//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-//
-//   left_shift_result = bitwise_ops.left_shift(lhs, rhs)
-//
-//   print(left_shift_result)
-//
-// # This will print:
-// # tf.Tensor([ -32   -5 -128    0], shape=(4,), dtype=int8)
-// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int16)
-// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int32)
-// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int64)
-//
-// lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
-// rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
-// bitwise_ops.left_shift(lhs, rhs)
-// # <tf.Tensor: shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
-// ```
-//
-func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LeftShift",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Generates a feature cross from a list of tensors, and returns it as a
-// RaggedTensor.  See `tf.ragged.cross` for more details.
-//
-// Arguments:
-//	ragged_values: The values tensor for each RaggedTensor input.
-//	ragged_row_splits: The row_splits tensor for each RaggedTensor input.
-//	sparse_indices: The indices tensor for each SparseTensor input.
-//	sparse_values: The values tensor for each SparseTensor input.
-//	sparse_shape: The dense_shape tensor for each SparseTensor input.
-//	dense_inputs: The tf.Tensor inputs.
-//	input_order: String specifying the tensor type for each input.  The `i`th character in
-// this string specifies the type of the `i`th input, and is one of: 'R' (ragged),
-// 'D' (dense), or 'S' (sparse).  This attr is used to ensure that the crossed
-// values are combined in the order of the inputs from the call to tf.ragged.cross.
-//
-//
-//
-//
-//
-//
-// Returns:
-//	output_values: The `values` for the returned `RaggedTensor`.
-//	output_row_splits: The `row_splits` for the returned `RaggedTensor`.
-func RaggedCross(scope *Scope, ragged_values []tf.Output, ragged_row_splits []tf.Output, sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shape []tf.Output, dense_inputs []tf.Output, input_order string, hashed_output bool, num_buckets int64, hash_key int64, out_values_type tf.DataType, out_row_splits_type tf.DataType) (output_values tf.Output, output_row_splits tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"input_order": input_order, "hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_values_type": out_values_type, "out_row_splits_type": out_row_splits_type}
-	opspec := tf.OpSpec{
-		Type: "RaggedCross",
-		Input: []tf.Input{
-			tf.OutputList(ragged_values), tf.OutputList(ragged_row_splits), tf.OutputList(sparse_indices), tf.OutputList(sparse_values), tf.OutputList(sparse_shape), tf.OutputList(dense_inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-//   Combines (nests of) input elements into a dataset of (nests of) windows.
-//
-//   A "window" is a finite dataset of flat elements of size `size` (or possibly
-//   fewer if there are not enough input elements to fill the window and
-//   `drop_remainder` evaluates to false).
-//
-//   The `shift` argument determines the number of input elements by which
-//   the window moves on each iteration.  The first element in the `k`th window
-//   will be element
-//
-//   ```
-//   1 + (k-1) * shift
-//   ```
-//
-//   of the input dataset. In particular, the first element of the first window
-//   will always be the first element of the input dataset.
-//
-//   If the `stride` parameter is greater than 1, then each window will skip
-//   `(stride - 1)` input elements between each element that appears in the
-//   window. Output windows will still contain `size` elements regardless of
-//   the value of `stride`.
-//
-//   The `stride` argument determines the stride of the input elements, and the
-//   `shift` argument determines the shift of the window.
-//
-//   For example, letting `{...}` to represent a Dataset:
-//
-//   - `tf.data.Dataset.range(7).window(2)` produces
-//     `{{0, 1}, {2, 3}, {4, 5}, {6}}`
-//   - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces
-//     `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}`
-//   - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces
-//     `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}`
-//
-//   Note that when the `window` transformation is applied to a dataset of
-//   nested elements, it produces a dataset of nested windows.
-//
-//   For example:
-//
-//   - `tf.data.Dataset.from_tensor_slices((range(4), range(4))).window(2)`
-//     produces `{({0, 1}, {0, 1}), ({2, 3}, {2, 3})}`
-//   - `tf.data.Dataset.from_tensor_slices({"a": range(4)}).window(2)`
-//     produces `{{"a": {0, 1}}, {"a": {2, 3}}}`
-//
-// Arguments:
-//
-//	size: An integer scalar, representing the number of elements
-// of the input dataset to combine into a window. Must be positive.
-//	shift: An integer scalar, representing the number of input elements
-// by which the window moves in each iteration.  Defaults to `size`.
-// Must be positive.
-//	stride: An integer scalar, representing the stride of the input elements
-// in the sliding window. Must be positive. The default value of 1 means
-// "retain every input element".
-//	drop_remainder: A Boolean scalar, representing whether the last window should be
-// dropped if its size is smaller than `window_size`.
-//
-//
-func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "WindowDataset",
-		Input: []tf.Input{
-			input_dataset, size, shift, stride, drop_remainder,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SetSizeAttr is an optional argument to SetSize.
-type SetSizeAttr func(optionalAttr)
-
-// SetSizeValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SetSizeValidateIndices(value bool) SetSizeAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Number of unique elements along last dimension of input `set`.
-//
-// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
-// and `set_shape`. The last dimension contains values in a set, duplicates are
-// allowed but ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set`
-// indices.
-//
-// Arguments:
-//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
-//	set_values: 1D `Tensor`, values of a `SparseTensor`.
-//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
-//
-// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
-// `n-1` dimensions as `set`. Each value is the number of unique elements in
-// the corresponding `[0...n-1]` dimension of `set`.
-func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SetSize",
-		Input: []tf.Input{
-			set_indices, set_values, set_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AutoShardDatasetAttr is an optional argument to AutoShardDataset.
-type AutoShardDatasetAttr func(optionalAttr)
-
-// AutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value.
-// If not specified, defaults to 0
-func AutoShardDatasetAutoShardPolicy(value int64) AutoShardDatasetAttr {
-	return func(m optionalAttr) {
-		m["auto_shard_policy"] = value
-	}
-}
-
-// Creates a dataset that shards the input dataset.
-//
-// Creates a dataset that shards the input dataset by num_workers, returning a
-// sharded dataset for the index-th worker. This attempts to automatically shard
-// a dataset by examining the Dataset graph and inserting a shard op before the
-// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset).
-//
-// This dataset will throw a NotFound error if we cannot shard the dataset
-// automatically.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	num_workers: A scalar representing the number of workers to distribute this dataset across.
-//	index: A scalar representing the index of the current worker out of num_workers.
-//
-//
-func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...AutoShardDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AutoShardDataset",
-		Input: []tf.Input{
-			input_dataset, num_workers, index,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// InfeedEnqueuePrelinearizedBufferAttr is an optional argument to InfeedEnqueuePrelinearizedBuffer.
-type InfeedEnqueuePrelinearizedBufferAttr func(optionalAttr)
-
-// InfeedEnqueuePrelinearizedBufferDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. This should be -1 when the Op is running on a TPU device
-// and = 0 when the Op is running on the CPU device.
-// If not specified, defaults to -1
-func InfeedEnqueuePrelinearizedBufferDeviceOrdinal(value int64) InfeedEnqueuePrelinearizedBufferAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// An op which enqueues prelinearized buffer into TPU infeed.
-//
-// Arguments:
-//	input: A variant tensor representing linearized output.
-//
-// Returns the created operation.
-func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ...InfeedEnqueuePrelinearizedBufferAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "InfeedEnqueuePrelinearizedBuffer",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// An Op to sum inputs across replicated TPU instances.
-//
-// Each instance supplies its own input.
-//
-// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
-// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
-// and `B, D, F, H` as group 1. Thus we get the outputs:
-// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
-//
-// Arguments:
-//	input: The local input to the sum.
-//	group_assignment: An int32 tensor with shape
-// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
-// replica ids in the ith subgroup.
-//
-// Returns The sum of all the distributed inputs.
-func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CrossReplicaSum",
-		Input: []tf.Input{
-			input, group_assignment,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SubstrAttr is an optional argument to Substr.
-type SubstrAttr func(optionalAttr)
-
-// SubstrUnit sets the optional unit attribute to value.
-//
-// value: The unit that is used to create the substring.  One of: `"BYTE"` (for
-// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
-// encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
-// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
-// UTF-8.
-// If not specified, defaults to "BYTE"
-func SubstrUnit(value string) SubstrAttr {
-	return func(m optionalAttr) {
-		m["unit"] = value
-	}
-}
-
-// Return substrings from `Tensor` of strings.
-//
-// For each string in the input `Tensor`, creates a substring starting at index
-// `pos` with a total length of `len`.
-//
-// If `len` defines a substring that would extend beyond the length of the input
-// string, or if `len` is negative, then as many characters as possible are used.
-//
-// A negative `pos` indicates distance within the string backwards from the end.
-//
-// If `pos` specifies an index which is out of range for any of the input strings,
-// then an `InvalidArgumentError` is thrown.
-//
-// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
-// Op creation.
-//
-// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
-// broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-//
-// ---
-//
-// Examples
-//
-// Using scalar `pos` and `len`:
-//
-// ```python
-// input = [b'Hello', b'World']
-// position = 1
-// length = 3
-//
-// output = [b'ell', b'orl']
-// ```
-//
-// Using `pos` and `len` with same shape as `input`:
-//
-// ```python
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen']]
-// position = [[1, 2, 3],
-//             [1, 2, 3],
-//             [1, 2, 3]]
-// length =   [[2, 3, 4],
-//             [4, 3, 2],
-//             [5, 5, 5]]
-//
-// output = [[b'en', b'eve', b'lve'],
-//           [b'hirt', b'urt', b'te'],
-//           [b'ixtee', b'vente', b'hteen']]
-// ```
-//
-// Broadcasting `pos` and `len` onto `input`:
-//
-// ```
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen'],
-//          [b'nineteen', b'twenty', b'twentyone']]
-// position = [1, 2, 3]
-// length =   [1, 2, 3]
-//
-// output = [[b'e', b'ev', b'lve'],
-//           [b'h', b'ur', b'tee'],
-//           [b'i', b've', b'hte'],
-//           [b'i', b'en', b'nty']]
-// ```
-//
-// Broadcasting `input` onto `pos` and `len`:
-//
-// ```
-// input = b'thirteen'
-// position = [1, 5, 7]
-// length =   [3, 2, 1]
-//
-// output = [b'hir', b'ee', b'n']
-// ```
-//
-// Raises:
-//
-//   * `ValueError`: If the first argument cannot be converted to a
-//      Tensor of `dtype string`.
-//   * `InvalidArgumentError`: If indicies are out of range.
-//   * `ValueError`: If `pos` and `len` are not the same shape.
-//
-//
-// Arguments:
-//	input: Tensor of strings
-//	pos: Scalar defining the position of first character in each substring
-//	len: Scalar defining the number of characters to include in each substring
-//
-// Returns Tensor of substrings
-func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Substr",
-		Input: []tf.Input{
-			input, pos, len,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// A TPU core selector Op.
-//
-// This Op produces a set of TPU cores (for warm-up) or a single TPU core
-// (for regular inference) to execute the TPU program on. The output is
-// consumed by TPUPartitionedCall.
-//
-// Returns A vector 1 or more TPU cores.
-func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TPUOrdinalSelector",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
-type Conv2DBackpropFilterAttr func(optionalAttr)
-
-// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
-
-// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
-//
-// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
-// dimension, the amount of padding inserted before and after the dimension is
-// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
-// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to {}
-func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
-func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of convolution with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, out_channels]` tensor.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropFilter",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LRNGradAttr is an optional argument to LRNGrad.
-type LRNGradAttr func(optionalAttr)
-
-// LRNGradDepthRadius sets the optional depth_radius attribute to value.
-//
-// value: A depth radius.
-// If not specified, defaults to 5
-func LRNGradDepthRadius(value int64) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["depth_radius"] = value
-	}
-}
-
-// LRNGradBias sets the optional bias attribute to value.
-//
-// value: An offset (usually > 0 to avoid dividing by 0).
-// If not specified, defaults to 1
-func LRNGradBias(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["bias"] = value
-	}
-}
-
-// LRNGradAlpha sets the optional alpha attribute to value.
-//
-// value: A scale factor, usually positive.
-// If not specified, defaults to 1
-func LRNGradAlpha(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// LRNGradBeta sets the optional beta attribute to value.
-//
-// value: An exponent.
-// If not specified, defaults to 0.5
-func LRNGradBeta(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["beta"] = value
-	}
-}
-
-// Gradients for Local Response Normalization.
-//
-// Arguments:
-//	input_grads: 4-D with shape `[batch, height, width, channels]`.
-//	input_image: 4-D with shape `[batch, height, width, channels]`.
-//	output_image: 4-D with shape `[batch, height, width, channels]`.
-//
-// Returns The gradients for LRN.
-func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LRNGrad",
-		Input: []tf.Input{
-			input_grads, input_image, output_image,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PrelinearizeAttr is an optional argument to Prelinearize.
-type PrelinearizeAttr func(optionalAttr)
-
-// PrelinearizeShape sets the optional shape attribute to value.
-//
-// value: The shape of the tensor.
-// If not specified, defaults to {}
-func PrelinearizeShape(value tf.Shape) PrelinearizeAttr {
-	return func(m optionalAttr) {
-		m["shape"] = value
-	}
-}
-
-// PrelinearizeLayout sets the optional layout attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence. If a layout
-// attribute is passed but its values are all -1 the layout will be computed by
-// the infeed operation.
-// If not specified, defaults to {}
-func PrelinearizeLayout(value []int64) PrelinearizeAttr {
-	return func(m optionalAttr) {
-		m["layout"] = value
-	}
-}
-
-// An op which linearizes one Tensor value to an opaque variant tensor.
-//
-// Arguments:
-//	input: A tensor that will be linearized.
-func Prelinearize(scope *Scope, input tf.Output, optional ...PrelinearizeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Prelinearize",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the result of a TPU compilation.
-//
-// This operation returns the result of a TPU compilation as a serialized
-// CompilationResultProto, which holds a status and an error message if an error
-// occurred during compilation.
-func TPUCompilationResult(scope *Scope) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TPUCompilationResult",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ExperimentalRebatchDatasetAttr is an optional argument to ExperimentalRebatchDataset.
-type ExperimentalRebatchDatasetAttr func(optionalAttr)
-
-// ExperimentalRebatchDatasetUseFallback sets the optional use_fallback attribute to value.
-// If not specified, defaults to true
-func ExperimentalRebatchDatasetUseFallback(value bool) ExperimentalRebatchDatasetAttr {
-	return func(m optionalAttr) {
-		m["use_fallback"] = value
-	}
-}
-
-// Creates a dataset that changes the batch size.
-//
-// Creates a dataset that changes the batch size of the dataset to current batch
-// size // num_replicas.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	num_replicas: A scalar representing the number of replicas to distribute this batch across. As
-// a result of this transformation the current batch size would end up being
-// divided  by this parameter.
-//
-//
-func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_replicas tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalRebatchDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalRebatchDataset",
-		Input: []tf.Input{
-			input_dataset, num_replicas,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Concatenates tensors along one dimension.
-//
-// Arguments:
-//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [0, rank(values)).
-//	values: The `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.
-func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Concat",
-		Input: []tf.Input{
-			concat_dim, tf.OutputList(values),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
-type ResourceApplyPowerSignAttr func(optionalAttr)
-
-// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AddSign update.
-//
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
-// variable <- variable - lr_t * update
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	logbase: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyPowerSign",
-		Input: []tf.Input{
-			var_, m, lr, logbase, sign_decay, beta, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// TensorListConcatAttr is an optional argument to TensorListConcat.
-type TensorListConcatAttr func(optionalAttr)
-
-// TensorListConcatElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to {unknown_rank:true}
-func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// Concats all tensors in the list along the 0th dimension.
-//
-// Requires that all tensors have the same shape except the first dimension.
-//
-// input_handle: The input list.
-// tensor: The concated result.
-// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
-//
-func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListConcat",
-		Input: []tf.Input{
-			input_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Concatenates tensors along one dimension.
-//
-// Arguments:
-//	values: List of `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//	axis: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [-rank(values), rank(values)).
-//
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.
-func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ConcatV2",
-		Input: []tf.Input{
-			tf.OutputList(values), axis,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug.
-type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load RMSProp embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the RMSProp optimization algorithm.
-//	ms: Value of ms used in the RMSProp optimization algorithm.
-//	mom: Value of mom used in the RMSProp optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, ms, mom, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// EqualAttr is an optional argument to Equal.
-type EqualAttr func(optionalAttr)
-
-// EqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value.
-// If not specified, defaults to true
-func EqualIncompatibleShapeError(value bool) EqualAttr {
-	return func(m optionalAttr) {
-		m["incompatible_shape_error"] = value
-	}
-}
-
-// Returns the truth value of (x == y) element-wise.
-//
-// *NOTE*: `Equal` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-//
-// ```python
-// x = tf.constant([2, 4])
-// y = tf.constant(2)
-// tf.math.equal(x, y) ==> array([True, False])
-//
-// x = tf.constant([2, 4])
-// y = tf.constant([2, 4])
-// tf.math.equal(x, y) ==> array([True,  True])
-// ```
-func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Equal",
-		Input: []tf.Input{
-			x, y,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
-type SparseToSparseSetOperationAttr func(optionalAttr)
-
-// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of 2 `SparseTensor` inputs.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
-// order and range of `set1` and `set2` indices.
-//
-// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
-// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set1`
-// and `set2` indices.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
-//
-// Arguments:
-//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
-// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
-// max set size across `0...n-1` dimensions.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
-// max set size across `0...n-1` dimensions.
-//
-//
-// Returns:
-//	result_indices: 2D indices of a `SparseTensor`.
-//	result_values: 1D values of a `SparseTensor`.
-//	result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseToSparseSetOperation",
-		Input: []tf.Input{
-			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// UnsortedSegmentJoinAttr is an optional argument to UnsortedSegmentJoin.
-type UnsortedSegmentJoinAttr func(optionalAttr)
-
-// UnsortedSegmentJoinSeparator sets the optional separator attribute to value.
-//
-// value: The separator to use when joining.
-// If not specified, defaults to ""
-func UnsortedSegmentJoinSeparator(value string) UnsortedSegmentJoinAttr {
-	return func(m optionalAttr) {
-		m["separator"] = value
-	}
-}
-
-// Joins the elements of `inputs` based on `segment_ids`.
-//
-// Computes the string join along segments of a tensor.
-// Given `segment_ids` with rank `N` and `data` with rank `N+M`:
-//
-//     `output[i, k1...kM] = strings.join([data[j1...jN, k1...kM])`
-//
-// where the join is over all [j1...jN] such that segment_ids[j1...jN] = i.
-// Strings are joined in row-major order.
-//
-// For example:
-//
-// ```python
-// inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']]
-// output_array = string_ops.unsorted_segment_join(inputs=inputs,
-//                                                 segment_ids=[1, 0, 1],
-//                                                 num_segments=2,
-//                                                 separator=':'))
-// # output_array ==> [['Y', '6', '6'], ['Y:p', 'q:G', 'c:a']]
-//
-//
-// inputs = ['this', 'is', 'a', 'test']
-// output_array = string_ops.unsorted_segment_join(inputs=inputs,
-//                                                 segment_ids=[0, 0, 0, 0],
-//                                                 num_segments=1,
-//                                                 separator=':'))
-// # output_array ==> ['this:is:a:test']
-// ```
-//
-// Arguments:
-//	inputs: The input to be joined.
-//	segment_ids: A tensor whose shape is a prefix of data.shape.  Negative segment ids are not
-// supported.
-//	num_segments: A scalar.
-func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output, num_segments tf.Output, optional ...UnsortedSegmentJoinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentJoin",
-		Input: []tf.Input{
-			inputs, segment_ids, num_segments,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SerializeIteratorAttr is an optional argument to SerializeIterator.
-type SerializeIteratorAttr func(optionalAttr)
-
-// SerializeIteratorExternalStatePolicy sets the optional external_state_policy attribute to value.
-// If not specified, defaults to 0
-func SerializeIteratorExternalStatePolicy(value int64) SerializeIteratorAttr {
-	return func(m optionalAttr) {
-		m["external_state_policy"] = value
-	}
-}
-
-// Converts the given `resource_handle` representing an iterator to a variant tensor.
-//
-// Arguments:
-//	resource_handle: A handle to an iterator resource.
-//
-// Returns A variant tensor storing the state of the iterator contained in the
-// resource.
-func SerializeIterator(scope *Scope, resource_handle tf.Output, optional ...SerializeIteratorAttr) (serialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeIterator",
-		Input: []tf.Input{
-			resource_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp.
-type ResourceApplyCenteredRMSPropAttr func(optionalAttr)
-
-// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, mg, ms, and mom tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the centered RMSProp algorithm.
-//
-// The centered RMSProp algorithm uses an estimate of the centered second moment
-// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
-// uses the (uncentered) second moment. This often helps with training, but is
-// slightly more expensive in terms of computation and memory.
-//
-// Note that in dense implementation of this algorithm, mg, ms, and mom will
-// update even if the grad is zero, but in this sparse implementation, mg, ms,
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// mean_grad = decay * mean_grad + (1-decay) * gradient
-//
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-//
-// mg <- rho * mg_{t-1} + (1-rho) * grad
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	mg: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyCenteredRMSProp",
-		Input: []tf.Input{
-			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters.
-type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingFTRLParametersConfig(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Retrieve FTRL embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns:
-//	parameters: Parameter parameters updated by the FTRL optimization algorithm.
-//	accumulators: Parameter accumulators updated by the FTRL optimization algorithm.
-//	linears: Parameter linears updated by the FTRL optimization algorithm.
-func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingFTRLParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch.
-type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr)
-
-// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. Should be >= 0 and less than the number
-// of TPU cores in the task on which the node is placed.
-// If not specified, defaults to -1
-func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// An op that enqueues a list of input batch tensors to TPUEmbedding.
-//
-// Arguments:
-//	batch: A list of 1D tensors, one for each embedding table, containing the
-// indices into the tables.
-//	mode_override: A string input that overrides the mode specified in the
-// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-//
-// Returns the created operation.
-func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EnqueueTPUEmbeddingIntegerBatch",
-		Input: []tf.Input{
-			tf.OutputList(batch), mode_override,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Component-wise divides a SparseTensor by a dense Tensor.
-//
-// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-// the other direction.
-//
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
-//
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseDiv",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the gradient of `Tile`.
-//
-// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum
-//
-// Since `Tile` takes an input and repeats the input `multiples` times
-// along each dimension, `TileGrad` takes in `multiples` and aggregates
-// each repeated tile of `input` into `output`.
-func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TileGrad",
-		Input: []tf.Input{
-			input, multiples,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AudioSummaryAttr is an optional argument to AudioSummary.
-type AudioSummaryAttr func(optionalAttr)
-
-// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_outputs"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with audio.
-//
-// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-//
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sample_rate": sample_rate}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSummary",
-		Input: []tf.Input{
-			tag, tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters.
-type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// LoadTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingFTRLParametersConfig(value string) LoadTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["config"] = value
-	}
-}
-
-// Load FTRL embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the FTRL optimization algorithm.
-//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
-//	linears: Value of linears used in the FTRL optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingFTRLParameters",
-		Input: []tf.Input{
-			parameters, accumulators, linears,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a tf.data service job.
-func CreateJob(scope *Scope, dataset_id tf.Output, address tf.Output, protocol tf.Output, processing_mode tf.Output) (job_token tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CreateJob",
-		Input: []tf.Input{
-			dataset_id, address, protocol, processing_mode,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput.
-type Conv3DBackpropInputAttr func(optionalAttr)
-
-// Conv3DBackpropInputDilations sets the optional dilations attribute to value.
-// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
-func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of 3-D convolution with respect to the input.
-//
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
-//
-// Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInput",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
-type DepthwiseConv2dNativeAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeExplicitPaddings sets the optional explicit_paddings attribute to value.
-// If not specified, defaults to {}
-func DepthwiseConv2dNativeExplicitPaddings(value []int64) DepthwiseConv2dNativeAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to {i:1 i:1 i:1 i:1}
-func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
-//
-// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-// and a filter / kernel tensor of shape
-// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
-// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
-// a different filter to each input channel (expanding from 1 channel to
-// `channel_multiplier` channels for each), then concatenates the results
-// together. Thus, the output has `in_channels * channel_multiplier` channels.
-//
-// ```
-// for k in 0..in_channels-1
-//   for q in 0..channel_multiplier-1
-//     output[b, i, j, k * channel_multiplier + q] =
-//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
-//                         filter[di, dj, k, q]
-// ```
-//
-// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
-//
-// Arguments:
-//
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`.
-//	padding: The type of padding algorithm to use.
-func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNative",
-		Input: []tf.Input{
-			input, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates an all-zeros CSRSparseMatrix with shape `dense_shape`.
-//
-// Arguments:
-//	dense_shape: The desired matrix shape.
-//
-//
-// Returns An empty CSR matrix with shape `dense_shape`.
-func SparseMatrixZeros(scope *Scope, dense_shape tf.Output, type_ tf.DataType) (sparse_matrix tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"type": type_}
-	opspec := tf.OpSpec{
-		Type: "SparseMatrixZeros",
-		Input: []tf.Input{
-			dense_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Computes the minimum along segments of a tensor.
 //
 // Read
@@ -47094,100 +40664,123 @@ func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.Dat
 	return outputs
 }
 
-// ResourceApplyAdagradV2Attr is an optional argument to ResourceApplyAdagradV2.
-type ResourceApplyAdagradV2Attr func(optionalAttr)
+// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters.
+type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
 
-// ResourceApplyAdagradV2UseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradV2UseLocking(value bool) ResourceApplyAdagradV2Attr {
+// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["table_id"] = value
 	}
 }
 
-// ResourceApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value.
-// If not specified, defaults to true
-func ResourceApplyAdagradV2UpdateSlots(value bool) ResourceApplyAdagradV2Attr {
+// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
 	return func(m optionalAttr) {
-		m["update_slots"] = value
+		m["table_name"] = value
 	}
 }
 
-// Update '*var' according to the adagrad scheme.
+// LoadTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMDLAdagradLightParametersConfig(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load MDL Adagrad Light embedding parameters.
 //
-// accum += grad * grad
-// var -= lr * grad * (1 / (sqrt(accum) + epsilon))
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
+//	parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm.
+//	accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm.
+//	weights: Value of weights used in the MDL Adagrad Light optimization algorithm.
+//	benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm.
+//
+//
 //
 // Returns the created operation.
-func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdagradV2Attr) (o *tf.Operation) {
+func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagradV2",
+		Type: "LoadTPUEmbeddingMDLAdagradLightParameters",
 		Input: []tf.Input{
-			var_, accum, lr, epsilon, grad,
+			parameters, accumulators, weights, benefits,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Returns the number of gradients aggregated in the given accumulators.
+// MapPeekAttr is an optional argument to MapPeek.
+type MapPeekAttr func(optionalAttr)
+
+// MapPeekCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// Arguments:
-//	handle: The handle to an accumulator.
-//
-// Returns The number of gradients aggregated in the given accumulator.
-func ResourceAccumulatorNumAccumulated(scope *Scope, handle tf.Output) (num_accumulated tf.Output) {
-	if scope.Err() != nil {
-		return
+// REQUIRES: value >= 0
+func MapPeekCapacity(value int64) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "ResourceAccumulatorNumAccumulated",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Connects N outputs from an N-way replicated TPU computation.
+// MapPeekMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// This operation holds a replicated output from a `tpu.replicate()` computation subgraph.
-// Each replicated output has the same shape and type alongside the input.
+// REQUIRES: value >= 0
+func MapPeekMemoryLimit(value int64) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapPeekContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapPeekContainer(value string) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapPeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapPeekSharedName(value string) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op peeks at the values at the specified key.  If the
 //
-// For example:
-// ```
-// %computation = "tf.Computation"()
-// %replicated_output:2 = "tf.TPUReplicatedOutput"(%computation)
-// ```
-// The above computation has a replicated output of two replicas.
-func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) {
+// underlying container does not contain this key
+// this op will block until it does.
+func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "TPUReplicatedOutput",
+		Type: "MapPeek",
 		Input: []tf.Input{
-			input,
+			key, indices,
 		},
 		Attrs: attrs,
 	}
@@ -47197,84 +40790,186 @@ func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (out
 	}
 	var idx int
 	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("TPUReplicatedOutput", err)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapPeek", err)
 		return
 	}
-	return outputs
+	return values
 }
 
-// Converts a tensor to a scalar predicate.
+// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters.
+type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingCenteredRMSPropParametersConfig(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve centered RMSProp embedding parameters.
 //
-// Converts a tensor to a scalar predicate with the following rules:
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
 //
-// - For 0D tensors, truthiness is determined by comparing against a "zero"
-//   value. For numerical types it is the obvious zero. For strings it is the
-//   empty string.
-//
-// - For >0D tensors, truthiness is determined by looking at the number of
-//   elements. If has zero elements, then the result is false. Otherwise the
-//   result is true.
-//
-// This matches the behavior of If and While for determining if a tensor counts
-// as true/false for a branch condition.
-func ToBool(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns:
+//	parameters: Parameter parameters updated by the centered RMSProp optimization algorithm.
+//	ms: Parameter ms updated by the centered RMSProp optimization algorithm.
+//	mom: Parameter mom updated by the centered RMSProp optimization algorithm.
+//	mg: Parameter mg updated by the centered RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ToBool",
-		Input: []tf.Input{
-			input,
-		},
+		Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// GenerateBoundingBoxProposalsAttr is an optional argument to GenerateBoundingBoxProposals.
-type GenerateBoundingBoxProposalsAttr func(optionalAttr)
-
-// GenerateBoundingBoxProposalsPostNmsTopn sets the optional post_nms_topn attribute to value.
+// Transforms a vector of brain.Example protos (as strings) into typed tensors.
 //
-// value: An integer. Maximum number of rois in the output.
-// If not specified, defaults to 300
-func GenerateBoundingBoxProposalsPostNmsTopn(value int64) GenerateBoundingBoxProposalsAttr {
+// Arguments:
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	names: A vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) names for the
+// corresponding serialized protos.  These are purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no names are available.
+// If non-empty, this vector must be the same length as "serialized".
+//	sparse_keys: A list of Nsparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples' features associated with dense values.
+//	dense_defaults: A list of Ndense Tensors (some may be empty).
+// dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	sparse_types: A list of Nsparse types; the data types of data in each Feature
+// given in sparse_keys.
+// Currently the ParseExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
+// given in dense_keys.
+// The number of elements in the Feature corresponding to dense_key[j]
+// must always equal dense_shapes[j].NumEntries().
+// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
+// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
+// The dense outputs are just the inputs row-stacked by batch.
+// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
+// the shape of the output Tensor dense_values[j] will be
+// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
+// of elements of length D1 * .... * DN, across all minibatch entries
+// in the input.  Any minibatch entry with less than M blocks of elements of
+// length D1 * ... * DN will be padded with the corresponding default_value
+// scalar element along the second dimension.
+func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseExample",
+		Input: []tf.Input{
+			serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
+}
+
+// DatasetToGraphAttr is an optional argument to DatasetToGraph.
+type DatasetToGraphAttr func(optionalAttr)
+
+// DatasetToGraphStatefulWhitelist sets the optional stateful_whitelist attribute to value.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func DatasetToGraphStatefulWhitelist(value []string) DatasetToGraphAttr {
 	return func(m optionalAttr) {
-		m["post_nms_topn"] = value
+		m["stateful_whitelist"] = value
 	}
 }
 
-// This op produces Region of Interests from given bounding boxes(bbox_deltas) encoded wrt anchors according to eq.2 in arXiv:1506.01497
+// DatasetToGraphAllowStateful sets the optional allow_stateful attribute to value.
+// If not specified, defaults to false
+func DatasetToGraphAllowStateful(value bool) DatasetToGraphAttr {
+	return func(m optionalAttr) {
+		m["allow_stateful"] = value
+	}
+}
+
+// DatasetToGraphStripDeviceAssignment sets the optional strip_device_assignment attribute to value.
+// If not specified, defaults to false
+func DatasetToGraphStripDeviceAssignment(value bool) DatasetToGraphAttr {
+	return func(m optionalAttr) {
+		m["strip_device_assignment"] = value
+	}
+}
+
+// Returns a serialized GraphDef representing `input_dataset`.
 //
-//       The op selects top `pre_nms_topn` scoring boxes, decodes them with respect to anchors,
-//       applies non-maximal suppression on overlapping boxes with higher than
-//       `nms_threshold` intersection-over-union (iou) value, discarding boxes where shorter
-//       side is less than `min_size`.
-//       Inputs:
-//       `scores`: A 4D tensor of shape [Batch, Height, Width, Num Anchors] containing the scores per anchor at given postion
-//       `bbox_deltas`: is a tensor of shape [Batch, Height, Width, 4 x Num Anchors] boxes encoded to each anchor
-//       `anchors`: A 1D tensor of shape [4 x Num Anchors], representing the anchors.
-//       Outputs:
-//       `rois`: output RoIs, a 3D tensor of shape [Batch, post_nms_topn, 4], padded by 0 if less than post_nms_topn candidates found.
-//       `roi_probabilities`: probability scores of each roi in 'rois', a 2D tensor of shape [Batch,post_nms_topn], padded with 0 if needed, sorted by scores.
+// Returns a graph representation for `input_dataset`.
 //
 // Arguments:
-//	scores: A 4-D float tensor of shape `[num_images, height, width, num_achors]` containing scores of the boxes for given anchors, can be unsorted.
-//	bbox_deltas: A 4-D float tensor of shape `[num_images, height, width, 4 x num_anchors]`. encoding boxes with respec to each anchor.
-// Coordinates are given in the form [dy, dx, dh, dw].
-//	image_info: A 2-D float tensor of shape `[num_images, 5]` containing image information Height, Width, Scale.
-//	anchors: A 2-D float tensor of shape `[num_anchors, 4]` describing the anchor boxes. Boxes are formatted in the form [y1, x1, y2, x2].
-//	nms_threshold: A scalar float tensor for non-maximal-suppression threshold.
-//	pre_nms_topn: A scalar int tensor for the number of top scoring boxes to be used as input.
-//	min_size: A scalar float tensor. Any box that has a smaller size than min_size will be discarded.
+//	input_dataset: A variant tensor representing the dataset to return the graph representation for.
 //
-// Returns:
-//	rois: A 3-D float tensor of shape `[num_images,post_nms_topn,4]` representing the selected
-// region of interest boxes. Sorted in descending order in scores.
-//	roi_probabilities: A 2-D float tensor of shape `[num_images, post_nms_topn]` representing the score of the
-// region of interest box in `rois` tensor at the same index.
-func GenerateBoundingBoxProposals(scope *Scope, scores tf.Output, bbox_deltas tf.Output, image_info tf.Output, anchors tf.Output, nms_threshold tf.Output, pre_nms_topn tf.Output, min_size tf.Output, optional ...GenerateBoundingBoxProposalsAttr) (rois tf.Output, roi_probabilities tf.Output) {
+// Returns The graph representation of the dataset (as serialized GraphDef).
+func DatasetToGraph(scope *Scope, input_dataset tf.Output, optional ...DatasetToGraphAttr) (graph tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -47283,74 +40978,363 @@ func GenerateBoundingBoxProposals(scope *Scope, scores tf.Output, bbox_deltas tf
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "GenerateBoundingBoxProposals",
+		Type: "DatasetToGraph",
 		Input: []tf.Input{
-			scores, bbox_deltas, image_info, anchors, nms_threshold, pre_nms_topn, min_size,
+			input_dataset,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2.
-type InitializeTableFromTextFileV2Attr func(optionalAttr)
+// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
+type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
 
-// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value.
+// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
 //
-// value: Number of elements of the file, use -1 if unknown.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr {
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
 	return func(m optionalAttr) {
-		m["vocab_size"] = value
+		m["use_locking"] = value
 	}
 }
 
-// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value.
-//
-// value: Delimiter to separate fields in a line.
-// If not specified, defaults to "\t"
-func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr {
-	return func(m optionalAttr) {
-		m["delimiter"] = value
-	}
-}
-
-// Initializes a table from a text file.
-//
-// It inserts one key-value pair into the table for each line of the file.
-// The key and value is extracted from the whole line content, elements from the
-// split line based on `delimiter` or the line number (starting from zero).
-// Where to extract the key and value from a line is specified by `key_index` and
-// `value_index`.
-//
-// - A value of -1 means use the line number(starting from zero), expects `int64`.
-// - A value of -2 means use the whole line content, expects `string`.
-// - A value >= 0 means use the index (starting at zero) of the split line based
-//   on `delimiter`.
+// var: Should be from a Variable().
 //
 // Arguments:
-//	table_handle: Handle to a table which will be initialized.
-//	filename: Filename of a vocabulary text file.
-//	key_index: Column index in a line to get the table `key` values from.
-//	value_index: Column index that represents information of a line to get the table
-// `value` values from.
+//
+//	accum: Should be from a Variable().
+//	accum_update: : Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
 // Returns the created operation.
-func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) {
+func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "InitializeTableFromTextFileV2",
+		Type: "ResourceSparseApplyAdadelta",
 		Input: []tf.Input{
-			table_handle, filename,
+			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes sigmoid of `x` element-wise.
+//
+// Specifically, `y = 1 / (1 + exp(-x))`.
+func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sigmoid",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug.
+type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve ADAM embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the ADAM optimization algorithm.
+//	momenta: Parameter momenta updated by the ADAM optimization algorithm.
+//	velocities: Parameter velocities updated by the ADAM optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the ADAM optimization algorithm.
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam.
+type ResourceApplyAdamAttr func(optionalAttr)
+
+// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, uses the nesterov update.
+// If not specified, defaults to false
+func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the Adam algorithm.
+//
+// $$\text{lr}_t := \mathrm{learning_rate} * \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$
+// $$m_t := \beta_1 * m_{t-1} + (1 - \beta_1) * g$$
+// $$v_t := \beta_2 * v_{t-1} + (1 - \beta_2) * g * g$$
+// $$\text{variable} := \text{variable} - \text{lr}_t * m_t / (\sqrt{v_t} + \epsilon)$$
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	beta2_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdam",
+		Input: []tf.Input{
+			var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// CumsumAttr is an optional argument to Cumsum.
+type CumsumAttr func(optionalAttr)
+
+// CumsumExclusive sets the optional exclusive attribute to value.
+//
+// value: If `True`, perform exclusive cumsum.
+// If not specified, defaults to false
+func CumsumExclusive(value bool) CumsumAttr {
+	return func(m optionalAttr) {
+		m["exclusive"] = value
+	}
+}
+
+// CumsumReverse sets the optional reverse attribute to value.
+//
+// value: A `bool` (default: False).
+// If not specified, defaults to false
+func CumsumReverse(value bool) CumsumAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Compute the cumulative sum of the tensor `x` along `axis`.
+//
+// By default, this op performs an inclusive cumsum, which means that the first
+// element of the input is identical to the first element of the output:
+//
+// ```python
+// tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
+// ```
+//
+// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
+// performed instead:
+//
+// ```python
+// tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
+// ```
+//
+// By setting the `reverse` kwarg to `True`, the cumsum is performed in the
+// opposite direction:
+//
+// ```python
+// tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
+// ```
+//
+// This is more efficient than using separate `tf.reverse` ops.
+//
+// The `reverse` and `exclusive` kwargs can also be combined:
+//
+// ```python
+// tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
+// ```
+//
+// Arguments:
+//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Cumsum",
+		Input: []tf.Input{
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Performs gradient updates of embedding tables.
+//
+// Arguments:
+//	inputs: A TensorList of gradients with which to update embedding tables.
+// This argument has the same length and shapes as the return value of
+// RecvTPUEmbeddingActivations, but contains gradients of the model's loss
+// with respect to the embedding activations. The embedding tables are updated
+// from these gradients via the optimizer specified in the TPU embedding
+// configuration given to tpu.initialize_system.
+//	learning_rates: A TensorList of float32 scalars, one for each dynamic learning
+// rate tag: see the comments in
+// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto.
+// Multiple tables can share the same dynamic learning rate tag as specified
+// in the configuration. If the learning rates for all tables are constant,
+// this list should be empty.
+//	config: Serialized TPUEmbeddingConfiguration proto.
+//
+// Returns the created operation.
+func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"config": config}
+	opspec := tf.OpSpec{
+		Type: "SendTPUEmbeddingGradients",
+		Input: []tf.Input{
+			tf.OutputList(inputs), tf.OutputList(learning_rates),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum.
+type ResourceApplyKerasMomentumAttr func(optionalAttr)
+
+// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var + momentum * accum, so in the end, the var you get is actually
+// var + momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// accum = accum * momentum - lr * grad
+// var += accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyKerasMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, momentum,
 		},
 		Attrs: attrs,
 	}
@@ -47361,7 +41345,7 @@ func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filenam
 type TensorArrayConcatV2Attr func(optionalAttr)
 
 // TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
-// If not specified, defaults to {unknown_rank:true}
+// If not specified, defaults to <unknown_rank:true >
 func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr {
 	return func(m optionalAttr) {
 		m["element_shape_except0"] = value
@@ -47520,271 +41504,301 @@ func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output,
 	return scope.AddOperation(opspec)
 }
 
-// Serializes the tree ensemble to a proto.
+// RecvAttr is an optional argument to Recv.
+type RecvAttr func(optionalAttr)
+
+// RecvClientTerminated sets the optional client_terminated attribute to value.
+//
+// value: If set to true, this indicates that the node was added
+// to the graph as a result of a client-side feed or fetch of Tensor data,
+// in which case the corresponding send or recv is expected to be managed
+// locally by the caller.
+// If not specified, defaults to false
+func RecvClientTerminated(value bool) RecvAttr {
+	return func(m optionalAttr) {
+		m["client_terminated"] = value
+	}
+}
+
+// Receives the named tensor from send_device on recv_device.
 //
 // Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
 //
-// Returns:
-//	stamp_token: Stamp token of the tree ensemble resource.
-//	tree_ensemble_serialized: Serialized proto of the ensemble.
-func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) {
+//	tensor_name: The name of the tensor to receive.
+//	send_device: The name of the device sending the tensor.
+//	send_device_incarnation: The current incarnation of send_device.
+//	recv_device: The name of the device receiving the tensor.
+//
+// Returns The tensor to receive.
+func Recv(scope *Scope, tensor_type tf.DataType, tensor_name string, send_device string, send_device_incarnation int64, recv_device string, optional ...RecvAttr) (tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"tensor_type": tensor_type, "tensor_name": tensor_name, "send_device": send_device, "send_device_incarnation": send_device_incarnation, "recv_device": recv_device}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesSerializeEnsemble",
+		Type: "Recv",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapStageAttr is an optional argument to OrderedMapStage.
+type OrderedMapStageAttr func(optionalAttr)
+
+// OrderedMapStageCapacity sets the optional capacity attribute to value.
+//
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapStageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func OrderedMapStageContainer(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapStageSharedName sets the optional shared_name attribute to value.
+//
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Stage (key, values) in the underlying container which behaves like a ordered
+//
+// associative container.   Elements are ordered by key.
+//
+// Arguments:
+//	key: int64
+//
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
+//
+//
+// Returns the created operation.
+func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OrderedMapStage",
 		Input: []tf.Input{
-			tree_ensemble_handle,
+			key, indices, tf.OutputList(values),
 		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata.
+type TPUReplicateMetadataAttr func(optionalAttr)
+
+// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value.
+//
+// value: Number of cores per replica. Used for model parallelism.
+// If not specified, defaults to 1
+func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["num_cores_per_replica"] = value
+	}
+}
+
+// TPUReplicateMetadataTopology sets the optional topology attribute to value.
+//
+// value: TopologyProto indicating the topology of the TPU pod slice.
+// If not specified, defaults to ""
+func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["topology"] = value
+	}
+}
+
+// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value.
+//
+// value: Whether to place the computation on the TPU.
+// If not specified, defaults to true
+func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["use_tpu"] = value
+	}
+}
+
+// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value.
+//
+// value: The assignment of devices for the computation.
+// If not specified, defaults to <>
+func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["device_assignment"] = value
+	}
+}
+
+// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value.
+//
+// value: DEPRECATED. Use num_cores_per_replica instead.
+// If not specified, defaults to <>
+func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["computation_shape"] = value
+	}
+}
+
+// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value.
+// If not specified, defaults to <>
+func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["host_compute_core"] = value
+	}
+}
+
+// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value.
+// If not specified, defaults to <>
+func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["padding_map"] = value
+	}
+}
+
+// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value.
+// If not specified, defaults to "STEP_MARK_AT_ENTRY"
+func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["step_marker_location"] = value
+	}
+}
+
+// TPUReplicateMetadataAllowSoftPlacement sets the optional allow_soft_placement attribute to value.
+// If not specified, defaults to false
+func TPUReplicateMetadataAllowSoftPlacement(value bool) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["allow_soft_placement"] = value
+	}
+}
+
+// Metadata indicating how the TPU computation should be replicated.
+//
+// This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph.
+//
+// Arguments:
+//	num_replicas: Number of replicas of the computation
+//
+// Returns the created operation.
+func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicateMetadata",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// TensorListConcatAttr is an optional argument to TensorListConcat.
+type TensorListConcatAttr func(optionalAttr)
+
+// TensorListConcatElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to <unknown_rank:true >
+func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// Concats all tensors in the list along the 0th dimension.
+//
+// Requires that all tensors have the same shape except the first dimension.
+//
+// input_handle: The input list.
+// tensor: The concated result.
+// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
+//
+func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListConcat",
+		Input: []tf.Input{
+			input_handle,
+		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1)
 }
 
-// Computes inverse hyperbolic cosine of x element-wise.
-//
-// Given an input tensor, the function computes inverse hyperbolic cosine of every element.
-// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range.
-//
-// ```python
-// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")])
-// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf]
-// ```
-func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Acosh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug.
+type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
 
-// Outputs deterministic pseudorandom random numbers from a gamma distribution.
-//
-// Outputs random values from a gamma distribution.
-//
-// The outputs are a deterministic function of `shape`, `seed`, and `alpha`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//	alpha: The concentration of the gamma distribution. Shape must match the rightmost
-// dimensions of `shape`.
-//
-// Returns Random values with specified shape.
-func StatelessRandomGammaV2(scope *Scope, shape tf.Output, seed tf.Output, alpha tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomGammaV2",
-		Input: []tf.Input{
-			shape, seed, alpha,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ReverseSequenceAttr is an optional argument to ReverseSequence.
-type ReverseSequenceAttr func(optionalAttr)
-
-// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
-//
-// value: The dimension along which reversal is performed.
-// If not specified, defaults to 0
-func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
-	return func(m optionalAttr) {
-		m["batch_dim"] = value
-	}
-}
-
-// Reverses variable length slices.
-//
-// This op first slices `input` along the dimension `batch_dim`, and for each
-// slice `i`, reverses the first `seq_lengths[i]` elements along
-// the dimension `seq_dim`.
-//
-// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
-// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
-//
-// The output slice `i` along dimension `batch_dim` is then given by input
-// slice `i`, with the first `seq_lengths[i]` slices along dimension
-// `seq_dim` reversed.
-//
-// For example:
-//
-// ```
-// # Given this:
-// batch_dim = 0
-// seq_dim = 1
-// input.dims = (4, 8, ...)
-// seq_lengths = [7, 2, 3, 5]
-//
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
-// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
-// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
-// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
-//
-// # while entries past seq_lens are copied through:
-// output[0, 7:, :, ...] = input[0, 7:, :, ...]
-// output[1, 2:, :, ...] = input[1, 2:, :, ...]
-// output[2, 3:, :, ...] = input[2, 3:, :, ...]
-// output[3, 2:, :, ...] = input[3, 2:, :, ...]
-// ```
-//
-// In contrast, if:
-//
-// ```
-// # Given this:
-// batch_dim = 2
-// seq_dim = 0
-// input.dims = (8, ?, 4, ...)
-// seq_lengths = [7, 2, 3, 5]
-//
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
-// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
-// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
-// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
-//
-// # while entries past seq_lens are copied through:
-// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
-// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
-// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
-// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
-// ```
-//
-// Arguments:
-//	input: The input to reverse.
-//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
-// `max(seq_lengths) <= input.dims(seq_dim)`
-//	seq_dim: The dimension which is partially reversed.
-//
-// Returns The partially reversed input. It has the same shape as `input`.
-func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"seq_dim": seq_dim}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ReverseSequence",
-		Input: []tf.Input{
-			input, seq_lengths,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Fetches multiple values from infeed as an XLA tuple.
-//
-// Arguments:
-//	dtypes: The element types of each element in `outputs`.
-//	shapes: The shapes of each tensor in `outputs`.
-//
-// Returns A list of tensors that will be provided using the infeed mechanism.
-func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
-	opspec := tf.OpSpec{
-		Type: "InfeedDequeueTuple",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("InfeedDequeueTuple", err)
-		return
-	}
-	return outputs
-}
-
-// Applies softmax to a batched N-D `SparseTensor`.
-//
-// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
-// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
-//
-// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
-// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
-// zero elements do not participate*.  Specifically, the algorithm is equivalent
-// to the following:
-//
-//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
-//       with shape `[B, C]`, along the size-C dimension;
-//   (2) Masks out the original implicitly-zero locations;
-//   (3) Renormalizes the remaining elements.
-//
-// Hence, the `SparseTensor` result has exactly the same non-zero indices and
-// shape.
-//
-// Arguments:
-//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
-// SparseTensor, in canonical ordering.
-//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//
-// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
-func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSoftmax",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters.
-type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
 // If not specified, defaults to -1
-func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr {
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
 	return func(m optionalAttr) {
 		m["table_id"] = value
 	}
 }
 
-// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
 // If not specified, defaults to ""
-func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
 	return func(m optionalAttr) {
 		m["table_name"] = value
 	}
 }
 
-// LoadTPUEmbeddingProximalAdagradParametersConfig sets the optional config attribute to value.
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig sets the optional config attribute to value.
 // If not specified, defaults to ""
-func LoadTPUEmbeddingProximalAdagradParametersConfig(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
 	return func(m optionalAttr) {
 		m["config"] = value
 	}
 }
 
-// Load proximal Adagrad embedding parameters.
+// Load RMSProp embedding parameters with debug support.
 //
 // An op that loads optimization parameters into HBM for embedding. Must be
 // preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
@@ -47793,13 +41807,15 @@ func LoadTPUEmbeddingProximalAdagradParametersConfig(value string) LoadTPUEmbedd
 // executed.
 //
 // Arguments:
-//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
+//	parameters: Value of parameters used in the RMSProp optimization algorithm.
+//	ms: Value of ms used in the RMSProp optimization algorithm.
+//	mom: Value of mom used in the RMSProp optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm.
 //
 //
 //
 // Returns the created operation.
-func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) {
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -47808,44 +41824,211 @@ func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Outpu
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingProximalAdagradParameters",
+		Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug",
 		Input: []tf.Input{
-			parameters, accumulators,
+			parameters, ms, mom, gradient_accumulators,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
-type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
-
-// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+// Concatenates tensors along one dimension.
 //
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// Arguments:
+//	values: List of `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//	axis: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [-rank(values), rank(values)).
+//
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ConcatV2",
+		Input: []tf.Input{
+			tf.OutputList(values), axis,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug.
+type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load FTRL embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the FTRL optimization algorithm.
+//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
+//	linears: Value of linears used in the FTRL optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, linears, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters.
+type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersConfig(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adadelta embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adadelta optimization algorithm.
+//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
+//	updates: Value of updates used in the Adadelta optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdadeltaParameters",
+		Input: []tf.Input{
+			parameters, accumulators, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
+type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
+
+// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
+func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+// ResourceSparseApplyFtrlV2MultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value.
+// If not specified, defaults to false
+func ResourceSparseApplyFtrlV2MultiplyLinearByLr(value bool) ResourceSparseApplyFtrlV2Attr {
+	return func(m optionalAttr) {
+		m["multiply_linear_by_lr"] = value
+	}
+}
+
+// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+//
+// That is for rows we have grad for, we update var, accum and linear as follows:
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
 //
 // Arguments:
 //	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
 //	grad: The gradient.
 //	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Learning rate. Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
 //	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
+//	l2: L2 shrinkage regularization. Must be a scalar.
+//
+//	lr_power: Scaling factor. Must be a scalar.
 //
 // Returns the created operation.
-func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
+func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -47854,292 +42037,9 @@ func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumul
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdagradDA",
+		Type: "ResourceSparseApplyFtrlV2",
 		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes hyperbolic cosine of x element-wise.
-//
-//   Given an input tensor, this function computes hyperbolic cosine of every
-//   element in the tensor. Input range is `[-inf, inf]` and output range
-//   is `[1, inf]`.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")])
-//   tf.math.cosh(x) ==> [inf 4.0515420e+03 1.1276259e+00 1.5430807e+00 1.8106556e+00 3.7621956e+00 1.1013233e+04 inf]
-//   ```
-func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Cosh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CollectiveReduceAttr is an optional argument to CollectiveReduce.
-type CollectiveReduceAttr func(optionalAttr)
-
-// CollectiveReduceWaitFor sets the optional wait_for attribute to value.
-// If not specified, defaults to {}
-func CollectiveReduceWaitFor(value []int64) CollectiveReduceAttr {
-	return func(m optionalAttr) {
-		m["wait_for"] = value
-	}
-}
-
-// CollectiveReduceCommunicationHint sets the optional communication_hint attribute to value.
-// If not specified, defaults to "auto"
-func CollectiveReduceCommunicationHint(value string) CollectiveReduceAttr {
-	return func(m optionalAttr) {
-		m["communication_hint"] = value
-	}
-}
-
-// Mutually reduces multiple tensors of identical type and shape.
-func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64, optional ...CollectiveReduceAttr) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CollectiveReduce",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CudnnRNNAttr is an optional argument to CudnnRNN.
-type CudnnRNNAttr func(optionalAttr)
-
-// CudnnRNNRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNRnnMode(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNInputMode(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNDirection(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNDropout(value float32) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNSeed(value int64) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNSeed2(value int64) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNIsTraining sets the optional is_training attribute to value.
-// If not specified, defaults to true
-func CudnnRNNIsTraining(value bool) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// A RNN backed by cuDNN.
-//
-// Computes the RNN from the input and initial states, with respect to the params
-// buffer.
-//
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//   the actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
-//   training.
-// reserve_space: An opaque tensor that can be used in backprop calculation. It
-//   is only produced if is_training is false.
-func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNN",
-		Input: []tf.Input{
-			input, input_h, input_c, params,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Creates a dataset that batches `batch_size` elements from `input_dataset`.
-//
-// Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//
-//
-func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "BatchDataset",
-		Input: []tf.Input{
-			input_dataset, batch_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EnqueueTPUEmbeddingSparseTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseTensorBatch.
-type EnqueueTPUEmbeddingSparseTensorBatchAttr func(optionalAttr)
-
-// EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. Should be >= 0 and less than the number
-// of TPU cores in the task on which the node is placed.
-// If not specified, defaults to -1
-func EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseTensorBatchAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// EnqueueTPUEmbeddingSparseTensorBatchCombiners sets the optional combiners attribute to value.
-//
-// value: A list of string scalars, one for each embedding table that specify
-// how to normalize the embedding activations after weighted summation.
-// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
-// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
-// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
-// all tables.
-// If not specified, defaults to {}
-func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmbeddingSparseTensorBatchAttr {
-	return func(m optionalAttr) {
-		m["combiners"] = value
-	}
-}
-
-// EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value.
-// If not specified, defaults to {}
-func EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingSparseTensorBatchAttr {
-	return func(m optionalAttr) {
-		m["max_sequence_lengths"] = value
-	}
-}
-
-// Eases the porting of code that uses tf.nn.embedding_lookup_sparse().
-//
-// sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
-// to the ith feature. table_ids[i] indicates which embedding table to look up ith
-// feature.
-//
-// The tensors at corresponding positions in the three input lists (sample_indices,
-// embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
-// with dim_size() equal to the total number of lookups into the table described by
-// the corresponding feature.
-//
-// Arguments:
-//	sample_indices: A list of rank 1 Tensors specifying the training example to
-// which the corresponding embedding_indices and aggregation_weights values
-// belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
-//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
-// It corresponds to sp_ids.values in embedding_lookup_sparse().
-//	aggregation_weights: A list of rank 1 Tensors containing per training example
-// aggregation weights. It corresponds to sp_weights.values in
-// embedding_lookup_sparse().
-//	mode_override: A string input that overrides the mode specified in the
-// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-//	table_ids: A list of integers specifying the identifier of the embedding table
-// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
-// corresponding input. The ith input is looked up using table_ids[i]. The size
-// of the table_ids list must be equal to that of sample_indices,
-// embedding_indices and aggregation_weights.
-//
-// Returns the created operation.
-func EnqueueTPUEmbeddingSparseTensorBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingSparseTensorBatchAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"table_ids": table_ids}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EnqueueTPUEmbeddingSparseTensorBatch",
-		Input: []tf.Input{
-			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
 		},
 		Attrs: attrs,
 	}
@@ -48473,46 +42373,95 @@ func SparseMatrixTranspose(scope *Scope, input tf.Output, type_ tf.DataType, opt
 	return op.Output(0)
 }
 
-// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
-type ResourceApplyRMSPropAttr func(optionalAttr)
+// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters.
+type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
 
-// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersConfig(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load proximal Adagrad embedding parameters.
 //
-// value: If `True`, updating of the var, ms, and mom tensors is protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingProximalAdagradParameters",
+		Input: []tf.Input{
+			parameters, accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
+type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
+
+// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
 // If not specified, defaults to false
-func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
+func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Update '*var' according to the RMSProp algorithm.
-//
-// Note that in dense implementation of this algorithm, ms and mom will
-// update even if the grad is zero, but in this sparse implementation, ms
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-//
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
+// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
 //
 // Arguments:
 //	var_: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
 //	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
 //
 // Returns the created operation.
-func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
+func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -48521,55 +42470,110 @@ func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Out
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyRMSProp",
+		Type: "ResourceSparseApplyAdagradDA",
 		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad,
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
-type MaxPool3DGradAttr func(optionalAttr)
-
-// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
+// Applies softmax to a batched N-D `SparseTensor`.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of 3D max pooling function.
+// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
+// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
+//
+// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
+// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
+// zero elements do not participate*.  Specifically, the algorithm is equivalent
+// to the following:
+//
+//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
+//       with shape `[B, C]`, along the size-C dimension;
+//   (2) Masks out the original implicitly-zero locations;
+//   (3) Renormalizes the remaining elements.
+//
+// Hence, the `SparseTensor` result has exactly the same non-zero indices and
+// shape.
 //
 // Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
+//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
+// SparseTensor, in canonical ordering.
+//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
+func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "SparseSoftmax",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes hyperbolic cosine of x element-wise.
+//
+//   Given an input tensor, this function computes hyperbolic cosine of every
+//   element in the tensor. Input range is `[-inf, inf]` and output range
+//   is `[1, inf]`.
+//
+//   ```python
+//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")])
+//   tf.math.cosh(x) ==> [inf 4.0515420e+03 1.1276259e+00 1.5430807e+00 1.8106556e+00 3.7621956e+00 1.1013233e+04 inf]
+//   ```
+func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Cosh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CollectiveReduceAttr is an optional argument to CollectiveReduce.
+type CollectiveReduceAttr func(optionalAttr)
+
+// CollectiveReduceWaitFor sets the optional wait_for attribute to value.
+// If not specified, defaults to <>
+func CollectiveReduceWaitFor(value []int64) CollectiveReduceAttr {
+	return func(m optionalAttr) {
+		m["wait_for"] = value
+	}
+}
+
+// CollectiveReduceCommunicationHint sets the optional communication_hint attribute to value.
+// If not specified, defaults to "auto"
+func CollectiveReduceCommunicationHint(value string) CollectiveReduceAttr {
+	return func(m optionalAttr) {
+		m["communication_hint"] = value
+	}
+}
+
+// Mutually reduces multiple tensors of identical type and shape.
+func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64, optional ...CollectiveReduceAttr) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool3DGrad",
+		Type: "CollectiveReduce",
 		Input: []tf.Input{
-			orig_input, orig_output, grad,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -48577,148 +42581,153 @@ func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, gr
 	return op.Output(0)
 }
 
-// Creates a dataset that executes a SQL query and emits rows of the result set.
-//
-// Arguments:
-//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
-//	data_source_name: A connection string to connect to the database.
-//	query: A SQL query to execute.
-//
-//
-func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "SqlDataset",
-		Input: []tf.Input{
-			driver_name, data_source_name, query,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
+type ResourceApplyAdaMaxAttr func(optionalAttr)
 
-// Outputs deterministic pseudorandom random integers from a uniform distribution.
+// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
 //
-// The generated values follow a uniform distribution in the range `[minval, maxval)`.
-//
-// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//	minval: Minimum value (inclusive, scalar).
-//	maxval: Maximum value (exclusive, scalar).
-//
-// Returns Random values with specified shape.
-func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniformInt",
-		Input: []tf.Input{
-			shape, seed, minval, maxval,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage.
-type CropAndResizeGradImageAttr func(optionalAttr)
-
-// CropAndResizeGradImageMethod sets the optional method attribute to value.
-//
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr {
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
 	return func(m optionalAttr) {
-		m["method"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Computes the gradient of the crop_and_resize op wrt the input image tensor.
+// Update '*var' according to the AdaMax algorithm.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// v_t <- max(beta2 * v_{t-1}, abs(g))
+// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
 //
 // Arguments:
-//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
-// containing the original image size. Both `image_height` and `image_width` need
-// to be positive.
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
 //
-//
-// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"T": T}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CropAndResizeGradImage",
+		Type: "ResourceApplyAdaMax",
 		Input: []tf.Input{
-			grads, boxes, box_ind, image_size,
+			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the LSTM cell backward propagation for the entire time sequence.
+//
+// This implementation is to be used in conjunction of BlockLSTMV2.
+//
+// Arguments:
+//	seq_len_max: Maximum time length actually used by this input. Outputs are padded
+// with zeros beyond this length.
+//	x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs).
+//	cs_prev: Value of the initial cell state.
+//	h_prev: Initial output of cell (to be used for peephole).
+//	w: The weight matrix.
+//	wci: The weight matrix for input gate peephole connection.
+//	wcf: The weight matrix for forget gate peephole connection.
+//	wco: The weight matrix for output gate peephole connection.
+//	b: The bias vector.
+//	i: The input gate over the whole time sequence.
+//	cs: The cell state before the tanh over the whole time sequence.
+//	f: The forget gate over the whole time sequence.
+//	o: The output gate over the whole time sequence.
+//	ci: The cell input over the whole time sequence.
+//	co: The cell after the tanh over the whole time sequence.
+//	h: The output h vector over the whole time sequence.
+//	cs_grad: The current gradient of cs.
+//	h_grad: The gradient of h vector.
+//	use_peephole: Whether to use peephole weights.
+//
+// Returns:
+//	x_grad: The gradient of x to be back-propped.
+//	cs_prev_grad: The gradient of cs_prev to be back-propped.
+//	h_prev_grad: The gradient of h_prev to be back-propped.
+//	w_grad: The gradient for w to be back-propped.
+//	wci_grad: The gradient for wci to be back-propped.
+//	wcf_grad: The gradient for wcf to be back-propped.
+//	wco_grad: The gradient for wco to be back-propped.
+//	b_grad: The gradient for w to be back-propped.
+func BlockLSTMGradV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"use_peephole": use_peephole}
+	opspec := tf.OpSpec{
+		Type: "BlockLSTMGradV2",
+		Input: []tf.Input{
+			seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7)
 }
 
-// OutfeedDequeueAttr is an optional argument to OutfeedDequeue.
-type OutfeedDequeueAttr func(optionalAttr)
-
-// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value.
+// Returns the element-wise max of two SparseTensors.
 //
-// value: The TPU device to use. This should be -1 when the Op
-// is running on a TPU device, and >= 0 when the Op is running on the CPU
-// device.
-// If not specified, defaults to -1
-func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// Retrieves a single tensor from the computation outfeed.
-//
-// This operation will block indefinitely until data is available.
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
 //
 // Arguments:
-//	dtype: The type of elements in the tensor.
-//	shape: The shape of the tensor.
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
 //
-// Returns A tensor that will be read from the device outfeed.
-func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) {
+// Returns:
+//	output_indices: 2-D.  The indices of the output SparseTensor.
+//	output_values: 1-D.  The values of the output SparseTensor.
+func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
-	for _, a := range optional {
-		a(attrs)
+	opspec := tf.OpSpec{
+		Type: "SparseSparseMaximum",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the Bessel i1e function of `x` element-wise.
+//
+// Exponentially scaled modified Bessel function of order 0 defined as
+// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`.
+//
+// This function is faster and numerically stabler than `bessel_i1(x)`.
+func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
 	opspec := tf.OpSpec{
-		Type: "OutfeedDequeue",
-
-		Attrs: attrs,
+		Type: "BesselI1e",
+		Input: []tf.Input{
+			x,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -48821,53 +42830,145 @@ func LSTMBlockCell(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Outpu
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
 }
 
-// RandomUniformAttr is an optional argument to RandomUniform.
-type RandomUniformAttr func(optionalAttr)
-
-// RandomUniformSeed sets the optional seed attribute to value.
+// A TPU core selector Op.
 //
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomUniformSeed(value int64) RandomUniformAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomUniformSeed2 sets the optional seed2 attribute to value.
+// This Op produces a set of TPU cores (for warm-up) or a single TPU core
+// (for regular inference) to execute the TPU program on. The output is
+// consumed by TPUPartitionedCall.
 //
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomUniformSeed2(value int64) RandomUniformAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
-//
-// Returns A tensor of the specified shape filled with uniform random values.
-func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) {
+// Returns A vector 1 or more TPU cores.
+func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "TPUOrdinalSelector",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SubstrAttr is an optional argument to Substr.
+type SubstrAttr func(optionalAttr)
+
+// SubstrUnit sets the optional unit attribute to value.
+//
+// value: The unit that is used to create the substring.  One of: `"BYTE"` (for
+// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
+// encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
+// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
+// UTF-8.
+// If not specified, defaults to "BYTE"
+func SubstrUnit(value string) SubstrAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
+// Return substrings from `Tensor` of strings.
+//
+// For each string in the input `Tensor`, creates a substring starting at index
+// `pos` with a total length of `len`.
+//
+// If `len` defines a substring that would extend beyond the length of the input
+// string, or if `len` is negative, then as many characters as possible are used.
+//
+// A negative `pos` indicates distance within the string backwards from the end.
+//
+// If `pos` specifies an index which is out of range for any of the input strings,
+// then an `InvalidArgumentError` is thrown.
+//
+// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
+// Op creation.
+//
+// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
+// broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// ---
+//
+// Examples
+//
+// Using scalar `pos` and `len`:
+//
+// ```python
+// input = [b'Hello', b'World']
+// position = 1
+// length = 3
+//
+// output = [b'ell', b'orl']
+// ```
+//
+// Using `pos` and `len` with same shape as `input`:
+//
+// ```python
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen']]
+// position = [[1, 2, 3],
+//             [1, 2, 3],
+//             [1, 2, 3]]
+// length =   [[2, 3, 4],
+//             [4, 3, 2],
+//             [5, 5, 5]]
+//
+// output = [[b'en', b'eve', b'lve'],
+//           [b'hirt', b'urt', b'te'],
+//           [b'ixtee', b'vente', b'hteen']]
+// ```
+//
+// Broadcasting `pos` and `len` onto `input`:
+//
+// ```
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen'],
+//          [b'nineteen', b'twenty', b'twentyone']]
+// position = [1, 2, 3]
+// length =   [1, 2, 3]
+//
+// output = [[b'e', b'ev', b'lve'],
+//           [b'h', b'ur', b'tee'],
+//           [b'i', b've', b'hte'],
+//           [b'i', b'en', b'nty']]
+// ```
+//
+// Broadcasting `input` onto `pos` and `len`:
+//
+// ```
+// input = b'thirteen'
+// position = [1, 5, 7]
+// length =   [3, 2, 1]
+//
+// output = [b'hir', b'ee', b'n']
+// ```
+//
+// Raises:
+//
+//   * `ValueError`: If the first argument cannot be converted to a
+//      Tensor of `dtype string`.
+//   * `InvalidArgumentError`: If indices are out of range.
+//   * `ValueError`: If `pos` and `len` are not the same shape.
+//
+//
+// Arguments:
+//	input: Tensor of strings
+//	pos: Scalar defining the position of first character in each substring
+//	len: Scalar defining the number of characters to include in each substring
+//
+// Returns Tensor of substrings
+func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomUniform",
+		Type: "Substr",
 		Input: []tf.Input{
-			shape,
+			input, pos, len,
 		},
 		Attrs: attrs,
 	}
@@ -48940,6 +43041,864 @@ func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
+// DecodeCSVAttr is an optional argument to DecodeCSV.
+type DecodeCSVAttr func(optionalAttr)
+
+// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
+//
+// value: char delimiter to separate fields in a record.
+// If not specified, defaults to ","
+func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["field_delim"] = value
+	}
+}
+
+// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
+//
+// value: If false, treats double quotation marks as regular
+// characters inside of the string fields (ignoring RFC 4180, Section 2,
+// Bullet 5).
+// If not specified, defaults to true
+func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["use_quote_delim"] = value
+	}
+}
+
+// DecodeCSVNaValue sets the optional na_value attribute to value.
+//
+// value: Additional string to recognize as NA/NaN.
+// If not specified, defaults to ""
+func DecodeCSVNaValue(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["na_value"] = value
+	}
+}
+
+// DecodeCSVSelectCols sets the optional select_cols attribute to value.
+// If not specified, defaults to <>
+func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["select_cols"] = value
+	}
+}
+
+// Convert CSV records to tensors. Each column maps to one tensor.
+//
+// RFC 4180 format is expected for the CSV records.
+// (https://tools.ietf.org/html/rfc4180)
+// Note that we allow leading and trailing spaces with int or float field.
+//
+// Arguments:
+//	records: Each string is a record/row in the csv and all records should have
+// the same format.
+//	record_defaults: One tensor per column of the input record, with either a
+// scalar default value for that column or an empty vector if the column is
+// required.
+//
+// Returns Each tensor will have the same shape as records.
+func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeCSV",
+		Input: []tf.Input{
+			records, tf.OutputList(record_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("DecodeCSV", err)
+		return
+	}
+	return output
+}
+
+// SerializeIteratorAttr is an optional argument to SerializeIterator.
+type SerializeIteratorAttr func(optionalAttr)
+
+// SerializeIteratorExternalStatePolicy sets the optional external_state_policy attribute to value.
+// If not specified, defaults to 0
+func SerializeIteratorExternalStatePolicy(value int64) SerializeIteratorAttr {
+	return func(m optionalAttr) {
+		m["external_state_policy"] = value
+	}
+}
+
+// Converts the given `resource_handle` representing an iterator to a variant tensor.
+//
+// Arguments:
+//	resource_handle: A handle to an iterator resource.
+//
+// Returns A variant tensor storing the state of the iterator contained in the
+// resource.
+func SerializeIterator(scope *Scope, resource_handle tf.Output, optional ...SerializeIteratorAttr) (serialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeIterator",
+		Input: []tf.Input{
+			resource_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp.
+type ResourceApplyCenteredRMSPropAttr func(optionalAttr)
+
+// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, mg, ms, and mom tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the centered RMSProp algorithm.
+//
+// The centered RMSProp algorithm uses an estimate of the centered second moment
+// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+// uses the (uncentered) second moment. This often helps with training, but is
+// slightly more expensive in terms of computation and memory.
+//
+// Note that in dense implementation of this algorithm, mg, ms, and mom will
+// update even if the grad is zero, but in this sparse implementation, mg, ms,
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// mean_grad = decay * mean_grad + (1-decay) * gradient
+//
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+//
+// mg <- rho * mg_{t-1} + (1-rho) * grad
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	mg: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyCenteredRMSProp",
+		Input: []tf.Input{
+			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// UnsortedSegmentJoinAttr is an optional argument to UnsortedSegmentJoin.
+type UnsortedSegmentJoinAttr func(optionalAttr)
+
+// UnsortedSegmentJoinSeparator sets the optional separator attribute to value.
+//
+// value: The separator to use when joining.
+// If not specified, defaults to ""
+func UnsortedSegmentJoinSeparator(value string) UnsortedSegmentJoinAttr {
+	return func(m optionalAttr) {
+		m["separator"] = value
+	}
+}
+
+// Joins the elements of `inputs` based on `segment_ids`.
+//
+// Computes the string join along segments of a tensor.
+// Given `segment_ids` with rank `N` and `data` with rank `N+M`:
+//
+//     `output[i, k1...kM] = strings.join([data[j1...jN, k1...kM])`
+//
+// where the join is over all [j1...jN] such that segment_ids[j1...jN] = i.
+// Strings are joined in row-major order.
+//
+// For example:
+//
+// ```python
+// inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']]
+// output_array = string_ops.unsorted_segment_join(inputs=inputs,
+//                                                 segment_ids=[1, 0, 1],
+//                                                 num_segments=2,
+//                                                 separator=':'))
+// # output_array ==> [['Y', '6', '6'], ['Y:p', 'q:G', 'c:a']]
+//
+//
+// inputs = ['this', 'is', 'a', 'test']
+// output_array = string_ops.unsorted_segment_join(inputs=inputs,
+//                                                 segment_ids=[0, 0, 0, 0],
+//                                                 num_segments=1,
+//                                                 separator=':'))
+// # output_array ==> ['this:is:a:test']
+// ```
+//
+// Arguments:
+//	inputs: The input to be joined.
+//	segment_ids: A tensor whose shape is a prefix of data.shape.  Negative segment ids are not
+// supported.
+//	num_segments: A scalar.
+func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output, num_segments tf.Output, optional ...UnsortedSegmentJoinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentJoin",
+		Input: []tf.Input{
+			inputs, segment_ids, num_segments,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LuAttr is an optional argument to Lu.
+type LuAttr func(optionalAttr)
+
+// LuOutputIdxType sets the optional output_idx_type attribute to value.
+// If not specified, defaults to DT_INT32
+func LuOutputIdxType(value tf.DataType) LuAttr {
+	return func(m optionalAttr) {
+		m["output_idx_type"] = value
+	}
+}
+
+// Computes the LU decomposition of one or more square matrices.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices.
+//
+// The input has to be invertible.
+//
+// The output consists of two tensors LU and P containing the LU decomposition
+// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and
+// upper triangular factors.
+//
+// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of
+// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower
+// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose
+// entries correspond to the upper triangular part, including the diagonal, of LU.
+//
+// P represents a permutation matrix encoded as a list of indices each between `0`
+// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to
+// P, then the L, U and P satisfies P_mat * input = L * U.
+//
+// Arguments:
+//	input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of
+// size `[M, M]`.
+//
+// Returns:
+//	lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the
+// lower triangular factor `L` with unit diagonal, and whose upper triangular part
+// denotes the upper triangular factor `U`.
+//	p: Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is
+// `[..., M]`.
+// @compatibility(scipy)
+// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are
+// packed into a single tensor, the permutation is applied to `input` instead of
+// the right hand side and the permutation `P` is returned as a list of indices
+// instead of a permutation matrix.
+// @end_compatibility
+func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Lu",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Outputs deterministic pseudorandom random numbers from a Poisson distribution.
+//
+// Outputs random values from a Poisson distribution.
+//
+// The outputs are a deterministic function of `shape`, `seed`, and `lam`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//	lam: The rate of the Poisson distribution. Shape must match the rightmost dimensions
+// of `shape`.
+//	dtype: The type of the output.
+//
+// Returns Random values with specified shape.
+func StatelessRandomPoisson(scope *Scope, shape tf.Output, seed tf.Output, lam tf.Output, dtype tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomPoisson",
+		Input: []tf.Input{
+			shape, seed, lam,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of `NOT x` element-wise.
+//
+// Arguments:
+//	x: A `Tensor` of type `bool`.
+//
+// Returns A `Tensor` of type `bool` with the same shape as `x`. The logical negation of `x`.
+func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogicalNot",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ImageProjectiveTransformV2Attr is an optional argument to ImageProjectiveTransformV2.
+type ImageProjectiveTransformV2Attr func(optionalAttr)
+
+// ImageProjectiveTransformV2FillMode sets the optional fill_mode attribute to value.
+//
+// value: Fill mode, "REFLECT", "WRAP", or "CONSTANT".
+// If not specified, defaults to "CONSTANT"
+func ImageProjectiveTransformV2FillMode(value string) ImageProjectiveTransformV2Attr {
+	return func(m optionalAttr) {
+		m["fill_mode"] = value
+	}
+}
+
+// Applies the given transform to each of the images.
+//
+// If one row of `transforms` is `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps
+// the *output* point `(x, y)` to a transformed *input* point
+// `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
+// `k = c0 x + c1 y + 1`. If the transformed point lays outside of the input
+// image, the output pixel is set to 0.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	transforms: 2-D Tensor, `[batch, 8]` or `[1, 8]` matrix, where each row corresponds to a 3 x 3
+// projective transformation matrix, with the last entry assumed to be 1. If there
+// is one row, the same transformation will be applied to all images.
+//	output_shape: 1-D Tensor [new_height, new_width].
+//	interpolation: Interpolation method, "NEAREST" or "BILINEAR".
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ImageProjectiveTransformV2(scope *Scope, images tf.Output, transforms tf.Output, output_shape tf.Output, interpolation string, optional ...ImageProjectiveTransformV2Attr) (transformed_images tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"interpolation": interpolation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ImageProjectiveTransformV2",
+		Input: []tf.Input{
+			images, transforms, output_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes rectified linear gradients for a Relu operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu operation.
+//	features: The features passed as input to the corresponding Relu operation, OR
+// the outputs of that operation (both work equivalently).
+//
+// Returns `gradients * (features > 0)`.
+func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReluGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
+type ResourceApplyMomentumAttr func(optionalAttr)
+
+// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// IRFFTAttr is an optional argument to IRFFT.
+type IRFFTAttr func(optionalAttr)
+
+// IRFFTTreal sets the optional Treal attribute to value.
+// If not specified, defaults to DT_FLOAT
+func IRFFTTreal(value tf.DataType) IRFFTAttr {
+	return func(m optionalAttr) {
+		m["Treal"] = value
+	}
+}
+
+// Inverse real-valued fast Fourier transform.
+//
+// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most dimension of `input`.
+//
+// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
+// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
+// `fft_length` is not provided, it is computed from the size of the inner-most
+// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
+// compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
+// than the corresponding dimension of `input`, the dimension is cropped. If it is
+// larger, the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A complex tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//
+// Returns A float32 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length` samples of its inverse
+//   1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft
+// @end_compatibility
+func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFTAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "IRFFT",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch.
+type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value.
+//
+// value: A list of string scalars, one for each embedding table that specify
+// how to normalize the embedding activations after weighted summation.
+// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+// all tables.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr {
+	return func(m optionalAttr) {
+		m["combiners"] = value
+	}
+}
+
+// An op that enqueues TPUEmbedding input indices from a SparseTensor.
+//
+// This Op eases the porting of code that uses embedding_lookup_sparse(),
+// although some Python preprocessing of the SparseTensor arguments to
+// embedding_lookup_sparse() is required to produce the arguments to this Op,
+// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
+// step.
+//
+// The tensors at corresponding positions in the three input lists
+// must have the same shape, i.e. rank 1 with dim_size() equal to the total
+// number of lookups into the table described by the corresponding table_id.
+//
+// Arguments:
+//	sample_indices: A list of rank 1 Tensors specifying the training example and
+// feature to which the corresponding embedding_indices and aggregation_weights
+// values belong. sample_indices[i] must equal b * nf + f, where nf is the
+// number of features from the corresponding table, f is in [0, nf), and
+// b is in [0, batch size).
+//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+//	aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
+// (training example, feature) -- aggregation weights.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingSparseBatch",
+		Input: []tf.Input{
+			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
+type ResourceScatterNdUpdateAttr func(optionalAttr)
+
+// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse `updates` to individual values or slices within a given
+//
+// variable according to `indices`.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// ```
+//
+// For example, say we want to update 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that update would look like this:
+//
+// ```python
+//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1] ,[7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     update = tf.scatter_nd_update(ref, indices, updates)
+//     with tf.Session() as sess:
+//       print sess.run(update)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 11, 3, 10, 9, 6, 7, 12]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdUpdate",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes square root of x element-wise.
+//
+// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
+func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sqrt",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
+//
+// This operation folds the padded areas of `input` by `MirrorPad` according to the
+// `paddings` you specify. `paddings` must be the same as `paddings` argument
+// given to the corresponding `MirrorPad` op.
+//
+// The folded size of each dimension D of the output is:
+//
+// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
+// # 'paddings' is [[0, 1]], [0, 1]].
+// # 'mode' is SYMMETRIC.
+// # rank of 't' is 2.
+// pad(t, paddings) ==> [[ 1,  5]
+//                       [11, 28]]
+// ```
+//
+// Arguments:
+//	input: The input tensor to be folded.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	mode: The mode used in the `MirrorPad` op.
+//
+// Returns The folded tensor.
+func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mode": mode}
+	opspec := tf.OpSpec{
+		Type: "MirrorPadGrad",
+		Input: []tf.Input{
+			input, paddings,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces the max pool of the input tensor for quantized types.
+//
+// Arguments:
+//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns:
+//	output
+//	min_output: The float value that the lowest quantized output value represents.
+//	max_output: The float value that the highest quantized output value represents.
+func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMaxPool",
+		Input: []tf.Input{
+			input, min_input, max_input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
+type ResourceApplyAdagradAttr func(optionalAttr)
+
+// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update '*var' according to the adagrad scheme.
+//
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdagrad",
+		Input: []tf.Input{
+			var_, accum, lr, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // The gradient of SparseFillEmptyRows.
 //
 // Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
@@ -48972,27 +43931,93 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val
 	return op.Output(0), op.Output(1)
 }
 
-// SerializeSparseAttr is an optional argument to SerializeSparse.
-type SerializeSparseAttr func(optionalAttr)
+// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
+type MaxPool3DGradAttr func(optionalAttr)
 
-// SerializeSparseOutType sets the optional out_type attribute to value.
+// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
 //
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
 	return func(m optionalAttr) {
-		m["out_type"] = value
+		m["data_format"] = value
 	}
 }
 
-// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
+// Computes gradients of 3D max pooling function.
 //
 // Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPool3DGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
+type ResourceApplyRMSPropAttr func(optionalAttr)
+
+// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, ms, and mom tensors is protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the RMSProp algorithm.
+//
+// Note that in dense implementation of this algorithm, ms and mom will
+// update even if the grad is zero, but in this sparse implementation, ms
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -49001,9 +44026,303 @@ func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SerializeSparse",
+		Type: "ResourceApplyRMSProp",
 		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
+			var_, ms, mom, lr, rho, momentum, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Reshapes a SparseTensor to represent values in a new dense shape.
+//
+// This operation has the same semantics as reshape on the represented dense
+// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
+//
+// If one component of `new_shape` is the special value -1, the size of that
+// dimension is computed so that the total dense size remains constant.  At
+// most one component of `new_shape` can be -1.  The number of dense elements
+// implied by `new_shape` must be the same as the number of dense elements
+// originally implied by `input_shape`.
+//
+// Reshaping does not affect the order of values in the SparseTensor.
+//
+// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
+// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
+// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
+// `output_shape` has length `R_out`.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
+// SparseTensor.
+//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
+//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+//
+// Returns:
+//	output_indices: 2-D.  `N x R_out` matrix with the updated indices of non-empty
+// values in the output SparseTensor.
+//	output_shape: 1-D.  `R_out` vector with the full dense shape of the output
+// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
+// filled in.
+func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReshape",
+		Input: []tf.Input{
+			input_indices, input_shape, new_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Elementwise computes the bitwise left-shift of `x` and `y`.
+//
+// If `y` is negative, or greater than or equal to the width of `x` in bits the
+// result is implementation defined.
+//
+// Example:
+//
+// ```python
+// import tensorflow as tf
+// from tensorflow.python.ops import bitwise_ops
+// import numpy as np
+// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
+//
+// for dtype in dtype_list:
+//   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
+//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+//
+//   left_shift_result = bitwise_ops.left_shift(lhs, rhs)
+//
+//   print(left_shift_result)
+//
+// # This will print:
+// # tf.Tensor([ -32   -5 -128    0], shape=(4,), dtype=int8)
+// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int16)
+// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int32)
+// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int64)
+//
+// lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
+// rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
+// bitwise_ops.left_shift(lhs, rhs)
+// # <tf.Tensor: shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
+// ```
+//
+func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LeftShift",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Generates a feature cross from a list of tensors, and returns it as a
+// RaggedTensor.  See `tf.ragged.cross` for more details.
+//
+// Arguments:
+//	ragged_values: The values tensor for each RaggedTensor input.
+//	ragged_row_splits: The row_splits tensor for each RaggedTensor input.
+//	sparse_indices: The indices tensor for each SparseTensor input.
+//	sparse_values: The values tensor for each SparseTensor input.
+//	sparse_shape: The dense_shape tensor for each SparseTensor input.
+//	dense_inputs: The tf.Tensor inputs.
+//	input_order: String specifying the tensor type for each input.  The `i`th character in
+// this string specifies the type of the `i`th input, and is one of: 'R' (ragged),
+// 'D' (dense), or 'S' (sparse).  This attr is used to ensure that the crossed
+// values are combined in the order of the inputs from the call to tf.ragged.cross.
+//
+//
+//
+//
+//
+//
+// Returns:
+//	output_values: The `values` for the returned `RaggedTensor`.
+//	output_row_splits: The `row_splits` for the returned `RaggedTensor`.
+func RaggedCross(scope *Scope, ragged_values []tf.Output, ragged_row_splits []tf.Output, sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shape []tf.Output, dense_inputs []tf.Output, input_order string, hashed_output bool, num_buckets int64, hash_key int64, out_values_type tf.DataType, out_row_splits_type tf.DataType) (output_values tf.Output, output_row_splits tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_order": input_order, "hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_values_type": out_values_type, "out_row_splits_type": out_row_splits_type}
+	opspec := tf.OpSpec{
+		Type: "RaggedCross",
+		Input: []tf.Input{
+			tf.OutputList(ragged_values), tf.OutputList(ragged_row_splits), tf.OutputList(sparse_indices), tf.OutputList(sparse_values), tf.OutputList(sparse_shape), tf.OutputList(dense_inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Output a fact about factorials.
+func Fact(scope *Scope) (fact tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Fact",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softmax cross entropy cost and gradients to backpropagate.
+//
+// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
+// a matrix of label probabilities, but rather a single label per row
+// of features.  This label is considered to have probability 1.0 for the
+// given row.
+//
+// Inputs are the logits, not probabilities.
+//
+// Arguments:
+//	features: batch_size x num_classes matrix
+//	labels: batch_size vector with values in [0, num_classes).
+// This is the label for the given minibatch entry.
+//
+// Returns:
+//	loss: Per example loss (batch_size vector).
+//	backprop: backpropagated gradients (batch_size x num_classes matrix).
+func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSoftmaxCrossEntropyWithLogits",
+		Input: []tf.Input{
+			features, labels,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Worker heartbeat op.
+//
+// Heartbeats may be sent periodically to indicate the coordinator is still active,
+// to retrieve the current worker status and to expedite shutdown when necessary.
+//
+// Arguments:
+//	request: A string tensor containing a serialized WorkerHeartbeatRequest
+//
+// Returns A string tensor containing a serialized WorkerHeartbeatResponse
+func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "WorkerHeartbeat",
+		Input: []tf.Input{
+			request,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent.
+type ResourceApplyProximalGradientDescentAttr func(optionalAttr)
+
+// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' as FOBOS algorithm with fixed learning rate.
+//
+// prox_v = var - alpha * delta
+// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	delta: The change.
+//
+// Returns the created operation.
+func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyProximalGradientDescent",
+		Input: []tf.Input{
+			var_, alpha, l1, l2, delta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RandomUniformAttr is an optional argument to RandomUniform.
+type RandomUniformAttr func(optionalAttr)
+
+// RandomUniformSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomUniformSeed(value int64) RandomUniformAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomUniformSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomUniformSeed2(value int64) RandomUniformAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with uniform random values.
+func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomUniform",
+		Input: []tf.Input{
+			shape,
 		},
 		Attrs: attrs,
 	}
@@ -49011,6 +44330,3440 @@ func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Ou
 	return op.Output(0)
 }
 
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.
+type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Adadelta embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Adadelta optimization algorithm.
+//	accumulators: Parameter accumulators updated by the Adadelta optimization algorithm.
+//	updates: Parameter updates updated by the Adadelta optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns:
+//	stamp_token: Stamp token of the tree ensemble resource.
+//	num_trees: The number of trees in the tree ensemble resource.
+//	num_finalized_trees: The number of trees that were finished successfully.
+//	num_attempted_layers: The number of layers we attempted to build (but not necessarily succeeded).
+//	last_layer_nodes_range: Rank size 2 tensor that contains start and end ids of the nodes in the latest
+// layer.
+func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesGetEnsembleStates",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
+type ResourceScatterNdAddAttr func(optionalAttr)
+
+// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse addition to individual values or slices in a Variable.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
+// ```
+//
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that addition would look like this:
+//
+// ```python
+// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
+// indices = tf.constant([[4], [3], [1], [7]])
+// updates = tf.constant([9, 10, 11, 12])
+// add = tf.scatter_nd_add(ref, indices, updates)
+// with tf.Session() as sess:
+//   print sess.run(add)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 13, 3, 14, 14, 6, 7, 20]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdAdd",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Counts the number of occurrences of each value in an integer array.
+//
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
+//
+// Values in `arr` outside of the range [0, size) are ignored.
+//
+// Arguments:
+//	arr: int32 `Tensor`.
+//	size: non-negative int32 scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
+//
+// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
+// each value in the range [0, size).
+func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Bincount",
+		Input: []tf.Input{
+			arr, size, weights,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gradients for batch normalization.
+//
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+//
+// This op is deprecated. See `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this Tensor will be multiplied
+// with the normalized Tensor.
+//	backprop: 4D backprop Tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+//
+// Returns:
+//	dx: 4D backprop tensor for input.
+//	dm: 1D backprop tensor for mean.
+//	dv: 1D backprop tensor for variance.
+//	db: 1D backprop tensor for beta.
+//	dg: 1D backprop tensor for gamma.
+func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "BatchNormWithGlobalNormalizationGrad",
+		Input: []tf.Input{
+			t, m, v, gamma, backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage.
+type CropAndResizeGradImageAttr func(optionalAttr)
+
+// CropAndResizeGradImageMethod sets the optional method attribute to value.
+//
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr {
+	return func(m optionalAttr) {
+		m["method"] = value
+	}
+}
+
+// Computes the gradient of the crop_and_resize op wrt the input image tensor.
+//
+// Arguments:
+//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
+// containing the original image size. Both `image_height` and `image_width` need
+// to be positive.
+//
+//
+// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"T": T}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CropAndResizeGradImage",
+		Input: []tf.Input{
+			grads, boxes, box_ind, image_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OutfeedDequeueAttr is an optional argument to OutfeedDequeue.
+type OutfeedDequeueAttr func(optionalAttr)
+
+// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Retrieves a single tensor from the computation outfeed.
+//
+// This operation will block indefinitely until data is available.
+//
+// Arguments:
+//	dtype: The type of elements in the tensor.
+//	shape: The shape of the tensor.
+//
+// Returns A tensor that will be read from the device outfeed.
+func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedDequeue",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An Op to sum inputs across replicated TPU instances.
+//
+// Each instance supplies its own input.
+//
+// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
+// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+// and `B, D, F, H` as group 1. Thus we get the outputs:
+// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+//
+// Arguments:
+//	input: The local input to the sum.
+//	group_assignment: An int32 tensor with shape
+// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+// replica ids in the ith subgroup.
+//
+// Returns The sum of all the distributed inputs.
+func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CrossReplicaSum",
+		Input: []tf.Input{
+			input, group_assignment,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingRaggedTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingRaggedTensorBatch.
+type EnqueueTPUEmbeddingRaggedTensorBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingRaggedTensorBatchCombiners sets the optional combiners attribute to value.
+//
+// value: A list of string scalars, one for each embedding table that specify
+// how to normalize the embedding activations after weighted summation.
+// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+// all tables.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingRaggedTensorBatchCombiners(value []string) EnqueueTPUEmbeddingRaggedTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["combiners"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["max_sequence_lengths"] = value
+	}
+}
+
+// Eases the porting of code that uses tf.nn.embedding_lookup().
+//
+// sample_splits[i], embedding_indices[i] and aggregation_weights[i] correspond
+// to the ith feature. table_ids[i] indicates which embedding table to look up ith
+// feature.
+//
+// The tensors at corresponding positions in two of the input lists,
+// embedding_indices and aggregation_weights, must have the same shape, i.e. rank 1
+// with dim_size() equal to the total number of lookups into the table described by
+// the corresponding feature.
+//
+// Arguments:
+//	sample_splits: A list of rank 1 Tensors specifying the break points for splitting
+// embedding_indices and aggregation_weights into rows.
+// It corresponds to ids.row_splits in embedding_lookup(), when ids is a
+// RaggedTensor.
+//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+// It corresponds to ids.values in embedding_lookup(), when ids is a RaggedTensor.
+//	aggregation_weights: A list of rank 1 Tensors containing per training example
+// aggregation weights. It corresponds to the values field of a RaggedTensor
+// with the same row_splits as ids in embedding_lookup(), when ids is a
+// RaggedTensor.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//	table_ids: A list of integers specifying the identifier of the embedding table
+// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+// corresponding input. The ith input is looked up using table_ids[i]. The size
+// of the table_ids list must be equal to that of sample_indices,
+// embedding_indices and aggregation_weights.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingRaggedTensorBatch(scope *Scope, sample_splits []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingRaggedTensorBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"table_ids": table_ids}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingRaggedTensorBatch",
+		Input: []tf.Input{
+			tf.OutputList(sample_splits), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
+type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
+
+// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+//
+// and `max` to 'outputs' tensor of same shape as `inputs`.
+//
+// `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+//
+// Before quantization, `min` and `max` values are adjusted with the following
+// logic.
+// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
+// the behavior can be unexpected:
+// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
+// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
+// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
+// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
+//
+// This operation has a gradient and thus allows for training `min` and `max`
+// values.
+func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxVars",
+		Input: []tf.Input{
+			inputs, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Enqueue multiple Tensor values on the computation outfeed.
+//
+// Arguments:
+//	inputs: A list of tensors that will be inserted into the outfeed queue as an
+// XLA tuple.
+//
+// Returns the created operation.
+func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedEnqueueTuple",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns the number of nonzeroes of `sparse_matrix`.
+//
+// Arguments:
+//	sparse_matrix: A CSRSparseMatrix.
+//
+// Returns The number of nonzeroes of `sparse_matrix`.
+func SparseMatrixNNZ(scope *Scope, sparse_matrix tf.Output) (nnz tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixNNZ",
+		Input: []tf.Input{
+			sparse_matrix,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DecodeJpegAttr is an optional argument to DecodeJpeg.
+type DecodeJpegAttr func(optionalAttr)
+
+// DecodeJpegChannels sets the optional channels attribute to value.
+//
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodeJpegChannels(value int64) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
+	}
+}
+
+// DecodeJpegRatio sets the optional ratio attribute to value.
+//
+// value: Downscaling ratio.
+// If not specified, defaults to 1
+func DecodeJpegRatio(value int64) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["ratio"] = value
+	}
+}
+
+// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
+//
+// value: If true use a slower but nicer upscaling of the
+// chroma planes (yuv420/422 only).
+// If not specified, defaults to true
+func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["fancy_upscaling"] = value
+	}
+}
+
+// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
+//
+// value: If true try to recover an image from truncated input.
+// If not specified, defaults to false
+func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["try_recover_truncated"] = value
+	}
+}
+
+// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
+//
+// value: The minimum required fraction of lines before a truncated
+// input is accepted.
+// If not specified, defaults to 1
+func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["acceptable_fraction"] = value
+	}
+}
+
+// DecodeJpegDctMethod sets the optional dct_method attribute to value.
+//
+// value: string specifying a hint about the algorithm used for
+// decompression.  Defaults to "" which maps to a system-specific
+// default.  Currently valid values are ["INTEGER_FAST",
+// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
+// jpeg library changes to a version that does not have that specific
+// option.)
+// If not specified, defaults to ""
+func DecodeJpegDctMethod(value string) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["dct_method"] = value
+	}
+}
+
+// Decode a JPEG-encoded image to a uint8 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+//
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
+//
+//
+// This op also supports decoding PNGs and non-animated GIFs since the interface is
+// the same, though it is cleaner to use `tf.io.decode_image`.
+//
+// Arguments:
+//	contents: 0-D.  The JPEG-encoded image.
+//
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeJpeg",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters.
+type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersConfig(value string) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load ADAM embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the ADAM optimization algorithm.
+//	momenta: Value of momenta used in the ADAM optimization algorithm.
+//	velocities: Value of velocities used in the ADAM optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingADAMParameters",
+		Input: []tf.Input{
+			parameters, momenta, velocities,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "LatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the power of one value to another.
+//
+// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+// corresponding elements in `x` and `y`. For example:
+//
+// ```
+// # tensor 'x' is [[2, 2]], [3, 3]]
+// # tensor 'y' is [[8, 16], [2, 3]]
+// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+// ```
+func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Pow",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Element-wise multiplication of a sparse matrix with a dense tensor.
+//
+// Returns a sparse matrix.
+//
+// The dense tensor `b` may be either a scalar; otherwise `a` must be a rank-3
+// `SparseMatrix`; in this case `b` must be shaped `[batch_size, 1, 1]` and the
+// multiply operation broadcasts.
+//
+// **NOTE** even if `b` is zero, the sparsity structure of the output does not
+// change.
+//
+// Arguments:
+//	a: A CSRSparseMatrix.
+//	b: A dense tensor.
+//
+// Returns A dense output tensor.
+func SparseMatrixMul(scope *Scope, a tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixMul",
+		Input: []tf.Input{
+			a, b,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the element-wise sum of a list of tensors.
+//
+// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+// wait for all of its inputs to be ready before beginning to sum. This can
+// save memory if inputs are ready at different times, since minimum temporary
+// storage is proportional to the output size rather than the inputs size.
+//
+// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
+//
+// Returns a `Tensor` of same shape and type as the elements of `inputs`.
+//
+// Arguments:
+//	inputs: A list of `Tensor` objects, each with same shape and type.
+//	shape: Shape of elements of `inputs`.
+func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "AccumulateNV2",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An op enabling differentiation of TPU Embeddings.
+//
+// This op simply returns its first input, which is assumed to have been sliced
+// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of
+// this op, and its first argument being a trainable Variable, enables automatic
+// differentiation of graphs containing embeddings via the TPU Embedding Python
+// libraries.
+//
+// Arguments:
+//	embedding_variable: A trainable variable, enabling optimizers to find this op.
+//	sliced_activations: The embedding activations Tensor to return.
+//	table_id: The id of the table in the embedding layer configuration from which
+// these activations were computed.
+//	lookup_id: Identifier of the set of embedding indices which produced these
+// activations.
+func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id}
+	opspec := tf.OpSpec{
+		Type: "TPUEmbeddingActivations",
+		Input: []tf.Input{
+			embedding_variable, sliced_activations,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
+type QuantizeAndDequantizeV3Attr func(optionalAttr)
+
+// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["range_given"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3NarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func QuantizeAndDequantizeV3NarrowRange(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3Axis sets the optional axis attribute to value.
+// If not specified, defaults to -1
+func QuantizeAndDequantizeV3Axis(value int64) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Quantizes then dequantizes a tensor.
+//
+// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
+// tensor, so its value can change during training.
+func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeAndDequantizeV3",
+		Input: []tf.Input{
+			input, input_min, input_max, num_bits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x * y element-wise.
+//
+// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Mul",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softplus gradients for a softplus operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding softplus operation.
+//	features: The features passed as input to the corresponding softplus operation.
+//
+// Returns The gradients: `gradients / (1 + exp(-features))`.
+func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SoftplusGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the item in the list with the given index.
+//
+// input_handle: the list
+// index: the position in the list from which an element will be retrieved
+// item: the element at that position
+//
+//
+func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListGetItem",
+		Input: []tf.Input{
+			input_handle, index, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.
+type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve proximal Adagrad embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the proximal Adagrad optimization algorithm.
+//	accumulators: Parameter accumulators updated by the proximal Adagrad optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the value stored in an Optional variant or raises an error if none exists.
+func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "OptionalGetValue",
+		Input: []tf.Input{
+			optional,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("OptionalGetValue", err)
+		return
+	}
+	return components
+}
+
+// Determine the script codes of a given tensor of Unicode integer code points.
+//
+// This operation converts Unicode code points to script codes corresponding to
+// each code point. Script codes correspond to International Components for
+// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
+// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
+// match input shape.
+//
+// Examples:
+//
+// >>> tf.strings.unicode_script([1, 31, 38])
+// <tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 0, 0], dtype=int32)>
+//
+// Arguments:
+//	input: A Tensor of int32 Unicode code points.
+//
+// Returns A Tensor of int32 script codes corresponding to each input code point.
+func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeScript",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CropAndResizeAttr is an optional argument to CropAndResize.
+type CropAndResizeAttr func(optionalAttr)
+
+// CropAndResizeMethod sets the optional method attribute to value.
+//
+// value: A string specifying the sampling method for resizing. It can be either
+// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling
+// methods are supported: Bilinear and Nearest Neighbor.
+// If not specified, defaults to "bilinear"
+func CropAndResizeMethod(value string) CropAndResizeAttr {
+	return func(m optionalAttr) {
+		m["method"] = value
+	}
+}
+
+// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
+//
+// value: Value used for extrapolation, when applicable.
+// If not specified, defaults to 0
+func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
+	return func(m optionalAttr) {
+		m["extrapolation_value"] = value
+	}
+}
+
+// Extracts crops from the input image tensor and resizes them.
+//
+// Extracts crops from the input image tensor and resizes them using bilinear
+// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
+// common output size specified by `crop_size`. This is more general than the
+// `crop_to_bounding_box` op which extracts a fixed size slice from the input image
+// and does not allow resizing or aspect ratio change.
+//
+// Returns a tensor with `crops` from the input `image` at positions defined at the
+// bounding box locations in `boxes`. The cropped boxes are all resized (with
+// bilinear or nearest neighbor interpolation) to a fixed
+// `size = [crop_height, crop_width]`. The result is a 4-D tensor
+// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
+// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
+// results to using `tf.image.resize_bilinear()` or
+// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
+// `align_corners=True`.
+//
+// Arguments:
+//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+// Both `image_height` and `image_width` need to be positive.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
+// cropped image patches are resized to this size. The aspect ratio of the image
+// content is not preserved. Both `crop_height` and `crop_width` need to be
+// positive.
+//
+// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CropAndResize",
+		Input: []tf.Input{
+			image, boxes, box_ind, crop_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
+type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
+// If not specified, defaults to <>
+func DepthwiseConv2dNativeBackpropFilterExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape based on `data_format`.  For example, if
+// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
+// in_width, in_channels]` tensor.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNativeBackpropFilter",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that zips together `input_datasets`.
+//
+// The elements of the resulting dataset are created by zipping corresponding
+// elements from each of the input datasets.
+//
+// The size of the resulting dataset will match the size of the smallest input
+// dataset, and no error will be raised if input datasets have different sizes.
+//
+// Arguments:
+//	input_datasets: List of `N` variant Tensors representing datasets to be zipped together.
+//
+//
+func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ZipDataset",
+		Input: []tf.Input{
+			tf.OutputList(input_datasets),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Rounds the values of a tensor to the nearest integer, element-wise.
+//
+// Rounds half to even.  Also known as bankers rounding. If you want to round
+// according to the current system rounding mode use std::cint.
+func Round(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Round",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a tree ensemble model and returns a handle to it.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble resource to be created.
+//	stamp_token: Token to use as the initial value of the resource stamp.
+//	tree_ensemble_serialized: Serialized proto of the tree ensemble.
+//
+// Returns the created operation.
+func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCreateEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Calculates the softmax of a CSRSparseMatrix.
+//
+// Calculate the softmax of the innermost dimensions of a SparseMatrix.
+//
+// Missing values are treated as `-inf` (i.e., logits of zero probability); and
+// the output has the same sparsity structure as the input (though missing values
+// in the output may now be treated as having probability zero).
+//
+// Arguments:
+//	logits: A CSRSparseMatrix.
+//
+//
+// Returns A CSRSparseMatrix.
+func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (softmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixSoftmax",
+		Input: []tf.Input{
+			logits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RestoreAttr is an optional argument to Restore.
+type RestoreAttr func(optionalAttr)
+
+// RestorePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`.
+// If not specified, defaults to -1
+func RestorePreferredShard(value int64) RestoreAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// Reads a tensor stored in one or several files. If there are several files (for
+// instance because a tensor was saved as slices), `file_pattern` may contain
+// wildcard symbols (`*` and `?`) in the filename portion only, not in the
+// directory portion.
+//
+// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
+// in which file the requested tensor is likely to be found. This op will first
+// open the file at index `preferred_shard` in the list of matching files and try
+// to restore tensors from that file.  Only if some tensors or tensor slices are
+// not found in that first file, then the Op opens all the files. Setting
+// `preferred_shard` to match the value passed as the `shard` input
+// of a matching `Save` Op may speed up Restore.  This attribute only affects
+// performance, not correctness.  The default value -1 means files are processed in
+// order.
+//
+// See also `RestoreSlice`.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Restore",
+		Input: []tf.Input{
+			file_pattern, tensor_name,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the next record (key, value pair) produced by a Reader.
+//
+// Will dequeue from the input queue if necessary (e.g. when the
+// Reader needs to start reading from a new file since it has finished
+// with the previous file).
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//	queue_handle: Handle to a Queue, with string work items.
+//
+// Returns:
+//	key: A scalar.
+//	value: A scalar.
+func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderReadV2",
+		Input: []tf.Input{
+			reader_handle, queue_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// CumprodAttr is an optional argument to Cumprod.
+type CumprodAttr func(optionalAttr)
+
+// CumprodExclusive sets the optional exclusive attribute to value.
+//
+// value: If `True`, perform exclusive cumprod.
+// If not specified, defaults to false
+func CumprodExclusive(value bool) CumprodAttr {
+	return func(m optionalAttr) {
+		m["exclusive"] = value
+	}
+}
+
+// CumprodReverse sets the optional reverse attribute to value.
+//
+// value: A `bool` (default: False).
+// If not specified, defaults to false
+func CumprodReverse(value bool) CumprodAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Compute the cumulative product of the tensor `x` along `axis`.
+//
+// By default, this op performs an inclusive cumprod, which means that the first
+// element of the input is identical to the first element of the output:
+//
+// ```python
+// tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
+// ```
+//
+// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+// performed instead:
+//
+// ```python
+// tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
+// ```
+//
+// By setting the `reverse` kwarg to `True`, the cumprod is performed in the
+// opposite direction:
+//
+// ```python
+// tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
+// ```
+//
+// This is more efficient than using separate `tf.reverse` ops.
+//
+// The `reverse` and `exclusive` kwargs can also be combined:
+//
+// ```python
+// tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
+// ```
+//
+// Arguments:
+//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Cumprod",
+		Input: []tf.Input{
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug.
+type LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load SGD embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, parameters tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a dataset that batches input elements into a SparseTensor.
+//
+// Arguments:
+//	input_dataset: A handle to an input dataset. Must have a single component.
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	row_shape: A vector representing the dense shape of each row in the produced
+// SparseTensor. The shape may be partially specified, using `-1` to indicate
+// that a particular dimension should use the maximum size of all batch elements.
+//
+//
+func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "DenseToSparseBatchDataset",
+		Input: []tf.Input{
+			input_dataset, batch_size, row_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.
+type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adadelta parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adadelta optimization algorithm.
+//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
+//	updates: Value of updates used in the Adadelta optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, updates, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns x / y element-wise.
+//
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Div",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Enqueue a Tensor on the computation outfeed.
+//
+// Arguments:
+//	input: A tensor that will be inserted into the outfeed queue.
+//
+// Returns the created operation.
+func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedEnqueue",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.
+type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load proximal Adagrad embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Serializes the tree ensemble to a proto.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns:
+//	stamp_token: Stamp token of the tree ensemble resource.
+//	tree_ensemble_serialized: Serialized proto of the ensemble.
+func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesSerializeEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes inverse hyperbolic cosine of x element-wise.
+//
+// Given an input tensor, the function computes inverse hyperbolic cosine of every element.
+// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range.
+//
+// ```python
+// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")])
+// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf]
+// ```
+func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Acosh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs deterministic pseudorandom random numbers from a gamma distribution.
+//
+// Outputs random values from a gamma distribution.
+//
+// The outputs are a deterministic function of `shape`, `seed`, and `alpha`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//	alpha: The concentration of the gamma distribution. Shape must match the rightmost
+// dimensions of `shape`.
+//
+// Returns Random values with specified shape.
+func StatelessRandomGammaV2(scope *Scope, shape tf.Output, seed tf.Output, alpha tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomGammaV2",
+		Input: []tf.Input{
+			shape, seed, alpha,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that executes a SQL query and emits rows of the result set.
+//
+// Arguments:
+//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
+//	data_source_name: A connection string to connect to the database.
+//	query: A SQL query to execute.
+//
+//
+func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "SqlDataset",
+		Input: []tf.Input{
+			driver_name, data_source_name, query,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs deterministic pseudorandom random integers from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[minval, maxval)`.
+//
+// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//	minval: Minimum value (inclusive, scalar).
+//	maxval: Maximum value (exclusive, scalar).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomUniformInt",
+		Input: []tf.Input{
+			shape, seed, minval, maxval,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a batched diagonal tensor with a given batched diagonal values.
+//
+// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+// everything else padded with zeros. The diagonal is computed as follows:
+//
+// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
+// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
+//
+// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
+//
+// For example:
+//
+// ```
+// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
+//
+// and diagonal.shape = (2, 4)
+//
+// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
+//                                      [0, 2, 0, 0]
+//                                      [0, 0, 3, 0]
+//                                      [0, 0, 0, 4]],
+//                                     [[5, 0, 0, 0]
+//                                      [0, 6, 0, 0]
+//                                      [0, 0, 7, 0]
+//                                      [0, 0, 0, 8]]]
+//
+// which has shape (2, 4, 4)
+// ```
+//
+// Arguments:
+//	diagonal: Rank `k`, where `k >= 1`.
+//
+// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
+func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDiag",
+		Input: []tf.Input{
+			diagonal,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
+type StatelessTruncatedNormalAttr func(optionalAttr)
+
+// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessTruncatedNormal",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug.
+type RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve SGD embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the stochastic gradient descent optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (parameters tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
+type StatelessRandomUniformAttr func(optionalAttr)
+
+// StatelessRandomUniformDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomUniform",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessRandomUniformFullIntAttr is an optional argument to StatelessRandomUniformFullInt.
+type StatelessRandomUniformFullIntAttr func(optionalAttr)
+
+// StatelessRandomUniformFullIntDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_UINT64
+func StatelessRandomUniformFullIntDtype(value tf.DataType) StatelessRandomUniformFullIntAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom random integers from a uniform distribution.
+//
+// The generated values are uniform integers covering the whole range of `dtype`.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniformFullInt(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformFullIntAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomUniformFullInt",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.
+type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Momentum embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Momentum optimization algorithm.
+//	momenta: Parameter momenta updated by the Momentum optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the Momentum optimization algorithm.
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// EqualAttr is an optional argument to Equal.
+type EqualAttr func(optionalAttr)
+
+// EqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value.
+// If not specified, defaults to true
+func EqualIncompatibleShapeError(value bool) EqualAttr {
+	return func(m optionalAttr) {
+		m["incompatible_shape_error"] = value
+	}
+}
+
+// Returns the truth value of (x == y) element-wise.
+//
+// *NOTE*: `Equal` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// ```python
+// x = tf.constant([2, 4])
+// y = tf.constant(2)
+// tf.math.equal(x, y) ==> array([True, False])
+//
+// x = tf.constant([2, 4])
+// y = tf.constant([2, 4])
+// tf.math.equal(x, y) ==> array([True,  True])
+// ```
+func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Equal",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
+type SparseToSparseSetOperationAttr func(optionalAttr)
+
+// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of 2 `SparseTensor` inputs.
+//
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
+// order and range of `set1` and `set2` indices.
+//
+// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
+// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set1`
+// and `set2` indices.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
+// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//
+//
+// Returns:
+//	result_indices: 2D indices of a `SparseTensor`.
+//	result_values: 1D values of a `SparseTensor`.
+//	result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseToSparseSetOperation",
+		Input: []tf.Input{
+			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple.
+type InfeedEnqueueTupleAttr func(optionalAttr)
+
+// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence for
+// all the tuple shapes, in the order the shapes appear in the "shapes" input.
+// The layout elements for a sub-shape can be set to -1, in which case the
+// corresponding layout will be computed by the infeed operation.
+// If not specified, defaults to <>
+func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr {
+	return func(m optionalAttr) {
+		m["layouts"] = value
+	}
+}
+
+// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Feeds multiple Tensor values into the computation as an XLA tuple.
+//
+// Arguments:
+//	inputs: A list of tensors that will be provided using the infeed mechanism.
+//	shapes: The shapes of each tensor in `inputs`.
+//
+// Returns the created operation.
+func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shapes": shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueueTuple",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters.
+type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersConfig(value string) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load RMSProp embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the RMSProp optimization algorithm.
+//	ms: Value of ms used in the RMSProp optimization algorithm.
+//	mom: Value of mom used in the RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingRMSPropParameters",
+		Input: []tf.Input{
+			parameters, ms, mom,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a Dataset that returns pseudorandom numbers.
+//
+// Creates a Dataset that returns a stream of uniformly distributed
+// pseudorandom 64-bit signed integers.
+//
+// In the TensorFlow Python API, you can instantiate this dataset via the
+// class `tf.data.experimental.RandomDataset`.
+//
+// Instances of this dataset are also created as a result of the
+// `hoist_random_uniform` static optimization. Whether this optimization is
+// performed is determined by the `experimental_optimization.hoist_random_uniform`
+// option of `tf.data.Options`.
+//
+// Arguments:
+//	seed: A scalar seed for the random number generator. If either seed or
+// seed2 is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//
+//
+func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "RandomDataset",
+		Input: []tf.Input{
+			seed, seed2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
+type FractionalAvgPoolAttr func(optionalAttr)
+
+// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value.
+//
+// value: When set to True, generates the pooling sequence in a
+// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+// difference between pseudorandom and random.
+// If not specified, defaults to false
+func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["pseudo_random"] = value
+	}
+}
+
+// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [41/3, 26/3] for fractional avg pooling.
+// If not specified, defaults to false
+func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value.
+//
+// value: When set to True, a fixed pooling region will be used when
+// iterating over a FractionalAvgPool node in the computation graph. Mainly used
+// in unit test to make FractionalAvgPool deterministic.
+// If not specified, defaults to false
+func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["deterministic"] = value
+	}
+}
+
+// FractionalAvgPoolSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Performs fractional average pooling on the input.
+//
+// Fractional average pooling is similar to Fractional max pooling in the pooling
+// region generation step. The only difference is that after pooling regions are
+// generated, a mean operation is performed instead of a max operation in each
+// pooling region.
+//
+// Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+// supports row and col dimension and should be >= 1.0. For example, a valid
+// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+// must be 1.0 because we don't allow pooling on batch and channels
+// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+// respectively.
+//
+// Returns:
+//	output: output tensor after fractional avg pooling.
+//	row_pooling_sequence: row pooling sequence, needed to calculate gradient.
+//	col_pooling_sequence: column pooling sequence, needed to calculate gradient.
+func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FractionalAvgPool",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt.
+type StatefulUniformFullIntAttr func(optionalAttr)
+
+// StatefulUniformFullIntDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_UINT64
+func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random integers from a uniform distribution.
+//
+// The generated values are uniform integers covering the whole range of `dtype`.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	shape: The shape of the output tensor.
+//
+// Returns Random values with specified shape.
+func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulUniformFullInt",
+		Input: []tf.Input{
+			resource, algorithm, shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters.
+type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load SGD embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingStochasticGradientDescentParameters",
+		Input: []tf.Input{
+			parameters,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RequantizePerChannelAttr is an optional argument to RequantizePerChannel.
+type RequantizePerChannelAttr func(optionalAttr)
+
+// RequantizePerChannelOutType sets the optional out_type attribute to value.
+//
+// value: The quantized type of output tensor that needs to be converted.
+// If not specified, defaults to DT_QUINT8
+func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Requantizes input with min and max values known per channel.
+//
+// Arguments:
+//	input: The original input tensor.
+//	input_min: The minimum value of the input tensor
+//	input_max: The maximum value of the input tensor.
+//	requested_output_min: The minimum value of the output tensor requested.
+//	requested_output_max: The maximum value of the output tensor requested.
+//
+// Returns:
+//	output: Output tensor.
+//	output_min: The minimum value of the final output tensor
+//	output_max: The maximum value of the final output tensor.
+func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RequantizePerChannel",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// LeakyReluAttr is an optional argument to LeakyRelu.
+type LeakyReluAttr func(optionalAttr)
+
+// LeakyReluAlpha sets the optional alpha attribute to value.
+// If not specified, defaults to 0.2
+func LeakyReluAlpha(value float32) LeakyReluAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
+	}
+}
+
+// Computes rectified linear: `max(features, features * alpha)`.
+func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LeakyRelu",
+		Input: []tf.Input{
+			features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Component-wise divides a SparseTensor by a dense Tensor.
+//
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+// the other direction.
+//
+// Arguments:
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseDenseCwiseDiv",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape, dense,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch.
+type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op that enqueues a list of input batch tensors to TPUEmbedding.
+//
+// Arguments:
+//	batch: A list of 1D tensors, one for each embedding table, containing the
+// indices into the tables.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingIntegerBatch",
+		Input: []tf.Input{
+			tf.OutputList(batch), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MapClearAttr is an optional argument to MapClear.
+type MapClearAttr func(optionalAttr)
+
+// MapClearCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearCapacity(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapClearMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearMemoryLimit(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapClearContainer(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapClearSharedName(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes all elements in the underlying container.
+//
+// Returns the created operation.
+func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapClear",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Deserialize `SparseTensor` objects.
+//
+// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+// the last dimension stores serialized `SparseTensor` objects and the other N
+// dimensions (N >= 0) correspond to a batch. The ranks of the original
+// `SparseTensor` objects must all match. When the final `SparseTensor` is
+// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+// the sparse tensors have been concatenated along new dimensions, one for each
+// batch.
+//
+// The output `SparseTensor` object's shape values for the original dimensions
+// are the max across the input `SparseTensor` objects' shape values for the
+// corresponding dimensions. The new dimensions match the size of the batch.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
+//
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+//
+// and
+//
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+//
+// then the final deserialized `SparseTensor` will be:
+//
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+//
+// Arguments:
+//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+// must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "DeserializeSparse",
+		Input: []tf.Input{
+			serialized_sparse,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Decode web-safe base64-encoded strings.
+//
+// Input may or may not have padding at the end. See EncodeBase64 for padding.
+// Web-safe means that input must use - and _ instead of + and /.
+//
+// Arguments:
+//	input: Base64 strings to decode.
+//
+// Returns Decoded strings.
+func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeBase64",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters.
+type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersConfig(value string) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adagrad embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdagradParameters",
+		Input: []tf.Input{
+			parameters, accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns the gradient of `Tile`.
+//
+// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum
+//
+// Since `Tile` takes an input and repeats the input `multiples` times
+// along each dimension, `TileGrad` takes in `multiples` and aggregates
+// each repeated tile of `input` into `output`.
+func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TileGrad",
+		Input: []tf.Input{
+			input, multiples,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AudioSummaryAttr is an optional argument to AudioSummary.
+type AudioSummaryAttr func(optionalAttr)
+
+// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
+//
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_outputs"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with audio.
+//
+// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
+//
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+//
+// Arguments:
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sample_rate": sample_rate}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSummary",
+		Input: []tf.Input{
+			tag, tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters.
+type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersConfig(value string) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load FTRL embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the FTRL optimization algorithm.
+//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
+//	linears: Value of linears used in the FTRL optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingFTRLParameters",
+		Input: []tf.Input{
+			parameters, accumulators, linears,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput.
+type Conv3DBackpropInputAttr func(optionalAttr)
+
+// Conv3DBackpropInputDilations sets the optional dilations attribute to value.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
+func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the input.
+//
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropInput",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
+type DepthwiseConv2dNativeAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeExplicitPaddings sets the optional explicit_paddings attribute to value.
+// If not specified, defaults to <>
+func DepthwiseConv2dNativeExplicitPaddings(value []int64) DepthwiseConv2dNativeAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
+//
+// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+// and a filter / kernel tensor of shape
+// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
+// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
+// a different filter to each input channel (expanding from 1 channel to
+// `channel_multiplier` channels for each), then concatenates the results
+// together. Thus, the output has `in_channels * channel_multiplier` channels.
+//
+// ```
+// for k in 0..in_channels-1
+//   for q in 0..channel_multiplier-1
+//     output[b, i, j, k * channel_multiplier + q] =
+//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+//                         filter[di, dj, k, q]
+// ```
+//
+// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+//
+// Arguments:
+//
+//
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`.
+//	padding: The type of padding algorithm to use.
+func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNative",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates an all-zeros CSRSparseMatrix with shape `dense_shape`.
+//
+// Arguments:
+//	dense_shape: The desired matrix shape.
+//
+//
+// Returns An empty CSR matrix with shape `dense_shape`.
+func SparseMatrixZeros(scope *Scope, dense_shape tf.Output, type_ tf.DataType) (sparse_matrix tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixZeros",
+		Input: []tf.Input{
+			dense_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
+type PaddingFIFOQueueV2Attr func(optionalAttr)
+
+// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
+//
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types.
+// Shapes of fixed rank but variable size are allowed by setting
+// any shape dimension to -1.  In this case, the inputs' shape may vary along
+// the given dimension, and DequeueMany will pad the given dimension with
+// zeros up to the maximum shape of all elements in the given batch.
+// If the length of this attr is 0, different queue elements may have
+// different ranks and shapes, but only one element may be dequeued at a time.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shapes"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
+//
+// Variable-size shapes are allowed by setting the corresponding shape dimensions
+// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+// size of any given element in the minibatch.  See below for details.
+//
+// Arguments:
+//	component_types: The type of each component in a value.
+//
+// Returns The handle to the queue.
+func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PaddingFIFOQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug.
+type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Momentum embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Momentum optimization algorithm.
+//	momenta: Value of momenta used in the Momentum optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, momenta, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Constructs a tensor by tiling a given tensor.
 //
 // This operation creates a new tensor by replicating `input` `multiples` times.
@@ -49056,3 +47809,1567 @@ func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output)
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
+
+// SerializeSparseAttr is an optional argument to SerializeSparse.
+type SerializeSparseAttr func(optionalAttr)
+
+// SerializeSparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeSparse",
+		Input: []tf.Input{
+			sparse_indices, sparse_values, sparse_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Extracts the average gradient in the given ConditionalAccumulator.
+//
+// The op blocks until sufficient (i.e., more than num_required)
+// gradients have been accumulated.  If the accumulator has already
+// aggregated more than num_required gradients, it returns the average of
+// the accumulated gradients.  Also automatically increments the recorded
+// global_step in the accumulator by 1, and resets the aggregate to 0.
+//
+// Arguments:
+//	handle: The handle to an accumulator.
+//	num_required: Number of gradients required before we return an aggregate.
+//	dtype: The data type of accumulated gradients. Needs to correspond to the type
+// of the accumulator.
+//
+// Returns The average of the accumulated gradients.
+func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "ResourceAccumulatorTakeGradient",
+		Input: []tf.Input{
+			handle, num_required,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// InfeedEnqueueAttr is an optional argument to InfeedEnqueue.
+type InfeedEnqueueAttr func(optionalAttr)
+
+// InfeedEnqueueShape sets the optional shape attribute to value.
+//
+// value: The shape of the tensor.
+// If not specified, defaults to <>
+func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["shape"] = value
+	}
+}
+
+// InfeedEnqueueLayout sets the optional layout attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence.
+// If a layout attribute is passed, but its values are all -1, the layout will
+// be computed by the infeed operation.
+// If not specified, defaults to <>
+func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["layout"] = value
+	}
+}
+
+// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op which feeds a single Tensor value into the computation.
+//
+// Arguments:
+//	input: A tensor that will be provided using the infeed mechanism.
+//
+// Returns the created operation.
+func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueue",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the mean along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+// over `j` such that `segment_ids[j] == i` and `N` is the total number of
+// values summed.
+//
+// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+// </div>
+//
+// For example:
+//
+// ```
+// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+// tf.segment_mean(c, tf.constant([0, 0, 1]))
+// # ==> [[2.5, 2.5, 2.5, 2.5],
+// #      [5, 6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMean",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CTCLossV2Attr is an optional argument to CTCLossV2.
+type CTCLossV2Attr func(optionalAttr)
+
+// CTCLossV2PreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
+//
+// value: Scalar, if true then repeated labels are
+// collapsed prior to the CTC calculation.
+// If not specified, defaults to false
+func CTCLossV2PreprocessCollapseRepeated(value bool) CTCLossV2Attr {
+	return func(m optionalAttr) {
+		m["preprocess_collapse_repeated"] = value
+	}
+}
+
+// CTCLossV2CtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
+//
+// value: Scalar.  If set to false, *during* CTC calculation
+// repeated non-blank labels will not be merged and are interpreted as
+// individual labels.  This is a simplified version of CTC.
+// If not specified, defaults to true
+func CTCLossV2CtcMergeRepeated(value bool) CTCLossV2Attr {
+	return func(m optionalAttr) {
+		m["ctc_merge_repeated"] = value
+	}
+}
+
+// CTCLossV2IgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
+//
+// value: Scalar. If set to true, during CTC
+// calculation, items that have longer output sequences than input sequences
+// are skipped: they don't contribute to the loss term and have zero-gradient.
+// If not specified, defaults to false
+func CTCLossV2IgnoreLongerOutputsThanInputs(value bool) CTCLossV2Attr {
+	return func(m optionalAttr) {
+		m["ignore_longer_outputs_than_inputs"] = value
+	}
+}
+
+// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
+//
+// the gradient.  This class performs the softmax operation for you, so inputs
+// should be e.g. linear projections of outputs by an LSTM.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. Default blank
+// label is 0 rather num_classes - 1.
+//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
+// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+// `(batch b, time t)`.
+//	labels_values: The values (labels) associated with the given batch and time.
+//	sequence_length: A vector containing sequence lengths (batch).
+//
+// Returns:
+//	loss: A vector (batch) containing log-probabilities.
+//	gradient: The gradient of `loss`.  3-D, shape:
+// `(max_time x batch_size x num_classes)`.
+func CTCLossV2(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossV2Attr) (loss tf.Output, gradient tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCLossV2",
+		Input: []tf.Input{
+			inputs, labels_indices, labels_values, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum.
+type ResourceSparseApplyKerasMomentumAttr func(optionalAttr)
+
+// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var + momentum * accum, so in the end, the var you get is actually
+// var + momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+//
+// accum = accum * momentum - lr * grad
+// var += accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyKerasMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, indices, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
+type MaxPoolGradGradV2Attr func(optionalAttr)
+
+// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGradV2",
+		Input: []tf.Input{
+			orig_input, orig_output, grad, ksize, strides,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters.
+type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersConfig(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Momentum embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Momentum optimization algorithm.
+//	momenta: Parameter momenta updated by the Momentum optimization algorithm.
+func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMomentumParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+//   Combines (nests of) input elements into a dataset of (nests of) windows.
+//
+//   A "window" is a finite dataset of flat elements of size `size` (or possibly
+//   fewer if there are not enough input elements to fill the window and
+//   `drop_remainder` evaluates to false).
+//
+//   The `shift` argument determines the number of input elements by which
+//   the window moves on each iteration.  The first element in the `k`th window
+//   will be element
+//
+//   ```
+//   1 + (k-1) * shift
+//   ```
+//
+//   of the input dataset. In particular, the first element of the first window
+//   will always be the first element of the input dataset.
+//
+//   If the `stride` parameter is greater than 1, then each window will skip
+//   `(stride - 1)` input elements between each element that appears in the
+//   window. Output windows will still contain `size` elements regardless of
+//   the value of `stride`.
+//
+//   The `stride` argument determines the stride of the input elements, and the
+//   `shift` argument determines the shift of the window.
+//
+//   For example, letting `{...}` to represent a Dataset:
+//
+//   - `tf.data.Dataset.range(7).window(2)` produces
+//     `{{0, 1}, {2, 3}, {4, 5}, {6}}`
+//   - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces
+//     `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}`
+//   - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces
+//     `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}`
+//
+//   Note that when the `window` transformation is applied to a dataset of
+//   nested elements, it produces a dataset of nested windows.
+//
+//   For example:
+//
+//   - `tf.data.Dataset.from_tensor_slices((range(4), range(4))).window(2)`
+//     produces `{({0, 1}, {0, 1}), ({2, 3}, {2, 3})}`
+//   - `tf.data.Dataset.from_tensor_slices({"a": range(4)}).window(2)`
+//     produces `{{"a": {0, 1}}, {"a": {2, 3}}}`
+//
+// Arguments:
+//
+//	size: An integer scalar, representing the number of elements
+// of the input dataset to combine into a window. Must be positive.
+//	shift: An integer scalar, representing the number of input elements
+// by which the window moves in each iteration.  Defaults to `size`.
+// Must be positive.
+//	stride: An integer scalar, representing the stride of the input elements
+// in the sliding window. Must be positive. The default value of 1 means
+// "retain every input element".
+//	drop_remainder: A Boolean scalar, representing whether the last window should be
+// dropped if its size is smaller than `window_size`.
+//
+//
+func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "WindowDataset",
+		Input: []tf.Input{
+			input_dataset, size, shift, stride, drop_remainder,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SetSizeAttr is an optional argument to SetSize.
+type SetSizeAttr func(optionalAttr)
+
+// SetSizeValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SetSizeValidateIndices(value bool) SetSizeAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Number of unique elements along last dimension of input `set`.
+//
+// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
+// and `set_shape`. The last dimension contains values in a set, duplicates are
+// allowed but ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set`
+// indices.
+//
+// Arguments:
+//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
+//	set_values: 1D `Tensor`, values of a `SparseTensor`.
+//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
+//
+// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
+// `n-1` dimensions as `set`. Each value is the number of unique elements in
+// the corresponding `[0...n-1]` dimension of `set`.
+func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SetSize",
+		Input: []tf.Input{
+			set_indices, set_values, set_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AutoShardDatasetAttr is an optional argument to AutoShardDataset.
+type AutoShardDatasetAttr func(optionalAttr)
+
+// AutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value.
+// If not specified, defaults to 0
+func AutoShardDatasetAutoShardPolicy(value int64) AutoShardDatasetAttr {
+	return func(m optionalAttr) {
+		m["auto_shard_policy"] = value
+	}
+}
+
+// Creates a dataset that shards the input dataset.
+//
+// Creates a dataset that shards the input dataset by num_workers, returning a
+// sharded dataset for the index-th worker. This attempts to automatically shard
+// a dataset by examining the Dataset graph and inserting a shard op before the
+// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset).
+//
+// This dataset will throw a NotFound error if we cannot shard the dataset
+// automatically.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	num_workers: A scalar representing the number of workers to distribute this dataset across.
+//	index: A scalar representing the index of the current worker out of num_workers.
+//
+//
+func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...AutoShardDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AutoShardDataset",
+		Input: []tf.Input{
+			input_dataset, num_workers, index,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// InfeedEnqueuePrelinearizedBufferAttr is an optional argument to InfeedEnqueuePrelinearizedBuffer.
+type InfeedEnqueuePrelinearizedBufferAttr func(optionalAttr)
+
+// InfeedEnqueuePrelinearizedBufferDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op is running on a TPU device
+// and = 0 when the Op is running on the CPU device.
+// If not specified, defaults to -1
+func InfeedEnqueuePrelinearizedBufferDeviceOrdinal(value int64) InfeedEnqueuePrelinearizedBufferAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op which enqueues prelinearized buffer into TPU infeed.
+//
+// Arguments:
+//	input: A variant tensor representing linearized output.
+//
+// Returns the created operation.
+func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ...InfeedEnqueuePrelinearizedBufferAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueuePrelinearizedBuffer",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters.
+type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersConfig(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve FTRL embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the FTRL optimization algorithm.
+//	accumulators: Parameter accumulators updated by the FTRL optimization algorithm.
+//	linears: Parameter linears updated by the FTRL optimization algorithm.
+func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingFTRLParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
+type Conv2DBackpropFilterAttr func(optionalAttr)
+
+// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
+//
+// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
+// dimension, the amount of padding inserted before and after the dimension is
+// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
+// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
+// If not specified, defaults to <>
+func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to <i:1 i:1 i:1 i:1 >
+func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of convolution with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, out_channels]` tensor.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv2DBackpropFilter",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LRNGradAttr is an optional argument to LRNGrad.
+type LRNGradAttr func(optionalAttr)
+
+// LRNGradDepthRadius sets the optional depth_radius attribute to value.
+//
+// value: A depth radius.
+// If not specified, defaults to 5
+func LRNGradDepthRadius(value int64) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["depth_radius"] = value
+	}
+}
+
+// LRNGradBias sets the optional bias attribute to value.
+//
+// value: An offset (usually > 0 to avoid dividing by 0).
+// If not specified, defaults to 1
+func LRNGradBias(value float32) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["bias"] = value
+	}
+}
+
+// LRNGradAlpha sets the optional alpha attribute to value.
+//
+// value: A scale factor, usually positive.
+// If not specified, defaults to 1
+func LRNGradAlpha(value float32) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
+	}
+}
+
+// LRNGradBeta sets the optional beta attribute to value.
+//
+// value: An exponent.
+// If not specified, defaults to 0.5
+func LRNGradBeta(value float32) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["beta"] = value
+	}
+}
+
+// Gradients for Local Response Normalization.
+//
+// Arguments:
+//	input_grads: 4-D with shape `[batch, height, width, channels]`.
+//	input_image: 4-D with shape `[batch, height, width, channels]`.
+//	output_image: 4-D with shape `[batch, height, width, channels]`.
+//
+// Returns The gradients for LRN.
+func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LRNGrad",
+		Input: []tf.Input{
+			input_grads, input_image, output_image,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// PrelinearizeAttr is an optional argument to Prelinearize.
+type PrelinearizeAttr func(optionalAttr)
+
+// PrelinearizeShape sets the optional shape attribute to value.
+//
+// value: The shape of the tensor.
+// If not specified, defaults to <>
+func PrelinearizeShape(value tf.Shape) PrelinearizeAttr {
+	return func(m optionalAttr) {
+		m["shape"] = value
+	}
+}
+
+// PrelinearizeLayout sets the optional layout attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence. If a layout
+// attribute is passed but its values are all -1 the layout will be computed by
+// the infeed operation.
+// If not specified, defaults to <>
+func PrelinearizeLayout(value []int64) PrelinearizeAttr {
+	return func(m optionalAttr) {
+		m["layout"] = value
+	}
+}
+
+// An op which linearizes one Tensor value to an opaque variant tensor.
+//
+// Arguments:
+//	input: A tensor that will be linearized.
+func Prelinearize(scope *Scope, input tf.Output, optional ...PrelinearizeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Prelinearize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the result of a TPU compilation.
+//
+// This operation returns the result of a TPU compilation as a serialized
+// CompilationResultProto, which holds a status and an error message if an error
+// occurred during compilation.
+func TPUCompilationResult(scope *Scope) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUCompilationResult",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdagradV2Attr is an optional argument to ResourceApplyAdagradV2.
+type ResourceApplyAdagradV2Attr func(optionalAttr)
+
+// ResourceApplyAdagradV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradV2UseLocking(value bool) ResourceApplyAdagradV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceApplyAdagradV2UpdateSlots(value bool) ResourceApplyAdagradV2Attr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update '*var' according to the adagrad scheme.
+//
+// accum += grad * grad
+// var -= lr * grad * (1 / (sqrt(accum) + epsilon))
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdagradV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdagradV2",
+		Input: []tf.Input{
+			var_, accum, lr, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Shuts down a running distributed TPU system.
+//
+// The op returns an error if no system is running.
+//
+// Returns the created operation.
+func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ShutdownDistributedTPU",
+	}
+	return scope.AddOperation(opspec)
+}
+
+// SerializeManySparseAttr is an optional argument to SerializeManySparse.
+type SerializeManySparseAttr func(optionalAttr)
+
+// SerializeManySparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
+//
+// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
+// is treated as the minibatch dimension.  Elements of the `SparseTensor`
+// must be sorted in increasing order of this first dimension.  The serialized
+// `SparseTensor` objects going into each row of `serialized_sparse` will have
+// rank `R-1`.
+//
+// The minibatch size `N` is extracted from `sparse_shape[0]`.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeManySparse",
+		Input: []tf.Input{
+			sparse_indices, sparse_values, sparse_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Says whether the targets are in the top `K` predictions.
+//
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
+//
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+//
+// Arguments:
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
+//
+// Returns Computed precision at `k` as a `bool Tensor`.
+func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InTopKV2",
+		Input: []tf.Input{
+			predictions, targets, k,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates an Optional variant with no value.
+func OptionalNone(scope *Scope) (optional tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OptionalNone",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters.
+type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve SGD embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm.
+func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CudnnRNNAttr is an optional argument to CudnnRNN.
+type CudnnRNNAttr func(optionalAttr)
+
+// CudnnRNNRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNRnnMode(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNInputMode(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNDirection(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNDropout(value float32) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNSeed(value int64) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNSeed2(value int64) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNIsTraining sets the optional is_training attribute to value.
+// If not specified, defaults to true
+func CudnnRNNIsTraining(value bool) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// A RNN backed by cuDNN.
+//
+// Computes the RNN from the input and initial states, with respect to the params
+// buffer.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//   the actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// is_training: Indicates whether this operation is used for inference or
+//   training.
+// reserve_space: An opaque tensor that can be used in backprop calculation. It
+//   is only produced if is_training is false.
+func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNN",
+		Input: []tf.Input{
+			input, input_h, input_c, params,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//
+//
+func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "BatchDataset",
+		Input: []tf.Input{
+			input_dataset, batch_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingSparseTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseTensorBatch.
+type EnqueueTPUEmbeddingSparseTensorBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingSparseTensorBatchCombiners sets the optional combiners attribute to value.
+//
+// value: A list of string scalars, one for each embedding table that specify
+// how to normalize the embedding activations after weighted summation.
+// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+// all tables.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmbeddingSparseTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["combiners"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value.
+// If not specified, defaults to <>
+func EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingSparseTensorBatchAttr {
+	return func(m optionalAttr) {
+		m["max_sequence_lengths"] = value
+	}
+}
+
+// Eases the porting of code that uses tf.nn.embedding_lookup_sparse().
+//
+// sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond
+// to the ith feature. table_ids[i] indicates which embedding table to look up ith
+// feature.
+//
+// The tensors at corresponding positions in the three input lists (sample_indices,
+// embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1
+// with dim_size() equal to the total number of lookups into the table described by
+// the corresponding feature.
+//
+// Arguments:
+//	sample_indices: A list of rank 1 Tensors specifying the training example to
+// which the corresponding embedding_indices and aggregation_weights values
+// belong. It corresponds to sp_ids.indices[:,0] in  embedding_lookup_sparse().
+//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+// It corresponds to sp_ids.values in embedding_lookup_sparse().
+//	aggregation_weights: A list of rank 1 Tensors containing per training example
+// aggregation weights. It corresponds to sp_weights.values in
+// embedding_lookup_sparse().
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//	table_ids: A list of integers specifying the identifier of the embedding table
+// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the
+// corresponding input. The ith input is looked up using table_ids[i]. The size
+// of the table_ids list must be equal to that of sample_indices,
+// embedding_indices and aggregation_weights.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingSparseTensorBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingSparseTensorBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"table_ids": table_ids}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingSparseTensorBatch",
+		Input: []tf.Input{
+			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ReverseSequenceAttr is an optional argument to ReverseSequence.
+type ReverseSequenceAttr func(optionalAttr)
+
+// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
+//
+// value: The dimension along which reversal is performed.
+// If not specified, defaults to 0
+func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
+	return func(m optionalAttr) {
+		m["batch_dim"] = value
+	}
+}
+
+// Reverses variable length slices.
+//
+// This op first slices `input` along the dimension `batch_dim`, and for each
+// slice `i`, reverses the first `seq_lengths[i]` elements along
+// the dimension `seq_dim`.
+//
+// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+//
+// The output slice `i` along dimension `batch_dim` is then given by input
+// slice `i`, with the first `seq_lengths[i]` slices along dimension
+// `seq_dim` reversed.
+//
+// For example:
+//
+// ```
+// # Given this:
+// batch_dim = 0
+// seq_dim = 1
+// input.dims = (4, 8, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
+// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
+// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
+// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[0, 7:, :, ...] = input[0, 7:, :, ...]
+// output[1, 2:, :, ...] = input[1, 2:, :, ...]
+// output[2, 3:, :, ...] = input[2, 3:, :, ...]
+// output[3, 2:, :, ...] = input[3, 2:, :, ...]
+// ```
+//
+// In contrast, if:
+//
+// ```
+// # Given this:
+// batch_dim = 2
+// seq_dim = 0
+// input.dims = (8, ?, 4, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
+// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
+// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
+// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
+// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
+// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
+// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+// ```
+//
+// Arguments:
+//	input: The input to reverse.
+//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
+// `max(seq_lengths) <= input.dims(seq_dim)`
+//	seq_dim: The dimension which is partially reversed.
+//
+// Returns The partially reversed input. It has the same shape as `input`.
+func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"seq_dim": seq_dim}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ReverseSequence",
+		Input: []tf.Input{
+			input, seq_lengths,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Fetches multiple values from infeed as an XLA tuple.
+//
+// Arguments:
+//	dtypes: The element types of each element in `outputs`.
+//	shapes: The shapes of each tensor in `outputs`.
+//
+// Returns A list of tensors that will be provided using the infeed mechanism.
+func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
+	opspec := tf.OpSpec{
+		Type: "InfeedDequeueTuple",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("InfeedDequeueTuple", err)
+		return
+	}
+	return outputs
+}
+
+// Creates and returns an empty tensor list.
+//
+// All list elements must be tensors of dtype element_dtype and shape compatible
+// with element_shape.
+//
+// handle: an empty tensor list.
+// element_dtype: the type of elements in the list.
+// element_shape: a shape compatible with that of elements in the list.
+func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "EmptyTensorList",
+		Input: []tf.Input{
+			element_shape, max_num_elements,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Sets up TPUEmbedding in a distributed TPU system.
+//
+// Arguments:
+//	config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
+// describes the embedding lookups of the program.
+//
+// Returns the created operation.
+func ConfigureTPUEmbedding(scope *Scope, config string) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"config": config}
+	opspec := tf.OpSpec{
+		Type: "ConfigureTPUEmbedding",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns the number of gradients aggregated in the given accumulators.
+//
+// Arguments:
+//	handle: The handle to an accumulator.
+//
+// Returns The number of gradients aggregated in the given accumulator.
+func ResourceAccumulatorNumAccumulated(scope *Scope, handle tf.Output) (num_accumulated tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceAccumulatorNumAccumulated",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Connects N outputs from an N-way replicated TPU computation.
+//
+// This operation holds a replicated output from a `tpu.replicate()` computation subgraph.
+// Each replicated output has the same shape and type alongside the input.
+//
+// For example:
+// ```
+// %computation = "tf.Computation"()
+// %replicated_output:2 = "tf.TPUReplicatedOutput"(%computation)
+// ```
+// The above computation has a replicated output of two replicas.
+func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicatedOutput",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("TPUReplicatedOutput", err)
+		return
+	}
+	return outputs
+}
diff --git a/tensorflow/go/saved_model.go b/tensorflow/go/saved_model.go
index 7aa1e83cbc4..64ae82e3b01 100644
--- a/tensorflow/go/saved_model.go
+++ b/tensorflow/go/saved_model.go
@@ -22,7 +22,7 @@ import (
 	"unsafe"
 
 	"github.com/golang/protobuf/proto"
-	corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"
+	corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"
 )
 
 // #include <stdlib.h>
diff --git a/tensorflow/go/signature.go b/tensorflow/go/signature.go
index 8aac0e2ec93..c2db0c75247 100644
--- a/tensorflow/go/signature.go
+++ b/tensorflow/go/signature.go
@@ -16,7 +16,7 @@ limitations under the License.
 
 package tensorflow
 
-import corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"
+import corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"
 
 // #include "tensorflow/c/c_api.h"
 import "C"
diff --git a/tensorflow/go/signature_test.go b/tensorflow/go/signature_test.go
index e6927f3cebd..f9fa8427819 100644
--- a/tensorflow/go/signature_test.go
+++ b/tensorflow/go/signature_test.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"testing"
 
-	corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"
 	tspb "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/tensor_shape_go_proto"
 	typb "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/types_go_proto"
+	corepb "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"
 )
 
 func TestSignatureFromProto(t *testing.T) {
diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml
index ce1acc20b00..aa4a9bb4618 100644
--- a/tensorflow/java/maven/proto/pom.xml
+++ b/tensorflow/java/maven/proto/pom.xml
@@ -16,7 +16,7 @@
     <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
-      <version>3.5.1</version>
+      <version>3.9.2</version>
     </dependency>
   </dependencies>
 
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index 727f18d8b6d..f40090ac45d 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -33,7 +33,7 @@
         <scala.test.version>2.2.6</scala.test.version>
         <maven.compiler.version>3.0</maven.compiler.version>
         <java.version>1.8</java.version>
-        <spark.version>2.3.1</spark.version>
+        <spark.version>2.4.5</spark.version>
         <yarn.api.version>2.7.3</yarn.api.version>
         <junit.version>4.11</junit.version>
     </properties>
diff --git a/tensorflow/java/src/main/native/BUILD b/tensorflow/java/src/main/native/BUILD
index 0b363ff577e..e38e58d6fe6 100644
--- a/tensorflow/java/src/main/native/BUILD
+++ b/tensorflow/java/src/main/native/BUILD
@@ -30,7 +30,7 @@ tf_cuda_library(
     }),
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//conditions:default": [
             "//tensorflow/c:c_api",
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index a2ab4854165..14babee2da7 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -253,6 +253,7 @@ cc_library(
         "//tensorflow/lite/core/api",
         "//tensorflow/lite/delegates/nnapi:nnapi_delegate",
         "//tensorflow/lite/experimental/resource",
+        "//tensorflow/lite/kernels/internal:compatibility",
         "//tensorflow/lite/nnapi:nnapi_implementation",
         "//tensorflow/lite/schema:schema_fbs",
     ] + select({
diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl
index 55436152bfa..4af4bd4aae8 100644
--- a/tensorflow/lite/build_def.bzl
+++ b/tensorflow/lite/build_def.bzl
@@ -326,6 +326,7 @@ def generated_test_models():
         "relu6",
         "reshape",
         "resize_bilinear",
+        "resize_nearest_neighbor",
         "resolve_constant_strided_slice",
         "reverse_sequence",
         "reverse_v2",
@@ -701,6 +702,7 @@ def gen_model_coverage_test(src, model_name, data, failure_type, tags, size = "m
                 "//tensorflow/lite/python:lite",
                 "//tensorflow/python:client_testlib",
             ] + flex_dep(target_op_sets),
+            timeout = "long",
         )
 
 def if_tflite_experimental_runtime(if_eager, if_non_eager, if_none = []):
diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h
index 87b86d1838b..9e0e82bc906 100644
--- a/tensorflow/lite/c/builtin_op_data.h
+++ b/tensorflow/lite/c/builtin_op_data.h
@@ -209,8 +209,8 @@ typedef struct {
 } TfLiteBatchToSpaceNDParams;
 
 typedef struct {
-  bool adjoint_lhs;
-  bool adjoint_rhs;
+  bool adj_x;
+  bool adj_y;
 } TfLiteBatchMatMulParams;
 
 typedef struct {
@@ -297,6 +297,7 @@ typedef struct {
 
 typedef struct {
   bool align_corners;
+  bool half_pixel_centers;
 } TfLiteResizeNearestNeighborParams;
 
 typedef struct {
diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c
index f70a60002dd..e6b47896528 100644
--- a/tensorflow/lite/c/common.c
+++ b/tensorflow/lite/c/common.c
@@ -79,7 +79,8 @@ TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
 void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
 
 void TfLiteTensorDataFree(TfLiteTensor* t) {
-  if (t->allocation_type == kTfLiteDynamic) {
+  if (t->allocation_type == kTfLiteDynamic ||
+      t->allocation_type == kTfLitePersistentRo) {
     free(t->data.raw);
   }
   t->data.raw = NULL;
@@ -172,7 +173,8 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
 }
 
 void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
-  if (tensor->allocation_type != kTfLiteDynamic) {
+  if (tensor->allocation_type != kTfLiteDynamic &&
+      tensor->allocation_type != kTfLitePersistentRo) {
     return;
   }
   // TODO(b/145340303): Tensor data should be aligned.
diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h
index 81ba071be19..ab150e87d93 100644
--- a/tensorflow/lite/c/common.h
+++ b/tensorflow/lite/c/common.h
@@ -29,6 +29,9 @@ limitations under the License.
 // TfLiteDelegate - allows delegation of nodes to alternative backends.
 //
 // Some abstractions in this file are created and managed by Interpreter.
+//
+// NOTE: The order of values in these structs are "semi-ABI stable". New values
+// should be added only to the end of structs and never reordered.
 
 #ifndef TENSORFLOW_LITE_C_COMMON_H_
 #define TENSORFLOW_LITE_C_COMMON_H_
@@ -155,8 +158,16 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
   do {                                              \
     (context)->ReportError((context), __VA_ARGS__); \
   } while (false)
+
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
+  do {                                                \
+    if ((context) != nullptr) {                       \
+      (context)->ReportError((context), __VA_ARGS__); \
+    }                                                 \
+  } while (false)
 #else  // TF_LITE_STRIP_ERROR_STRINGS
 #define TF_LITE_KERNEL_LOG(context, ...)
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
 #endif  // TF_LITE_STRIP_ERROR_STRINGS
 
 // Check whether value is true, and if not return kTfLiteError from
@@ -310,15 +321,23 @@ typedef union TfLitePtrUnion {
   void* data;
 } TfLitePtrUnion;
 
-// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
-// data (or data externally allocated). kTfLiteArenaRw is arena allocated
-// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
+// Memory allocation strategies.
+//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
+//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
+//        and available during eval.
+//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
+//        only available during eval.
+//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
+//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
+//        useful for tensors that can be computed during prepare and treated
+//        as constant inputs for downstream ops (also in prepare).
 typedef enum TfLiteAllocationType {
   kTfLiteMemNone = 0,
   kTfLiteMmapRo,
   kTfLiteArenaRw,
   kTfLiteArenaRwPersistent,
   kTfLiteDynamic,
+  kTfLitePersistentRo,
 } TfLiteAllocationType;
 
 // The delegates should use zero or positive integers to represent handles.
diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc
index 6c861151283..63e04899ca3 100644
--- a/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -536,6 +536,10 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       if (const auto* schema_params =
               op->builtin_options_as_ResizeNearestNeighborOptions()) {
         params->align_corners = schema_params->align_corners();
+        params->half_pixel_centers = schema_params->half_pixel_centers();
+      } else {
+        params->align_corners = false;
+        params->half_pixel_centers = false;
       }
       *builtin_data = params.release();
       return kTfLiteOk;
@@ -834,8 +838,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
       TF_LITE_ENSURE(error_reporter, params != nullptr);
       if (const auto* bmm_params =
               op->builtin_options_as_BatchMatMulOptions()) {
-        params->adjoint_lhs = bmm_params->adjoint_lhs();
-        params->adjoint_rhs = bmm_params->adjoint_rhs();
+        params->adj_x = bmm_params->adj_x();
+        params->adj_y = bmm_params->adj_y();
       }
       *builtin_data = params.release();
       return kTfLiteOk;
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index 4cebd059a80..7f4e0e286ea 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@@ -1183,7 +1183,8 @@ TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
   // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
   if (tensor->allocation_type == kTfLiteArenaRw ||
       tensor->allocation_type == kTfLiteDynamic ||
-      tensor->allocation_type == kTfLiteArenaRwPersistent) {
+      tensor->allocation_type == kTfLiteArenaRwPersistent ||
+      tensor->allocation_type == kTfLitePersistentRo) {
     tensor_resized_since_op_invoke_ |=
         TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
     if (tensor->type != kTfLiteString) {
@@ -1195,14 +1196,16 @@ TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
         return kTfLiteError;
       }
 
-      // Realloc space for kTfLiteDynamic tensors.
+      // Realloc space for heap-allocated tensors.
       TfLiteTensorRealloc(bytesRequired, tensor);
       tensor->bytes = bytesRequired;
     }
     if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
     tensor->dims = new_size;
 
-    if (tensor->allocation_type != kTfLiteDynamic) {
+    // Reset arena-allocated tensors; they will be allocated later.
+    if (tensor->allocation_type == kTfLiteArenaRw ||
+        tensor->allocation_type == kTfLiteArenaRwPersistent) {
       tensor->data.raw = nullptr;
     }
   } else {
diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD
index 9fe80605e39..98314fdc1b8 100644
--- a/tensorflow/lite/delegates/flex/BUILD
+++ b/tensorflow/lite/delegates/flex/BUILD
@@ -23,10 +23,10 @@ cc_library(
         "//tensorflow/lite:string_util",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/c:c_api_internal",
@@ -63,10 +63,10 @@ cc_library(
         ":delegate_only_runtime",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:tensorflow",
@@ -100,10 +100,10 @@ cc_library(
         "//tensorflow/lite:util",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core:lib",
@@ -134,10 +134,10 @@ cc_library(
         "@com_google_absl//absl/memory",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core/common_runtime/eager:context",
@@ -180,10 +180,10 @@ cc_library(
         # set of core TensorFlow kernels. We may want to revisit this dependency
         # to allow selective registration via build targets.
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/core/common_runtime/eager:context",
@@ -208,10 +208,10 @@ tf_cc_test(
         "@com_google_googletest//:gtest",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:tensorflow",
@@ -242,10 +242,10 @@ cc_library(
         "//tensorflow/lite:kernel_api",
     ] + select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib_lite",
+            "//tensorflow/core:portable_tensorflow_lib_lite",
         ],
         "//conditions:default": [
             "//tensorflow/c:c_api_internal",
diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD
index 099f653a1b8..2581232bc2b 100644
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@@ -167,7 +167,7 @@ ios_static_framework(
         "metal_delegate.h",
         "metal_delegate_internal.h",
     ],
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     deps = [":metal_delegate"],
 )
 
diff --git a/tensorflow/lite/delegates/gpu/cl/api.cc b/tensorflow/lite/delegates/gpu/cl/api.cc
index 7ffb5604d83..475eed4dccc 100644
--- a/tensorflow/lite/delegates/gpu/cl/api.cc
+++ b/tensorflow/lite/delegates/gpu/cl/api.cc
@@ -352,10 +352,10 @@ class GlBufferHolder : public TensorTie {
 };
 
 TensorObject TensorToObj(const Tensor& tensor) {
-  if (tensor.StorageType() == TensorStorageType::BUFFER) {
+  if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
     return OpenClBuffer{tensor.GetMemoryPtr()};
   }
-  if (tensor.StorageType() == TensorStorageType::IMAGE_BUFFER) {
+  if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
     return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
   }
   return OpenClTexture{tensor.GetMemoryPtr()};
@@ -516,9 +516,9 @@ TensorObjectDef TensorToDef(const Tensor& tensor) {
   def.dimensions.h = tensor.Height();
   def.dimensions.w = tensor.Width();
   def.dimensions.c = tensor.Channels();
-  def.object_def.data_layout = ToDataLayout(tensor.StorageType());
-  def.object_def.data_type = tensor.DataType();
-  def.object_def.object_type = ToObjectType(tensor.StorageType());
+  def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
+  def.object_def.data_type = tensor.GetDataType();
+  def.object_def.object_type = ToObjectType(tensor.GetStorageType());
   def.object_def.user_provided = false;
   return def;
 }
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
index cf9b8d2c6eb..ff6f06eeb68 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@@ -133,6 +133,7 @@ cc_library(
     srcs = ["conv_buffer_1x1.cc"],
     hdrs = ["conv_buffer_1x1.h"],
     deps = [
+        ":conv_common",
         ":gpu_operation",
         ":util",
         ":work_group_picking",
@@ -154,6 +155,11 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "conv_common",
+    hdrs = ["conv_common.h"],
+)
+
 cc_test(
     name = "conv_buffer_1x1_test",
     srcs = ["conv_buffer_1x1_test.cc"],
@@ -217,6 +223,7 @@ cc_library(
     srcs = ["conv_powervr.cc"],
     hdrs = ["conv_powervr.h"],
     deps = [
+        ":conv_common",
         ":gpu_operation",
         ":util",
         ":work_group_picking",
@@ -301,6 +308,22 @@ cc_test(
     ],
 )
 
+cc_library(
+    name = "conv_weights_converter",
+    srcs = ["conv_weights_converter.cc"],
+    hdrs = ["conv_weights_converter.h"],
+    deps = [
+        ":conv_common",
+        ":gpu_operation",
+        ":util",
+        ":work_group_picking",
+        "//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
+        "//tensorflow/lite/delegates/gpu/cl:cl_kernel",
+        "//tensorflow/lite/delegates/gpu/common:status",
+        "//tensorflow/lite/delegates/gpu/common:types",
+    ],
+)
+
 cc_library(
     name = "converter",
     srcs = ["converter.cc"],
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
index 4acd6b3f4fc..cb86e023545 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc
@@ -303,7 +303,11 @@ absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) {
 absl::Status ConvBuffer1x1::BindArguments() {
   kernel_.ResetBindingCounter();
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
+  if (definition_.src_tensors.size() == 1) {
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
+  } else {
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[1]->GetMemoryPtr()));
+  }
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(biases_.GetMemoryPtr()));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
@@ -351,6 +355,18 @@ bool IsConvBuffer1x1Supported(const OperationDef& definition,
          attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
 }
 
+bool IsConvBuffer1x1Supported(const OperationDef& definition,
+                              const BHWC& weights_shape,
+                              const Convolution2DAttributes& attr) {
+  auto src_storage_type = definition.src_tensors[0].storage_type;
+  return src_storage_type == TensorStorageType::BUFFER &&
+         weights_shape.w == 1 && weights_shape.h == 1 &&
+         attr.dilations.w == 1 && attr.dilations.h == 1 &&
+         attr.strides.w == 1 && attr.strides.h == 1 &&
+         attr.padding.prepended.w == 0 && attr.padding.prepended.h == 0 &&
+         attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
+}
+
 absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
                                  const OperationDef& definition,
                                  const Convolution2DAttributes& attr,
@@ -414,6 +430,29 @@ absl::Status CreateConvBuffer1x1Wino4x4To6x6(
       attr.weights, *creation_context.device, creation_context.context);
 }
 
+absl::Status CreateConvBuffer1x1DynamicWeights(
+    const CreationContext& creation_context, const OperationDef& definition,
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    ConvBuffer1x1* result, const BHWC* dst_shape) {
+  const int dst_depth = DivideRoundUp(weights_shape.b, 4);
+  const int src_depth = DivideRoundUp(weights_shape.c, 4);
+  ConvBuffer1x1::ConvParams conv_params;
+  if (dst_shape) {
+    conv_params = GetBestParams(*creation_context.device, definition,
+                                *dst_shape, src_depth, dst_depth);
+  } else {
+    conv_params = GetBestParams(*creation_context.device, definition, src_depth,
+                                dst_depth);
+  }
+  *result = ConvBuffer1x1(definition, conv_params);
+  LinearStorageCreateInfo create_info;
+  create_info.storage_type = LinearStorageType::BUFFER;
+  create_info.data_type = result->definition_.GetDataType();
+  create_info.aligned_size = weights_shape.b;
+  return CreateLinearStorage(create_info, attr.bias, creation_context.context,
+                             &result->biases_);
+}
+
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h
index dbda924fc87..d85fca2c6d9 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
 #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"
@@ -48,9 +49,15 @@ class ConvBuffer1x1 : public GPUOperation {
 
   absl::Status AddToQueue(CLCommandQueue* queue) override;
   absl::Status Tune(const TuningParameters& params) override;
-
   absl::Status Compile(const CreationContext& creation_context) override;
 
+  ConvWeightsDescription GetConvWeightsDescription() const {
+    ConvWeightsDescription desc;
+    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
+    desc.output_group_size = conv_params_.block_size.z;
+    return desc;
+  }
+
   struct ConvParams {
     int3 block_size = int3(1, 1, 1);
     int element_size = 4;  // can be 4, 8 or 16
@@ -77,6 +84,10 @@ class ConvBuffer1x1 : public GPUOperation {
       const CreationContext& creation_context, const OperationDef& definition,
       const Convolution2DAttributes& attr, ConvBuffer1x1* result,
       const BHWC* shape);
+  friend absl::Status CreateConvBuffer1x1DynamicWeights(
+      const CreationContext& creation_context, const OperationDef& definition,
+      const Convolution2DAttributes& attr, const BHWC& weights_shape,
+      ConvBuffer1x1* result, const BHWC* dst_shape);
 
   template <DataType T>
   absl::Status UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
@@ -163,6 +174,10 @@ absl::Status ConvBuffer1x1::UploadWeights(
 bool IsConvBuffer1x1Supported(const OperationDef& definition,
                               const Convolution2DAttributes& attr);
 
+bool IsConvBuffer1x1Supported(const OperationDef& definition,
+                              const BHWC& weights_shape,
+                              const Convolution2DAttributes& attr);
+
 absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
                                  const OperationDef& definition,
                                  const Convolution2DAttributes& attr,
@@ -175,6 +190,11 @@ absl::Status CreateConvBuffer1x1(const CreationContext& creation_context,
                                  ConvBuffer1x1* result,
                                  const BHWC* shape = nullptr);
 
+absl::Status CreateConvBuffer1x1DynamicWeights(
+    const CreationContext& creation_context, const OperationDef& definition,
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    ConvBuffer1x1* result, const BHWC* dst_shape = nullptr);
+
 absl::Status CreateConvBuffer1x1Wino4x4To6x6(
     const CreationContext& creation_context, const OperationDef& definition,
     const Convolution2DAttributes& attr, ConvBuffer1x1* result,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h
new file mode 100644
index 00000000000..f630c9d1f1c
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h
@@ -0,0 +1,37 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_CONV_COMMON_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_CONV_COMMON_H_
+
+namespace tflite {
+namespace gpu {
+namespace cl {
+
+enum class ConvWeightsLayout {
+  kUnknown,
+  kOHWIOGroupI4O4,
+};
+
+struct ConvWeightsDescription {
+  ConvWeightsLayout layout;
+  int output_group_size;
+};
+
+}  // namespace cl
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_CONV_COMMON_H_
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
index 7ba12df8bf1..9bb52b3e9c2 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc
@@ -138,6 +138,18 @@ ConvPowerVR::ConvPowerVR(const OperationDef& definition,
                        attr.dilations.w, attr.dilations.h),
       conv_params_(GuessBestParams(device, definition, attr, dst_shape)) {}
 
+ConvPowerVR::ConvPowerVR(const OperationDef& definition,
+                         const Convolution2DAttributes& attr,
+                         const BHWC& weights_shape, const CLDevice& device,
+                         const BHWC* dst_shape)
+    : GPUOperation(definition),
+      stride_padding_(attr.strides.w, attr.strides.h, -attr.padding.prepended.w,
+                      -attr.padding.prepended.h),
+      kernel_dilation_(weights_shape.w, weights_shape.h, attr.dilations.w,
+                       attr.dilations.h),
+      conv_params_(GuessBestParams(device, definition, attr, weights_shape,
+                                   dst_shape)) {}
+
 ConvPowerVR::ConvPowerVR(const OperationDef& definition,
                          const FullyConnectedAttributes& attr,
                          const CLDevice& device, const BHWC* dst_shape)
@@ -192,7 +204,11 @@ absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) {
 absl::Status ConvPowerVR::BindArguments() {
   kernel_.ResetBindingCounter();
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
+  if (definition_.src_tensors.size() == 1) {
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_.GetMemoryPtr()));
+  } else {
+    RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[1]->GetMemoryPtr()));
+  }
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(biases_.GetMemoryPtr()));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
@@ -779,6 +795,13 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
     conv_params.work_group_launch_order = int3(0, 1, 2);
     conv_params.fixed_work_group_size = false;
     conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
+  } else if (device.IsAdreno()) {
+    conv_params.block_size = int3(2, 2, 1);
+    conv_params.work_group_size = int3(8, 2, 1);
+    conv_params.work_group_launch_order = int3(0, 1, 2);
+    conv_params.fixed_work_group_size = false;
+    conv_params.src_depth_loop_size = 1;
+    conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
   } else {
     conv_params.block_size = int3(1, 1, 4);
     conv_params.work_group_size = int3(8, 2, 1);
@@ -821,6 +844,22 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
                          x_kernel_is_1, y_kernel_is_1, false, dst_shape);
 }
 
+ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
+    const CLDevice& device, const OperationDef& definition,
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    const BHWC* dst_shape) const {
+  const int dst_depth = DivideRoundUp(weights_shape.b, 4);
+  const int src_depth = DivideRoundUp(weights_shape.c, 4);
+  const bool x_kernel_is_1 =
+      weights_shape.w == 1 && attr.strides.w == 1 && attr.dilations.w == 1 &&
+      attr.padding.prepended.w == 0 && attr.padding.appended.w == 0;
+  const bool y_kernel_is_1 =
+      weights_shape.h == 1 && attr.strides.h == 1 && attr.dilations.h == 1 &&
+      attr.padding.prepended.h == 0 && attr.padding.appended.h == 0;
+  return GuessBestParams(device, definition, src_depth, dst_depth,
+                         x_kernel_is_1, y_kernel_is_1, false, dst_shape);
+}
+
 ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
     const CLDevice& device, const OperationDef& definition,
     const FullyConnectedAttributes& attr, const BHWC* dst_shape) const {
@@ -863,6 +902,20 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
   return result->UploadData(attr.weights, attr.bias, creation_context.context);
 }
 
+absl::Status CreateConvPowerVRDynamicWeights(
+    const CreationContext& creation_context, const OperationDef& definition,
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    ConvPowerVR* result, const BHWC* dst_shape) {
+  *result = ConvPowerVR(definition, attr, weights_shape,
+                        *creation_context.device, dst_shape);
+  LinearStorageCreateInfo create_info;
+  create_info.storage_type = LinearStorageType::BUFFER;
+  create_info.data_type = result->conv_params_.weights_data_type;
+  create_info.aligned_size = weights_shape.b;
+  return CreateLinearStorage(create_info, attr.bias, creation_context.context,
+                             &result->biases_);
+}
+
 absl::Status CreateConvPowerVRWino4x4To6x6(
     const CreationContext& creation_context, const OperationDef& definition,
     const Convolution2DAttributes& attr, ConvPowerVR* result,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
index 01f77eed288..5eff4b36053 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
 #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"
@@ -44,6 +45,13 @@ class ConvPowerVR : public GPUOperation {
   absl::Status Tune(const TuningParameters& params) override;
   absl::Status Compile(const CreationContext& creation_context) override;
 
+  ConvWeightsDescription GetConvWeightsDescription() const {
+    ConvWeightsDescription desc;
+    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
+    desc.output_group_size = conv_params_.block_size.z;
+    return desc;
+  }
+
   // Move only
   ConvPowerVR(ConvPowerVR&& operation);
   ConvPowerVR& operator=(ConvPowerVR&& operation);
@@ -82,6 +90,9 @@ class ConvPowerVR : public GPUOperation {
   ConvPowerVR(const OperationDef& definition,
               const Convolution2DAttributes& attr, const CLDevice& device,
               const BHWC* dst_shape = nullptr);
+  ConvPowerVR(const OperationDef& definition,
+              const Convolution2DAttributes& attr, const BHWC& weights_shape,
+              const CLDevice& device, const BHWC* dst_shape = nullptr);
   ConvPowerVR(const OperationDef& definition,
               const FullyConnectedAttributes& attr, const CLDevice& device,
               const BHWC* dst_shape = nullptr);
@@ -112,6 +123,11 @@ class ConvPowerVR : public GPUOperation {
                                         ConvPowerVR* result,
                                         const BHWC* dst_shape);
 
+  friend absl::Status CreateConvPowerVRDynamicWeights(
+      const CreationContext& creation_context, const OperationDef& definition,
+      const Convolution2DAttributes& attr, const BHWC& weights_shape,
+      ConvPowerVR* result, const BHWC* dst_shape);
+
   friend absl::Status CreateConvPowerVRWino4x4To6x6(
       const CreationContext& creation_context, const OperationDef& definition,
       const Convolution2DAttributes& attr, ConvPowerVR* result,
@@ -126,6 +142,11 @@ class ConvPowerVR : public GPUOperation {
                              const OperationDef& definition,
                              const Convolution2DAttributes& attr,
                              const BHWC* dst_shape = nullptr) const;
+  ConvParams GuessBestParams(const CLDevice& device,
+                             const OperationDef& definition,
+                             const Convolution2DAttributes& attr,
+                             const BHWC& weights_shape,
+                             const BHWC* dst_shape = nullptr) const;
   ConvParams GuessBestParams(const CLDevice& device,
                              const OperationDef& definition,
                              const FullyConnectedAttributes& attr,
@@ -225,6 +246,11 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context,
                                ConvPowerVR* result,
                                const BHWC* dst_shape = nullptr);
 
+absl::Status CreateConvPowerVRDynamicWeights(
+    const CreationContext& creation_context, const OperationDef& definition,
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    ConvPowerVR* result, const BHWC* dst_shape = nullptr);
+
 absl::Status CreateConvPowerVRWino4x4To6x6(
     const CreationContext& creation_context, const OperationDef& definition,
     const Convolution2DAttributes& attr, ConvPowerVR* result,
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc
new file mode 100644
index 00000000000..71559ab587a
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc
@@ -0,0 +1,160 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h"
+
+#include <string>
+
+#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
+
+namespace tflite {
+namespace gpu {
+namespace cl {
+namespace {
+
+std::string GetConverterToConvWeightsCode(
+    const OperationDef& op_def,
+    const ConvWeightsDescription& conv_weights_desc) {
+  TensorCodeGenerator src_tensor(
+      "src_data",
+      WHSBPoint{"src_size.x", "src_size.y", "src_size.z", "src_size.w"},
+      op_def.src_tensors[0]);
+  TensorCodeGenerator dst_tensor(
+      "dst_data",
+      WHSBPoint{"dst_size.x", "dst_size.y", "dst_size.z", "dst_size.w"},
+      op_def.dst_tensors[0]);
+
+  std::string c = GetCommonDefines(op_def.precision);
+  c += "__kernel void main_function(\n";
+  c += src_tensor.GetDeclaration(AccessType::READ) + ",\n";
+  c += dst_tensor.GetDeclaration(AccessType::WRITE) + ",\n";
+  c += "    int4 src_size,              \n";
+  c += "    float4 mask\n";
+  c += ") {\n";
+  c += "  int GROUP_SIZE = " +
+       std::to_string(conv_weights_desc.output_group_size) + ";\n";
+  c += "  int O = get_global_id(0) * 4;\n";
+  c += "  int I = get_global_id(1);\n";
+  c += "  int Z = get_global_id(2);\n";
+  c += "  int W = Z % src_size.x;\n";
+  c += "  int H = Z / src_size.x;\n";
+  c += "  if (O >= src_size.w || I >= src_size.z || H >= src_size.y) return;\n";
+  c += "  FLT4 v0 =" + src_tensor.ReadWHSB("W", "H", "I", "O + 0") + ";\n";
+  c += "  FLT4 v1 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
+  c += "  FLT4 v2 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
+  c += "  FLT4 v3 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
+  c += "  if (O + 1 < src_size.w) {\n";
+  c += "    v1 =" + src_tensor.ReadWHSB("W", "H", "I", "O + 1") + ";\n";
+  c += "  }\n";
+  c += "  if (O + 2 < src_size.w) {\n";
+  c += "    v2 =" + src_tensor.ReadWHSB("W", "H", "I", "O + 2") + ";\n";
+  c += "  }\n";
+  c += "  if (O + 3 < src_size.w) {\n";
+  c += "    v3 =" + src_tensor.ReadWHSB("W", "H", "I", "O + 3") + ";\n";
+  c += "  }\n";
+  c += "  if (I == src_size.z - 1) {\n";
+  c += "    FLT4 mask_t = TO_FLT4(mask);\n";
+  c += "    v0 *= mask_t;\n";
+  c += "    v1 *= mask_t;\n";
+  c += "    v2 *= mask_t;\n";
+  c += "    v3 *= mask_t;\n";
+  c += "  }\n";
+  c += "  FLT4 r0 = (FLT4)(v0.x, v1.x, v2.x, v3.x);\n";
+  c += "  FLT4 r1 = (FLT4)(v0.y, v1.y, v2.y, v3.y);\n";
+  c += "  FLT4 r2 = (FLT4)(v0.z, v1.z, v2.z, v3.z);\n";
+  c += "  FLT4 r3 = (FLT4)(v0.w, v1.w, v2.w, v3.w);\n";
+  c += "  int d_index = O / (GROUP_SIZE * 4);\n";
+  c += "  int k_index = (O % (GROUP_SIZE * 4)) / 4;\n";
+  c += "  int dst_offset = (((d_index * src_size.y + H) * src_size.x + W) * "
+       "src_size.z + I) * GROUP_SIZE + "
+       "k_index;\n";
+  c += "  int address0 = dst_offset * 4 + 0;\n";
+  c += "  int address1 = dst_offset * 4 + 1;\n";
+  c += "  int address2 = dst_offset * 4 + 2;\n";
+  c += "  int address3 = dst_offset * 4 + 3;\n";
+  c += "  " + dst_tensor.Write("r0", "address0");
+  c += "  " + dst_tensor.Write("r1", "address1");
+  c += "  " + dst_tensor.Write("r2", "address2");
+  c += "  " + dst_tensor.Write("r3", "address3");
+  c += "}\n";
+  return c;
+}
+}  // namespace
+
+ConverterToConvWeights::ConverterToConvWeights(
+    ConverterToConvWeights&& operation)
+    : GPUOperation(std::move(operation)),
+      conv_weights_desc_(operation.conv_weights_desc_),
+      kernel_(std::move(operation.kernel_)),
+      work_group_size_(operation.work_group_size_) {}
+
+ConverterToConvWeights& ConverterToConvWeights::operator=(
+    ConverterToConvWeights&& operation) {
+  if (this != &operation) {
+    conv_weights_desc_ = operation.conv_weights_desc_;
+    kernel_ = std::move(operation.kernel_);
+    std::swap(work_group_size_, operation.work_group_size_);
+    GPUOperation::operator=(std::move(operation));
+  }
+  return *this;
+}
+
+absl::Status ConverterToConvWeights::Compile(
+    const CreationContext& creation_context) {
+  std::string code =
+      GetConverterToConvWeightsCode(definition_, conv_weights_desc_);
+  return creation_context.cache->GetOrCreateCLKernel(
+      code, "main_function", *creation_context.context,
+      *creation_context.device, &kernel_);
+}
+
+absl::Status ConverterToConvWeights::BindArguments() {
+  kernel_.ResetBindingCounter();
+  RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
+  RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
+  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
+  RETURN_IF_ERROR(
+      kernel_.SetBytesAuto(GetMaskForLastPlane(src_[0]->Channels())));
+  return absl::OkStatus();
+}
+
+int3 ConverterToConvWeights::GetGridSize() const {
+  const int grid_x = DivideRoundUp(
+      AlignByN(src_[0]->Batch(), 4 * conv_weights_desc_.output_group_size), 4);
+  const int grid_y = src_[0]->Slices();
+  const int grid_z = src_[0]->Width() * src_[0]->Height();
+  return int3(grid_x, grid_y, grid_z);
+}
+
+absl::Status ConverterToConvWeights::Tune(const TuningParameters& params) {
+  RETURN_IF_ERROR(BindArguments());
+  return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_);
+}
+
+absl::Status ConverterToConvWeights::AddToQueue(CLCommandQueue* queue) {
+  RETURN_IF_ERROR(BindArguments());
+  return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_);
+}
+
+ConverterToConvWeights CreateConverterToConvWeights(
+    const OperationDef& definition,
+    const ConvWeightsDescription& conv_weights_desc) {
+  return ConverterToConvWeights(definition, conv_weights_desc);
+}
+
+}  // namespace cl
+}  // namespace gpu
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h
new file mode 100644
index 00000000000..d79cfb8e3e0
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h
@@ -0,0 +1,69 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_CONV_WEIGHTS_CONVERTER_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_CONV_WEIGHTS_CONVERTER_H_
+
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/types.h"
+
+namespace tflite {
+namespace gpu {
+namespace cl {
+
+class ConverterToConvWeights : public GPUOperation {
+ public:
+  ConverterToConvWeights(const OperationDef& definition,
+                         const ConvWeightsDescription& conv_weights_desc)
+      : GPUOperation(definition),
+        conv_weights_desc_(conv_weights_desc),
+        work_group_size_(8, 4, 1) {}
+  absl::Status AddToQueue(CLCommandQueue* queue) override;
+  absl::Status Tune(const TuningParameters& params) override;
+
+  absl::Status Compile(const CreationContext& creation_context) override;
+
+  // Move only
+  ConverterToConvWeights(ConverterToConvWeights&& operation);
+  ConverterToConvWeights& operator=(ConverterToConvWeights&& operation);
+  ConverterToConvWeights(const ConverterToConvWeights&) = delete;
+  ConverterToConvWeights& operator=(const ConverterToConvWeights&) = delete;
+
+ private:
+  absl::Status BindArguments();
+  int3 GetGridSize() const;
+
+  ConvWeightsDescription conv_weights_desc_;
+  CLKernel kernel_;
+  int3 work_group_size_;
+};
+
+// We expect src BHWC tensor and we assume that B is O, H = H, W = W, C is I
+// as dst we expect Tensor with storage type BUFFER and
+// dst.b * dst.h * dst.w * dst.c = AlignByN(src.b, 4) * src.h * src.w
+// AlignByN(src.c, 4)
+ConverterToConvWeights CreateConverterToConvWeights(
+    const OperationDef& definition,
+    const ConvWeightsDescription& conv_weights_desc);
+
+}  // namespace cl
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_CONV_WEIGHTS_CONVERTER_H_
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD
index e9265257c05..9650b53937a 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD
@@ -11,9 +11,11 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/cl:model_hints",
         "//tensorflow/lite/delegates/gpu/cl:tensor_type",
         "//tensorflow/lite/delegates/gpu/cl/kernels:conv_buffer_1x1",
+        "//tensorflow/lite/delegates/gpu/cl/kernels:conv_common",
         "//tensorflow/lite/delegates/gpu/cl/kernels:conv_constants",
         "//tensorflow/lite/delegates/gpu/cl/kernels:conv_powervr",
         "//tensorflow/lite/delegates/gpu/cl/kernels:conv_texture",
+        "//tensorflow/lite/delegates/gpu/cl/kernels:conv_weights_converter",
         "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
         "//tensorflow/lite/delegates/gpu/cl/kernels:work_group_picking",
         "//tensorflow/lite/delegates/gpu/common:operations",
@@ -103,6 +105,7 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/cl:model_hints",
         "//tensorflow/lite/delegates/gpu/cl:storage_type_util",
         "//tensorflow/lite/delegates/gpu/cl:tensor_type",
+        "//tensorflow/lite/delegates/gpu/cl/kernels:conv_common",
         "//tensorflow/lite/delegates/gpu/cl/kernels:elementwise",
         "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
         "//tensorflow/lite/delegates/gpu/cl/selectors:default_selector",
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
index d2d775f819f..dc34dd7faee 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
 #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/util.h"
@@ -58,6 +59,19 @@ absl::Status SelectConvolutionWinogradAdreno(
   return absl::OkStatus();
 }
 
+absl::Status SelectConvolutionDynamicWeightsAdreno(
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    const BHWC& dst_shape, const CreationContext& creation_context,
+    const OperationDef& op_def, ModelHints hints,
+    std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
+  ConvPowerVR conv;
+  RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights(
+      creation_context, op_def, attr, weights_shape, &conv, &dst_shape));
+  *weights_desc = conv.GetConvWeightsDescription();
+  *ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
+  return absl::OkStatus();
+}
+
 absl::Status SelectConvolutionNVidia(const Convolution2DAttributes& attr,
                                      const CreationContext& creation_context,
                                      const OperationDef& op_def,
@@ -122,6 +136,28 @@ absl::Status SelectConvolutionWinogradMali(
   return absl::OkStatus();
 }
 
+absl::Status SelectConvolutionDynamicWeightsMali(
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    const BHWC& dst_shape, const CreationContext& creation_context,
+    const OperationDef& op_def, ModelHints hints,
+    std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
+  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
+      IsConvBuffer1x1Supported(op_def, weights_shape, attr)) {
+    ConvBuffer1x1 conv;
+    RETURN_IF_ERROR(CreateConvBuffer1x1DynamicWeights(
+        creation_context, op_def, attr, weights_shape, &conv, &dst_shape));
+    *weights_desc = conv.GetConvWeightsDescription();
+    *ptr = absl::make_unique<ConvBuffer1x1>(std::move(conv));
+  } else {
+    ConvPowerVR conv;
+    RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights(
+        creation_context, op_def, attr, weights_shape, &conv, &dst_shape));
+    *weights_desc = conv.GetConvWeightsDescription();
+    *ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
+  }
+  return absl::OkStatus();
+}
+
 }  // namespace
 
 absl::Status SelectConvolution(const Convolution2DAttributes& attr,
@@ -173,6 +209,41 @@ absl::Status SelectConvolutionForWinograd(
   }
 }
 
+absl::Status SelectConvolutionWithDynamicWeights(
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    const BHWC& dst_shape, const CreationContext& creation_context,
+    const OperationDef& op_def, ModelHints hints,
+    std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc) {
+  switch (creation_context.device->vendor()) {
+    case Vendor::QUALCOMM:
+      return SelectConvolutionDynamicWeightsAdreno(
+          attr, weights_shape, dst_shape, creation_context, op_def, hints, ptr,
+          weights_desc);
+    case Vendor::MALI:
+      return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape,
+                                                 creation_context, op_def,
+                                                 hints, ptr, weights_desc);
+    default: {
+      ConvPowerVR conv;
+      RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights(
+          creation_context, op_def, attr, weights_shape, &conv, &dst_shape));
+      *weights_desc = conv.GetConvWeightsDescription();
+      *ptr = absl::make_unique<ConvPowerVR>(std::move(conv));
+      return absl::OkStatus();
+    }
+  }
+}
+
+absl::Status SelectConverterToConvWeights(
+    const ConvWeightsDescription& weights_desc,
+    const CreationContext& creation_context, const OperationDef& op_def,
+    ModelHints hints, std::unique_ptr<GPUOperation>* ptr) {
+  ConverterToConvWeights converter =
+      ConverterToConvWeights(op_def, weights_desc);
+  *ptr = absl::make_unique<ConverterToConvWeights>(std::move(converter));
+  return absl::OkStatus();
+}
+
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h
index 94723527ad5..58be4b60ce6 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include "tensorflow/lite/delegates/gpu/cl/kernels/conv_common.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
 #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
@@ -39,6 +40,17 @@ absl::Status SelectConvolutionForWinograd(
     const CreationContext& creation_context, const OperationDef& op_def,
     ModelHints hints, std::unique_ptr<GPUOperation>* ptr);
 
+absl::Status SelectConvolutionWithDynamicWeights(
+    const Convolution2DAttributes& attr, const BHWC& weights_shape,
+    const BHWC& dst_shape, const CreationContext& creation_context,
+    const OperationDef& op_def, ModelHints hints,
+    std::unique_ptr<GPUOperation>* ptr, ConvWeightsDescription* weights_desc);
+
+absl::Status SelectConverterToConvWeights(
+    const ConvWeightsDescription& weights_desc,
+    const CreationContext& creation_context, const OperationDef& op_def,
+    ModelHints hints, std::unique_ptr<GPUOperation>* ptr);
+
 }  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
index b6fb11b58c4..60d06d7da89 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.h"
 #include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
 #include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
@@ -196,14 +197,52 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
           absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
       auto input_shape = inputs[0]->tensor.shape;
       auto output_shape = outputs[0]->tensor.shape;
-      if (WinogradFromNode(creation_context, op_def, hints, input_shape,
-                           output_shape, attr, gpu_subgraph)
-              .ok()) {
-        return absl::OkStatus();
+      if (inputs.size() == 1) {
+        if (WinogradFromNode(creation_context, op_def, hints, input_shape,
+                             output_shape, attr, gpu_subgraph)
+                .ok()) {
+          return absl::OkStatus();
+        } else {
+          gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
+          return SelectConvolution(attr, output_shape, creation_context, op_def,
+                                   hints, gpu_op);
+        }
       } else {
-        gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
-        return SelectConvolution(attr, output_shape, creation_context, op_def,
-                                 hints, gpu_op);
+        auto weights_shape = inputs[1]->tensor.shape;
+        TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
+                                         TensorStorageType::BUFFER,
+                                         Layout::UNKNOWN};
+        gpu_subgraph->operations.clear();
+        gpu_subgraph->operations.resize(2);
+        auto& converter_op = gpu_subgraph->operations[0];
+        auto& conv_op = gpu_subgraph->operations[1];
+        conv_op.input_ids = {0, -1};
+        conv_op.output_ids = {0};
+        OperationDef conv_def = op_def;
+        conv_def.src_tensors[1] = weights_desc;
+        ConvWeightsDescription conv_weights_desc;
+        RETURN_IF_ERROR(SelectConvolutionWithDynamicWeights(
+            attr, weights_shape, output_shape, creation_context, conv_def,
+            hints, &conv_op.operation, &conv_weights_desc));
+
+        int aligned_output =
+            AlignByN(weights_shape.b, conv_weights_desc.output_group_size * 4);
+        int aligned_input = AlignByN(weights_shape.c, 4);
+        gpu_subgraph->new_tensors = {
+            {BHWC(1, 1, 1,
+                  aligned_output * aligned_input * weights_shape.h *
+                      weights_shape.w),
+             weights_desc}};
+        OperationDef converter_def;
+        converter_def.precision = op_def.precision;
+        converter_def.src_tensors.push_back(op_def.src_tensors[1]);
+        converter_def.dst_tensors.push_back(weights_desc);
+
+        converter_op.input_ids = {1};
+        converter_op.output_ids = {-1};
+        return SelectConverterToConvWeights(conv_weights_desc, creation_context,
+                                            converter_def, hints,
+                                            &converter_op.operation);
       }
     }
     case OperationType::CONVOLUTION_TRANSPOSED: {
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc
index f01975e2347..4a52508af0e 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc
@@ -29,7 +29,7 @@ namespace cl {
 namespace {
 
 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
-                                         cl_mem memory, enum DataType data_type,
+                                         cl_mem memory, DataType data_type,
                                          int width, cl_mem* result) {
   cl_image_format format;
   cl_image_desc desc;
diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.h b/tensorflow/lite/delegates/gpu/cl/tensor.h
index d59ef838888..cb7d4263a5c 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor.h
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.h
@@ -75,8 +75,8 @@ class Tensor {
   int4 GetWHSB() const { return int4(shape_.w, shape_.h, Slices(), shape_.b); }
   int4 GetWHDS() const { return int4(shape_.w, shape_.h, shape_.d, Slices()); }
 
-  enum DataType DataType() const { return descriptor_.data_type; }
-  TensorStorageType StorageType() const { return descriptor_.storage_type; }
+  DataType GetDataType() const { return descriptor_.data_type; }
+  TensorStorageType GetStorageType() const { return descriptor_.storage_type; }
 
   // for profiling and memory statistics
   uint64_t GetMemorySizeInBytes() const;
diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc
index e8a899c5b87..46856a70a7c 100644
--- a/tensorflow/lite/delegates/gpu/common/model_builder.cc
+++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc
@@ -555,9 +555,23 @@ class Conv2DOperationParser : public TFLiteOperationParser {
                            const TfLiteNode* tflite_node,
                            const TfLiteRegistration* registration) final {
     RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 3));
-    RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node,
-                                       /*runtime_inputs=*/1, /*outputs=*/1));
-    RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1));
+    const int runtime_inputs =
+        GetNumberOfRuntimeInputsForNode(context, tflite_node);
+    if (runtime_inputs > 2) {
+      return absl::InternalError(
+          absl::StrCat("Expected 1 or 2 input tensor(s), but node has ",
+                       runtime_inputs, " runtime inputs."));
+    }
+    const int runtime_outputs =
+        GetNumberOfRuntimeOutputsForNode(context, tflite_node);
+    if (runtime_outputs != 1) {
+      return absl::InternalError(
+          absl::StrCat("Expected 1 output tensor(s), but node has ",
+                       runtime_outputs, " runtime outputs."));
+    }
+    if (runtime_inputs == 1) {
+      RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1));
+    }
     TfLiteConvParams* tf_options = nullptr;
     RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
     RETURN_IF_ERROR(CheckStridesAndDilation(
@@ -575,7 +589,12 @@ class Conv2DOperationParser : public TFLiteOperationParser {
     RETURN_IF_ERROR(reader->AddOutputs(node));
 
     Convolution2DAttributes attr;
-    RETURN_IF_ERROR(reader->ReadTensor(1, &attr.weights));
+    const int runtime_inputs = reader->GetNumberOfRuntimeInputs();
+    if (runtime_inputs == 2) {
+      RETURN_IF_ERROR(reader->AddInput(node, 1));
+    } else {  // runtime_inputs == 1;
+      RETURN_IF_ERROR(reader->ReadTensor(1, &attr.weights));
+    }
     reader->ReadTensor(2, &attr.bias).IgnoreError();  // bias is optional
 
     const auto* tf_options =
@@ -1683,22 +1702,37 @@ class SliceOperationParser : public TFLiteOperationParser {
     if (starts.data.size() != sizes.data.size()) {
       return absl::InvalidArgumentError("Starts amount != sizes amount.");
     }
+    const auto& in_shape = input->tensor.shape;
     if (starts.data.size() == 4) {
+      sizes.data[0] =
+          sizes.data[0] != -1 ? sizes.data[0] : in_shape.b - starts.data[0];
+      sizes.data[1] =
+          sizes.data[1] != -1 ? sizes.data[1] : in_shape.h - starts.data[1];
+      sizes.data[2] =
+          sizes.data[2] != -1 ? sizes.data[2] : in_shape.w - starts.data[2];
+      sizes.data[3] =
+          sizes.data[3] != -1 ? sizes.data[3] : in_shape.c - starts.data[3];
       attr.starts =
           BHWC(starts.data[0], starts.data[1], starts.data[2], starts.data[3]);
       attr.ends =
           BHWC(starts.data[0] + sizes.data[0], starts.data[1] + sizes.data[1],
                starts.data[2] + sizes.data[2], starts.data[3] + sizes.data[3]);
     } else if (starts.data.size() == 3) {
+      sizes.data[0] =
+          sizes.data[0] != -1 ? sizes.data[0] : in_shape.h - starts.data[0];
+      sizes.data[1] =
+          sizes.data[1] != -1 ? sizes.data[1] : in_shape.w - starts.data[1];
+      sizes.data[2] =
+          sizes.data[2] != -1 ? sizes.data[2] : in_shape.c - starts.data[2];
       attr.starts = BHWC(0, starts.data[0], starts.data[1], starts.data[2]);
       attr.ends =
-          BHWC(input->tensor.shape.b, starts.data[0] + sizes.data[0],
+          BHWC(in_shape.b, starts.data[0] + sizes.data[0],
                starts.data[1] + sizes.data[1], starts.data[2] + sizes.data[2]);
     } else {
       return absl::UnimplementedError(
           "Slicing is supported for 3 or 4 dimensional tensors only.");
     }
-    RETURN_IF_ERROR(UpdateIfNegative(input->tensor.shape, &attr));
+    RETURN_IF_ERROR(UpdateIfNegative(in_shape, &attr));
 
     auto out_shape = graph->FindOutputs(node->id)[0]->tensor.shape;
     if ((attr.ends.b - attr.starts.b) != out_shape.b) {
@@ -2387,6 +2421,40 @@ class TransformLandmarksOperationParser : public TFLiteOperationParser {
  private:
 };
 
+class TransformLandmarksV2OperationParser : public TFLiteOperationParser {
+ public:
+  absl::Status IsSupported(const TfLiteContext* context,
+                           const TfLiteNode* tflite_node,
+                           const TfLiteRegistration* registration) final {
+    RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node,
+                                       /*runtime_inputs=*/2, /*outputs=*/1));
+    return absl::OkStatus();
+  }
+
+  absl::Status Parse(const TfLiteNode* tflite_node,
+                     const TfLiteRegistration* registration,
+                     GraphFloat32* graph, ObjectReader* reader) final {
+    Node* node = graph->NewNode();
+    RETURN_IF_ERROR(reader->AddInput(node, 0));  // data
+    RETURN_IF_ERROR(reader->AddInput(node, 1));  // bbox
+    RETURN_IF_ERROR(reader->AddOutputs(node));
+    std::string op_name = "transform_landmarks_v2";
+    node->operation.type = op_name;
+    BHWC output_shape;
+    RETURN_IF_ERROR(
+        ParseCustomAttributes(op_name, tflite_node->custom_initial_data,
+                              tflite_node->custom_initial_data_size,
+                              &(node->operation.attributes), &output_shape));
+
+    auto output_value = graph->FindOutputs(node->id)[0];
+
+    output_value->tensor.shape = graph->FindInputs(node->id)[0]->tensor.shape;
+    return absl::OkStatus();
+  }
+
+ private:
+};
+
 class Landmarks2TransformMatrixOperationParser : public TFLiteOperationParser {
  public:
   absl::Status IsSupported(const TfLiteContext* context,
@@ -2417,6 +2485,37 @@ class Landmarks2TransformMatrixOperationParser : public TFLiteOperationParser {
   }
 };
 
+class Landmarks2TransformMatrixV2OperationParser
+    : public TFLiteOperationParser {
+ public:
+  absl::Status IsSupported(const TfLiteContext* context,
+                           const TfLiteNode* tflite_node,
+                           const TfLiteRegistration* registration) final {
+    return CheckInputsOutputs(context, tflite_node, /*runtime_inputs=*/1,
+                              /*outputs=*/1);
+  }
+
+  absl::Status Parse(const TfLiteNode* tflite_node,
+                     const TfLiteRegistration* registration,
+                     GraphFloat32* graph, ObjectReader* reader) final {
+    Node* node = graph->NewNode();
+    RETURN_IF_ERROR(reader->AddInput(node, 0));  // landmarks
+    RETURN_IF_ERROR(reader->AddOutputs(node));   // transform matrix
+
+    const std::string op_name = "landmarks_to_transform_matrix_v2";
+    node->operation.type = op_name;
+    BHWC output_shape;
+    RETURN_IF_ERROR(
+        ParseCustomAttributes(op_name, tflite_node->custom_initial_data,
+                              tflite_node->custom_initial_data_size,
+                              &(node->operation.attributes), &output_shape));
+
+    auto output_value = graph->FindOutputs(node->id)[0];
+    output_value->tensor.shape = output_shape;
+    return absl::OkStatus();
+  }
+};
+
 class AlignmentPointsToTransformMatrixOperationParser
     : public TFLiteOperationParser {
  public:
@@ -2638,9 +2737,15 @@ std::unique_ptr<TFLiteOperationParser> NewOperationParser(
       if (custom_name == "TransformLandmarks") {
         return std::make_unique<TransformLandmarksOperationParser>();
       }
+      if (custom_name == "TransformLandmarksV2") {
+        return std::make_unique<TransformLandmarksV2OperationParser>();
+      }
       if (custom_name == "Landmarks2TransformMatrix") {
         return std::make_unique<Landmarks2TransformMatrixOperationParser>();
       }
+      if (custom_name == "Landmarks2TransformMatrixV2") {
+        return std::make_unique<Landmarks2TransformMatrixV2OperationParser>();
+      }
       if (custom_name == "AlignmentPointsToTransformMatrix") {
         return std::make_unique<
             AlignmentPointsToTransformMatrixOperationParser>();
diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc
index 28ce67b1ce3..bdcf6f605cc 100644
--- a/tensorflow/lite/delegates/gpu/common/operations.cc
+++ b/tensorflow/lite/delegates/gpu/common/operations.cc
@@ -506,6 +506,14 @@ BHWC CalculateOutputShape(const BHWC& input, const PadAttributes& attr) {
               attr.appended.c + attr.prepended.c + input.c);
 }
 
+BHWDC CalculateOutputShape(const BHWDC& input, const Pad3DAttributes& attr) {
+  return BHWDC(attr.appended.b + attr.prepended.b + input.b,
+               attr.appended.h + attr.prepended.h + input.h,
+               attr.appended.w + attr.prepended.w + input.w,
+               attr.appended.d + attr.prepended.d + input.d,
+               attr.appended.c + attr.prepended.c + input.c);
+}
+
 BHWC CalculateOutputShape(const BHWC& input,
                           const FullyConnectedAttributes& attr) {
   return BHWC(input.b, 1, 1, attr.weights.shape.o);
@@ -562,6 +570,62 @@ absl::Status CalculateOutputShape(const std::vector<BHWC>& input,
   return absl::OkStatus();
 }
 
+absl::Status CalculateOutputShape(const std::vector<BHWDC>& input,
+                                  const ConcatAttributes& attr,
+                                  BHWDC* output_shape) {
+  BHWDC new_shape = input[0];
+  switch (attr.axis) {
+    case Axis::CHANNELS:
+      for (int i = 1; i < input.size(); ++i) {
+        if (input[i].h != new_shape.h || input[i].w != new_shape.w ||
+            input[i].d != new_shape.d) {
+          return absl::InvalidArgumentError(
+              "Height, Width and Depth must be the same when concatenating "
+              "by channels axis");
+        }
+        new_shape.c += input[i].c;
+      }
+      break;
+    case Axis::HEIGHT:
+      for (int i = 1; i < input.size(); ++i) {
+        if (input[i].w != new_shape.w || input[i].c != new_shape.c ||
+            input[i].d != new_shape.d) {
+          return absl::InvalidArgumentError(
+              "Width, Depth and Channels must be the same when concatenating "
+              "by height axis");
+        }
+        new_shape.h += input[i].h;
+      }
+      break;
+    case Axis::WIDTH:
+      for (int i = 1; i < input.size(); ++i) {
+        if (input[i].h != new_shape.h || input[i].c != new_shape.c ||
+            input[i].d != new_shape.d) {
+          return absl::InvalidArgumentError(
+              "Height, Depth and Channels must be the same when concatenating "
+              "by width axis");
+        }
+        new_shape.w += input[i].w;
+      }
+      break;
+    case Axis::DEPTH:
+      for (int i = 1; i < input.size(); ++i) {
+        if (input[i].w != new_shape.w || input[i].h != new_shape.h ||
+            input[i].c != new_shape.c) {
+          return absl::InvalidArgumentError(
+              "Width, Height and Channels must be the same when concatenating "
+              "by depth axis");
+        }
+        new_shape.d += input[i].d;
+      }
+      break;
+    default:
+      return absl::InvalidArgumentError("Invalid axis");
+  }
+  *output_shape = new_shape;
+  return absl::OkStatus();
+}
+
 Padding2D CalculateSamePadding(const BHWC& input,
                                const Convolution2DAttributes& attr) {
   return MakeSamePadding(input, attr);
diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h
index 4eb41dfe1a3..d0268eee585 100644
--- a/tensorflow/lite/delegates/gpu/common/operations.h
+++ b/tensorflow/lite/delegates/gpu/common/operations.h
@@ -206,6 +206,12 @@ absl::Status CalculateOutputShape(const std::vector<BHWC>& input,
                                   const ConcatAttributes& attr,
                                   BHWC* output_shape);
 
+// @return shape of a tensor after Concat operation is applied to the given
+//         input.
+absl::Status CalculateOutputShape(const std::vector<BHWDC>& input,
+                                  const ConcatAttributes& attr,
+                                  BHWDC* output_shape);
+
 // @return padding for pooling operation to make sure output keep the same shape
 // as the given input.
 Padding2D CalculateSamePadding(const BHWC& input,
@@ -425,6 +431,17 @@ struct PadAttributes {
 // @return shape of a tensor after Pad operation is applied to the given input.
 BHWC CalculateOutputShape(const BHWC& input, const PadAttributes& attr);
 
+struct Pad3DAttributes {
+  PaddingContentType type = PaddingContentType::ZEROS;
+
+  BHWDC prepended;
+  BHWDC appended;
+};
+
+// @return shape of a tensor after Pad3D operation is applied to the given
+// input.
+BHWDC CalculateOutputShape(const BHWDC& input, const Pad3DAttributes& attr);
+
 struct ConstTensorAttributes {
   Tensor<BHWC, DataType::FLOAT32> tensor;
 };
diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc
index 055327d3534..749382c3417 100644
--- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc
+++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc
@@ -32,6 +32,11 @@ class MergeConvolutionWithMul : public SequenceTransformation {
   TransformResult ApplyToNodesSequence(const std::vector<Node*>& sequence,
                                        GraphFloat32* graph) final {
     auto& conv_node = *sequence[0];
+    if (graph->FindInputs(conv_node.id).size() != 1) {
+      return {TransformStatus::DECLINED,
+              "This fusion is only applicable to ops with one runtime input."};
+    }
+
     auto& mul_node = *sequence[1];
     if (mul_node.operation.type != ToString(OperationType::MUL) ||
         !mul_node.operation.attributes.has_value()) {
@@ -91,6 +96,10 @@ class MergeMulWithConvolution : public SequenceTransformation {
   TransformResult ApplyToNodesSequence(const std::vector<Node*>& sequence,
                                        GraphFloat32* graph) final {
     auto& conv_node = *sequence[1];
+    if (graph->FindInputs(conv_node.id).size() != 1) {
+      return {TransformStatus::DECLINED,
+              "This fusion is only applicable to ops with one runtime input."};
+    }
     auto& mul_node = *sequence[0];
     if (mul_node.operation.type != ToString(OperationType::MUL) ||
         !mul_node.operation.attributes.has_value()) {
diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc
index 58da8862937..4b6727e66e7 100644
--- a/tensorflow/lite/delegates/gpu/delegate.cc
+++ b/tensorflow/lite/delegates/gpu/delegate.cc
@@ -263,12 +263,12 @@ class DelegateKernel {
 
     input_refs->clear();
     output_refs->clear();
-    const auto& inputs = graph->inputs();
+    const auto inputs = graph->inputs();
     input_refs->reserve(inputs.size());
     for (const auto& input : inputs) {
       input_refs->push_back(input->tensor.ref);
     }
-    const auto& outputs = graph->outputs();
+    const auto outputs = graph->outputs();
     output_refs->reserve(outputs.size());
     for (const auto& output : outputs) {
       output_refs->push_back(output->tensor.ref);
diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD
index d2ef617a8e2..700a553a125 100644
--- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD
@@ -73,6 +73,7 @@ cc_library(
 cc_test(
     name = "add_test",
     srcs = ["add_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -102,6 +103,7 @@ cc_library(
 cc_test(
     name = "concat_test",
     srcs = ["concat_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -136,6 +138,7 @@ cc_library(
 cc_test(
     name = "conv_test",
     srcs = ["conv_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -176,6 +179,7 @@ cc_library(
 cc_test(
     name = "depthwise_conv_test",
     srcs = ["depthwise_conv_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -205,6 +209,7 @@ cc_library(
 cc_test(
     name = "elementwise_test",
     srcs = ["elementwise_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -235,6 +240,7 @@ cc_library(
 cc_test(
     name = "fully_connected_test",
     srcs = ["fully_connected_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -263,6 +269,7 @@ cc_library(
 cc_test(
     name = "lstm_test",
     srcs = ["lstm_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -292,6 +299,7 @@ cc_library(
 cc_test(
     name = "max_unpooling_test",
     srcs = ["max_unpooling_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -322,6 +330,7 @@ cc_library(
 cc_test(
     name = "mean_test",
     srcs = ["mean_test.cc"],
+    linkstatic = True,
     tags = [
         "notap",
         "tflite_not_portable_ios",
@@ -351,6 +360,7 @@ cc_library(
 cc_test(
     name = "mul_test",
     srcs = ["mul_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -380,6 +390,7 @@ cc_library(
 cc_test(
     name = "pad_test",
     srcs = ["pad_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -409,6 +420,7 @@ cc_library(
 cc_test(
     name = "pooling_test",
     srcs = ["pooling_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -440,6 +452,7 @@ cc_library(
 cc_test(
     name = "prelu_test",
     srcs = ["prelu_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -471,6 +484,7 @@ cc_library(
 cc_test(
     name = "quantize_and_dequantize_test",
     srcs = ["quantize_and_dequantize_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -501,6 +515,7 @@ cc_library(
 cc_test(
     name = "relu_test",
     srcs = ["relu_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -529,6 +544,7 @@ cc_library(
 cc_test(
     name = "reshape_test",
     srcs = ["reshape_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -558,6 +574,7 @@ cc_library(
 cc_test(
     name = "slice_test",
     srcs = ["slice_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -589,6 +606,7 @@ cc_library(
 cc_test(
     name = "softmax_test",
     srcs = ["softmax_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -618,6 +636,7 @@ cc_library(
 cc_test(
     name = "space_to_depth_test",
     srcs = ["space_to_depth_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -679,6 +698,7 @@ cc_library(
 cc_test(
     name = "transpose_conv_test",
     srcs = ["transpose_conv_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
@@ -708,6 +728,7 @@ cc_library(
 cc_test(
     name = "resize_test",
     srcs = ["resize_test.cc"],
+    linkstatic = True,
     tags = tf_gpu_tests_tags() + [
         "notap",
         "tflite_not_portable_ios",
diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc
index 7bb1266b49c..990d86436fa 100644
--- a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc
+++ b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc
@@ -39,6 +39,10 @@ class Convolution : public NodeShader {
  public:
   absl::Status GenerateCode(const GenerationContext& ctx,
                             GeneratedCode* generated_code) const final {
+    if (ctx.input_shapes.size() != 1) {
+      return absl::UnimplementedError(
+          "Convolution does not support more than 1 runtime tensor");
+    }
     const auto& attr =
         absl::any_cast<const Convolution2DAttributes&>(ctx.op_attr);
     auto weights = attr.weights.shape;
@@ -161,6 +165,10 @@ class Convolution1x1 : public NodeShader {
  public:
   absl::Status GenerateCode(const GenerationContext& ctx,
                             GeneratedCode* generated_code) const final {
+    if (ctx.input_shapes.size() != 1) {
+      return absl::UnimplementedError(
+          "Convolution does not support more than 1 runtime tensor");
+    }
     const auto& attr =
         absl::any_cast<const Convolution2DAttributes&>(ctx.op_attr);
     if (attr.weights.shape.h != 1 || attr.weights.shape.w != 1) {
diff --git a/tensorflow/lite/delegates/gpu/java/src/main/java/org/tensorflow/lite/gpu/GpuDelegate.java b/tensorflow/lite/delegates/gpu/java/src/main/java/org/tensorflow/lite/gpu/GpuDelegate.java
index 8d802ae044a..895f12f0233 100644
--- a/tensorflow/lite/delegates/gpu/java/src/main/java/org/tensorflow/lite/gpu/GpuDelegate.java
+++ b/tensorflow/lite/delegates/gpu/java/src/main/java/org/tensorflow/lite/gpu/GpuDelegate.java
@@ -62,6 +62,18 @@ public class GpuDelegate implements Delegate, Closeable {
       return this;
     }
 
+    /**
+     * Enables running quantized models with the delegate. Defaults to false.
+     *
+     * <p>WARNING: This is an experimental API and subject to change.
+     *
+     * @param quantizedModelsAllowed When {@code true}, the GPU may run quantized models.
+     */
+    public Options setQuantizedModelsAllowed(boolean quantizedModelsAllowed) {
+      this.quantizedModelsAllowed = quantizedModelsAllowed;
+      return this;
+    }
+
     /**
      * Sets the inference preference for precision/compilation/runtime tradeoffs.
      *
@@ -74,11 +86,16 @@ public class GpuDelegate implements Delegate, Closeable {
     }
 
     boolean precisionLossAllowed = true;
+    boolean quantizedModelsAllowed = false;
     int inferencePreference = INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER;
   }
 
   public GpuDelegate(Options options) {
-    delegateHandle = createDelegate(options.precisionLossAllowed, options.inferencePreference);
+    delegateHandle =
+        createDelegate(
+            options.precisionLossAllowed,
+            options.quantizedModelsAllowed,
+            options.inferencePreference);
   }
 
   public GpuDelegate() {
@@ -107,7 +124,8 @@ public class GpuDelegate implements Delegate, Closeable {
     System.loadLibrary(TFLITE_GPU_LIB);
   }
 
-  private static native long createDelegate(boolean precisionLossAllowed, int preference);
+  private static native long createDelegate(
+      boolean precisionLossAllowed, boolean quantizedModelsAllowed, int preference);
 
   private static native void deleteDelegate(long delegateHandle);
 }
diff --git a/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc b/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc
index 089e2c2f816..900cc0e0d75 100644
--- a/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc
+++ b/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc
@@ -23,7 +23,7 @@ extern "C" {
 
 JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_gpu_GpuDelegate_createDelegate(
     JNIEnv* env, jclass clazz, jboolean precision_loss_allowed,
-    jint inference_preference) {
+    jboolean quantized_models_allowed, jint inference_preference) {
   TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();
   if (precision_loss_allowed == JNI_TRUE) {
     options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
@@ -31,6 +31,10 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_gpu_GpuDelegate_createDelegate(
         TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE;
     options.inference_priority3 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION;
   }
+  options.experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE;
+  if (quantized_models_allowed) {
+    options.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT;
+  }
   options.inference_preference = static_cast<int32_t>(inference_preference);
   return reinterpret_cast<jlong>(TfLiteGpuDelegateV2Create(&options));
 }
diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD
index 192c787b0db..4db8f3d071d 100644
--- a/tensorflow/lite/delegates/gpu/metal/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/BUILD
@@ -80,7 +80,7 @@ objc_library(
 ios_unit_test(
     name = "common_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -117,7 +117,7 @@ objc_library(
 ios_unit_test(
     name = "compiled_model_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -205,7 +205,7 @@ objc_library(
 ios_unit_test(
     name = "inference_context_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -235,7 +235,7 @@ ios_application(
         "iphone",
     ],
     infoplists = ["Info.plist"],
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     provisioning_profile = "//tensorflow/lite/delegates/gpu/metal:provisioning_profile.mobileprovision",
     tags = tf_gpu_tests_tags() + [
         "local",
@@ -267,7 +267,7 @@ objc_library(
 
 ios_unit_test(
     name = "ComponentsTests",
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + ["notap"],
     test_host = ":TestApplication",
diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc
index 2fbc3b1074f..14ec8b2b09f 100644
--- a/tensorflow/lite/delegates/gpu/metal/api.cc
+++ b/tensorflow/lite/delegates/gpu/metal/api.cc
@@ -225,6 +225,10 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
       break;
     }
     case OperationType::CONVOLUTION_2D: {
+      if (graph.FindInputs(node->id).size() != 1) {
+        return absl::UnimplementedError(
+            "Convolution does not support more than 1 runtime tensor");
+      }
       const auto dst_shape = graph.FindOutputs(node_id)[0]->tensor.shape;
       auto attr =
           absl::any_cast<Convolution2DAttributes>(node->operation.attributes);
diff --git a/tensorflow/lite/delegates/gpu/metal/common_test.mm b/tensorflow/lite/delegates/gpu/metal/common_test.mm
index 7cedac0f799..48cdb679461 100644
--- a/tensorflow/lite/delegates/gpu/metal/common_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/common_test.mm
@@ -57,12 +57,12 @@ kernel void FunctionName(device TYPE* const src_buffer[[buffer(0)]],
 
   NSDictionary* macrosFloat4 = @{@"TYPE" : @"float4"};
   status = CreateComputeProgram(device, code, functionName, macrosFloat4, &program);
-  XCTAssertTrue(status.ok(), @"%s", std::string(status.messasge()).c_str());
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
   XCTAssertNotNil(program);
 
   NSDictionary* macrosHalf4 = @{@"TYPE" : @"half4"};
   status = CreateComputeProgram(device, code, functionName, macrosHalf4, &program);
-  XCTAssertTrue(status.ok(), @"%s", std::string(status.messasge()).c_str());
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
   XCTAssertNotNil(program);
 
   // This compilation is intended to be incorrect
diff --git a/tensorflow/lite/delegates/gpu/metal/compiled_model_test.mm b/tensorflow/lite/delegates/gpu/metal/compiled_model_test.mm
index 83870123321..3a76178d71f 100644
--- a/tensorflow/lite/delegates/gpu/metal/compiled_model_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/compiled_model_test.mm
@@ -159,7 +159,8 @@ static std::vector<ComputeTaskDescriptorPtr> Add2Linkable(int id, ValueId input_
   std::vector<ComputeTaskDescriptorPtr> descriptors;
   descriptors.push_back(ComputeTaskDescriptorPtr(new ComputeTaskDescriptor({
       id,
-      true,  // Is linkable?
+      true,  // linkable
+      true,  // associative_op
       R"(FLT4 linkable$0(FLT4 value, int linear_index, uint3 gid, device FLT4* const buffer2) {
            return value + buffer2[linear_index];
          }
@@ -250,12 +251,14 @@ static std::vector<ComputeTaskDescriptorPtr> Add2Linkable(int id, ValueId input_
 
 - (void)testAddOperationFused {
   auto graph = Add(1, 1, 3);
-  auto graph2 = Add2Linkable(2, 2, 3, 4);
+  auto graph2 = Add(1, 2, 4);
+  auto graph3 = Add2Linkable(2, 4, 3, 5);
   graph.insert(graph.end(), graph2.begin(), graph2.end());
+  graph.insert(graph.end(), graph3.begin(), graph3.end());
   std::vector<ComputeTaskDescriptorPtr> model;
-  auto status = ValidateOptimizeModel({1, 2}, {4}, graph, &model);
+  auto status = ValidateOptimizeModel({1, 2}, {5}, graph, &model);
   XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
-  XCTAssertTrue(model.size() == 1, @"Not fused, more than one task descriptor.");
+  XCTAssertTrue(model.size() <= 2, @"Not fused, more than two task descriptors.");
 }
 
 - (void)testBinaryOperationSuccess {
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
index a1052b8adf4..657e9b53a59 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@@ -71,7 +71,7 @@ objc_library(
 ios_unit_test(
     name = "add_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -109,7 +109,7 @@ objc_library(
 ios_unit_test(
     name = "concat_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -151,7 +151,7 @@ objc_library(
 ios_unit_test(
     name = "conv_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -213,7 +213,7 @@ objc_library(
 ios_unit_test(
     name = "depthwise_conv_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -253,7 +253,7 @@ objc_library(
 ios_unit_test(
     name = "elementwise_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -293,7 +293,7 @@ objc_library(
 ios_unit_test(
     name = "fully_connected_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -332,7 +332,7 @@ objc_library(
 ios_unit_test(
     name = "max_unpooling_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -371,7 +371,7 @@ objc_library(
 ios_unit_test(
     name = "mean_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = [
         "notap",
@@ -450,7 +450,7 @@ objc_library(
 ios_unit_test(
     name = "padding_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -490,7 +490,7 @@ objc_library(
 ios_unit_test(
     name = "pooling_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -530,7 +530,7 @@ objc_library(
 ios_unit_test(
     name = "prelu_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -569,7 +569,7 @@ objc_library(
 ios_unit_test(
     name = "relu_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -608,7 +608,7 @@ objc_library(
 ios_unit_test(
     name = "resize_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -648,7 +648,7 @@ objc_library(
 ios_unit_test(
     name = "reshape_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -687,7 +687,7 @@ objc_library(
 ios_unit_test(
     name = "slice_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -727,7 +727,7 @@ objc_library(
 ios_unit_test(
     name = "softmax_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -764,7 +764,7 @@ objc_library(
 ios_unit_test(
     name = "space_to_depth_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -804,7 +804,7 @@ objc_library(
 ios_unit_test(
     name = "transpose_conv_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
@@ -885,7 +885,7 @@ objc_library(
 ios_unit_test(
     name = "winograd_test",
     testonly = 1,
-    minimum_os_version = "10.0",
+    minimum_os_version = "11.0",
     runner = tflite_ios_lab_runner("IOS_LATEST"),
     tags = tf_gpu_tests_tags() + [
         "notap",
diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
index c22cbe86175..cc9e049123e 100644
--- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
+++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
@@ -300,10 +300,15 @@ VariedShapeSpec/ReshapeOpTest/RegularShapes/1
 VariedShapeSpec/ReshapeOpTest/WithStretchDimension/1
 
 # resize_bilinear_test
+// align_corners & half_pixel_centers are not implemented in NNAPI before API 30
+ResizeBilinearOpTest/ResizeBilinearOpTest.+HalfPixelCenters.*,30
 // Only models with constant size tensor are accelerated
 ResizeBilinearOpTest/ResizeBilinearOpTest/.+/0,29
 
 # resize_nearest_neighbor_test
+// align_corners & half_pixel_centers are not implemented in NNAPI before API 30
+ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,30
+ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,30
 // Only models with constant size tensor are accelerated
 ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest/.+/0,29
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index 012da4a1d9b..ff6ad0dc0d9 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -1648,13 +1648,14 @@ bool NNAPIDelegateKernel::Validate(
       }
       auto builtin =
           reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
-      Expect(!builtin->align_corners,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             "NNAPI does not support align_corners == true.", &val_ctx);
-      // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior.
-      Expect(!builtin->half_pixel_centers,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             "NNAPI does not support half_pixel_centers == true.", &val_ctx);
+      if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
+        Expect(!builtin->align_corners,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               "NNAPI does not support align_corners == true.", &val_ctx);
+        Expect(!builtin->half_pixel_centers,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               "NNAPI does not support half_pixel_centers == true.", &val_ctx);
+      }
       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
         Expect(input.type == kTfLiteFloat32,
                NNAPIValidationFailureType::kUnsupportedInputType,
@@ -1668,9 +1669,14 @@ bool NNAPIDelegateKernel::Validate(
       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
           node->builtin_data);
-      Expect(!builtin->align_corners,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             "NNAPI does not support align_corners == true.", &val_ctx);
+      if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
+        Expect(!builtin->align_corners,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               "NNAPI does not support align_corners == true.", &val_ctx);
+        Expect(!builtin->half_pixel_centers,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               "NNAPI does not support half_pixel_centers == true.", &val_ctx);
+      }
     } break;
     case kTfLiteBuiltinSqueeze: {
       ExpectOpVersion(version, 1, &val_ctx);
@@ -2431,6 +2437,14 @@ TfLiteStatus NNAPIDelegateKernel::Map(
       const int output_width = output.dims->data[2];
       mapping_args.builder->AddScalarInt32Operand(output_width);
       mapping_args.builder->AddScalarInt32Operand(output_height);
+      auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
+          mapping_args.node->builtin_data);
+      if (builtin->align_corners == true ||
+          builtin->half_pixel_centers == true) {
+        mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
+        mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
+        mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
+      }
       *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
     } break;
     case kTfLiteBuiltinResizeNearestNeighbor: {
@@ -2440,7 +2454,13 @@ TfLiteStatus NNAPIDelegateKernel::Map(
       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
       mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
-
+      auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
+          mapping_args.node->builtin_data);
+      if (builtin->align_corners == true ||
+          builtin->half_pixel_centers == true) {
+        mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
+        mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
+      }
       *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
     } break;
     case kTfLiteBuiltinSqueeze: {
@@ -3131,7 +3151,8 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
                                     "creating NNAPI model", nnapi_errno);
     nn_model_.reset(model);
 
-    TF_LITE_ENSURE_STATUS(BuildGraph(context, params->input_tensors,
+    TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
+                                     params->input_tensors,
                                      params->output_tensors, nnapi_errno));
   }
 
@@ -3749,7 +3770,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
         }
       } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
                  reg->builtin_code == kTfLiteBuiltinMinimum) {
-        const TfLiteTensor& operand_tensor = context->tensors[input_pos];
+        const TfLiteTensor& operand_tensor =
+            context->tensors[node->inputs->data[input_pos]];
         if (operand_tensor.dims->size == 0) {
           int tensor_index;
 
@@ -3794,7 +3816,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
                   reg->builtin_code == kTfLiteBuiltinSum) &&
                  (input_pos == 1)) {
         // The axis needs, be converted to a tensor if specified as scalar
-        const TfLiteTensor& axis_tensor = context->tensors[1];
+        const TfLiteTensor& axis_tensor =
+            context->tensors[node->inputs->data[input_pos]];
         if (axis_tensor.dims->size == 0) {
           TF_LITE_ENSURE_STATUS(
               builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
@@ -3853,8 +3876,10 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
 }
 
 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
-    TfLiteContext* context, const TfLiteIntArray* input_tensors,
-    const TfLiteIntArray* output_tensors, int* nnapi_errno) {
+    TfLiteContext* context,
+    const StatefulNnApiDelegate::Options& delegate_options,
+    const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
+    int* nnapi_errno) {
   // Build the ops and tensors.
   TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context, nnapi_errno));
   // Map input and output tensor indices to ANN
@@ -3919,11 +3944,13 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
           outputs.data()),
       "identifying model inputs and outputs", nnapi_errno);
 
+  auto allow_fp16 =
+      context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context,
         nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
-            nn_model_.get(), context->allow_fp32_relax_to_fp16),
+            nn_model_.get(), allow_fp16),
         "set relaxed computation mode for fp32 if possible", nnapi_errno);
   }
 
@@ -3997,6 +4024,7 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
   delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
   delegate_data_.max_number_delegated_partitions =
       options.max_number_delegated_partitions;
+  delegate_data_.allow_fp16 = options.allow_fp16;
   TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
                        "Created TensorFlow Lite delegate for NNAPI.");
   Prepare = DoPrepare;
@@ -4026,6 +4054,7 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
   options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
   options.max_number_delegated_partitions =
       delegate_data->max_number_delegated_partitions;
+  options.allow_fp16 = delegate_data->allow_fp16;
   return options;
 }
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
index fe777ea99aa..1bd9fb5c49f 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@@ -89,6 +89,9 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
     // The selection is currently done sorting partitions in decreasing order
     // of number of nodes and selecting them until the limit is reached.
     int max_number_delegated_partitions = 3;
+
+    // allow fp32 compuation to be run in fp16
+    bool allow_fp16 = false;
   };
 
   // Uses default options.
@@ -184,6 +187,8 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
     // Maximum number of NNAPI partition to delegate. Zero or negative means
     // no limit. Copied from StatefulNnApiDelegate::Options
     int max_number_delegated_partitions;
+    // allow fp32 computation to be run in fp32
+    bool allow_fp16 = false;
 
     ~Data();
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
index b35bf0224fd..668fdf5b5f6 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@@ -349,6 +349,7 @@ class NNAPIDelegateKernel {
   TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno);
 
   TfLiteStatus BuildGraph(TfLiteContext* context,
+                          const StatefulNnApiDelegate::Options& options,
                           const TfLiteIntArray* input_tensors,
                           const TfLiteIntArray* output_tensors,
                           int* nnapi_errno);
diff --git a/tensorflow/lite/delegates/utils/BUILD b/tensorflow/lite/delegates/utils/BUILD
new file mode 100644
index 00000000000..069da167455
--- /dev/null
+++ b/tensorflow/lite/delegates/utils/BUILD
@@ -0,0 +1,36 @@
+package(
+    default_visibility = [
+        "//visibility:public",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "simple_delegate",
+    srcs = [
+        "simple_delegate.cc",
+    ],
+    hdrs = [
+        "simple_delegate.h",
+    ],
+    deps = [
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:minimal_logging",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/delegates:utils",
+        "//tensorflow/lite/kernels/internal:compatibility",
+    ],
+)
+
+cc_test(
+    name = "simple_delegate_test",
+    srcs = ["simple_delegate_test.cc"],
+    deps = [
+        ":simple_delegate",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/tensorflow/lite/delegates/utils/simple_delegate.cc b/tensorflow/lite/delegates/utils/simple_delegate.cc
new file mode 100644
index 00000000000..51736e56d26
--- /dev/null
+++ b/tensorflow/lite/delegates/utils/simple_delegate.cc
@@ -0,0 +1,140 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/delegates/utils/simple_delegate.h"
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/context_util.h"
+#include "tensorflow/lite/delegates/utils.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/minimal_logging.h"
+
+namespace tflite {
+namespace {
+TfLiteRegistration GetDelegateKernelRegistration(
+    SimpleDelegateInterface* delegate) {
+  TfLiteRegistration kernel_registration;
+  kernel_registration.profiling_string = nullptr;
+  kernel_registration.builtin_code = kTfLiteBuiltinDelegate;
+  kernel_registration.custom_name = delegate->name();
+  kernel_registration.free = [](TfLiteContext* context, void* buffer) -> void {
+    delete reinterpret_cast<SimpleDelegateKernelInterface*>(buffer);
+  };
+  kernel_registration.init = [](TfLiteContext* context, const char* buffer,
+                                size_t length) -> void* {
+    const TfLiteDelegateParams* params =
+        reinterpret_cast<const TfLiteDelegateParams*>(buffer);
+    if (params == nullptr) {
+      TF_LITE_KERNEL_LOG(context, "NULL TfLiteDelegateParams passed.");
+      return nullptr;
+    }
+    auto* delegate =
+        reinterpret_cast<SimpleDelegateInterface*>(params->delegate->data_);
+    std::unique_ptr<SimpleDelegateKernelInterface> delegate_kernel(
+        delegate->CreateDelegateKernelInterface());
+    if (delegate_kernel->Init(context, params) != kTfLiteOk) {
+      return nullptr;
+    }
+    return delegate_kernel.release();
+  };
+  kernel_registration.prepare = [](TfLiteContext* context,
+                                   TfLiteNode* node) -> TfLiteStatus {
+    if (node->user_data == nullptr) {
+      TF_LITE_KERNEL_LOG(context, "Delegate kernel was not initialized");
+      return kTfLiteError;
+    }
+    SimpleDelegateKernelInterface* delegate_kernel =
+        reinterpret_cast<SimpleDelegateKernelInterface*>(node->user_data);
+    return delegate_kernel->Prepare(context, node);
+  };
+  kernel_registration.invoke = [](TfLiteContext* context,
+                                  TfLiteNode* node) -> TfLiteStatus {
+    SimpleDelegateKernelInterface* delegate_kernel =
+        reinterpret_cast<SimpleDelegateKernelInterface*>(node->user_data);
+    TFLITE_DCHECK(delegate_kernel != nullptr);
+    return delegate_kernel->Invoke(context, node);
+  };
+
+  return kernel_registration;
+}
+
+TfLiteStatus DelegatePrepare(TfLiteContext* context,
+                             TfLiteDelegate* base_delegate) {
+  auto* delegate =
+      reinterpret_cast<SimpleDelegateInterface*>(base_delegate->data_);
+  delegates::IsNodeSupportedFn node_supported_fn =
+      [=](TfLiteContext* context, TfLiteNode* node,
+          TfLiteRegistration* registration,
+          std::string* unsupported_details) -> bool {
+    return delegate->IsNodeSupportedByDelegate(registration, node, context);
+  };
+  // TODO(b/149484598): Update to have method that gets all supported nodes.
+  delegates::GraphPartitionHelper helper(context, node_supported_fn);
+  TF_LITE_ENSURE_STATUS(helper.Partition(nullptr));
+
+  const auto delegate_partitions = helper.GetFirstNLargestPartitions();
+
+  // To avoid creating a new TfLiteIntArray and free it later, we reserve one
+  // element to represent TfLiteIntArray.size which is the 1st element of
+  // TfLiteIntArray C struct.
+  std::vector<int> supported_nodes(1);
+  for (const auto partition : delegate_partitions) {
+    auto* nodes = partition->nodes_to_replace;
+    supported_nodes.insert(supported_nodes.end(), nodes->data,
+                           nodes->data + nodes->size);
+  }
+  // Set first element to the number of nodes to replace.
+  supported_nodes[0] = supported_nodes.size() - 1;
+
+  TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO,
+                  "%s delegate: %d nodes delegated out of %d nodes with "
+                  "%d partitions.\n",
+                  delegate->name(), supported_nodes[0],
+                  helper.num_total_nodes(), delegate_partitions.size());
+  TfLiteRegistration delegate_kernel_registration =
+      GetDelegateKernelRegistration(delegate);
+
+  return context->ReplaceNodeSubsetsWithDelegateKernels(
+      context, delegate_kernel_registration,
+      reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()), base_delegate);
+}
+}  // namespace
+
+TfLiteDelegate* TfLiteDelegateFactory::CreateSimpleDelegate(
+    std::unique_ptr<SimpleDelegateInterface> simple_delegate) {
+  if (simple_delegate == nullptr) {
+    return nullptr;
+  }
+  auto delegate = new TfLiteDelegate();
+  delegate->Prepare = &DelegatePrepare;
+  delegate->flags = kTfLiteDelegateFlagsNone;
+  delegate->CopyFromBufferHandle = nullptr;
+  delegate->CopyToBufferHandle = nullptr;
+  delegate->FreeBufferHandle = nullptr;
+  delegate->data_ = simple_delegate.release();
+  return delegate;
+}
+
+void TfLiteDelegateFactory::DeleteSimpleDelegate(TfLiteDelegate* delegate) {
+  if (!delegate) return;
+  SimpleDelegateInterface* simple_delegate =
+      reinterpret_cast<SimpleDelegateInterface*>(delegate->data_);
+  delete simple_delegate;
+  delete delegate;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/utils/simple_delegate.h b/tensorflow/lite/delegates/utils/simple_delegate.h
new file mode 100644
index 00000000000..bf35fbc47aa
--- /dev/null
+++ b/tensorflow/lite/delegates/utils/simple_delegate.h
@@ -0,0 +1,109 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file has utilities that facilitates creating new delegates.
+// - SimpleDelegateKernelInterface: Represents a Kernel which handles a subgraph
+// to be delegated. It has Init/Prepare/Invoke which are going to be called
+// during inference, similar to TFLite Kernels. Delegate owner should implement
+// this interface to build/prepare/invoke the delegated subgraph.
+// - SimpleDelegateInterface:
+// This class wraps TFLiteDelegate and users need to implement the interface and
+// then Call GetFinalizedDelegate() to get TfLiteDelegate* that can be passed to
+// ModifyGraphWithDelegate.
+#ifndef TENSORFLOW_LITE_DELEGATES_UTILS_SIMPLE_DELEGATE_H_
+#define TENSORFLOW_LITE_DELEGATES_UTILS_SIMPLE_DELEGATE_H_
+
+#include <memory>
+
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+// Users should inherit from this class and implement the interface below.
+// Each instance represents a single part of the graph (subgraph).
+class SimpleDelegateKernelInterface {
+ public:
+  virtual ~SimpleDelegateKernelInterface() {}
+
+  // Initializes a delegated subgraph.
+  // The nodes in the subgraph are inside TfLiteDelegateParams->nodes_to_replace
+  virtual TfLiteStatus Init(TfLiteContext* context,
+                            const TfLiteDelegateParams* params) = 0;
+
+  // Will be called by the framework. Should handle any needed preparation
+  // for the subgraph e.g. allocating buffers, compiling model.
+  // Returns status, and signalling any errors.
+  virtual TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) = 0;
+
+  // Actual subgraph inference should happen on this call.
+  // Returns status, and signalling any errors.
+  virtual TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) = 0;
+};
+
+// Pure Interface that clients should implement.
+// The Interface represents a delegate capabilities and provide factory
+// for SimpleDelegateKernelInterface
+//
+// Clients should implement the following methods:
+// - IsNodeSupportedByDelegate
+// - name
+// - CreateDelegateKernelInterface
+class SimpleDelegateInterface {
+ public:
+  SimpleDelegateInterface() {}
+
+  virtual ~SimpleDelegateInterface() {}
+
+  // Returns true if 'node' is supported by the delegate. False otherwise.
+  virtual bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration,
+                                         const TfLiteNode* node,
+                                         TfLiteContext* context) const = 0;
+
+  // Returns a name that identifies the delegate.
+  // This name is used for debugging/logging/profiling.
+  virtual const char* name() const = 0;
+
+  // Returns instance of an object that implements the interface
+  // SimpleDelegateKernelInterface.
+  // An instance of SimpleDelegateKernelInterface represents one subgraph to
+  // be delegated.
+  // Caller takes ownership of the returned object.
+  virtual std::unique_ptr<SimpleDelegateKernelInterface>
+  CreateDelegateKernelInterface() = 0;
+};
+
+// Factory class that provides two static methods
+// CreateSimpleDelegate
+// DeleteSimpleDelegate
+// Which should be used to construct TfLiteDelegate from
+// Simple Delegate and delete TfLiteDelegate and SimpleDelegate give
+// tfLiteDelegate* created from 'CreateSimpleDelegate' method.
+// Users should use these methods to Create and Destroy the delegate.
+class TfLiteDelegateFactory {
+ public:
+  // Creates TfLiteDelegate from the provided SimpleDelegateInterface.
+  // The returned TfLiteDelegate should be deleted using DeleteSimpleDelegate.
+  static TfLiteDelegate* CreateSimpleDelegate(
+      std::unique_ptr<SimpleDelegateInterface> simple_delegate);
+
+  // Deletes 'delegate' the passed pointer must be the one returned
+  // from GetFinalizedDelegate.
+  // This function will destruct the SimpleDelegate object too.
+  static void DeleteSimpleDelegate(TfLiteDelegate* delegate);
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_UTILS_SIMPLE_DELEGATE_H_
diff --git a/tensorflow/lite/delegates/utils/simple_delegate_test.cc b/tensorflow/lite/delegates/utils/simple_delegate_test.cc
new file mode 100644
index 00000000000..fa6d528a537
--- /dev/null
+++ b/tensorflow/lite/delegates/utils/simple_delegate_test.cc
@@ -0,0 +1,194 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/delegates/utils/simple_delegate.h"
+
+#include <memory>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/builtin_op_kernels.h"
+
+namespace tflite {
+namespace {
+// Delegate options.
+struct TestSimpleDelegateOptions {
+  // Allowed ops to delegate.
+  int allowed_builtin_code;
+  // Report error during init.
+  bool error_during_init = false;
+  // Report error during prepare.
+  bool error_during_prepare = false;
+  // Report error during invoke.
+  bool error_during_invoke = false;
+};
+
+// Dummy delegate kernel.
+class TestSimpleDelegateKernel : public SimpleDelegateKernelInterface {
+ public:
+  explicit TestSimpleDelegateKernel(TestSimpleDelegateOptions options)
+      : options_(options) {}
+
+  TfLiteStatus Init(TfLiteContext* context,
+                    const TfLiteDelegateParams* params) override {
+    return !options_.error_during_init ? kTfLiteOk : kTfLiteError;
+  }
+
+  TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) override {
+    return !options_.error_during_prepare ? kTfLiteOk : kTfLiteError;
+  }
+
+  TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) override {
+    return !options_.error_during_invoke ? kTfLiteOk : kTfLiteError;
+  }
+
+ private:
+  TestSimpleDelegateOptions options_;
+};
+
+// Simple delegate which implements the interface of SimpleDelegateInterface.
+// This holds the Delegate capabilities.
+class TestSimpleDelegate : public SimpleDelegateInterface {
+ public:
+  explicit TestSimpleDelegate(TestSimpleDelegateOptions options)
+      : options_(options) {}
+  bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration,
+                                 const TfLiteNode* node,
+                                 TfLiteContext* context) const override {
+    return options_.allowed_builtin_code == registration->builtin_code;
+  }
+
+  const char* name() const override { return "TestSimpleDelegate"; }
+
+  std::unique_ptr<SimpleDelegateKernelInterface> CreateDelegateKernelInterface()
+      override {
+    return std::make_unique<TestSimpleDelegateKernel>(options_);
+  }
+
+ private:
+  TestSimpleDelegateOptions options_;
+};
+
+class TestDelegate : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    interpreter_.reset(new Interpreter);
+    interpreter_->AddTensors(5);
+    interpreter_->SetInputs({0, 1});
+    interpreter_->SetOutputs({3, 4});
+    TfLiteQuantizationParams quant;
+    interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3},
+                                               quant);
+    interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3},
+                                               quant);
+    interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3},
+                                               quant);
+    interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3},
+                                               quant);
+    interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3},
+                                               quant);
+    TfLiteRegistration* reg = ops::builtin::Register_ADD();
+    void* builtin_data_1 = malloc(sizeof(int));
+    void* builtin_data_2 = malloc(sizeof(int));
+    void* builtin_data_3 = malloc(sizeof(int));
+    interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, builtin_data_1,
+                                        reg);
+    interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, builtin_data_2,
+                                        reg);
+    interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, builtin_data_3,
+                                        reg);
+  }
+
+  void TearDown() override {
+    interpreter_.reset();
+    TfLiteDelegateFactory::DeleteSimpleDelegate(delegate_);
+  }
+
+ protected:
+  std::unique_ptr<Interpreter> interpreter_;
+  TfLiteDelegate* delegate_ = nullptr;
+};
+
+TEST_F(TestDelegate, BasicDelegate) {
+  TestSimpleDelegateOptions options;
+  options.allowed_builtin_code = kTfLiteBuiltinAdd;
+  delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate(
+      std::make_unique<TestSimpleDelegate>(options));
+  interpreter_->ModifyGraphWithDelegate(delegate_);
+
+  ASSERT_EQ(interpreter_->execution_plan().size(), 1);
+  int node = interpreter_->execution_plan()[0];
+  const auto* node_and_reg = interpreter_->node_and_registration(node);
+  EXPECT_EQ("TestSimpleDelegate", node_and_reg->second.custom_name);
+
+  const TfLiteDelegateParams* params = static_cast<const TfLiteDelegateParams*>(
+      node_and_reg->first.builtin_data);
+  ASSERT_EQ(params->nodes_to_replace->size, 3);
+  EXPECT_EQ(params->nodes_to_replace->data[0], 0);
+  EXPECT_EQ(params->nodes_to_replace->data[1], 1);
+  EXPECT_EQ(params->nodes_to_replace->data[2], 2);
+
+  ASSERT_EQ(params->input_tensors->size, 2);
+  EXPECT_EQ(params->input_tensors->data[0], 0);
+  EXPECT_EQ(params->input_tensors->data[1], 1);
+
+  ASSERT_EQ(params->output_tensors->size, 2);
+  EXPECT_EQ(params->output_tensors->data[0], 3);
+  EXPECT_EQ(params->output_tensors->data[1], 4);
+}
+
+TEST_F(TestDelegate, NoNodesToDelegate) {
+  TestSimpleDelegateOptions options;
+  options.allowed_builtin_code = kTfLiteBuiltinSub;
+  delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate(
+      std::make_unique<TestSimpleDelegate>(options));
+  interpreter_->ModifyGraphWithDelegate(delegate_);
+
+  ASSERT_EQ(interpreter_->execution_plan().size(), 3);
+}
+
+TEST_F(TestDelegate, DelegateFailedPrepare) {
+  TestSimpleDelegateOptions options;
+  options.allowed_builtin_code = kTfLiteBuiltinAdd;
+  options.error_during_prepare = true;
+  delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate(
+      std::make_unique<TestSimpleDelegate>(options));
+  ASSERT_EQ(kTfLiteDelegateError,
+            interpreter_->ModifyGraphWithDelegate(delegate_));
+}
+
+TEST_F(TestDelegate, DelegateFailedInvoke) {
+  TestSimpleDelegateOptions options;
+  options.allowed_builtin_code = kTfLiteBuiltinAdd;
+  options.error_during_invoke = true;
+  delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate(
+      std::make_unique<TestSimpleDelegate>(options));
+  ASSERT_EQ(kTfLiteOk, interpreter_->ModifyGraphWithDelegate(delegate_));
+  ASSERT_EQ(kTfLiteError, interpreter_->Invoke());
+}
+
+TEST_F(TestDelegate, DelegateFailedInit) {
+  TestSimpleDelegateOptions options;
+  options.allowed_builtin_code = kTfLiteBuiltinAdd;
+  options.error_during_init = true;
+  delegate_ = TfLiteDelegateFactory::CreateSimpleDelegate(
+      std::make_unique<TestSimpleDelegate>(options));
+  ASSERT_EQ(kTfLiteDelegateError,
+            interpreter_->ModifyGraphWithDelegate(delegate_));
+}
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index 8b51bf28b87..e8e6c061160 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -252,6 +252,9 @@ cc_test(
         "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
         "//conditions:default": [],
     }),
+    tags = [
+        "notsan",  # TODO(b/155404603)
+    ],
     deps = [
         ":pool_2d_tester",
         ":test_main",
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
index 7c061b3a734..6d9b4dac8f8 100644
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -245,10 +245,9 @@ class Subgraph {
         *flags = 0;
         return kTfLiteOk;
       default:
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(context, "invalid padding mode (%d) in node #%d",
-                             static_cast<int>(padding), node_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(context,
+                                 "invalid padding mode (%d) in node #%d",
+                                 static_cast<int>(padding), node_index);
         return kTfLiteError;
     }
   }
@@ -274,32 +273,24 @@ class Subgraph {
         *output_max = 6.0f;
         return kTfLiteOk;
       case kTfLiteActTanh:
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(context,
-                             "unsupported fused activation (Tanh) in node #%d",
-                             node_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Tanh) in node #%d",
+            node_index);
         return kTfLiteError;
       case kTfLiteActSignBit:
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(context,
-                             "unsupported fused activation (Sign) in node #%d",
-                             node_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Sign) in node #%d",
+            node_index);
         return kTfLiteError;
       case kTfLiteActSigmoid:
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(
-              context, "unsupported fused activation (Sigmoid) in node #%d",
-              node_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Sigmoid) in node #%d",
+            node_index);
         return kTfLiteError;
       default:
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(context,
-                             "invalid fused activation (%d) in node #%d",
-                             static_cast<int>(activation), node_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(context,
+                                 "invalid fused activation (%d) in node #%d",
+                                 static_cast<int>(activation), node_index);
         return kTfLiteError;
     }
   }
@@ -308,34 +299,26 @@ class Subgraph {
                                              const TfLiteConvParams* params,
                                              int node_index) {
     if (params->stride_width <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
-                           params->stride_width, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
+                               params->stride_width, node_index);
       return kTfLiteError;
     }
     if (params->stride_height <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
-                           params->stride_height, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
+                               params->stride_height, node_index);
       return kTfLiteError;
     }
 
     if (params->dilation_width_factor <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "invalid dilation width factor %d in node #%d",
-                           params->dilation_width_factor, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "invalid dilation width factor %d in node #%d",
+                               params->dilation_width_factor, node_index);
       return kTfLiteError;
     }
     if (params->dilation_height_factor <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "invalid dilation height factor %d in node #%d",
-                           params->dilation_height_factor, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "invalid dilation height factor %d in node #%d",
+                               params->dilation_height_factor, node_index);
       return kTfLiteError;
     }
 
@@ -346,88 +329,150 @@ class Subgraph {
       TfLiteContext* context, const TfLiteDepthwiseConvParams* params,
       int output_channels, int node_index) {
     if (params->stride_width <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
-                           params->stride_width, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
+                               params->stride_width, node_index);
       return kTfLiteError;
     }
     if (params->stride_height <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
-                           params->stride_height, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
+                               params->stride_height, node_index);
       return kTfLiteError;
     }
 
     if (params->depth_multiplier <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid depth multiplier %d in node #%d",
-                           params->depth_multiplier, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "invalid depth multiplier %d in node #%d",
+                               params->depth_multiplier, node_index);
       return kTfLiteError;
     }
     if (output_channels % params->depth_multiplier != 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "depth multiplier %d is incompatible with "
-                           "number of output channels %d in node #%d",
-                           params->depth_multiplier, output_channels,
-                           node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "depth multiplier %d is incompatible with "
+                               "number of output channels %d in node #%d",
+                               params->depth_multiplier, output_channels,
+                               node_index);
       return kTfLiteError;
     }
 
     if (params->dilation_width_factor <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "invalid dilation width factor %d in node #%d",
-                           params->dilation_width_factor, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "invalid dilation width factor %d in node #%d",
+                               params->dilation_width_factor, node_index);
       return kTfLiteError;
     }
     if (params->dilation_height_factor <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "invalid dilation height factor %d in node #%d",
-                           params->dilation_height_factor, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "invalid dilation height factor %d in node #%d",
+                               params->dilation_height_factor, node_index);
       return kTfLiteError;
     }
 
     return kTfLiteOk;
   }
 
-  static TfLiteStatus CheckTransposedConvolutionParams(
+  static TfLiteStatus CheckMediaPipeTransposedConvolutionParams(
       TfLiteContext* context, const TfLiteTransposeConvParams* params,
       int node_index) {
     if (params->stride_width <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
-                           params->stride_width, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
+                               params->stride_width, node_index);
       return kTfLiteError;
     }
     if (params->stride_height <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
-                           params->stride_height, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
+                               params->stride_height, node_index);
       return kTfLiteError;
     }
 
     return kTfLiteOk;
   }
 
+  static TfLiteStatus CheckMediaPipePoolParams(TfLiteContext* context,
+                                               const TfLitePoolParams* params,
+                                               int node_index) {
+    if (params->stride_width <= 0) {
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
+                               params->stride_width, node_index);
+      return kTfLiteError;
+    }
+    if (params->stride_height <= 0) {
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
+                               params->stride_height, node_index);
+      return kTfLiteError;
+    }
+    if (params->filter_width <= 0) {
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid filter width %d in node #%d",
+                               params->filter_width, node_index);
+      return kTfLiteError;
+    }
+    if (params->filter_height <= 0) {
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid filter height %d in node #%d",
+                               params->filter_height, node_index);
+      return kTfLiteError;
+    }
+    if (params->filter_width != params->stride_width) {
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context, "filter width %d does not match stride width %d in node #%d",
+          params->filter_width, params->stride_width, node_index);
+      return kTfLiteError;
+    }
+    if (params->filter_height != params->stride_height) {
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context,
+          "filter height %d does not match stride height %d in node #%d",
+          params->filter_height, params->stride_height, node_index);
+      return kTfLiteError;
+    }
+    switch (params->activation) {
+      case kTfLiteActNone:
+        break;
+      case kTfLiteActRelu:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Relu) in node #%d",
+            node_index);
+        return kTfLiteOk;
+      case kTfLiteActRelu1:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (ReluMinus1To1) in node #%d",
+            node_index);
+        return kTfLiteOk;
+      case kTfLiteActRelu6:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Relu6) in node #%d",
+            node_index);
+        return kTfLiteOk;
+      case kTfLiteActTanh:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Tanh) in node #%d",
+            node_index);
+        return kTfLiteError;
+      case kTfLiteActSignBit:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Sign) in node #%d",
+            node_index);
+        return kTfLiteError;
+      case kTfLiteActSigmoid:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "unsupported fused activation (Sigmoid) in node #%d",
+            node_index);
+        return kTfLiteError;
+      default:
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context, "invalid fused activation (%d) in node #%d",
+            static_cast<int>(params->activation), node_index);
+        return kTfLiteError;
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus CheckFullyConnectedParams(
       TfLiteContext* context, const TfLiteFullyConnectedParams* params,
       int node_index) {
     if (params->weights_format != kTfLiteFullyConnectedWeightsFormatDefault) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "unsupported non-default weights format in node #%d",
-                           node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context, "unsupported non-default weights format in node #%d",
+          node_index);
       return kTfLiteError;
     }
 
@@ -438,39 +483,29 @@ class Subgraph {
                                          const TfLitePoolParams* params,
                                          int node_index) {
     if (params->stride_width <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
-                           params->stride_width, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride width %d in node #%d",
+                               params->stride_width, node_index);
       return kTfLiteError;
     }
     if (params->stride_height <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
-                           params->stride_height, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid stride height %d in node #%d",
+                               params->stride_height, node_index);
       return kTfLiteError;
     }
 
     if (params->filter_width <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid filter width %d in node #%d",
-                           params->filter_width, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid filter width %d in node #%d",
+                               params->filter_width, node_index);
       return kTfLiteError;
     }
     if (params->filter_height <= 0) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "invalid filter height %d in node #%d",
-                           params->filter_height, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "invalid filter height %d in node #%d",
+                               params->filter_height, node_index);
       return kTfLiteError;
     }
     if (params->filter_width == 1 && params->filter_height == 1) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context, "meaningless 1x1 pooling in node #%d",
-                           node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context, "meaningless 1x1 pooling in node #%d",
+                               node_index);
       return kTfLiteError;
     }
 
@@ -483,19 +518,15 @@ class Subgraph {
                                                int expected_num_outputs,
                                                int node_index) {
     if (node->inputs->size != expected_num_inputs) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "unexpected number of inputs (%d != %d) in node #%d",
-                           node->inputs->size, expected_num_inputs, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context, "unexpected number of inputs (%d != %d) in node #%d",
+          node->inputs->size, expected_num_inputs, node_index);
       return kTfLiteError;
     }
     if (node->outputs->size != expected_num_outputs) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(
-            context, "unexpected number of output (%d != %d) in node #%d",
-            node->outputs->size, expected_num_outputs, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context, "unexpected number of output (%d != %d) in node #%d",
+          node->outputs->size, expected_num_outputs, node_index);
       return kTfLiteError;
     }
     return kTfLiteOk;
@@ -505,11 +536,9 @@ class Subgraph {
                                            const TfLiteTensor& tensor,
                                            int tensor_index, int node_index) {
     if (tensor.type != kTfLiteFloat32) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(
-            context, "unsupported type %s in tensor #%d in node #%d",
-            TfLiteTypeGetName(tensor.type), tensor_index, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context, "unsupported type %s in tensor #%d in node #%d",
+          TfLiteTypeGetName(tensor.type), tensor_index, node_index);
       return kTfLiteError;
     }
     return kTfLiteOk;
@@ -520,21 +549,17 @@ class Subgraph {
                                        int expected_num_dims,
                                        int tensor_index) {
     if (tensor.dims->size != expected_num_dims) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(
-            context,
-            "unexpected number of shape dimensions (%d != %d) in tensor #%d",
-            tensor.dims->size, expected_num_dims, tensor_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context,
+          "unexpected number of shape dimensions (%d != %d) in tensor #%d",
+          tensor.dims->size, expected_num_dims, tensor_index);
       return kTfLiteError;
     }
     for (int i = 0; i < tensor.dims->size; i++) {
       if (tensor.dims->data[i] <= 0) {
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(context,
-                             "invalid dimension #%d (%d) in tensor #%d", i,
-                             tensor.dims->data[i], tensor_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(context,
+                                 "invalid dimension #%d (%d) in tensor #%d", i,
+                                 tensor.dims->data[i], tensor_index);
         return kTfLiteError;
       }
     }
@@ -545,25 +570,22 @@ class Subgraph {
                                             const TfLiteTensor& tensor,
                                             int tensor_index, int node_index) {
     if (tensor.dims->size < 1) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "unexpected number of shape dimensions (%d) in "
-                           "tensor #%d in node #%d: "
-                           "expected at least a 1D tensor",
-                           tensor.dims->size, tensor_index, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(context,
+                               "unexpected number of shape dimensions (%d) in "
+                               "tensor #%d in node #%d: "
+                               "expected at least a 1D tensor",
+                               tensor.dims->size, tensor_index, node_index);
       return kTfLiteError;
     }
     // Validate that all non-channel dimensions (if any) are exactly 1.
     for (int i = 0; i < tensor.dims->size - 1; i++) {
       if (tensor.dims->data[i] != 1) {
-        if (context != nullptr) {
-          TF_LITE_KERNEL_LOG(context,
-                             "unexpected value %d of shape dimension #%d in "
-                             "tensor #%d in node #%d: "
-                             "expected 1 for non-channel dimensions",
-                             tensor.dims[i], i, tensor_index, node_index);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            context,
+            "unexpected value %d of shape dimension #%d in "
+            "tensor #%d in node #%d: "
+            "expected 1 for non-channel dimensions",
+            tensor.dims[i], i, tensor_index, node_index);
         return kTfLiteError;
       }
     }
@@ -575,12 +597,11 @@ class Subgraph {
       int node_index) {
     // TODO(b/149120844): remove checks once dynamic tensors are supported
     if (tensor.allocation_type == kTfLiteDynamic) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "invalid allocation type in tensor #%d in node #%d: "
-                           "expected non-dynamic tensor",
-                           tensor_index, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context,
+          "invalid allocation type in tensor #%d in node #%d: "
+          "expected non-dynamic tensor",
+          tensor_index, node_index);
       return kTfLiteError;
     }
     return kTfLiteOk;
@@ -592,12 +613,11 @@ class Subgraph {
                                                   int node_index) {
     if (tensor.allocation_type != kTfLiteMmapRo ||
         tensor.data.raw_const == nullptr) {
-      if (context != nullptr) {
-        TF_LITE_KERNEL_LOG(context,
-                           "invalid allocation type in tensor #%d in node #%d: "
-                           "expected static read-only tensor",
-                           tensor_index, node_index);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          context,
+          "invalid allocation type in tensor #%d in node #%d: "
+          "expected static read-only tensor",
+          tensor_index, node_index);
       return kTfLiteError;
     }
     return kTfLiteOk;
@@ -697,12 +717,30 @@ class Subgraph {
       case kTfLiteBuiltinCustom: {
         if (strcmp(registration->custom_name, "Convolution2DTransposeBias") ==
             0) {
-          const TfLiteTransposeConvParams* deconv_params =
-              static_cast<const TfLiteTransposeConvParams*>(
-                  node->custom_initial_data);
+          TfLiteTransposeConvParams deconv_params = {kTfLitePaddingUnknown};
+          std::memcpy(&deconv_params, node->custom_initial_data,
+                      node->custom_initial_data_size);
+
           return VisitMediaPipeDeconvolutionNode(
               subgraph, context, node_index, node, context->tensors,
-              deconv_params, xnnpack_tensors);
+              &deconv_params, xnnpack_tensors);
+        } else if (strcmp(registration->custom_name,
+                          "MaxPoolingWithArgmax2D") == 0) {
+          TfLitePoolParams pool_params = {kTfLitePaddingUnknown};
+          std::memcpy(&pool_params, node->custom_initial_data,
+                      node->custom_initial_data_size);
+
+          return VisitMediaPipeMaxPoolingNode(subgraph, context, node_index,
+                                              node, context->tensors,
+                                              &pool_params, xnnpack_tensors);
+        } else if (strcmp(registration->custom_name, "MaxUnpooling2D") == 0) {
+          TfLitePoolParams pool_params = {kTfLitePaddingUnknown};
+          std::memcpy(&pool_params, node->custom_initial_data,
+                      node->custom_initial_data_size);
+
+          return VisitMediaPipeUnpoolingNode(subgraph, context, node_index,
+                                             node, context->tensors,
+                                             &pool_params, xnnpack_tensors);
         }
         return kTfLiteError;
       }
@@ -1037,23 +1075,19 @@ class Subgraph {
     const int32_t input_channels = filter_tensor.dims->data[1];
 
     if (input_tensor.dims->size == 0) {
-      if (logging_context != nullptr) {
-        TF_LITE_KERNEL_LOG(
-            logging_context,
-            "unexpected number of shape dimensions %d in tensor #%d",
-            input_tensor.dims->size, node->inputs->data[0]);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          logging_context,
+          "unexpected number of shape dimensions %d in tensor #%d",
+          input_tensor.dims->size, node->inputs->data[0]);
       return kTfLiteError;
     }
 
     int32_t num_input_elements = 1;
     for (int i = 0; i < input_tensor.dims->size; i++) {
       if (input_tensor.dims->data[i] <= 0) {
-        if (logging_context != nullptr) {
-          TF_LITE_KERNEL_LOG(logging_context,
-                             "invalid dimension #%d (%d) in tensor #%d", i,
-                             input_tensor.dims->data[i], node->inputs->data[0]);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            logging_context, "invalid dimension #%d (%d) in tensor #%d", i,
+            input_tensor.dims->data[i], node->inputs->data[0]);
         return kTfLiteError;
       }
       num_input_elements *= input_tensor.dims->data[i];
@@ -1066,55 +1100,47 @@ class Subgraph {
 
       for (int i = 0; i < input_tensor.dims->size - 1; i++) {
         if (input_tensor.dims->data[i] != output_tensor.dims->data[i]) {
-          if (logging_context != nullptr) {
-            TF_LITE_KERNEL_LOG(
-                logging_context,
-                "mismatch in shape dimension %d (%d != %d) in input and output "
-                "tensors of FULLY_CONNECTED operator #%d",
-                i, input_tensor.dims->data[i], output_tensor.dims->data[i],
-                node_index);
-          }
+          TF_LITE_MAYBE_KERNEL_LOG(
+              logging_context,
+              "mismatch in shape dimension %d (%d != %d) in input and output "
+              "tensors of FULLY_CONNECTED operator #%d",
+              i, input_tensor.dims->data[i], output_tensor.dims->data[i],
+              node_index);
           return kTfLiteError;
         }
       }
     } else {
       if (num_input_elements % input_channels != 0) {
-        if (logging_context != nullptr) {
-          TF_LITE_KERNEL_LOG(
-              logging_context,
-              "number of elements in input tensor #%d in FULLY_CONNECTED "
-              "operator is not divisible by input channels (%d)",
-              node->inputs->data[0], input_channels);
-          return kTfLiteError;
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            logging_context,
+            "number of elements in input tensor #%d in FULLY_CONNECTED "
+            "operator is not divisible by input channels (%d)",
+            node->inputs->data[0], input_channels);
+        return kTfLiteError;
       }
 
       TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 2,
                                              node->outputs->data[0]));
 
       if (output_tensor.dims->data[0] != num_input_elements / input_channels) {
-        if (logging_context != nullptr) {
-          TF_LITE_KERNEL_LOG(
-              logging_context,
-              "batch size %d in output tensor #%d in FULLY_CONNECTED operator "
-              "does not match batch size %d in reshaped input tensor #%d",
-              output_tensor.dims->data[0], node->outputs->data[0],
-              num_input_elements / input_channels, node->inputs->data[0]);
-        }
+        TF_LITE_MAYBE_KERNEL_LOG(
+            logging_context,
+            "batch size %d in output tensor #%d in FULLY_CONNECTED operator "
+            "does not match batch size %d in reshaped input tensor #%d",
+            output_tensor.dims->data[0], node->outputs->data[0],
+            num_input_elements / input_channels, node->inputs->data[0]);
         return kTfLiteError;
       }
     }
 
     if (output_tensor.dims->data[output_tensor.dims->size - 1] !=
         output_channels) {
-      if (logging_context != nullptr) {
-        TF_LITE_KERNEL_LOG(
-            logging_context,
-            "number of channels %d in output tensor #%d does not match output "
-            "channels %d in filter tensor #%d",
-            output_tensor.dims->data[output_tensor.dims->size - 1],
-            node->outputs->data[0], output_channels, node->inputs->data[1]);
-      }
+      TF_LITE_MAYBE_KERNEL_LOG(
+          logging_context,
+          "number of channels %d in output tensor #%d does not match output "
+          "channels %d in filter tensor #%d",
+          output_tensor.dims->data[output_tensor.dims->size - 1],
+          node->outputs->data[0], output_channels, node->inputs->data[1]);
       return kTfLiteError;
     }
 
@@ -1315,7 +1341,7 @@ class Subgraph {
     const int kernel_width = filter_tensor.dims->data[2];
     const int input_channels = filter_tensor.dims->data[3];
 
-    TF_LITE_ENSURE_STATUS(CheckTransposedConvolutionParams(
+    TF_LITE_ENSURE_STATUS(CheckMediaPipeTransposedConvolutionParams(
         logging_context, deconv_params, node_index));
 
     uint32_t flags = 0;
@@ -1357,6 +1383,138 @@ class Subgraph {
     return kTfLiteOk;
   }
 
+  static TfLiteStatus VisitMediaPipeMaxPoolingNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const TfLitePoolParams* pool_params,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 2, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 4,
+                                           node->inputs->data[0]));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_value_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(
+        CheckTensorFloatType(logging_context, output_value_tensor,
+                             node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_value_tensor,
+                                           4, node->outputs->data[0]));
+    TF_LITE_ENSURE_STATUS(
+        CheckTensorNonDynamicAllocation(logging_context, output_value_tensor,
+                                        node->outputs->data[0], node_index));
+
+    const TfLiteTensor& output_index_tensor = tensors[node->outputs->data[1]];
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_index_tensor,
+                                           4, node->outputs->data[1]));
+    TF_LITE_ENSURE_STATUS(
+        CheckTensorNonDynamicAllocation(logging_context, output_index_tensor,
+                                        node->outputs->data[1], node_index));
+
+    TF_LITE_ENSURE_STATUS(
+        CheckMediaPipePoolParams(logging_context, pool_params, node_index));
+
+    uint32_t flags = 0;
+    TF_LITE_ENSURE_STATUS(CalculatePadding(
+        logging_context, pool_params->padding, &flags, node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_argmax_pooling_2d(
+          subgraph,
+          /*input_padding_top=*/0,
+          /*input_padding_right=*/0,
+          /*input_padding_bottom=*/0,
+          /*input_padding_left=*/0,
+          static_cast<uint32_t>(pool_params->filter_height),
+          static_cast<uint32_t>(pool_params->filter_width),
+          /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_value_id=*/xnnpack_tensors[node->outputs->data[0]],
+          /*output_index_id=*/xnnpack_tensors[node->outputs->data[1]], flags);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(
+            logging_context,
+            "failed to delegate CUSTOM(MaxPoolingWithArgmax2D) node #%d",
+            node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
+  static TfLiteStatus VisitMediaPipeUnpoolingNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const TfLitePoolParams* pool_params,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 2, 1, node_index));
+
+    const TfLiteTensor& input_value_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(
+        CheckTensorFloatType(logging_context, input_value_tensor,
+                             node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_value_tensor,
+                                           4, node->inputs->data[0]));
+    TF_LITE_ENSURE_STATUS(
+        CheckTensorNonDynamicAllocation(logging_context, input_value_tensor,
+                                        node->inputs->data[0], node_index));
+
+    const TfLiteTensor& input_index_tensor = tensors[node->inputs->data[1]];
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_index_tensor,
+                                           4, node->inputs->data[1]));
+    TF_LITE_ENSURE_STATUS(
+        CheckTensorNonDynamicAllocation(logging_context, input_index_tensor,
+                                        node->inputs->data[1], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 4,
+                                           node->outputs->data[0]));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    TF_LITE_ENSURE_STATUS(
+        CheckMediaPipePoolParams(logging_context, pool_params, node_index));
+
+    uint32_t flags = 0;
+    TF_LITE_ENSURE_STATUS(CalculatePadding(
+        logging_context, pool_params->padding, &flags, node_index));
+    if (flags != 0) {
+      TF_LITE_MAYBE_KERNEL_LOG(
+          logging_context, "invalid padding mode (%d) in node #%d",
+          static_cast<int>(pool_params->padding), node_index);
+    }
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_unpooling_2d(
+          subgraph,
+          /*padding_top=*/0,
+          /*padding_right=*/0,
+          /*padding_bottom=*/0,
+          /*padding_left=*/0, static_cast<uint32_t>(pool_params->filter_height),
+          static_cast<uint32_t>(pool_params->filter_width),
+          /*input_value_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*input_index_id=*/xnnpack_tensors[node->inputs->data[1]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]],
+          /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context,
+                           "failed to delegate CUSTOM(MaxUnpooling2D) node #%d",
+                           node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitMulNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
diff --git a/tensorflow/lite/experimental/delegates/coreml/BUILD b/tensorflow/lite/experimental/delegates/coreml/BUILD
index 92aa96d5c50..c04aba65aa0 100644
--- a/tensorflow/lite/experimental/delegates/coreml/BUILD
+++ b/tensorflow/lite/experimental/delegates/coreml/BUILD
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 package(default_visibility = [
     "//visibility:public",
 ])
@@ -46,6 +45,11 @@ objc_library(
     name = "coreml_delegate",
     srcs = ["coreml_delegate.mm"],
     hdrs = ["coreml_delegate.h"],
+    module_name = "TensorFlowLiteCCoreML",
+    # By setting CoreML as weak_framework, the TensorFlow Lite can be built for older iOS versions.
+    weak_sdk_frameworks = [
+        "CoreML",
+    ],
     deps = [
         ":coreml_delegate_kernel",
         ":mlmodel_proto_cc",
diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc
index 09c386b55f0..2581b58f1e4 100644
--- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc
+++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc
@@ -87,6 +87,16 @@ OpBuilder* GraphBuilder::AddBuilder(
 
 CoreML::Specification::Model* GraphBuilder::BuildModel() {
   CoreML::Specification::Model* model = new CoreML::Specification::Model();
+  if (coreml_version_ == 2) {  // Core ML 2, iOS >= 12.0
+    model->set_specificationversion(3);
+  } else if (coreml_version_ == 3) {  // Core ML 3, iOS >= 13.0
+    model->set_specificationversion(4);
+    model->mutable_neuralnetwork()->set_arrayinputshapemapping(
+        CoreML::Specification::EXACT_ARRAY_MAPPING);
+  } else {
+    fprintf(stderr, "Unsupported Core ML version: %d\n", coreml_version_);
+    return nullptr;
+  }
   auto* neural_network = model->mutable_neuralnetwork();
   for (auto& builder : builders_) {
     CoreML::Specification::NeuralNetworkLayer* layer = builder->Build();
diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h
index 5367ae20d2f..c59c30a5a28 100644
--- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h
+++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h
@@ -52,6 +52,8 @@ class TensorID {
 // API is experimental and subject to change.
 class GraphBuilder {
  public:
+  explicit GraphBuilder(int coreml_version) : coreml_version_(coreml_version) {}
+
   // Returns pointer to the created builder. Ownership still belongs
   // to the GraphBuilder.
   OpBuilder* AddBuilder(int builtin_code, const TfLiteNode* node);
@@ -79,6 +81,8 @@ class GraphBuilder {
   // This information is used to mark constant tensors that are used as input.
   bool IsTensorUsed(int tflite_tensor_index);
 
+  const int coreml_version_;
+
  private:
   std::vector<std::unique_ptr<OpBuilder>> builders_;
   // Index in the vector is the tflite_tensor_index, the value
diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h
index b0fe24ee288..501a304706c 100644
--- a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h
+++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h
@@ -32,7 +32,8 @@ bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration,
                                  const TfLiteNode* node,
                                  TfLiteContext* context);
 bool IsReshapeOpSupported(const TfLiteRegistration* registration,
-                          const TfLiteNode* node, TfLiteContext* context);
+                          const TfLiteNode* node, TfLiteContext* context,
+                          int coreml_version);
 bool IsResizeBilinearOpSupported(const TfLiteRegistration* registration,
                                  const TfLiteNode* node,
                                  TfLiteContext* context);
diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc
index 33040e2e070..b7b78653d36 100644
--- a/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc
+++ b/tensorflow/lite/experimental/delegates/coreml/builders/reshape_op_builder.cc
@@ -114,7 +114,11 @@ TfLiteStatus ReshapeOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
 }
 
 bool IsReshapeOpSupported(const TfLiteRegistration* registration,
-                          const TfLiteNode* node, TfLiteContext* context) {
+                          const TfLiteNode* node, TfLiteContext* context,
+                          int coreml_version) {
+  if (coreml_version >= 3) {
+    return false;
+  }
   if (node->inputs->size == 1) {
     const auto* params =
         reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h
index 0d75afc8e34..8ad81040499 100644
--- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h
+++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.h
@@ -31,6 +31,13 @@ typedef enum {
 typedef struct {
   // Only create delegate when Neural Engine is available on the device.
   TfLiteCoreMlDelegateEnabledDevices enabled_devices;
+  // Specifies target Core ML version for model conversion.
+  // Core ML 3 come with a lot more ops, but some ops (e.g. reshape) is not
+  // delegated due to input rank constraint.
+  // if not set to one of the valid versions, the delegate will use highest
+  // version possible in the platform.
+  // Valid versions: (2, 3)
+  int coreml_version;
   // This sets the maximum number of Core ML delegates created.
   // Each graph corresponds to one delegated node subset in the
   // TFLite model. Set this to 0 to delegate all possible partitions.
diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm
index 5d0564ebc48..58728659894 100644
--- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm
+++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm
@@ -36,7 +36,7 @@ constexpr int kMinNodesPerCoreMlDelegate = 2;
 using delegates::coreml::CoreMlDelegateKernel;
 
 bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfLiteNode* node,
-                               TfLiteContext* context) {
+                               TfLiteContext* context, const TfLiteCoreMlDelegateOptions* options) {
   if (@available(iOS 11.0, *)) {
   } else {
     return false;
@@ -120,7 +120,8 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL
       return true;
     }
     case kTfLiteBuiltinReshape: {
-      return delegates::coreml::IsReshapeOpSupported(registration, node, context);
+      return delegates::coreml::IsReshapeOpSupported(registration, node, context,
+                                                     options->coreml_version);
     }
     case kTfLiteBuiltinResizeBilinear: {
       return delegates::coreml::IsResizeBilinearOpSupported(registration, node, context);
@@ -142,6 +143,39 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL
   return false;
 }
 
+class CoreMlDelegate : public TfLiteDelegate {
+ public:
+  explicit CoreMlDelegate(const TfLiteCoreMlDelegateOptions* params)
+      : params_(params != nullptr ? *params : TfLiteCoreMlDelegateOptions()) {
+    {
+      if (@available(iOS 13.0, *)) {
+        if (params_.coreml_version != 2 && params_.coreml_version != 3) {
+          NSLog(@"coreml_version must be 2 or 3. Setting to 3.");
+          params_.coreml_version = 3;
+        }
+      } else if (@available(iOS 12.0, *)) {
+        if (params_.coreml_version != 2) {
+          NSLog(@"coreml_version must be 2 - using Core ML version 2.");
+          params_.coreml_version = 2;
+        }
+      }
+      if (params_.max_delegated_partitions <= 0) {
+        params_.max_delegated_partitions = std::numeric_limits<int>::max();
+      }
+      if (params_.min_nodes_per_partition <= 0) {
+        params_.min_nodes_per_partition = kMinNodesPerCoreMlDelegate;
+      }
+    }
+  }
+
+  TfLiteCoreMlDelegateOptions* params() { return &params_; }
+
+  bool VerifyDelegate() { return true; }
+
+ private:
+  TfLiteCoreMlDelegateOptions params_;
+};
+
 TfLiteRegistration GetCoreMlKernelRegistration() {
   // This is the registration for the Delegate Node that gets added to
   // the TFLite graph instead of the subGraph it replaces it.
@@ -158,8 +192,10 @@ TfLiteRegistration GetCoreMlKernelRegistration() {
   };
   kernel_registration.init = [](TfLiteContext* context, const char* buffer,
                                 size_t length) -> void* {
-    const TfLiteDelegateParams* params = reinterpret_cast<const TfLiteDelegateParams*>(buffer);
-    CoreMlDelegateKernel* coreml_kernel = new CoreMlDelegateKernel();
+    const auto* params = reinterpret_cast<const TfLiteDelegateParams*>(buffer);
+    const auto* coreml_options =
+        (reinterpret_cast<CoreMlDelegate*>(params->delegate))->params();
+    CoreMlDelegateKernel* coreml_kernel = new CoreMlDelegateKernel(coreml_options->coreml_version);
     if (coreml_kernel->Init(context, params) != kTfLiteOk) {
       delete coreml_kernel;
       return nullptr;
@@ -187,14 +223,12 @@ TfLiteRegistration GetCoreMlKernelRegistration() {
 }
 
 TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
-  const auto* params =
-      reinterpret_cast<TfLiteCoreMlDelegateOptions*>(delegate->data_);
+  const auto* params = reinterpret_cast<TfLiteCoreMlDelegateOptions*>(delegate->data_);
 
-  delegates::IsNodeSupportedFn node_supported_fn =
-      [=](TfLiteContext* context, TfLiteNode* node,
-          TfLiteRegistration* registration,
-          std::string* unsupported_details) -> bool {
-    return IsNodeSupportedByDelegate(registration, node, context);
+  delegates::IsNodeSupportedFn node_supported_fn = [=](TfLiteContext* context, TfLiteNode* node,
+                                                       TfLiteRegistration* registration,
+                                                       std::string* unsupported_details) -> bool {
+    return IsNodeSupportedByDelegate(registration, node, context, params);
   };
 
   delegates::GraphPartitionHelper helper(context, node_supported_fn);
@@ -214,7 +248,8 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
 
   // Set first element to the number of nodes to replace.
   supported_nodes[0] = supported_nodes.size() - 1;
-  TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, "CoreML delegate: %d nodes delegated out of %d nodes, "
+  TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO,
+                  "CoreML delegate: %d nodes delegated out of %d nodes, "
                   "with %d partitions.\n",
                   supported_nodes[0], helper.num_total_nodes(), delegate_partitions.size());
 
@@ -223,28 +258,6 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
       reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()), delegate);
 }
 
-class CoreMlDelegate : public TfLiteDelegate {
- public:
-  explicit CoreMlDelegate(const TfLiteCoreMlDelegateOptions* params)
-      : params_(params != nullptr ? *params : TfLiteCoreMlDelegateOptions()) {
-    {
-      if (params_.max_delegated_partitions <= 0) {
-        params_.max_delegated_partitions = std::numeric_limits<int>::max();
-      }
-      if (params_.min_nodes_per_partition <= 0) {
-        params_.min_nodes_per_partition = kMinNodesPerCoreMlDelegate;
-      }
-    }
-  }
-
-  TfLiteCoreMlDelegateOptions* params() { return &params_; }
-
-  bool VerifyDelegate() { return true; }
-
- private:
-  TfLiteCoreMlDelegateOptions params_;
-};
-
 TfLiteDelegate* CreateCoreMlDelegate(const TfLiteCoreMlDelegateOptions* options) {
   TfLiteDelegate* delegate = new CoreMlDelegate(options);
   if (!static_cast<CoreMlDelegate*>(delegate)->VerifyDelegate()) {
@@ -288,7 +301,7 @@ bool IsNeuralEngineAvailable() {
 }  // namespace
 
 TfLiteDelegate* TfLiteCoreMlDelegateCreate(const TfLiteCoreMlDelegateOptions* options) {
-  if (@available(iOS 11.0, *)) {
+  if (@available(iOS 12.0, *)) {
     if (options->enabled_devices == TfLiteCoreMlDelegateDevicesWithNeuralEngine &&
         !IsNeuralEngineAvailable()) {
       NSLog(@"This device does not have Neural Engine, so Core ML delegate will not be enabled. "
@@ -299,7 +312,7 @@ TfLiteDelegate* TfLiteCoreMlDelegateCreate(const TfLiteCoreMlDelegateOptions* op
     return tflite::CreateCoreMlDelegate(options);
   } else {
     NSLog(@"Core ML delegate is not supported in this iOS version. "
-           "Minimum required iOS version is 11.0.");
+           "Minimum required iOS version is 12.0.");
     return nullptr;
   }
 }
diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h
index 04053ea81c1..8c983fb11aa 100644
--- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h
+++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.h
@@ -29,6 +29,8 @@ namespace coreml {
 // implements Init/Prepare/Invoke as TFLite kernel nodes.
 class CoreMlDelegateKernel {
  public:
+  explicit CoreMlDelegateKernel(int coreml_version)
+      : coreml_version_(coreml_version) {}
   // Initialize the delegated graph and add required nodes.
   TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params);
 
@@ -56,6 +58,7 @@ class CoreMlDelegateKernel {
   std::unique_ptr<delegates::coreml::GraphBuilder> builder_;
   std::unique_ptr<CoreML::Specification::Model> model_;
   ::CoreMlExecutor* executor_;
+  int coreml_version_;
 
   std::vector<int> input_tensor_ids_;
   std::vector<TensorData> inputs_;
diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm
index a36837bcc44..6a668bc971b 100644
--- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm
+++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate_kernel.mm
@@ -60,7 +60,7 @@ void TransposeToHWC(const float* chw, float* hwc, const TfLiteIntArray* hwc_dims
 
 TfLiteStatus CoreMlDelegateKernel::Init(TfLiteContext* context,
                                         const TfLiteDelegateParams* delegate_params) {
-  if (@available(iOS 11.0, *)) {
+  if (@available(iOS 12.0, *)) {
     executor_ = [[::CoreMlExecutor alloc] init];
     TF_LITE_ENSURE_STATUS(BuildModel(context, delegate_params));
     // Serialize the model protocol buffer and compile it.
@@ -76,7 +76,7 @@ TfLiteStatus CoreMlDelegateKernel::Init(TfLiteContext* context,
     }
     return kTfLiteOk;
   } else {
-    TF_LITE_KERNEL_LOG(context, "Minimum required iOS version is 11.0.");
+    TF_LITE_KERNEL_LOG(context, "Minimum required iOS version is 12.0.");
     return kTfLiteError;
   }
 }
@@ -104,6 +104,9 @@ void CoreMlDelegateKernel::AddOutputTensors(const TfLiteIntArray* output_tensors
     int batch_size, height_size, width_size, depth_size;
     GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims);
     multi_array->set_datatype(CoreML::Specification::ArrayFeatureType::FLOAT32);
+    if (coreml_version_ >= 3) {
+      multi_array->mutable_shape()->Add(batch_size);
+    }
     multi_array->mutable_shape()->Add(depth_size);
     multi_array->mutable_shape()->Add(height_size);
     multi_array->mutable_shape()->Add(width_size);
@@ -114,7 +117,7 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context,
                                               const TfLiteDelegateParams* delegate_params) {
   TfLiteNode* node;
   TfLiteRegistration* reg;
-  builder_.reset(new delegates::coreml::GraphBuilder());
+  builder_.reset(new delegates::coreml::GraphBuilder(coreml_version_));
   // Add Inputs
   AddInputTensors(delegate_params->input_tensors, context);
   // Build all ops.
@@ -144,8 +147,6 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context,
     return kTfLiteError;
   }
   AddOutputTensors(delegate_params->output_tensors, context);
-  // TODO(karimnosseir): Set correct version ?
-  model_->set_specificationversion(1);
   auto* model_description = model_->mutable_description();
   for (int i = 0; i < delegate_params->input_tensors->size; ++i) {
     const int tensor_id = delegate_params->input_tensors->data[i];
@@ -158,6 +159,9 @@ TfLiteStatus CoreMlDelegateKernel::BuildModel(TfLiteContext* context,
       int batch_size, height_size, width_size, depth_size;
       GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims);
       multi_array->set_datatype(CoreML::Specification::ArrayFeatureType::FLOAT32);
+      if (coreml_version_ >= 3) {
+        multi_array->mutable_shape()->Add(batch_size);
+      }
       multi_array->mutable_shape()->Add(depth_size);
       multi_array->mutable_shape()->Add(height_size);
       multi_array->mutable_shape()->Add(width_size);
@@ -181,9 +185,12 @@ TfLiteStatus CoreMlDelegateKernel::Prepare(TfLiteContext* context, TfLiteNode* n
     int batch_size, height_size, width_size, depth_size;
     GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor->dims);
 
-    inputs_.push_back({std::vector<float>(input_size),
-                       builder_->GetTensorName(tensor_index),
-                       {depth_size, height_size, width_size}});
+    std::vector<int> input_shape = {depth_size, height_size, width_size};
+    if (coreml_version_ >= 3) {
+      input_shape.insert(input_shape.begin(), batch_size);
+    }
+    inputs_.push_back(
+        {std::vector<float>(input_size), builder_->GetTensorName(tensor_index), input_shape});
   }
 
   outputs_.reserve(node->outputs->size);
@@ -222,9 +229,7 @@ TfLiteStatus CoreMlDelegateKernel::Invoke(TfLiteContext* context, TfLiteNode* no
   }
 }
 
-CoreMlDelegateKernel::~CoreMlDelegateKernel() {
-  [executor_ cleanup];
-}
+CoreMlDelegateKernel::~CoreMlDelegateKernel() { [executor_ cleanup]; }
 
 }  // namespace coreml
 }  // namespace delegates
diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h
index edec3020cbc..5ce0a0ade6c 100644
--- a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h
+++ b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.h
@@ -45,4 +45,5 @@ struct TensorData {
 @property MLModel* model API_AVAILABLE(ios(11));
 @property NSString* mlModelFilePath;
 @property NSString* compiledModelFilePath;
+@property(nonatomic, readonly) int coreMlVersion;
 @end
diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm
index 2091c0d7ca0..1f808e08d49 100644
--- a/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm
+++ b/tensorflow/lite/experimental/delegates/coreml/coreml_executor.mm
@@ -39,17 +39,22 @@ NSURL* createTemporaryFile() {
   NSSet* _featureNames;
 }
 
-- (instancetype)initWithInputs:(const std::vector<TensorData>*)inputs;
+- (instancetype)initWithInputs:(const std::vector<TensorData>*)inputs
+                 coreMlVersion:(int)coreMlVersion;
 - (MLFeatureValue*)featureValueForName:(NSString*)featureName API_AVAILABLE(ios(11));
 - (NSSet<NSString*>*)featureNames;
 
+@property(nonatomic, readonly) int coreMlVersion;
+
 @end
 
 @implementation MultiArrayFeatureProvider
 
-- (instancetype)initWithInputs:(const std::vector<TensorData>*)inputs {
+- (instancetype)initWithInputs:(const std::vector<TensorData>*)inputs
+                 coreMlVersion:(int)coreMlVersion {
   self = [super init];
   _inputs = inputs;
+  _coreMlVersion = coreMlVersion;
   for (auto& input : *_inputs) {
     if (input.name.empty()) {
       return nil;
@@ -74,8 +79,31 @@ NSURL* createTemporaryFile() {
   for (auto& input : *_inputs) {
     if ([featureName cStringUsingEncoding:NSUTF8StringEncoding] == input.name) {
       // TODO(b/141492326): Update shape handling for higher ranks
-      NSArray* shape = @[ @(input.shape[0]), @(input.shape[1]), @(input.shape[2]) ];
-      NSArray* strides = @[ @(input.shape[1] * input.shape[2]), @(input.shape[2]), @1 ];
+      NSArray* shape = @[
+        @(input.shape[0]),
+        @(input.shape[1]),
+        @(input.shape[2]),
+      ];
+      NSArray* strides = @[
+        @(input.shape[1] * input.shape[2]),
+        @(input.shape[2]),
+        @1,
+      ];
+
+      if ([self coreMlVersion] >= 3) {
+        shape = @[
+          @(input.shape[0]),
+          @(input.shape[1]),
+          @(input.shape[2]),
+          @(input.shape[3]),
+        ];
+        strides = @[
+          @(input.shape[1] * input.shape[2] * input.shape[3]),
+          @(input.shape[2] * input.shape[3]),
+          @(input.shape[3]),
+          @1,
+        ];
+      };
       NSError* error = nil;
       MLMultiArray* mlArray = [[MLMultiArray alloc] initWithDataPointer:(float*)input.data.data()
                                                                   shape:shape
@@ -106,7 +134,7 @@ NSURL* createTemporaryFile() {
   }
   NSError* error = nil;
   MultiArrayFeatureProvider* inputFeature =
-      [[MultiArrayFeatureProvider alloc] initWithInputs:&inputs];
+      [[MultiArrayFeatureProvider alloc] initWithInputs:&inputs coreMlVersion:[self coreMlVersion]];
   if (inputFeature == nil) {
     NSLog(@"inputFeature is not initialized.");
     return NO;
@@ -153,6 +181,14 @@ NSURL* createTemporaryFile() {
 - (NSURL*)saveModel:(CoreML::Specification::Model*)model {
   NSURL* modelUrl = createTemporaryFile();
   NSString* modelPath = [modelUrl path];
+  if (model->specificationversion() == 3) {
+    _coreMlVersion = 2;
+  } else if (model->specificationversion() == 4) {
+    _coreMlVersion = 3;
+  } else {
+    NSLog(@"Only Core ML models with specification version 3 or 4 are supported");
+    return nil;
+  }
   // Flush data to file.
   // TODO(karimnosseir): Can we mmap this instead of actual writing it to phone ?
   std::ofstream file_stream([modelPath UTF8String], std::ios::out | std::ios::binary);
diff --git a/tensorflow/lite/experimental/delegates/hexagon/BUILD b/tensorflow/lite/experimental/delegates/hexagon/BUILD
index 7f002d686a0..e799140b2ef 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/BUILD
+++ b/tensorflow/lite/experimental/delegates/hexagon/BUILD
@@ -61,7 +61,6 @@ cc_library(
         "//tensorflow/lite/experimental/delegates/hexagon/builders:op_builder",
         "//tensorflow/lite/experimental/delegates/hexagon/hexagon_nn:hexagon_nn_header",
         "//tensorflow/lite/kernels:kernel_util",
-        "//tensorflow/lite/kernels/internal:optimized_base",
         "//tensorflow/lite/schema:schema_fbs",
         "@hexagon_nn//:hexagon_nn_ops",
     ],
diff --git a/tensorflow/lite/experimental/delegates/hexagon/README.md b/tensorflow/lite/experimental/delegates/hexagon/README.md
index 198326d41de..6e627c17cd2 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/README.md
+++ b/tensorflow/lite/experimental/delegates/hexagon/README.md
@@ -76,9 +76,14 @@ are verified in `IsNodeSupportedByHexagon`:
       - dilation only supported when stride == 1
       - Otherwise, stride height/width <= 3
 * FullyConnected (without any activation)
+* Hardswish
 * L2Normalization (without any activation)
 * Logistic (aka Sigmoid)
+* Maximum
 * MaxPool2D (without any activation) (b/129276536)
+* Mean
+* Minimum
+* MirrorPad
 * Mul (without any activation) (b/129276536)
 * Neg
 * Pad: Only supports 0 padding (b/139277813)
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD b/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD
index 550748e9961..e24adc2537c 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/BUILD
@@ -12,11 +12,15 @@ cc_library(
         "arg_min_max_builder.cc",
         "arithmetic_builder.cc",
         "batch_seq_builder.cc",
+        "cast_builder.cc",
         "concat_builder.cc",
         "conv_2d_builder.cc",
         "conv_2d_helpers.cc",
+        "hardswish_builder.cc",
         "l2_normalization_builder.cc",
         "matmul_builder.cc",
+        "min_max_builder.cc",
+        "mirror_pad_builder.cc",
         "neg_op_builder.cc",
         "op_builder.cc",
         "pad_builder.cc",
@@ -37,10 +41,14 @@ cc_library(
         "arg_min_max_builder.h",
         "arithmetic_builder.h",
         "batch_seq_builder.h",
+        "cast_builder.h",
         "concat_builder.h",
         "conv_2d_builder.h",
+        "hardswish_builder.h",
         "l2_normalization_builder.h",
         "matmul_builder.h",
+        "min_max_builder.h",
+        "mirror_pad_builder.h",
         "neg_op_builder.h",
         "op_builder.h",
         "pad_builder.h",
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.cc
new file mode 100644
index 00000000000..fb81a7a054f
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.cc
@@ -0,0 +1,94 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.h"
+
+#include <stdint.h>
+
+#include <limits>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+TfLiteStatus CastOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
+                                             const TfLiteIntArray* outputs,
+                                             TfLiteContext* context) {
+  static int scalar_shape[] = {1, 1, 1, 1};
+
+  // Should be only 1 tensor that is cast in-place.
+  if (inputs->size != 1 || outputs->size != 1) {
+    TF_LITE_KERNEL_LOG(context, "Cast supports a single tensor");
+    return kTfLiteError;
+  } else if (inputs->data[0] != outputs->data[0]) {
+    TF_LITE_KERNEL_LOG(context, "input & output should be same for Cast");
+    return kTfLiteError;
+  }
+
+  int tensor_id = inputs->data[0];
+  const auto& tensor = context->tensors[tensor_id];
+  int batch_size, height_size, width_size, depth_size;
+  GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims);
+  float min_value = 0;
+  float max_value = 0;
+  if (tensor.quantization.type ==
+      TfLiteQuantizationType::kTfLiteAffineQuantization) {
+    // Casting doesn't require min/max, so populate only if available.
+    TF_LITE_ENSURE_STATUS(
+        ComputeMinAndMaxQuantValues(tensor, &min_value, &max_value));
+  }
+  auto* min_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&min_value), sizeof(min_value));
+  auto* max_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&max_value), sizeof(max_value));
+
+  AddInput(graph_builder_->GetHexagonTensorId(tensor_id));
+  AddInput(TensorID(min_const->GetID(), 0));
+  AddInput(TensorID(max_const->GetID(), 0));
+  node_output_ = AddOutput(sizeof(uint8_t), 4,
+                           {batch_size, height_size, width_size, depth_size});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus CastOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
+                                            TfLiteContext* context) {
+  // Should be only 1 output.
+  // Cast tensor already exists in the graph, so we need to overwrite it with
+  // the new TensorID.
+  if (!graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
+                                       node_output_.second,
+                                       /*overwrite*/ true)) {
+    TF_LITE_KERNEL_LOG(context, "Could not register Cast output.");
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+CastOpBuilder::~CastOpBuilder() {}
+
+OpBuilder* CreateCastBuilder(GraphBuilder* graph_builder, int op_type) {
+  return new CastOpBuilder(graph_builder, op_type);
+}
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.h b/tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.h
new file mode 100644
index 00000000000..d40a640712e
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/cast_builder.h
@@ -0,0 +1,54 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_CAST_BUILDER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_CAST_BUILDER_H_
+
+#include <vector>
+
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+
+// This builder is used to cast int8 input or output tensors to & from uint8
+// respectively. No TFLite op converts to this.
+// NOTE: There are no explicit tests for this, but is required for all int8 unit
+// tests.
+class CastOpBuilder : public OpBuilder {
+ public:
+  explicit CastOpBuilder(GraphBuilder* graph_builder, int op_type)
+      : OpBuilder(graph_builder, op_type) {}
+  // inputs & outputs should contain the *same* (one) TFLite tensor-id, since
+  // tensors are cast in-place. The tensor will point to a different Hexagon
+  // TensorID after this runs.
+  TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs,
+                                const TfLiteIntArray* outputs,
+                                TfLiteContext* context) override;
+
+  TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs,
+                               TfLiteContext* context) override;
+
+  ~CastOpBuilder() override;
+
+ private:
+  TensorID node_output_;
+};
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_CAST_BUILDER_H_
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/concat_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/concat_builder.cc
index ef94876b59d..5dc8ebac450 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/concat_builder.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/concat_builder.cc
@@ -30,12 +30,16 @@ TfLiteStatus ConcatOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
                                                TfLiteContext* context) {
   static int quant_bound_shape[] = {1, 1, 1, 1};
 
-  // Only axis 3 is supported.
   const TfLiteConcatenationParams* concat_params =
       reinterpret_cast<const TfLiteConcatenationParams*>(builtin_data_);
+  int concat_axis = concat_params->axis;
+  const int output_dim_size = context->tensors[outputs->data[0]].dims->size;
+  // Axis value is incremented if tensor dims are < 4 and/or axis < 0.
+  concat_axis =
+      concat_axis < 0 ? concat_axis + 4 : concat_axis + 4 - output_dim_size;
   auto* axis_const = graph_builder_->AddConstNodeWithData(
-      quant_bound_shape, (char*)&concat_params->axis,
-      sizeof(concat_params->axis));
+      quant_bound_shape, reinterpret_cast<char*>(&concat_axis),
+      sizeof(concat_axis));
   AddInput(TensorID(axis_const->GetID(), 0));
 
   int tensor_id;
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.cc
new file mode 100644
index 00000000000..86766371b97
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.cc
@@ -0,0 +1,89 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.h"
+
+#include <stdint.h>
+
+#include <limits>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+TfLiteStatus HardSwishOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
+                                                  const TfLiteIntArray* outputs,
+                                                  TfLiteContext* context) {
+  static int quant_bound_shape[] = {1, 1, 1, 1};
+
+  // input data tensor.
+  int tensor_id = inputs->data[0];
+  const auto& input1_tensor = context->tensors[tensor_id];
+  AddInput(graph_builder_->GetHexagonTensorId(tensor_id));
+  TF_LITE_ENSURE_STATUS(
+      ComputeMinAndMaxQuantValues(input1_tensor, &input_min_, &input_max_));
+  auto* input_min_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&input_min_),
+      sizeof(input_min_));
+  auto* input_max_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&input_max_),
+      sizeof(input_max_));
+  AddInput(TensorID(input_min_const->GetID(), 0));
+  AddInput(TensorID(input_max_const->GetID(), 0));
+
+  // Output min/max
+  TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
+      context->tensors[outputs->data[0]], &output_min_, &output_max_));
+  auto* output_min_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&output_min_),
+      sizeof(output_min_));
+  auto* output_max_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&output_max_),
+      sizeof(output_max_));
+  AddInput(TensorID(output_min_const->GetID(), 0));
+  AddInput(TensorID(output_max_const->GetID(), 0));
+
+  int output_batch_size, output_height_size, output_width_size,
+      output_depth_size;
+  GetDims(&output_batch_size, &output_height_size, &output_width_size,
+          &output_depth_size, context->tensors[outputs->data[0]].dims);
+
+  node_output_ = AddOutput(sizeof(uint8_t), 4,
+                           {output_batch_size, output_height_size,
+                            output_width_size, output_depth_size});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus HardSwishOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
+                                                 TfLiteContext* context) {
+  // Should be only 1 output.
+  graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
+                                  node_output_.second);
+  return kTfLiteOk;
+}
+
+HardSwishOpBuilder::~HardSwishOpBuilder() {}
+
+OpBuilder* CreateHardSwishBuilder(GraphBuilder* graph_builder, int op_type) {
+  return new HardSwishOpBuilder(graph_builder, op_type);
+}
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.h b/tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.h
new file mode 100644
index 00000000000..dac297db457
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/hardswish_builder.h
@@ -0,0 +1,50 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_HARDSWISH_BUILDER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_HARDSWISH_BUILDER_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+
+class HardSwishOpBuilder : public OpBuilder {
+ public:
+  explicit HardSwishOpBuilder(GraphBuilder* graph_builder, int op_type)
+      : OpBuilder(graph_builder, op_type) {}
+  TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs,
+                                const TfLiteIntArray* outputs,
+                                TfLiteContext* context) override;
+
+  TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs,
+                               TfLiteContext* context) override;
+
+  ~HardSwishOpBuilder() override;
+
+ private:
+  TensorID node_output_;
+  float input_min_, input_max_, output_min_, output_max_;
+};
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_HARDSWISH_BUILDER_H_
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.cc
new file mode 100644
index 00000000000..ab5895b9a14
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.cc
@@ -0,0 +1,106 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.h"
+
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+TfLiteStatus MinMaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
+                                               const TfLiteIntArray* outputs,
+                                               TfLiteContext* context) {
+  static int scalar_shape[] = {1, 1, 1, 1};
+  int a_tensor_id;
+  int b_tensor_id;
+
+  // Input tensors a and b.
+  a_tensor_id = inputs->data[0];
+  b_tensor_id = inputs->data[1];
+  const auto& a_tensor = context->tensors[a_tensor_id];
+  const auto& b_tensor = context->tensors[b_tensor_id];
+  if (a_tensor.allocation_type == kTfLiteMmapRo)
+    graph_builder_->AddConstNodeWithData(a_tensor_id, a_tensor);
+  if (b_tensor.allocation_type == kTfLiteMmapRo)
+    graph_builder_->AddConstNodeWithData(b_tensor_id, b_tensor);
+  AddInput(graph_builder_->GetHexagonTensorId(a_tensor_id));
+  AddInput(graph_builder_->GetHexagonTensorId(b_tensor_id));
+
+  // Add Inputs A & B min/max
+  TF_LITE_ENSURE_STATUS(
+      ComputeMinAndMaxQuantValues(a_tensor, &a_input_min_, &a_input_max_));
+  auto* a_input_min_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&a_input_min_),
+      sizeof(a_input_min_));
+  auto* a_input_max_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&a_input_max_),
+      sizeof(a_input_max_));
+  AddInput(TensorID(a_input_min_const->GetID(), 0));
+  AddInput(TensorID(a_input_max_const->GetID(), 0));
+
+  TF_LITE_ENSURE_STATUS(
+      ComputeMinAndMaxQuantValues(b_tensor, &b_input_min_, &b_input_max_));
+  auto* b_input_min_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&b_input_min_),
+      sizeof(b_input_min_));
+  auto* b_input_max_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&b_input_max_),
+      sizeof(b_input_max_));
+  AddInput(TensorID(b_input_min_const->GetID(), 0));
+  AddInput(TensorID(b_input_max_const->GetID(), 0));
+
+  // Add output min/max
+  const int output_tensor_id = outputs->data[0];
+  const auto& output_tensor = context->tensors[output_tensor_id];
+  float output_min, output_max;
+  TF_LITE_ENSURE_STATUS(
+      ComputeMinAndMaxQuantValues(output_tensor, &output_min, &output_max));
+  auto* output_min_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
+  auto* output_max_const = graph_builder_->AddConstNodeWithData(
+      scalar_shape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
+  AddInput(TensorID(output_min_const->GetID(), 0));
+  AddInput(TensorID(output_max_const->GetID(), 0));
+
+  // Add outputs.
+  int output_batch_size, output_height_size, output_width_size,
+      output_depth_size;
+  GetDims(&output_batch_size, &output_height_size, &output_width_size,
+          &output_depth_size, context->tensors[outputs->data[0]].dims);
+  node_output_ = AddOutput(sizeof(uint8_t), 4,
+                           {output_batch_size, output_height_size,
+                            output_width_size, output_depth_size});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MinMaxOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
+                                              TfLiteContext* context) {
+  // Should be only 1 output.
+  graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
+                                  node_output_.second);
+
+  return kTfLiteOk;
+}
+
+OpBuilder* CreateMinMaxBuilder(GraphBuilder* graph_builder, int op_type) {
+  return new MinMaxOpBuilder(graph_builder, op_type);
+}
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.h b/tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.h
new file mode 100644
index 00000000000..4d50d941e4f
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/min_max_builder.h
@@ -0,0 +1,45 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIN_MAX_BUILDER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIN_MAX_BUILDER_H_
+
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+
+class MinMaxOpBuilder : public OpBuilder {
+ public:
+  explicit MinMaxOpBuilder(GraphBuilder* graph_builder, int op_type)
+      : OpBuilder(graph_builder, op_type) {}
+
+  TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs,
+                                const TfLiteIntArray* outputs,
+                                TfLiteContext* context) override;
+
+  TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs,
+                               TfLiteContext* context) override;
+
+ private:
+  TensorID node_output_;
+  float a_input_min_, a_input_max_, b_input_min_, b_input_max_;
+};
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIN_MAX_BUILDER_H_
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.cc
new file mode 100644
index 00000000000..2a04088f4f3
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.cc
@@ -0,0 +1,112 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h"
+
+#include <stdint.h>
+
+#include <limits>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+TfLiteStatus MirrorPadOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
+                                                  const TfLiteIntArray* outputs,
+                                                  TfLiteContext* context) {
+  static int quant_bound_shape[] = {1, 1, 1, 1};
+  int tensor_id;
+
+  // Input data tensor.
+  tensor_id = inputs->data[0];
+  const auto& input_tensor = context->tensors[tensor_id];
+  AddInput(graph_builder_->GetHexagonTensorId(tensor_id));
+
+  // Padding tensor.
+  // Should be a constant.
+  tensor_id = inputs->data[1];
+  const auto& padding_tensor = context->tensors[tensor_id];
+  if (padding_tensor.dims->size != 2 || padding_tensor.dims->data[0] > 4 ||
+      padding_tensor.dims->data[1] != 2) {
+    TF_LITE_KERNEL_LOG(context, "Invalid padding tensor shape");
+    return kTfLiteError;
+  }
+  paddings_shape_ = {1, 1, 4, 2};
+  std::vector<int> padding_data(8, 0);
+  // Hexagon always expects padding data for each dimension in order {b, h, w,
+  // d}. This start value ensures we pad the non-relevant dimensions with 0.
+  int padding_data_start = 8 - padding_tensor.dims->data[0] * 2;
+  for (int i = 0; i < padding_tensor.dims->data[0] * 2; ++i) {
+    padding_data[padding_data_start + i] = padding_tensor.data.i32[i];
+  }
+  auto* const_padding_node = graph_builder_->AddConstNodeWithData(
+      paddings_shape_.data(), reinterpret_cast<char*>(padding_data.data()),
+      padding_data.size() * sizeof(padding_data[0]));
+  AddInput(TensorID(const_padding_node->GetID(), 0));
+  // Padding type.
+  const TfLiteMirrorPaddingParams* params =
+      reinterpret_cast<const TfLiteMirrorPaddingParams*>(builtin_data_);
+  if (params->mode == kTfLiteMirrorPaddingReflect) {
+    SetPaddingType(NN_PAD_MIRROR_REFLECT);
+  } else if (params->mode == kTfLiteMirrorPaddingSymmetric) {
+    SetPaddingType(NN_PAD_MIRROR_SYMMETRIC);
+  }
+
+  // Min/max values for input tensor.
+  TF_LITE_ENSURE_STATUS(
+      ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_));
+  auto* input_min_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&input_min_),
+      sizeof(input_min_));
+  auto* input_max_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&input_max_),
+      sizeof(input_max_));
+  AddInput(TensorID(input_min_const->GetID(), 0));
+  AddInput(TensorID(input_max_const->GetID(), 0));
+
+  // Hexagon outputs for this node.
+  int output_batch_size, output_height_size, output_width_size,
+      output_depth_size;
+  GetDims(&output_batch_size, &output_height_size, &output_width_size,
+          &output_depth_size, context->tensors[outputs->data[0]].dims);
+  node_output_ = AddOutput(sizeof(uint8_t), 4,
+                           {output_batch_size, output_height_size,
+                            output_width_size, output_depth_size});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MirrorPadOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
+                                                 TfLiteContext* context) {
+  // Should be only 1 output.
+  graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
+                                  node_output_.second);
+  return kTfLiteOk;
+}
+
+MirrorPadOpBuilder::~MirrorPadOpBuilder() {}
+
+OpBuilder* CreateMirrorPadBuilder(GraphBuilder* graph_builder, int op_type) {
+  return new MirrorPadOpBuilder(graph_builder, op_type);
+}
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h
new file mode 100644
index 00000000000..6fcb2606701
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/mirror_pad_builder.h
@@ -0,0 +1,49 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIRROR_PAD_BUILDER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIRROR_PAD_BUILDER_H_
+
+#include <vector>
+
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h"
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+
+class MirrorPadOpBuilder : public OpBuilder {
+ public:
+  explicit MirrorPadOpBuilder(GraphBuilder* graph_builder, int op_type)
+      : OpBuilder(graph_builder, op_type) {}
+  TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs,
+                                const TfLiteIntArray* outputs,
+                                TfLiteContext* context) override;
+
+  TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs,
+                               TfLiteContext* context) override;
+
+  ~MirrorPadOpBuilder() override;
+
+ private:
+  TensorID node_output_;
+  float input_min_, input_max_;
+  std::vector<int> paddings_shape_;
+};
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_MIRROR_PAD_BUILDER_H_
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc
index 0cfe99994a2..230a292b6fe 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.cc
@@ -43,6 +43,8 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type) {
       return CreateReduceBuilder(this, OP_QuantizedSum_8to32);
     case kTfLiteBuiltinPad:
       return CreatePadBuilder(this, OP_QuantizedPad_8);
+    case kTfLiteBuiltinMirrorPad:
+      return CreateMirrorPadBuilder(this, OP_MirrorPad_8);
     case kTfLiteBuiltinFullyConnected:
       return CreateMatMulBuilder(this, OP_QuantizedMatMul_8x8to32);
     case kTfLiteBuiltinAveragePool2d:
@@ -89,6 +91,12 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type) {
       return CreateSpaceToDepthBuilder(this, OP_DepthToSpace_8);
     case kTfLiteBuiltinQuantize:
       return CreateQuantizeBuilder(this, OP_Requantize_8to8);
+    case kTfLiteBuiltinHardSwish:
+      return CreateHardSwishBuilder(this, OP_QuantizedHardSwish_8);
+    case kTfLiteBuiltinMinimum:
+      return CreateMinMaxBuilder(this, OP_QuantizedMinimum_8);
+    case kTfLiteBuiltinMaximum:
+      return CreateMinMaxBuilder(this, OP_QuantizedMaximum_8);
     default:
       context_->ReportError(context_, "Op not supported: %d", op_type);
       return nullptr;
@@ -131,32 +139,79 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id,
   return builders_.back().get();
 }
 
-void delegates::hexagon::GraphBuilder::AddInputTensors(
-    const TfLiteIntArray* input_tensors, TfLiteContext* context) {
-  builders_.emplace_back(new OpBuilder(this, OP_INPUT));
-  builders_.back()->SetNodeId(builders_.size());
+// TODO(b/154604279): Support these casting ops in Hexagon op profiling (which
+// seems to key tensors on a single op, which may not be the case now).
+TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type,
+                                     int tensor_id,
+                                     OpBuilder::TensorID hexagon_input) {
+  // Create a new OpBuilder for casting the tensor.
+  OpBuilder* cast_builder = CreateCastBuilder(this, op_type);
+  builders_.emplace_back(cast_builder);
+  cast_builder->SetNodeId(builders_.size());
+  // We cast the tensor in-place, so there is only 1 input & output which is the
+  // same.
+  auto* tensor_data = TfLiteIntArrayCreate(1);
+  tensor_data->data[0] = tensor_id;
+
+  TF_LITE_ENSURE_STATUS(
+      cast_builder->PopulateSubGraph(tensor_data, tensor_data, context));
+  TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context));
+
+  TfLiteIntArrayFree(tensor_data);
+  return kTfLiteOk;
+}
+
+TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors,
+                                           TfLiteContext* context) {
+  auto* input_op = AddNode();
+  input_op->SetOpType(OP_INPUT);
+
   // We need to track num_inputs since not all input_tensors are actual input
   // data. Some are constants.
   int num_inputs = 0;
   for (int i = 0; i < input_tensors->size; ++i) {
     const int tensor_id = input_tensors->data[i];
     const auto& tensor = context->tensors[tensor_id];
-    if (tensor.allocation_type != kTfLiteMmapRo) {
-      AddTensorWithID(tensor_id, builders_.size(), num_inputs);
-      builders_.back()->AddOutput(tensor.dims);
-      ++num_inputs;
+    if (tensor.allocation_type == kTfLiteMmapRo) continue;
+    input_op->AddOutput(tensor.dims);
+    AddTensorWithID(tensor_id, input_op->GetID(), num_inputs);
+    // If tensor is of type int8, add an op to cast it to uint8.
+    if (tensor.type == kTfLiteInt8) {
+      TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8,
+                                      tensor_id,
+                                      GetHexagonTensorId(tensor_id)));
     }
+    ++num_inputs;
   }
+
+  return kTfLiteOk;
 }
 
-void delegates::hexagon::GraphBuilder::AddOutputTensors(
+TfLiteStatus GraphBuilder::AddOutputTensors(
     const TfLiteIntArray* output_tensors, TfLiteContext* context) {
-  builders_.emplace_back(new OpBuilder(this, OP_OUTPUT));
-  builders_.back()->SetNodeId(builders_.size());
+  std::vector<OpBuilder::TensorID> hexagon_output_ids;
+  hexagon_output_ids.reserve(output_tensors->size);
+
   for (int i = 0; i < output_tensors->size; ++i) {
     const int tensor_id = output_tensors->data[i];
-    builders_.back()->AddInput(GetHexagonTensorId(tensor_id));
+    const auto& tensor = context->tensors[tensor_id];
+    // If tensor is of type int8, add an op to cast it to uint8.
+    if (tensor.type == kTfLiteInt8) {
+      TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8,
+                                      tensor_id,
+                                      GetHexagonTensorId(tensor_id)));
+    }
+    hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id));
   }
+
+  // Add Hexagon OUTPUT op.
+  auto* output_op = AddNode();
+  output_op->SetOpType(OP_OUTPUT);
+  for (auto hexagon_output : hexagon_output_ids) {
+    output_op->AddInput(hexagon_output);
+  }
+
+  return kTfLiteOk;
 }
 
 OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims) {
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h b/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h
index 13da94c9298..267fc818ca1 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h
@@ -112,20 +112,8 @@ class OpBuilder {
     return kTfLiteOk;
   }
 
- protected:
-  // Helper method to fetch dimensions.
-  // TODO(karimnosseir): Move to a shared place.
-  void GetDims(int* batch_size, int* height_size, int* width_size,
-               int* depth_size, const TfLiteIntArray* dims) {
-    int* dim[] = {batch_size, height_size, width_size, depth_size};
-    for (int i = 0; i < 4; ++i) *(dim[i]) = 1;
-    for (int i = 4 - dims->size; i < 4; ++i) {
-      *dim[i] = dims->data[i - (4 - dims->size)];
-    }
-  }
-
-  TfLiteStatus ComputeMinAndMaxQuantValues(const TfLiteTensor& tensor,
-                                           float* min, float* max) {
+  static TfLiteStatus ComputeMinAndMaxQuantValues(const TfLiteTensor& tensor,
+                                                  float* min, float* max) {
     if (tensor.type == kTfLiteUInt8) {
       return ComputeMinAndMaxQuantValues(tensor, min, max,
                                          std::numeric_limits<uint8_t>::min(),
@@ -142,10 +130,22 @@ class OpBuilder {
     return kTfLiteError;
   }
 
+ protected:
+  // Helper method to fetch dimensions.
+  // TODO(karimnosseir): Move to a shared place.
+  void GetDims(int* batch_size, int* height_size, int* width_size,
+               int* depth_size, const TfLiteIntArray* dims) {
+    int* dim[] = {batch_size, height_size, width_size, depth_size};
+    for (int i = 0; i < 4; ++i) *(dim[i]) = 1;
+    for (int i = 4 - dims->size; i < 4; ++i) {
+      *dim[i] = dims->data[i - (4 - dims->size)];
+    }
+  }
+
   template <typename T>
-  TfLiteStatus ComputeMinAndMaxQuantValues(const TfLiteTensor& tensor,
-                                           float* min, float* max, T min_value,
-                                           T max_value) {
+  static TfLiteStatus ComputeMinAndMaxQuantValues(const TfLiteTensor& tensor,
+                                                  float* min, float* max,
+                                                  T min_value, T max_value) {
     *min = 0;
     *max = 0;
     const TfLiteQuantization& quant = tensor.quantization;
@@ -189,7 +189,7 @@ class GraphBuilder {
   // Add node to the graph. The caller responsible for setting correct
   // data in the Op.
   // 'tflite_node_index' is the node index in TFLite that creates this op.
-  OpBuilder* AddNode(int tflite_node_index);
+  OpBuilder* AddNode(int tflite_node_index = -1);
 
   // Add const node that provides the data held by 'tensor'.
   OpBuilder* AddConstNodeWithData(int tensor_id, const TfLiteTensor& tensor);
@@ -200,12 +200,12 @@ class GraphBuilder {
   OpBuilder* CreateOpBuilderFromTfLiteOp(int op_type);
 
   // Construct Input node with 'input_tensors' as output.
-  void AddInputTensors(const TfLiteIntArray* input_tensors,
-                       TfLiteContext* context);
+  TfLiteStatus AddInputTensors(const TfLiteIntArray* input_tensors,
+                               TfLiteContext* context);
 
   // Construct Output node with 'output_tensors' as input.
-  void AddOutputTensors(const TfLiteIntArray* output_tensors,
-                        TfLiteContext* context);
+  TfLiteStatus AddOutputTensors(const TfLiteIntArray* output_tensors,
+                                TfLiteContext* context);
 
   // Adds BatchSeqConfig node to the graph. This is configuration
   // for a dynamic batch size for the graph.
@@ -264,8 +264,8 @@ class GraphBuilder {
 
   // Add new tensor mapping to the tensor list.
   bool AddTensorWithID(int tflite_tensor_id, int hexagon_node_id,
-                       int hexagon_node_output_id) {
-    if (HasTensor(tflite_tensor_id)) {
+                       int hexagon_node_output_id, bool overwrite = false) {
+    if (!overwrite && HasTensor(tflite_tensor_id)) {
       return false;
     }
     if (tensors_.size() <= tflite_tensor_id) {
@@ -309,6 +309,10 @@ class GraphBuilder {
     }
   }
 
+  // Adds a Cast op to convert a tensor from int8 to uint8 (or vice versa).
+  TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id,
+                         OpBuilder::TensorID hexagon_input);
+
   const HexagonNN* hexagon_nn_ = nullptr;
   TfLiteContext* context_ = nullptr;
   int graph_id_ = -1;
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h b/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h
index e2a4ef9a0a3..515d0edb929 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/op_factory.h
@@ -35,6 +35,7 @@ OpBuilder* CreatePool2DBuilder(GraphBuilder* graph_builder, int op_type);
 OpBuilder* CreateReshapeBuilder(GraphBuilder* graph_builder, int op_type);
 OpBuilder* CreateSoftmaxBuilder(GraphBuilder* graph_builder, int op_type);
 OpBuilder* CreateReduceBuilder(GraphBuilder* graph_builder, int op_type);
+OpBuilder* CreateMirrorPadBuilder(GraphBuilder* graph_builder, int op_type);
 OpBuilder* CreatePadBuilder(GraphBuilder* graph_builder, int op_type);
 OpBuilder* CreateResizeNearestNeighborBuilder(GraphBuilder* graph_builder,
                                               int op_type);
@@ -51,6 +52,9 @@ OpBuilder* CreateBatchSeqBuilder(GraphBuilder* graph_builder, int op_type,
                                  TfLiteIntArray* input_batch_dimensions,
                                  TfLiteIntArray* output_batch_dimensions);
 OpBuilder* CreateQuantizeBuilder(GraphBuilder* graph_builder, int op_type);
+OpBuilder* CreateHardSwishBuilder(GraphBuilder* graph_builder, int op_type);
+OpBuilder* CreateCastBuilder(GraphBuilder* graph_builder, int op_type);
+OpBuilder* CreateMinMaxBuilder(GraphBuilder* graph_builder, int op_type);
 
 }  // namespace hexagon
 }  // namespace delegates
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc
index 8401f76cf4d..066c82560a8 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/reduce_builder.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/util.h"
 
 namespace tflite {
 namespace delegates {
@@ -35,9 +36,7 @@ TfLiteStatus ReduceOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
   tensor_id = inputs->data[0];
   const auto& input_tensor = context->tensors[tensor_id];
   AddInput(graph_builder_->GetHexagonTensorId(tensor_id));
-  ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_,
-                              std::numeric_limits<uint8_t>::min(),
-                              std::numeric_limits<uint8_t>::max());
+  ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_);
   auto* input_min_const = graph_builder_->AddConstNodeWithData(
       quant_bound_shape, reinterpret_cast<char*>(&input_min_),
       sizeof(input_min_));
@@ -63,37 +62,48 @@ TfLiteStatus ReduceOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
     return kTfLiteError;
   }
 
+  auto& output_tensor = context->tensors[outputs->data[0]];
   int output_batch_size, output_height_size, output_width_size,
       output_depth_size;
   GetDims(&output_batch_size, &output_height_size, &output_width_size,
-          &output_depth_size, context->tensors[outputs->data[0]].dims);
+          &output_depth_size, output_tensor.dims);
 
-  // Hexagon's sum-reduction outputs int32, so we shrink it down to UInt8.
-  if (op_node_.op_type == OP_QuantizedSum_8to32) {
-    const auto& reduce_out = AddOutput(sizeof(int32_t), 4,
-                                       {output_batch_size, output_height_size,
-                                        output_width_size, output_depth_size});
-    const auto& reduce_out_min = AddOutput(sizeof(float), 4, {1, 1, 1, 1});
-    const auto& reduce_out_max = AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  float output_min = -1, output_max = -1;
+  ComputeMinAndMaxQuantValues(output_tensor, &output_min, &output_max);
+  auto* output_min_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&output_min),
+      sizeof(output_min));
+  auto* output_max_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&output_max),
+      sizeof(output_max));
+  // Min/max values for output tensor.
+  AddInput(TensorID(output_min_const->GetID(), 0));
+  AddInput(TensorID(output_max_const->GetID(), 0));
 
-    auto* quantize_output_op = graph_builder_->AddNode(GetTFLiteNodeID());
-    quantize_output_op->SetOpType(OP_QuantizeDownAndShrinkRange_32to8);
-    quantize_output_op->AddInput(reduce_out);
-    quantize_output_op->AddInput(reduce_out_min);
-    quantize_output_op->AddInput(reduce_out_max);
-    node_output_ =
-        quantize_output_op->AddOutput(sizeof(uint8_t), 4,
-                                      {output_batch_size, output_height_size,
-                                       output_width_size, output_depth_size});
-    quantize_output_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1});
-    quantize_output_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1});
-  } else {
-    node_output_ = AddOutput(sizeof(uint8_t), 4,
-                             {output_batch_size, output_height_size,
-                              output_width_size, output_depth_size});
-    AddOutput(sizeof(float), 4, {1, 1, 1, 1});
-    AddOutput(sizeof(float), 4, {1, 1, 1, 1});
-  }
+  // Add outputs
+  size_t output_element_size = 0;
+  TF_LITE_ENSURE_STATUS(
+      GetSizeOfType(context, output_tensor.type, &output_element_size));
+  auto mean_output = AddOutput(output_element_size, 4,
+                               {output_batch_size, output_height_size,
+                                output_width_size, output_depth_size});
+  auto mean_out_min = AddOutput(output_element_size, 4, {1, 1, 1, 1});
+  auto mean_out_max = AddOutput(output_element_size, 4, {1, 1, 1, 1});
+  // Mean op doesn't honor the passed min/max for output, so we need
+  // to add requantize.
+  auto* requantize_op = graph_builder_->AddNode(GetTFLiteNodeID());
+  requantize_op->SetOpType(OP_Requantize_8to8);
+  requantize_op->AddInput(mean_output);
+  requantize_op->AddInput(mean_out_min);
+  requantize_op->AddInput(mean_out_max);
+  requantize_op->AddInput(TensorID(output_min_const->GetID(), 0));
+  requantize_op->AddInput(TensorID(output_max_const->GetID(), 0));
+  node_output_ =
+      requantize_op->AddOutput(sizeof(uint8_t), 4,
+                               {output_batch_size, output_height_size,
+                                output_width_size, output_depth_size});
+  requantize_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  requantize_op->AddOutput(sizeof(float), 4, {1, 1, 1, 1});
 
   return kTfLiteOk;
 }
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD
index b1df59c4098..a5cdc0411ca 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/BUILD
@@ -30,6 +30,8 @@ hexagon_op_tests(
         "conv_test.cc",
         "l2_norm_test.cc",
         "matmul_test.cc",
+        "min_max_builder_test.cc",
+        "mirror_pad_test.cc",
         "mul_test.cc",
         "neg_test.cc",
         "pad_test.cc",
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/activations_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/activations_test.cc
index 7356c4ba2f3..eed70619acf 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/activations_test.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/activations_test.cc
@@ -12,12 +12,39 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <cstdarg>
+#include <cstdint>
+#include <limits>
+#include <random>
+
 #include <gtest/gtest.h>
 #include "tensorflow/lite/experimental/delegates/hexagon/builders/tests/hexagon_delegate_op_model.h"
 
 namespace tflite {
 using testing::ElementsAreArray;
 
+void GenerateUniformRandomVector(int size, float min, float max,
+                                 std::minstd_rand* random_engine,
+                                 std::vector<float>* result) {
+  // Never use std::uniform_*_distribution in tests, it's
+  // implementation-defined. Likewise, don't use std::default_random_engine,
+  // implementation-defined. Implementation-defined is bad because it means that
+  // any toolchain update or new platform may run into test failures.
+  // std::minstd_rand is a standard instantiation of
+  // std::linear_congruential_engine, the cheapest generator in c++11 stdlib,
+  // it's good enough here.
+  result->resize(size);
+  for (int i = 0; i < size; i++) {
+    // We don't care whether the `max` value may ever be produced exactly.
+    // It may actually be thanks to rounding, as std::minstd_rand::modulus
+    // is 2^31 - 1 is greater than the inverse float epsilon.
+    float random_value_scaled_0_1 =
+        (*random_engine)() *
+        (1.0f / static_cast<float>(std::minstd_rand::modulus));
+    (*result)[i] = min + (max - min) * random_value_scaled_0_1;
+  }
+}
+
 class ActivationOpModel : public SingleOpModelWithHexagon {
  public:
   explicit ActivationOpModel(BuiltinOperator type, const TensorData& input,
@@ -152,4 +179,146 @@ TEST(ActivationOpModel, TanhOutput_Int8) {
   TanhTestImpl<int8_t, TensorType_INT8>();
 }
 
+void EvalTestReferenceHardSwish(int size, const std::vector<float>& input,
+                                std::vector<float>* result) {
+  result->resize(size);
+  for (int i = 0; i < size; i++) {
+    const float in = input[i];
+    (*result)[i] = in * std::min(6.0f, std::max(0.0f, in + 3)) * (1.0f / 6.0f);
+  }
+}
+
+template <TensorType Tensor_Type, typename input_type>
+void TestQuantizedHardSwish(int size, float input_min, float input_max,
+                            float output_min, float output_max,
+                            std::minstd_rand* random_engine) {
+  std::vector<float> float_input_values;
+  GenerateUniformRandomVector(size, input_min, input_max, random_engine,
+                              &float_input_values);
+  std::vector<float> float_ref_output_values;
+  EvalTestReferenceHardSwish(size, float_input_values,
+                             &float_ref_output_values);
+  for (float& val : float_ref_output_values) {
+    val = std::min(output_max, std::max(output_min, val));
+  }
+  ActivationOpModel m(
+      BuiltinOperator_HARD_SWISH,
+      /*input=*/{Tensor_Type, {1, 1, 1, size}, input_min, input_max},
+      /*output=*/{Tensor_Type, {1, 1, 1, size}, output_min, output_max});
+  m.SetInput<input_type>(float_input_values);
+
+  m.ApplyDelegateAndInvoke();
+  const std::vector<float> dequantized_output =
+      m.GetDequantizedOutput<input_type>();
+  // QUANTIZATION-RECOMMENDED TOLERANCE:
+  // The numerical error for any 8bit quantized function is at least one half
+  // times the quantization step: 0.5 * (kOutMax - kOutMin) / 256.
+  // To that we add again the quantization step (kOutMax - kOutMin) / 256
+  // to allow for an off-by-one rounding error.
+  // TOLERANCE FOR HEXAGON:
+  // Hexagon also introduces some error, so we choose the max between that value
+  // & 0.03
+  const float quant_recommended_tolerance =
+      std::max(input_max - input_min, output_max - output_min) * (1.5f / 256.f);
+  const float kTolerance = std::max(0.03f, quant_recommended_tolerance);
+  EXPECT_THAT(dequantized_output, ElementsAreArray(ArrayFloatNear(
+                                      float_ref_output_values, kTolerance)));
+}
+
+template <TensorType Tensor_Type, typename input_type>
+void HardSwishTestImpl() {
+  std::minstd_rand random_engine;
+  std::vector<std::pair<float, float>> minmax_pairs{{0.f, 1.f}, {-5.f, 10.f}};
+  for (const auto& input_minmax : minmax_pairs) {
+    for (const auto& output_minmax : minmax_pairs) {
+      float input_min = input_minmax.first;
+      float input_max = input_minmax.second;
+      float output_min = output_minmax.first;
+      float output_max = output_minmax.second;
+      for (int size : {1, 3, 40}) {
+        TestQuantizedHardSwish<Tensor_Type, input_type>(
+            size, input_min, input_max, output_min, output_max, &random_engine);
+      }
+    }
+  }
+}
+
+TEST(ActivationOpModel, HardSwishTestUInt8) {
+  HardSwishTestImpl<TensorType_UINT8, uint8_t>();
+}
+
+TEST(ActivationOpModel, HardSwishTestInt8) {
+  HardSwishTestImpl<TensorType_INT8, int8_t>();
+}
+
+template <TensorType Tensor_Type, typename input_type>
+void HardSwishBiasTestImpl() {
+  float input_min = -11.654928f;
+  float input_max = 25.036512f;
+  float output_min = -0.3905796f;
+  float output_max = 24.50887f;
+  float tolerated_bias = 0.035;
+
+  const float quantized_type_range =
+      static_cast<float>(std::numeric_limits<int8_t>::max()) -
+      static_cast<float>(std::numeric_limits<int8_t>::min());
+  const float input_scale = (input_max - input_min) / quantized_type_range;
+  const float output_scale = (output_max - output_min) / quantized_type_range;
+  const float max_scale = std::max(output_scale, input_scale);
+
+  // In this bias-focused test case, no need for randomly generated input
+  // values.
+  ASSERT_LE(input_min, -3.0f);
+  ASSERT_GE(input_max, 3.0f);
+  const int quantized_input_negative_three =
+      std::round(std::numeric_limits<input_type>::min() +
+                 (-3.0f - input_min) / input_scale);
+  const int quantized_input_positive_three =
+      std::round(std::numeric_limits<input_type>::min() +
+                 (3.0f - input_min) / input_scale);
+  std::vector<float> float_input_values;
+  for (int i = quantized_input_negative_three;
+       i <= quantized_input_positive_three; i++) {
+    float_input_values.push_back(
+        input_min + (i - std::numeric_limits<int8_t>::min()) * input_scale);
+  }
+  const int size = float_input_values.size();
+  std::vector<float> float_ref_output_values;
+  EvalTestReferenceHardSwish(size, float_input_values,
+                             &float_ref_output_values);
+  for (float& val : float_ref_output_values) {
+    val = std::min(output_max, std::max(output_min, val));
+  }
+
+  ActivationOpModel m(
+      BuiltinOperator_HARD_SWISH,
+      /*input=*/{Tensor_Type, {1, 1, 1, size}, input_min, input_max},
+      /*output=*/{Tensor_Type, {1, 1, 1, size}, output_min, output_max});
+  m.SetInput<input_type>(float_input_values);
+
+  m.ApplyDelegateAndInvoke();
+  const std::vector<float> dequantized_output =
+      m.GetDequantizedOutput<input_type>();
+
+  float sum_diff = 0;
+  for (int i = 0; i < size; i++) {
+    sum_diff += dequantized_output[i] - float_ref_output_values[i];
+  }
+  const float bias = sum_diff / (size * max_scale);
+  EXPECT_LE(std::abs(bias), tolerated_bias);
+}
+
+// See the comment in the reference implementation of quantized HardSwish:
+// A numerical issue significantly affecting ImageNet classification accuracy
+// with MobileNet v3 is only observable at the scale of HardSwish unit tests
+// if we monitor specifically bias. This testcase is extracted from one of the
+// HardSwish nodes in that MobileNet v3 that exhibited this issue.
+TEST(ActivationOpModel, HardSwishBiasTest) {
+  HardSwishBiasTestImpl<TensorType_UINT8, uint8_t>();
+}
+
+TEST(ActivationOpModel, HardSwishBiasTestInt8) {
+  HardSwishBiasTestImpl<TensorType_INT8, int8_t>();
+}
+
 }  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/concat_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/concat_test.cc
index 2478b41375b..335586d7b13 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/concat_test.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/concat_test.cc
@@ -71,6 +71,11 @@ class QuantizedConcatenationOpModel : public SingleOpModelWithHexagon {
                          GetZeroPoint(output_));
   }
 
+  template <typename T>
+  std::vector<T> GetOutput() {
+    return ExtractVector<T>(output_);
+  }
+
  private:
   int output_;
 };
@@ -105,6 +110,39 @@ TEST(QuantizedConcatenationOpModel, FourInputsQuantizedSameRange_Int8) {
   FourInputsQuantizedSameRangeImpl<int8_t, TensorType_INT8>();
 }
 
+template <typename integer_type, TensorType tensor_dtype>
+void TwoInputsNegativeAxisImpl() {
+  auto tensor0 = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  auto tensor1 = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f};
+  QuantizedConcatenationOpModel m0({{tensor_dtype,
+                                     {2, 3},
+                                     std::numeric_limits<integer_type>::min(),
+                                     std::numeric_limits<integer_type>::max()},
+                                    {tensor_dtype,
+                                     {2, 3},
+                                     std::numeric_limits<integer_type>::min(),
+                                     std::numeric_limits<integer_type>::max()}},
+                                   /*axis=*/-2,
+                                   {tensor_dtype,
+                                    {},
+                                    std::numeric_limits<integer_type>::min(),
+                                    std::numeric_limits<integer_type>::max()});
+
+  m0.SetInput<integer_type>(0, tensor0);
+  m0.SetInput<integer_type>(1, tensor1);
+  m0.ApplyDelegateAndInvoke();
+  EXPECT_THAT(m0.GetOutput<integer_type>(),
+              ElementsAreArray({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+}
+
+TEST(QuantizedConcatenationOpModel, TwoInputsNegativeAxis_UInt8) {
+  TwoInputsNegativeAxisImpl<uint8_t, TensorType_UINT8>();
+}
+
+TEST(QuantizedConcatenationOpModel, TwoInputsNegativeAxis_Int8) {
+  TwoInputsNegativeAxisImpl<int8_t, TensorType_INT8>();
+}
+
 // NOTE: Int8 Concat does not have mixed-range support.
 
 TEST(QuantizedConcatenationOpModel, FourInputsQuantizedMixedRange) {
@@ -129,6 +167,28 @@ TEST(QuantizedConcatenationOpModel, FourInputsQuantizedMixedRange) {
                   /*max_abs_error=*/0.2)));
 }
 
+TEST(QuantizedConcatenationOpModel, FourInputsAxis2_UInt8) {
+  QuantizedConcatenationOpModel m0({{TensorType_UINT8, {2, 1, 2}, -10.7, 10.8},
+                                    {TensorType_UINT8, {2, 1, 2}, 0, 12.8},
+                                    {TensorType_UINT8, {2, 1, 2}, -11, 11.8},
+                                    {TensorType_UINT8, {2, 1, 2}, 0, 7.4}},
+                                   /*axis=*/2,
+                                   {TensorType_UINT8, {2, 1, 2}, -1., 1.});
+
+  m0.SetInput<uint8_t>(0, {1.0f, -3.0f, -4.0f, -7.0f});
+  m0.SetInput<uint8_t>(1, {1.1f, 3.1f, 4.1f, 7.1f});
+  m0.SetInput<uint8_t>(2, {1.2f, -3.2f, -4.2f, 7.2f});
+  m0.SetInput<uint8_t>(3, {1.3f, 3.3f, 4.3f, 7.3f});
+  m0.ApplyDelegateAndInvoke();
+  EXPECT_THAT(m0.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f,   //
+                      -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f,  //
+                  },
+                  /*max_abs_error=*/0.2)));
+}
+
 // If the input min/max (across all tensors) is same as the output min/max,
 // Hexagon's Requantize causes errors in InceptionV3.
 // So, we diable it for that case in the builder.
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/min_max_builder_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/min_max_builder_test.cc
new file mode 100644
index 00000000000..315ea909c53
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/min_max_builder_test.cc
@@ -0,0 +1,171 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/tests/hexagon_delegate_op_model.h"
+
+namespace tflite {
+using testing::ElementsAreArray;
+
+template <typename data_type>
+class MinMaxOpModel : public SingleOpModelWithHexagon {
+ public:
+  MinMaxOpModel(tflite::BuiltinOperator op, const TensorData& input1,
+                const TensorData& input2, const TensorData& output) {
+    input1_ = AddInput(input1);
+    input2_ = AddInput(input2);
+    output_ = AddOutput(output);
+    SetBuiltinOp(op, BuiltinOptions_MaximumMinimumOptions,
+                 CreateMaximumMinimumOptions(builder_).Union());
+    BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+  }
+
+  MinMaxOpModel(tflite::BuiltinOperator op, const TensorData& input1,
+                std::initializer_list<data_type> input1_values,
+                const TensorData& input2,
+                std::initializer_list<data_type> input2_values,
+                const TensorData& output, bool input1_const) {
+    input1_ = AddInput(input1);
+    input2_ = AddInput(input2);
+    output_ = AddOutput(output);
+    SetBuiltinOp(op, BuiltinOptions_MaximumMinimumOptions,
+                 CreateMaximumMinimumOptions(builder_).Union());
+    BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+
+    // A workaround to mark the tensors as constant.
+    if (input1_const) {
+      auto* input1_tensor = interpreter_->tensor(input1_);
+      input1_tensor->allocation_type = kTfLiteMmapRo;
+    } else {
+      auto* input2_tensor = interpreter_->tensor(input2_);
+      input2_tensor->allocation_type = kTfLiteMmapRo;
+    }
+  }
+
+  void SetInput1(std::vector<data_type> data) { PopulateTensor(input1_, data); }
+
+  void SetInput2(std::vector<data_type> data) { PopulateTensor(input2_, data); }
+
+  std::vector<data_type> GetOutput() {
+    return ExtractVector<data_type>(output_);
+  }
+
+  template <typename T>
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
+                         GetZeroPoint(output_));
+  }
+
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ protected:
+  int input1_;
+  int input2_;
+  int output_;
+};
+
+template <typename data_type>
+void TestModel(tflite::BuiltinOperator op, const TensorData& input1,
+               const TensorData& input2, const TensorData& output,
+               std::initializer_list<data_type> input1_values,
+               std::initializer_list<data_type> input2_values) {
+  std::unique_ptr<MinMaxOpModel<data_type>> m;
+  m = std::make_unique<MinMaxOpModel<data_type>>(op, input1, input2, output);
+  m->SetInput1(input1_values);
+  m->SetInput2(input2_values);
+
+  m->Invoke();
+  const auto reference_output = m->GetOutput();
+  const auto reference_output_shape = m->GetOutputShape();
+  m->ApplyDelegateAndInvoke();
+  EXPECT_THAT(m->GetOutputShape(), ElementsAreArray(reference_output_shape));
+  EXPECT_THAT(m->GetOutput(), ElementsAreArray(reference_output));
+}
+
+template <typename data_type>
+void TestModelConstInput(tflite::BuiltinOperator op, const TensorData& input1,
+                         const TensorData& input2, const TensorData& output,
+                         std::initializer_list<data_type> input1_values,
+                         std::initializer_list<data_type> input2_values,
+                         bool input1_const) {
+  std::unique_ptr<MinMaxOpModel<data_type>> m;
+  m = std::make_unique<MinMaxOpModel<data_type>>(
+      op, input1, input1_values, input2, input2_values, output, input1_const);
+  m->SetInput1(input1_values);
+  m->SetInput2(input2_values);
+
+  m->Invoke();
+  const auto reference_output = m->GetOutput();
+  const auto reference_output_shape = m->GetOutputShape();
+  m->ApplyDelegateAndInvoke();
+  EXPECT_THAT(m->GetOutputShape(), ElementsAreArray(reference_output_shape));
+  EXPECT_THAT(m->GetOutput(), ElementsAreArray(reference_output));
+}
+
+TEST(MinMaxOpTest, Maximum_Uint8Test) {
+  std::initializer_list<uint8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<uint8_t> data2 = {0, 0, 1, 12, 255, 1};
+  TestModel<uint8_t>(BuiltinOperator_MAXIMUM,
+                     {TensorType_UINT8, {1, 3, 1, 2}, -1, 255},
+                     {TensorType_UINT8, {1, 3, 1, 2}, -1, 255},
+                     {TensorType_UINT8, {1, 3, 1, 2}, -1, 255}, data1, data2);
+}
+
+TEST(MinMaxOpTest, Maximum_Uint8Test_Const) {
+  std::initializer_list<uint8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<uint8_t> data2 = {0, 0, 1, 12, 255, 1};
+  TestModelConstInput<uint8_t>(
+      BuiltinOperator_MAXIMUM, {TensorType_UINT8, {1, 3, 1, 2}, -1, 255},
+      {TensorType_UINT8, {1, 3, 1, 2}, -1, 255},
+      {TensorType_UINT8, {1, 3, 1, 2}, -1, 255}, data1, data2, false);
+}
+
+TEST(MinMaxOpTest, Minimum_Uint8Test) {
+  std::initializer_list<uint8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<uint8_t> data2 = {0, 0, 1, 12, 255, 1};
+  TestModel<uint8_t>(BuiltinOperator_MINIMUM,
+                     {TensorType_UINT8, {1, 3, 1, 2}, -1, 255},
+                     {TensorType_UINT8, {1, 3, 1, 2}, -1, 255},
+                     {TensorType_UINT8, {1, 3, 1, 2}, -1, 255}, data1, data2);
+}
+
+TEST(MinMaxOpTest, Minimum_Uint8Test_Const) {
+  std::initializer_list<uint8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<uint8_t> data2 = {0, 0, 1, 12, 20, 1};
+  TestModelConstInput<uint8_t>(
+      BuiltinOperator_MINIMUM, {TensorType_UINT8, {1, 3, 1, 2}, -1, 25},
+      {TensorType_UINT8, {1, 3, 1, 2}, -1, 25},
+      {TensorType_UINT8, {1, 3, 1, 2}, -1, 25}, data1, data2, false);
+}
+
+TEST(MinMaxOpTest, Maximum_Int8Test) {
+  std::initializer_list<int8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<int8_t> data2 = {0, 0, 1, 12, 123, 1};
+  TestModel<int8_t>(BuiltinOperator_MAXIMUM,
+                    {TensorType_INT8, {1, 3, 1, 2}, -1, 125},
+                    {TensorType_INT8, {1, 3, 1, 2}, -1, 125},
+                    {TensorType_INT8, {1, 3, 1, 2}, -1, 125}, data1, data2);
+}
+
+TEST(MinMaxOpTest, Minimum_Int8Test) {
+  std::initializer_list<int8_t> data1 = {1, 0, 2, 11, 2, 23};
+  std::initializer_list<int8_t> data2 = {0, 0, 1, 12, 12, 1};
+  TestModel<int8_t>(BuiltinOperator_MINIMUM,
+                    {TensorType_INT8, {1, 3, 1, 2}, -1, 25},
+                    {TensorType_INT8, {1, 3, 1, 2}, -1, 25},
+                    {TensorType_INT8, {1, 3, 1, 2}, -1, 25}, data1, data2);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/mirror_pad_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/mirror_pad_test.cc
new file mode 100644
index 00000000000..4caf96ac8ce
--- /dev/null
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/mirror_pad_test.cc
@@ -0,0 +1,127 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/lite/experimental/delegates/hexagon/builders/tests/hexagon_delegate_op_model.h"
+
+namespace tflite {
+using testing::ElementsAreArray;
+
+template <typename T>
+class MirrorPadOpModel : public SingleOpModelWithHexagon {
+ public:
+  MirrorPadOpModel(const TensorData& input,
+                   std::initializer_list<int> paddings_shape,
+                   std::initializer_list<int> paddings,
+                   const TensorData& output, const tflite::MirrorPadMode mode) {
+    input_id_ = AddInput(input);
+    padding_matrix_id_ =
+        AddConstInput(TensorType_INT32, paddings, paddings_shape);
+    output_id_ = AddOutput(output);
+    SetBuiltinOp(BuiltinOperator_MIRROR_PAD, BuiltinOptions_MirrorPadOptions,
+                 CreateMirrorPadOptions(builder_, mode).Union());
+    BuildInterpreter({GetShape(input_id_), GetShape(padding_matrix_id_)});
+  }
+
+  int input_tensor_id() { return input_id_; }
+
+  std::vector<T> GetOutput() { return ExtractVector<T>(output_id_); }
+
+ protected:
+  int input_id_;
+  int padding_matrix_id_;
+  int output_id_;
+};
+
+TEST(MirrorPadTest, EmptyPad_UInt8) {
+  MirrorPadOpModel<uint8_t> model(
+      {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {0, 0, 0, 0},
+      {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_REFLECT);
+  model.PopulateTensor<uint8_t>(model.input_tensor_id(), {1, 2, 3, 4, 5, 6});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
+}
+
+TEST(MirrorPadTest, PadBothSides_Symmetric_Int8) {
+  MirrorPadOpModel<int8_t> model({TensorType_INT8, {2, 3}, -1.0, 1.0}, {2, 2},
+                                 {1, 1, 1, 1}, {TensorType_INT8, {}, -1.0, 1.0},
+                                 tflite::MirrorPadMode_SYMMETRIC);
+  model.PopulateTensor<int8_t>(model.input_tensor_id(), {1, 2, 3, 4, 5, 6});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({1, 1, 2, 3, 3, 1, 1, 2, 3, 3,
+                                4, 4, 5, 6, 6, 4, 4, 5, 6, 6}));
+}
+
+TEST(MirrorPadTest, PadBothSides_Reflect_UInt8) {
+  MirrorPadOpModel<uint8_t> model(
+      {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {1, 1, 1, 1},
+      {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_REFLECT);
+  model.PopulateTensor<uint8_t>(model.input_tensor_id(), {1, 2, 3, 4, 5, 6});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({5, 4, 5, 6, 5, 2, 1, 2, 3, 2,
+                                5, 4, 5, 6, 5, 2, 1, 2, 3, 2}));
+}
+
+TEST(MirrorPadTest, PadOneSide_left_Reflect_Int8) {
+  MirrorPadOpModel<int8_t> model({TensorType_INT8, {2, 3}, -1.0, 1.0}, {2, 2},
+                                 {1, 0, 1, 0}, {TensorType_INT8, {}, -1.0, 1.0},
+                                 tflite::MirrorPadMode_REFLECT);
+  model.PopulateTensor<int8_t>(model.input_tensor_id(), {1, 2, 3, 4, 5, 6});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({5, 4, 5, 6, 2, 1, 2, 3, 5, 4, 5, 6}));
+}
+
+TEST(MirrorPadTest, PadOneSide_right_Symmetric_UInt8) {
+  MirrorPadOpModel<uint8_t> model(
+      {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {0, 1, 0, 1},
+      {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_SYMMETRIC);
+  model.PopulateTensor<uint8_t>(model.input_tensor_id(), {1, 2, 3, 4, 5, 6});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({1, 2, 3, 3, 4, 5, 6, 6, 4, 5, 6, 6}));
+}
+
+TEST(MirrorPadTest, Pad_1D_Reflect_Int8) {
+  MirrorPadOpModel<int8_t> model({TensorType_INT8, {3}, -1.0, 1.0}, {1, 2},
+                                 {0, 2}, {TensorType_INT8, {}, -1.0, 1.0},
+                                 tflite::MirrorPadMode_REFLECT);
+  model.PopulateTensor<int8_t>(model.input_tensor_id(), {1, 2, 3});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 2, 1}));
+}
+
+TEST(MirrorPadTest, Pad_1D_Symmetric_UInt8) {
+  MirrorPadOpModel<uint8_t> model({TensorType_UINT8, {3}, -1.0, 1.0}, {1, 2},
+                                  {0, 2}, {TensorType_UINT8, {}, -1.0, 1.0},
+                                  tflite::MirrorPadMode_SYMMETRIC);
+  model.PopulateTensor<uint8_t>(model.input_tensor_id(), {1, 2, 3});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 2, 3, 3, 2}));
+}
+
+TEST(MirrorPadTest, PadBothSides_Reflect_Whole_UInt8) {
+  MirrorPadOpModel<uint8_t> model(
+      {TensorType_UINT8, {2, 3}, -1.0, 1.0}, {2, 2}, {1, 1, 2, 2},
+      {TensorType_UINT8, {}, -1.0, 1.0}, tflite::MirrorPadMode_REFLECT);
+  model.PopulateTensor<uint8_t>(model.input_tensor_id(), {1, 2, 3, 4, 5, 6});
+  model.ApplyDelegateAndInvoke();
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({6, 5, 4, 5, 6, 5, 4, 3, 2, 1, 2, 3, 2, 1,
+                                6, 5, 4, 5, 6, 5, 4, 3, 2, 1, 2, 3, 2, 1}));
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc
index 7e4f95ffa96..a3cd8c8255b 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/builders/tests/reduce_test.cc
@@ -18,8 +18,8 @@ limitations under the License.
 namespace tflite {
 using testing::ElementsAreArray;
 
-// TODO(b/148390890): All tests are disabled, enable after fix is availabel
-// and op is enabled.
+// TODO(b/148390890): Reduce Sum tests are disabled, enable after fix is
+// available and op is enabled.
 class ReduceOpModel : public SingleOpModelWithHexagon {
  public:
   ReduceOpModel(BuiltinOperator type, const TensorData& input,
@@ -49,32 +49,52 @@ class ReduceOpModel : public SingleOpModelWithHexagon {
   int output_;
 };
 
-TEST(ReduceOpModel, DISABLED_MeanNotKeepDims) {
+template <TensorType Tensor_Type, typename input_type>
+void TestMeanImpl() {
   float kQuantizedTolerance = 2.0 / 255;
   std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  ReduceOpModel m(BuiltinOperator_MEAN,
-                  {TensorType_UINT8, {1, 1, 3, 2}, -1.0, 1.0},
-                  {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {2}, false);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
+  ReduceOpModel m(BuiltinOperator_MEAN, {Tensor_Type, {1, 1, 3, 2}, -1.0, 1.0},
+                  {Tensor_Type, {2}, -1.0, 1.0}, {1}, {2}, false);
+  m.QuantizeAndPopulate<input_type>(m.Input(), data);
+  m.Invoke();
+  auto reference_output = m.GetDequantizedOutput<input_type>();
   m.ApplyDelegateAndInvoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 2}));
   EXPECT_THAT(
-      m.GetDequantizedOutput<uint8_t>(),
-      ElementsAreArray(ArrayFloatNear({0.4, 0.4}, kQuantizedTolerance)));
+      m.GetDequantizedOutput<input_type>(),
+      ElementsAreArray(ArrayFloatNear(reference_output, kQuantizedTolerance)));
 }
 
-TEST(ReduceOpModel, DISABLED_MeanKeepDims) {
+TEST(ReduceOpModel, MeanNotKeepDims_Uint8) {
+  TestMeanImpl<TensorType_UINT8, uint8_t>();
+}
+
+TEST(ReduceOpModel, MeanNotKeepDims_Int8) {
+  TestMeanImpl<TensorType_INT8, int8_t>();
+}
+
+template <TensorType Tensor_Type, typename input_type>
+void TestMeanKeppDimsImpl() {
   float kQuantizedTolerance = 2.0 / 255;
   std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  ReduceOpModel m(BuiltinOperator_MEAN,
-                  {TensorType_UINT8, {1, 1, 3, 2}, -1.0, 1.0},
-                  {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {3}, true);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
+  ReduceOpModel m(BuiltinOperator_MEAN, {Tensor_Type, {1, 1, 3, 2}, -1.0, 1.0},
+                  {Tensor_Type, {3}, -1.0, 1.0}, {1}, {3}, true);
+  m.QuantizeAndPopulate<input_type>(m.Input(), data);
+  m.Invoke();
+  auto reference_output = m.GetDequantizedOutput<input_type>();
   m.ApplyDelegateAndInvoke();
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 3, 1}));
   EXPECT_THAT(
-      m.GetDequantizedOutput<uint8_t>(),
-      ElementsAreArray(ArrayFloatNear({0.3, 0.35, 0.55}, kQuantizedTolerance)));
+      m.GetDequantizedOutput<input_type>(),
+      ElementsAreArray(ArrayFloatNear(reference_output, kQuantizedTolerance)));
+}
+
+TEST(ReduceOpModel, MeanKeepDims_Int8) {
+  TestMeanKeppDimsImpl<TensorType_INT8, int8_t>();
+}
+
+TEST(ReduceOpModel, MeanKeepDims_Uint8) {
+  TestMeanKeppDimsImpl<TensorType_UINT8, uint8_t>();
 }
 
 TEST(ReduceOpModel, DISABLED_SumNotKeepDims) {
diff --git a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.cc b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.cc
index e8ba5fa1583..c6acc5ac947 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.cc
@@ -221,6 +221,11 @@ TfLiteDelegate* TfLiteHexagonDelegateCreate(
   return tflite::CreateDelegate(options);
 }
 
+TfLiteHexagonDelegateOptions TfLiteHexagonDelegateOptionsDefault() {
+  TfLiteHexagonDelegateOptions result{0};
+  return result;
+}
+
 void TfLiteHexagonDelegateDelete(TfLiteDelegate* delegate) { delete delegate; }
 
 void TfLiteHexagonInit() { tflite::HexagonDelegateKernel::InitState(); }
diff --git a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.h b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.h
index 77d4c873267..8838181aae7 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.h
+++ b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate.h
@@ -34,6 +34,8 @@ limitations under the License.
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
+
+// Use TfLiteHexagonDelegateOptionsDefault() for Default options.
 struct TFL_CAPI_EXPORT TfLiteHexagonDelegateOptions {
   // This corresponds to the debug level in the hexagon SDK. 0 (default)
   // means no debug.
@@ -107,6 +109,10 @@ struct TFL_CAPI_EXPORT TfLiteHexagonDelegateOptions {
 TfLiteDelegate* TFL_CAPI_EXPORT
 TfLiteHexagonDelegateCreate(const TfLiteHexagonDelegateOptions* options);
 
+// Returns TfLiteHexagonDelegateOptions populated with default values.
+TFL_CAPI_EXPORT TfLiteHexagonDelegateOptions
+TfLiteHexagonDelegateOptionsDefault();
+
 // Do any needed cleanup and delete 'delegate'.
 void TFL_CAPI_EXPORT TfLiteHexagonDelegateDelete(TfLiteDelegate* delegate);
 
diff --git a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.cc b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.cc
index 968575aacea..5786562fc6a 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include "tensorflow/lite/delegates/utils.h"
 #include "tensorflow/lite/experimental/delegates/hexagon/hexagon_implementation.h"
 #include "tensorflow/lite/experimental/delegates/hexagon/utils.h"
-#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 
 namespace tflite {
@@ -144,24 +143,6 @@ TfLiteStatus HexagonDelegateKernel::Invoke(TfLiteContext* context,
     // Const tensors should have been handled at delegation time..
     if (tensor->allocation_type != kTfLiteMmapRo) {
       char* data_ptr = tensor->data.raw;
-      if (tensor->type == kTfLiteInt8) {
-        // If input is int8, we first re-quantize it to uint8 for Hexagon.
-        if (int8_to_uint8_tensors_.size() <= input_idx ||
-            !int8_to_uint8_tensors_[input_idx]) {
-          TF_LITE_KERNEL_LOG(context,
-                             "Found int8 input %d with no uint8 version",
-                             tensor_index);
-          return kTfLiteError;
-        }
-        TfLiteTensor* uint8_tensor = int8_to_uint8_tensors_[input_idx];
-        optimized_ops::Requantize(
-            tensor->data.int8, NumElements(tensor),
-            kSameScaleEffectiveMultiplier, kSameScaleEffectiveShift,
-            tensor->params.zero_point,
-            tensor->params.zero_point + kInt8Uint8ZeroPointDiff,
-            uint8_tensor->data.uint8);
-        data_ptr = uint8_tensor->data.raw;
-      }
 
       if (tensor->dims->size > 4) {
         ReportError(context, HexagonKernelState::INPUT_RANK_NOT_SUPPORTED,
@@ -213,19 +194,6 @@ TfLiteStatus HexagonDelegateKernel::Invoke(TfLiteContext* context,
     return kTfLiteError;
   }
 
-  // Requantize uint8->int8 for eligible output tensors.
-  for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
-    TfLiteTensor* tensor = &context->tensors[tensor_index];
-    if (tensor->allocation_type != kTfLiteMmapRo &&
-        tensor->type == kTfLiteInt8) {
-      optimized_ops::Requantize(
-          tensor->data.uint8, NumElements(tensor),
-          kSameScaleEffectiveMultiplier, kSameScaleEffectiveShift,
-          tensor->params.zero_point + kInt8Uint8ZeroPointDiff,
-          tensor->params.zero_point, tensor->data.int8);
-    }
-  }
-
   if (params_.print_graph_profile) {
     PrintPerformanceData(reinterpret_cast<Profiler*>(context->profiler));
   }
@@ -303,35 +271,6 @@ TfLiteStatus HexagonDelegateKernel::Prepare(TfLiteContext* context,
     }
   }
 
-  // Assign temporary tensors for any input int8 tensors.
-  std::vector<int> temporary_tensors;
-  int8_to_uint8_tensors_.clear();
-  int8_to_uint8_tensors_.reserve(node->inputs->size);
-  for (auto tensor_index : TfLiteIntArrayView(node->inputs)) {
-    TfLiteTensor* tensor = &context->tensors[tensor_index];
-    // For every int8 tensor, we need to create a new temporary uint8 tensor.
-    if (tensor->allocation_type != kTfLiteMmapRo &&
-        tensor->type == kTfLiteInt8) {
-      TfLiteTensor* uint8_tensor;
-      int uint8_tensor_index;
-      TF_LITE_ENSURE_STATUS(delegates::CreateNewTensorWithDifferentType(
-          context, tensor_index, kTfLiteUInt8, &uint8_tensor,
-          &uint8_tensor_index));
-      int8_to_uint8_tensors_.push_back(uint8_tensor);
-      temporary_tensors.push_back(uint8_tensor_index);
-    } else {
-      int8_to_uint8_tensors_.push_back(nullptr);
-    }
-  }
-  if (!temporary_tensors.empty()) {
-    // This ensures the runtime allocates memory for every required temporary
-    // tensor.
-    node->temporaries = TfLiteIntArrayCreate(temporary_tensors.size());
-    for (int i = 0; i < temporary_tensors.size(); ++i) {
-      node->temporaries->data[i] = temporary_tensors[i];
-    }
-  }
-
   if (params_.print_graph_debug) {
     PrintDebuggingGraph();
   }
@@ -353,7 +292,7 @@ TfLiteStatus HexagonDelegateKernel::BuildGraph(
                                 params_.output_batch_dimensions);
   }
   // Add inputs to the graph.
-  builder_->AddInputTensors(input_tensors, context);
+  TF_LITE_ENSURE_STATUS(builder_->AddInputTensors(input_tensors, context));
 
   // Add all ops.
   TfLiteNode* node;
@@ -369,7 +308,7 @@ TfLiteStatus HexagonDelegateKernel::BuildGraph(
   }
 
   // Add Outputs.
-  builder_->AddOutputTensors(output_tensors, context);
+  TF_LITE_ENSURE_STATUS(builder_->AddOutputTensors(output_tensors, context));
 
   builder_->Build();
 
diff --git a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.h b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.h
index f4442db41da..4e49c0690ed 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.h
+++ b/tensorflow/lite/experimental/delegates/hexagon/hexagon_delegate_kernel.h
@@ -100,12 +100,6 @@ class HexagonDelegateKernel {
   std::vector<int> nodes_;
   ::TfLiteHexagonDelegateOptions params_;
 
-  // Used to support int8 TFLite *input* tensors.
-  // This vector, for every node-input, contains:
-  // 1. Pointer to Uint8 version if tensor is non-constant & type is Int8.
-  // 2. nullptr otherwise.
-  std::vector<TfLiteTensor*> int8_to_uint8_tensors_;
-
   // Whether the Hexagon graph is prepared or not.
   bool graph_prepared_ = false;
 };
diff --git a/tensorflow/lite/experimental/delegates/hexagon/utils.cc b/tensorflow/lite/experimental/delegates/hexagon/utils.cc
index 6ba1279e01d..8aff13549b8 100644
--- a/tensorflow/lite/experimental/delegates/hexagon/utils.cc
+++ b/tensorflow/lite/experimental/delegates/hexagon/utils.cc
@@ -79,7 +79,11 @@ bool CheckOpVersion(const TfLiteRegistration* registration) {
     case kTfLiteBuiltinConcatenation:
     case kTfLiteBuiltinL2Normalization:
     case kTfLiteBuiltinLogistic:
+    case kTfLiteBuiltinMaximum:
     case kTfLiteBuiltinMaxPool2d:
+    case kTfLiteBuiltinMean:
+    case kTfLiteBuiltinMinimum:
+    case kTfLiteBuiltinMirrorPad:
     case kTfLiteBuiltinMul:
     case kTfLiteBuiltinPad:
     case kTfLiteBuiltinQuantize:
@@ -153,11 +157,26 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration,
       return IsActivationReluOrNone(sub_params->activation);
     }
     case kTfLiteBuiltinSum:
-    case kTfLiteBuiltinMean: {
       // TODO(b/139277813): Enable these when they pass unit tests. These seem
       // to recompute the output min/max instead of taking them as inputs, which
       // causes an unexpected shift in dequantized values.
       return false;
+    case kTfLiteBuiltinMean: {
+      return InputsWithCorrectTypes(
+                 node, context,
+                 {{kTfLiteUInt8, kTfLiteInt8}, {kTfLiteInt32}}) &&
+             IsConstantTensor(GetInput(context, node, 1));
+    }
+    case kTfLiteBuiltinMirrorPad: {
+      if (!InputsWithCorrectTypes(
+              node, context, {{kTfLiteUInt8, kTfLiteInt8}, {kTfLiteInt32}}) ||
+          !IsConstantTensor(GetInput(context, node, 1)))
+        return false;
+      const TfLiteMirrorPaddingParams* params =
+          reinterpret_cast<const TfLiteMirrorPaddingParams*>(
+              node->builtin_data);
+      return params->mode == kTfLiteMirrorPaddingReflect ||
+             params->mode == kTfLiteMirrorPaddingSymmetric;
     }
     case kTfLiteBuiltinPad: {
       // TODO(b/139277813): Currently we only support padding with the default
@@ -197,11 +216,7 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration,
             !TensorTypeMatch(node->inputs->data[i], context, kTfLiteInt8))
           return false;
       }
-      // Hexagon only supports concatenation at axis 3.
-      const TfLiteConcatenationParams* concat_params =
-          reinterpret_cast<const TfLiteConcatenationParams*>(
-              node->builtin_data);
-      return (concat_params->axis == 3);
+      return true;
     }
     case kTfLiteBuiltinMaxPool2d: {
       if (!InputsWithCorrectTypes(node, context, {{kTfLiteUInt8, kTfLiteInt8}}))
@@ -289,6 +304,7 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration,
       return (
           InputsWithCorrectTypes(node, context, {{kTfLiteUInt8, kTfLiteInt8}}));
     }
+    case kTfLiteBuiltinHardSwish:
     case kTfLiteBuiltinRelu:
     case kTfLiteBuiltinRelu6:
     case kTfLiteBuiltinTanh:
@@ -352,6 +368,16 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration,
       return InputsWithCorrectTypes(node, context,
                                     {{kTfLiteUInt8, kTfLiteInt8}});
     }
+    case kTfLiteBuiltinMinimum: {
+      return InputsWithCorrectTypes(
+          node, context,
+          {{kTfLiteUInt8, kTfLiteInt8}, {kTfLiteUInt8, kTfLiteInt8}});
+    }
+    case kTfLiteBuiltinMaximum: {
+      return InputsWithCorrectTypes(
+          node, context,
+          {{kTfLiteUInt8, kTfLiteInt8}, {kTfLiteUInt8, kTfLiteInt8}});
+    }
     default:
       return false;
   }
diff --git a/tensorflow/lite/experimental/ios/BUILD.apple b/tensorflow/lite/experimental/ios/BUILD.apple
index faa3f12971c..5c954bc3de8 100644
--- a/tensorflow/lite/experimental/ios/BUILD.apple
+++ b/tensorflow/lite/experimental/ios/BUILD.apple
@@ -22,16 +22,8 @@ genrule(
     """,
 )
 
-TFL_LIBRARY_HDRS = [
-    "//tensorflow/lite/delegates/gpu:metal_delegate.h",
-    "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h",
-    "//tensorflow/lite/c:c_api.h",
-    "//tensorflow/lite/c:common.h",
-]
-
 TFL_FRAMEWORK_HDRS = [
     "//tensorflow/lite/delegates/gpu:metal_delegate.h",
-    ":coreml_delegate.h",
     "//tensorflow/lite/c:c_api.h",
     "//tensorflow/lite/c:common.h",
 ]
@@ -42,19 +34,6 @@ ios_static_framework(
     hdrs = TFL_FRAMEWORK_HDRS,
     bundle_name = "TensorFlowLiteC",
     minimum_os_version = TFL_MINIMUM_OS_VERSION,
-    deps = [
-        ":TensorFlowLiteC",
-    ],
-)
-
-objc_library(
-    name = "TensorFlowLiteC",
-    hdrs = TFL_LIBRARY_HDRS,
-    module_name = "TensorFlowLiteC",
-    weak_sdk_frameworks = [
-        "Metal",
-        "CoreML",
-    ],
     deps = [
         ":tensorflow_lite_c",
     ],
@@ -78,24 +57,44 @@ ios_static_framework(
     ],
 )
 
-# Using this intermediate target is a workaround for a bug in bazel build rules
-# involving mixed objc_library & cc_library deps mentioned in (b/74809458).
-# When these dependencies are declared directly under the "TensorFlowLiteC"
-# target above, the resulting static library incorrectly contains duplicate
-# symbols from some ObjC code in the transitive dependencies.
+# This target builds the Core ML delegate as a separate static framework, which
+# does not include the TensorFlow Lite runtime. As this target does not contain
+# TensorFlow Lite runtime, it is intended to be linked along with the
+# TensorFlowLiteC framework above in a composable way.
 #
-# When a new dependency should be added to the TensorFlowLiteC framework, the
-# dependency should be added under this target instead.
-# When a new header file needs to be exposed, the header should be added to the
-# TFL_LIBRARY_HDRS list above.
+# bazel build -c opt --config=ios_fat //tensorflow/lite/experimental/ios:TensorFlowLiteCCoreMl_framework
+ios_static_framework(
+    name = "TensorFlowLiteCCoreML_framework",
+    hdrs = [
+        ":coreml_delegate.h",
+    ],
+    avoid_deps = [
+        ":tensorflow_lite_c",
+    ],
+    bundle_name = "TensorFlowLiteCCoreML",
+    minimum_os_version = TFL_MINIMUM_OS_VERSION,
+    deps = [
+        "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate",
+    ],
+)
+
 cc_library(
     name = "tensorflow_lite_c",
-    hdrs = TFL_LIBRARY_HDRS,
-    tags = ["nobuilder"],
+    hdrs = [
+        "//tensorflow/lite/c:c_api.h",
+        "//tensorflow/lite/c:common.h",
+        "//tensorflow/lite/delegates/gpu:metal_delegate.h",
+    ],
+    linkopts = [
+        "-Wl,-weak_framework,Metal",
+    ],
+    tags = [
+        "nobuilder",
+        "swift_module=TensorFlowLiteC",
+    ],
     deps = [
         "//tensorflow/lite/c:c_api",
         "//tensorflow/lite/delegates/gpu:metal_delegate",
-        "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate",
     ],
 )
 
diff --git a/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec b/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec
index 344b4594774..f379799c8a9 100644
--- a/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec
+++ b/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec
@@ -1,10 +1,10 @@
 Pod::Spec.new do |s|
   s.name             = 'TensorFlowLiteC'
-  s.version          = '2.1.0'
+  s.version          = '2.2.0'
   s.authors          = 'Google Inc.'
   s.license          = { :type => 'Apache' }
   s.homepage         = 'https://github.com/tensorflow/tensorflow'
-  s.source           = { :http => "https://dl.google.com/dl/cpdc/a8eee3017d6b2c5d/TensorFlowLiteC-#{s.version}.tar.gz" }
+  s.source           = { :http => "https://dl.google.com/dl/cpdc/b3338da8d8cfd06b/TensorFlowLiteC-#{s.version}.tar.gz" }
   s.summary          = 'TensorFlow Lite'
   s.description      = <<-DESC
 
diff --git a/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec.template b/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec.template
index d69c479282b..d8a5ef8f2e1 100644
--- a/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec.template
+++ b/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec.template
@@ -19,6 +19,16 @@ Pod::Spec.new do |s|
 
   s.module_name = 'TensorFlowLiteC'
   s.library = 'c++'
-  s.vendored_frameworks = 'Frameworks/TensorFlowLiteC.framework'
-  s.weak_frameworks = 'CoreML'
+
+  s.default_subspec = 'Core'
+
+  s.subspec 'Core' do |core|
+    core.vendored_frameworks = 'Frameworks/TensorFlowLiteC.framework'
+  end
+
+  s.subspec 'CoreML' do |coreml|
+    coreml.weak_framework = 'CoreML'
+    coreml.dependency 'TensorFlowLiteC/Core'
+    coreml.vendored_frameworks = 'Frameworks/TensorFlowLiteCCoreML.framework'
+  end
 end
diff --git a/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec b/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec
new file mode 100644
index 00000000000..788630a6d4f
--- /dev/null
+++ b/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec
@@ -0,0 +1,22 @@
+Pod::Spec.new do |s|
+  s.name             = 'TensorFlowLiteSelectTfOps'
+  s.version          = '2.2.0'
+  s.authors          = 'Google Inc.'
+  s.license          = { :type => 'Apache' }
+  s.homepage         = 'https://github.com/tensorflow/tensorflow'
+  s.source           = { :http => "https://dl.google.com/dl/cpdc/9604b128278441ac/TensorFlowLiteSelectTfOps-2.2.0.tar.gz" }
+  s.summary          = 'TensorFlow Lite Select TF Ops'
+  s.description      = <<-DESC
+
+  This pod can be used in addition to `TensorFlowLiteSwift` or
+  `TensorFlowLiteObjC` pod, in order to enable Select TensorFlow ops. The
+  resulting binary should also be force-loaded to the final app binary.
+                       DESC
+
+  s.ios.deployment_target = '9.0'
+
+  s.module_name = 'TensorFlowLiteSelectTfOps'
+  s.library = 'c++'
+  s.vendored_frameworks = 'Frameworks/TensorFlowLiteSelectTfOps.framework'
+  s.weak_frameworks = 'CoreML'
+end
diff --git a/tensorflow/lite/experimental/microfrontend/BUILD b/tensorflow/lite/experimental/microfrontend/BUILD
index aaaf864bb60..bf0eb6ae726 100644
--- a/tensorflow/lite/experimental/microfrontend/BUILD
+++ b/tensorflow/lite/experimental/microfrontend/BUILD
@@ -27,6 +27,17 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "audio_microfrontend_op_lib",
+    srcs = ["ops/audio_microfrontend_op.cc"],
+    deps = [
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/lite/experimental/microfrontend/lib:frontend",
+    ],
+    alwayslink = 1,
+)
+
 cc_test(
     name = "audio_microfrontend_test",
     size = "small",
diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec
index 2447f432664..e039fb57114 100644
--- a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec
+++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC-nightly.podspec
@@ -24,8 +24,8 @@ Pod::Spec.new do |s|
   s.public_header_files = objc_dir + 'apis/*.h'
   s.source_files = [
     objc_dir + '{apis,sources}/*.{h,m,mm}',
-    tfl_dir + 'experimental/c/c_api.h',
-    tfl_dir + 'experimental/c/common.h',
+    tfl_dir + 'c/c_api.h',
+    tfl_dir + 'c/common.h',
   ]
   s.module_map = objc_dir + 'apis/framework.modulemap'
   s.dependency 'TensorFlowLiteC', "~> #{s.version}"
@@ -33,7 +33,7 @@ Pod::Spec.new do |s|
     'HEADER_SEARCH_PATHS' =>
       '"${PODS_TARGET_SRCROOT}" ' +
       '"${PODS_TARGET_SRCROOT}/' + objc_dir  + 'apis"',
-    'VALID_ARCHS' => 'x86_64 armv7 arm64',
+    'VALID_ARCHS' => 'i386 x86_64 armv7 arm64',
   }
 
   s.test_spec 'Tests' do |ts|
diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec
index e7a4933bdde..c673cfad759 100644
--- a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec
+++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec
@@ -1,6 +1,6 @@
 Pod::Spec.new do |s|
   s.name             = 'TensorFlowLiteObjC'
-  s.version          = '2.1.0'
+  s.version          = '2.2.0'
   s.authors          = 'Google Inc.'
   s.license          = { :type => 'Apache' }
   s.homepage         = 'https://github.com/tensorflow/tensorflow'
@@ -24,8 +24,8 @@ Pod::Spec.new do |s|
   s.public_header_files = objc_dir + 'apis/*.h'
   s.source_files = [
     objc_dir + '{apis,sources}/*.{h,m,mm}',
-    tfl_dir + 'experimental/c/c_api.h',
-    tfl_dir + 'experimental/c/c_api_types.h',
+    tfl_dir + 'c/c_api.h',
+    tfl_dir + 'c/common.h',
   ]
   s.module_map = objc_dir + 'apis/framework.modulemap'
   s.dependency 'TensorFlowLiteC', "#{s.version}"
@@ -33,7 +33,7 @@ Pod::Spec.new do |s|
     'HEADER_SEARCH_PATHS' =>
       '"${PODS_TARGET_SRCROOT}" ' +
       '"${PODS_TARGET_SRCROOT}/' + objc_dir  + 'apis"',
-    'VALID_ARCHS' => 'x86_64 armv7 arm64',
+    'VALID_ARCHS' => 'i386 x86_64 armv7 arm64',
   }
 
   s.test_spec 'Tests' do |ts|
diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template
new file mode 100644
index 00000000000..fc9e10e4a2c
--- /dev/null
+++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec.template
@@ -0,0 +1,46 @@
+Pod::Spec.new do |s|
+  s.name             = 'TensorFlowLiteObjC'
+  s.version          = '${TFL_BUILD_VERSION}'
+  s.authors          = 'Google Inc.'
+  s.license          = { :type => 'Apache' }
+  s.homepage         = 'https://github.com/tensorflow/tensorflow'
+  s.source           = { :git => 'https://github.com/tensorflow/tensorflow.git', :commit => '${SOURCE_COMMIT}' }
+  s.summary          = 'TensorFlow Lite for Objective-C'
+  s.description      = <<-DESC
+
+  TensorFlow Lite is TensorFlow's lightweight solution for Objective-C
+  developers. It enables low-latency inference of on-device machine learning
+  models with a small binary size and fast performance supporting hardware
+  acceleration.
+                       DESC
+
+  s.ios.deployment_target = '9.0'
+
+  s.module_name = 'TFLTensorFlowLite'
+  s.static_framework = true
+
+  tfl_dir = 'tensorflow/lite/'
+  objc_dir = tfl_dir + 'experimental/objc/'
+  s.public_header_files = objc_dir + 'apis/*.h'
+  s.source_files = [
+    objc_dir + '{apis,sources}/*.{h,m,mm}',
+    tfl_dir + 'c/c_api.h',
+    tfl_dir + 'c/common.h',
+  ]
+  s.module_map = objc_dir + 'apis/framework.modulemap'
+  s.dependency 'TensorFlowLiteC', '~> 0.0.1-nightly'
+  s.pod_target_xcconfig = {
+    'HEADER_SEARCH_PATHS' =>
+      '"${PODS_TARGET_SRCROOT}" ' +
+      '"${PODS_TARGET_SRCROOT}/' + objc_dir  + 'apis"',
+    'VALID_ARCHS' => 'i386 x86_64 armv7 arm64',
+  }
+
+  s.test_spec 'Tests' do |ts|
+    ts.source_files = objc_dir + 'tests/*.m'
+    ts.resources = [
+      tfl_dir + 'testdata/add.bin',
+      tfl_dir + 'testdata/add_quantized.bin',
+    ]
+  end
+end
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/BoundingBoxUtil.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/BoundingBoxUtil.java
new file mode 100644
index 00000000000..30f562063f3
--- /dev/null
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/BoundingBoxUtil.java
@@ -0,0 +1,202 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.support.image;
+
+import static org.tensorflow.lite.support.common.SupportPreconditions.checkArgument;
+
+import android.graphics.RectF;
+import java.nio.ByteBuffer;
+import java.nio.FloatBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.tensorflow.lite.DataType;
+import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;
+
+/**
+ * Helper class for converting values that represents bounding boxes into rectangles.
+ *
+ * <p>The class provides a static function to create bounding boxes as {@link RectF} from different
+ * types of configurations.
+ *
+ * <p>Generally, a bounding box could be represented by 4 float values, but the values could be
+ * interpreted in many ways. We now support 3 {@link Type} of configurations, and the order of
+ * elements in each type is configurable as well.
+ */
+public final class BoundingBoxUtil {
+
+  /** Denotes how a bounding box is represented. */
+  public enum Type {
+    /**
+     * Represents the bounding box by using the combination of boundaries, {left, top, right,
+     * bottom}. The default order is {left, top, right, bottom}. Other orders can be indicated by an
+     * index array.
+     */
+    BOUNDARIES,
+    /**
+     * Represents the bounding box by using the upper_left corner, width and height. The default
+     * order is {upper_left_x, upper_left_y, width, height}. Other orders can be indicated by an
+     * index array.
+     */
+    UPPER_LEFT,
+    /**
+     * Represents the bounding box by using the center of the box, width and height. The default
+     * order is {center_x, center_y, width, height}. Other orders can be indicated by an index
+     * array.
+     */
+    CENTER,
+  }
+
+  /** Denotes if the coordinates are actual pixels or relative ratios. */
+  public enum CoordinateType {
+    /** The coordinates are relative ratios in range [0, 1]. */
+    RATIO,
+    /** The coordinates are actual pixel values. */
+    PIXEL
+  }
+
+  /**
+   * Creates a list of bounding boxes from a {@link TensorBuffer} which represents bounding boxes.
+   *
+   * @param tensor holds the data representing some boxes.
+   * @param valueIndex denotes the order of the elements defined in each bounding box type. An empty
+   *     index array represent the default order of each bounding box type. For example, to denote
+   *     the default order of BOUNDARIES, {left, top, right, bottom}, the index should be {0, 1, 2,
+   *     3}. To denote the order {left, right, top, bottom}, the order should be {0, 2, 1, 3}.
+   *     <p>The index array can be applied to all bounding box types to adjust the order of their
+   *     corresponding underlying elements.
+   * @param boundingBoxAxis specifies the index of the dimension that represents bounding box. The
+   *     size of that dimension is required to be 4. Index here starts from 0. For example, if the
+   *     tensor has shape 4x10, the axis for bounding boxes is likely to be 0. For shape 10x4, the
+   *     axis is likely to be 1 (or -1, equivalently).
+   * @param type defines how values should be converted into boxes. See {@link Type}
+   * @param coordinateType defines how values are interpreted to coordinates. See {@link
+   *     CoordinateType}
+   * @param height the height of the image which the boxes belong to. Only has effects when {@code
+   *     coordinateType} is {@link CoordinateType#RATIO}
+   * @param width the width of the image which the boxes belong to. Only has effects when {@code
+   *     coordinateType} is {@link CoordinateType#RATIO}
+   * @return A list of bounding boxes that the {@code tensor} represents. All dimensions except
+   *     {@code boundingBoxAxis} will be collapsed with order kept. For example, given {@code
+   *     tensor} with shape {1, 4, 10, 2} and {@code boundingBoxAxis = 1}, The result will be a list
+   *     of 20 bounding boxes.
+   * @throws IllegalArgumentException if size of bounding box dimension (set by {@code
+   *     boundingBoxAxis}) is not 4.
+   * @throws IllegalArgumentException if {@code boundingBoxAxis} is not in {@code (-(D+1), D)} where
+   *     {@code D} is the number of dimensions of the {@code tensor}.
+   * @throws IllegalArgumentException if {@code tensor} has data type other than {@link
+   *     DataType#FLOAT32}.
+   */
+  public static List<RectF> convert(
+      TensorBuffer tensor,
+      int[] valueIndex,
+      int boundingBoxAxis,
+      Type type,
+      CoordinateType coordinateType,
+      int height,
+      int width) {
+    int[] shape = tensor.getShape();
+    checkArgument(
+        boundingBoxAxis >= -shape.length && boundingBoxAxis < shape.length,
+        String.format(
+            "Axis %d is not in range (-(D+1), D), where D is the number of dimensions of input"
+                + " tensor (shape=%s)",
+            boundingBoxAxis, Arrays.toString(shape)));
+    if (boundingBoxAxis < 0) {
+      boundingBoxAxis = shape.length + boundingBoxAxis;
+    }
+    checkArgument(
+        shape[boundingBoxAxis] == 4,
+        String.format(
+            "Size of bounding box dimension %d is not 4. Got %d in shape %s",
+            boundingBoxAxis, shape[boundingBoxAxis], Arrays.toString(shape)));
+    checkArgument(
+        valueIndex.length == 4,
+        String.format(
+            "Bounding box index array length %d is not 4. Got index array %s",
+            valueIndex.length, Arrays.toString(valueIndex)));
+    checkArgument(
+        tensor.getDataType() == DataType.FLOAT32,
+        "Bounding Boxes only create from FLOAT32 buffers. Got: " + tensor.getDataType().name());
+    List<RectF> boundingBoxList = new ArrayList<>();
+    // Collapse dimensions to {a, 4, b}. So each bounding box could be represent as (i, j), and its
+    // four values are (i, k, j), where 0 <= k < 4. We can compute the 4 flattened index by
+    // i * 4b + k * b + j.
+    int a = 1;
+    for (int i = 0; i < boundingBoxAxis; i++) {
+      a *= shape[i];
+    }
+    int b = 1;
+    for (int i = boundingBoxAxis + 1; i < shape.length; i++) {
+      b *= shape[i];
+    }
+    float[] values = new float[4];
+    ByteBuffer byteBuffer = tensor.getBuffer();
+    byteBuffer.rewind();
+    FloatBuffer floatBuffer = byteBuffer.asFloatBuffer();
+    for (int i = 0; i < a; i++) {
+      for (int j = 0; j < b; j++) {
+        for (int k = 0; k < 4; k++) {
+          values[k] = floatBuffer.get((i * 4 + k) * b + j);
+        }
+        boundingBoxList.add(
+            convertOneBoundingBox(values, valueIndex, type, coordinateType, height, width));
+      }
+    }
+    byteBuffer.rewind();
+    return boundingBoxList;
+  }
+
+  private static RectF convertOneBoundingBox(
+      float[] values,
+      int[] valueIndex,
+      Type type,
+      CoordinateType coordinateType,
+      int height,
+      int width) {
+    float[] orderedValues = new float[4];
+    for (int i = 0; i < 4; i++) {
+      orderedValues[i] = values[valueIndex[i]];
+    }
+    return convertOneBoundingBox(orderedValues, type, coordinateType, height, width);
+  }
+
+  private static RectF convertOneBoundingBox(
+      float[] values, Type type, CoordinateType coordinateType, int height, int width) {
+    switch (type) {
+      case BOUNDARIES:
+        return convertFromBoundaries(values, coordinateType, height, width);
+      case UPPER_LEFT:
+      case CENTER:
+        // TODO(b/150824448): convertFrom{UpperLeft, Center}
+        throw new IllegalArgumentException("BoundingBox.Type " + type + " is not yet supported.");
+    }
+    throw new IllegalArgumentException("Cannot recognize BoundingBox.Type " + type);
+  }
+
+  private static RectF convertFromBoundaries(
+      float[] values, CoordinateType coordinateType, int height, int width) {
+    if (coordinateType == CoordinateType.RATIO) {
+      return new RectF(
+          values[0] * width, values[1] * height, values[2] * width, values[3] * height);
+    } else {
+      return new RectF(values[0], values[1], values[2], values[3]);
+    }
+  }
+
+  // Private constructor to prevent initialization.
+  private BoundingBoxUtil() {}
+}
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java
index b19ef2e3b62..bced23e6f67 100644
--- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/image/TensorImage.java
@@ -231,6 +231,26 @@ public class TensorImage {
     return container.getDataType();
   }
 
+  /**
+   * Gets the image width.
+   *
+   * @throws IllegalStateException if the TensorImage never loads data.
+   * @throws IllegalArgumentException if the container data is corrupted.
+   */
+  public int getWidth() {
+    return container.getWidth();
+  }
+
+  /**
+   * Gets the image height.
+   *
+   * @throws IllegalStateException if the TensorImage never loads data.
+   * @throws IllegalArgumentException if the container data is corrupted.
+   */
+  public int getHeight() {
+    return container.getHeight();
+  }
+
   // Requires tensor shape [h, w, 3] or [1, h, w, 3].
   static void checkImageTensorShape(int[] shape) {
     SupportPreconditions.checkArgument(
@@ -273,6 +293,41 @@ public class TensorImage {
       isBufferUpdated = true;
     }
 
+    int getWidth() {
+      SupportPreconditions.checkState(
+          isBitmapUpdated || isBufferUpdated,
+          "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?");
+      if (isBitmapUpdated) {
+        return bitmapImage.getWidth();
+      }
+      return getBufferDimensionSize(-2);
+    }
+
+    int getHeight() {
+      SupportPreconditions.checkState(
+          isBitmapUpdated || isBufferUpdated,
+          "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?");
+      if (isBitmapUpdated) {
+        return bitmapImage.getHeight();
+      }
+      return getBufferDimensionSize(-3);
+    }
+
+    // Internal helper method to get the size of one dimension in the shape of the `bufferImage`.
+    // Requires `isBufferUpdated` is true.
+    // Throws `IllegalArgumentException` if data is corrupted.
+    private int getBufferDimensionSize(int dim) {
+      int[] shape = bufferImage.getShape();
+      // The defensive check is needed because bufferImage might be invalidly changed by user
+      // (a.k.a internal data is corrupted)
+      TensorImage.checkImageTensorShape(shape);
+      dim = dim % shape.length;
+      if (dim < 0) {
+        dim += shape.length;
+      }
+      return shape[dim];
+    }
+
     public DataType getDataType() {
       return dataType;
     }
@@ -284,7 +339,8 @@ public class TensorImage {
         return bitmapImage;
       }
       if (!isBufferUpdated) {
-        throw new IllegalStateException("Both buffer and bitmap data are obsolete.");
+        throw new IllegalStateException(
+            "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?");
       }
       if (bufferImage.getDataType() != DataType.UINT8) {
         throw new IllegalStateException(
@@ -310,7 +366,8 @@ public class TensorImage {
         return bufferImage;
       }
       SupportPreconditions.checkArgument(
-          isBitmapUpdated, "Both buffer and bitmap data are obsolete.");
+          isBitmapUpdated,
+          "Both buffer and bitmap data are obsolete. Forgot to call TensorImage#load?");
       int requiredFlatSize = bitmapImage.getWidth() * bitmapImage.getHeight() * 3;
       if (bufferImage == null
           || (!bufferImage.isDynamic() && bufferImage.getFlatSize() != requiredFlatSize)) {
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/Category.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/Category.java
new file mode 100644
index 00000000000..ea369c3ac12
--- /dev/null
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/Category.java
@@ -0,0 +1,62 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.support.label;
+
+import java.util.Objects;
+
+/**
+ * Category is a util class, contains a label and a float value. Typically it's used as result of
+ * classification tasks.
+ */
+public final class Category {
+  private final String label;
+  private final float score;
+
+  /** Constructs a Category. */
+  public Category(String label, float score) {
+    this.label = label;
+    this.score = score;
+  }
+
+  /** Gets the reference of category's label. */
+  public String getLabel() {
+    return label;
+  }
+
+  /** Gets the score of the category. */
+  public float getScore() {
+    return score;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof Category) {
+      Category other = (Category) o;
+      return (other.getLabel().equals(this.label) && other.getScore() == this.score);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(label, score);
+  }
+
+  @Override
+  public String toString() {
+    return "<Category \"" + label + "\" (score=" + score + ")>";
+  }
+}
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/TensorLabel.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/TensorLabel.java
index 8c27995c0f7..10763a1a065 100644
--- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/TensorLabel.java
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/label/TensorLabel.java
@@ -17,6 +17,7 @@ package org.tensorflow.lite.support.label;
 
 import android.content.Context;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -150,17 +151,19 @@ public class TensorLabel {
    * than 1, and the axis should be effectively the last axis (which means every sub tensor
    * specified by this axis should have a flat size of 1).
    *
-   * @throws IllegalArgumentException if size of a sub tensor on each label is not 1.
+   * <p>{@link TensorLabel#getCategoryList()} is an alternative API to get the result.
+   *
+   * @throws IllegalStateException if size of a sub tensor on each label is not 1.
    */
   @NonNull
   public Map<String, Float> getMapWithFloatValue() {
     int labeledAxis = getFirstAxisWithSizeGreaterThanOne(tensorBuffer);
-    SupportPreconditions.checkArgument(
+    SupportPreconditions.checkState(
         labeledAxis == shape.length - 1,
         "get a <String, Scalar> map is only valid when the only labeled axis is the last one.");
     List<String> labels = axisLabels.get(labeledAxis);
     float[] data = tensorBuffer.getFloatArray();
-    SupportPreconditions.checkArgument(labels.size() == data.length);
+    SupportPreconditions.checkState(labels.size() == data.length);
     Map<String, Float> result = new LinkedHashMap<>();
     int i = 0;
     for (String label : labels) {
@@ -170,6 +173,37 @@ public class TensorLabel {
     return result;
   }
 
+  /**
+   * Gets a list of {@link Category} from the {@link TensorLabel} object.
+   *
+   * <p>The axis of label should be effectively the last axis (which means every sub tensor
+   * specified by this axis should have a flat size of 1), so that each labelled sub tensor could be
+   * converted into a float value score. Example: A {@link TensorLabel} with shape {@code {2, 5, 3}}
+   * and axis 2 is valid. If axis is 1 or 0, it cannot be converted into a {@link Category}.
+   *
+   * <p>{@link TensorLabel#getMapWithFloatValue()} is an alternative but returns a {@link Map} as
+   * the result.
+   *
+   * @throws IllegalStateException if size of a sub tensor on each label is not 1.
+   */
+  @NonNull
+  public List<Category> getCategoryList() {
+    int labeledAxis = getFirstAxisWithSizeGreaterThanOne(tensorBuffer);
+    SupportPreconditions.checkState(
+        labeledAxis == shape.length - 1,
+        "get a Category list is only valid when the only labeled axis is the last one.");
+    List<String> labels = axisLabels.get(labeledAxis);
+    float[] data = tensorBuffer.getFloatArray();
+    SupportPreconditions.checkState(labels.size() == data.length);
+    List<Category> result = new ArrayList<>();
+    int i = 0;
+    for (String label : labels) {
+      result.add(new Category(label, data[i]));
+      i += 1;
+    }
+    return result;
+  }
+
   private static int getFirstAxisWithSizeGreaterThanOne(@NonNull TensorBuffer tensorBuffer) {
     int[] shape = tensorBuffer.getShape();
     for (int i = 0; i < shape.length; i++) {
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java
index 16622a25333..fa05be363a6 100644
--- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/tensorbuffer/TensorBuffer.java
@@ -379,13 +379,13 @@ public abstract class TensorBuffer {
 
     // Check if the new shape is the same as current shape.
     int newFlatSize = computeFlatSize(shape);
+    this.shape = shape.clone();
     if (flatSize == newFlatSize) {
       return;
     }
 
     // Update to the new shape.
     flatSize = newFlatSize;
-    this.shape = shape.clone();
     buffer = ByteBuffer.allocateDirect(flatSize * getTypeSize());
     buffer.order(ByteOrder.nativeOrder());
   }
diff --git a/tensorflow/lite/experimental/support/metadata/java/BUILD b/tensorflow/lite/experimental/support/metadata/java/BUILD
index f1cd6173b9e..82b6e9866a9 100644
--- a/tensorflow/lite/experimental/support/metadata/java/BUILD
+++ b/tensorflow/lite/experimental/support/metadata/java/BUILD
@@ -25,6 +25,10 @@ java_library(
     name = "tensorflow-lite-support-metadata-lib",
     srcs = glob(["src/java/org/tensorflow/lite/support/metadata/**/*.java"]),
     javacopts = JAVACOPTS,
+    resource_jars = [
+        "//tensorflow/lite/experimental/support/metadata:libmetadata_schema_java.jar",
+        "//tensorflow/lite/experimental/support/metadata:libschema_fbs_java.jar",
+    ],
     deps = [
         "//tensorflow/lite/experimental/support/metadata:metadata_schema_java",
         "//tensorflow/lite/experimental/support/metadata:schema_fbs_java",
diff --git a/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataExtractor.java b/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataExtractor.java
index 054ea0e9730..3ded50e5d95 100644
--- a/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataExtractor.java
+++ b/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataExtractor.java
@@ -54,6 +54,11 @@ import org.tensorflow.lite.support.metadata.schema.TensorMetadata;
  * MetadataExtractor} omits subgraph index as an input in its methods.
  */
 public class MetadataExtractor {
+  // TODO(b/156539454): remove the hardcode versioning number and populate the version through
+  // genrule.
+  /** The version of the metadata parser that this {@link MetadataExtractor} library depends on. */
+  public static final String METADATA_PARSER_VERSION = "1.0.0";
+
   /** The helper class to load metadata from TFLite model FlatBuffer. */
   private final ModelInfo modelInfo;
 
@@ -76,6 +81,15 @@ public class MetadataExtractor {
     ByteBuffer metadataBuffer = modelInfo.getMetadataBuffer();
     if (metadataBuffer != null) {
       metadataInfo = new ModelMetadataInfo(metadataBuffer);
+
+      // Prints warning message if the minimum parser version is not satisfied.
+      if (!isMinimumParserVersionSatisfied()) {
+        System.err.printf(
+            "<Warning> Some fields in the metadata belong to a future schema. The minimum parser"
+                + " version required is %s, but the version of the current metadata parser is %s",
+            metadataInfo.getMininumParserVersion(), METADATA_PARSER_VERSION);
+      }
+
       checkArgument(
           modelInfo.getInputTensorCount() == metadataInfo.getInputTensorCount(),
           String.format(
@@ -98,7 +112,7 @@ public class MetadataExtractor {
   }
 
   /** Returns {@code true} if the model has metadata. Otherwise, returns {@code false}. */
-  public Boolean hasMetadata() {
+  public boolean hasMetadata() {
     return metadataInfo != null;
   }
 
@@ -216,7 +230,31 @@ public class MetadataExtractor {
   }
 
   /**
-   * Asserts if {@link metdadataInfo} is not initialized. Some models may not have metadata and this
+   * Returns {@code true} if the minimum parser version required by the given metadata flatbuffer
+   * precedes or equals to the version of the metadata parser that this MetadataExtractor library is
+   * relying on. All fields in the metadata can be parsed correctly with this metadata extractor
+   * library in this case. Otherwise, it returns {@code false}.
+   *
+   * <p>For example, assume the underlying metadata parser version is {@code 1.14.1},
+   *
+   * <ul>
+   *   <li>it returns {@code true}, if the required minimum parser version is the same or older,
+   *       such as {@code 1.14.1} or {@code 1.14.0}. Null version precedes all numeric versions,
+   *       because some metadata flatbuffers are generated before the first versioned release; <br>
+   *   <li>it returns {@code false}, if the required minimum parser version is newer, such as {@code
+   *       1.14.2}.
+   * </ul>
+   */
+  public final boolean isMinimumParserVersionSatisfied() {
+    String minVersion = metadataInfo.getMininumParserVersion();
+    if (minVersion == null) {
+      return true;
+    }
+    return compareVersions(minVersion, METADATA_PARSER_VERSION) <= 0;
+  }
+
+  /**
+   * Asserts if {@link #metadataInfo} is not initialized. Some models may not have metadata and this
    * is allowed. However, invoking methods that reads the metadata is not allowed.
    *
    * @throws IllegalStateException if this model does not contain model metadata
@@ -260,4 +298,35 @@ public class MetadataExtractor {
       return null;
     }
   }
+
+  /**
+   * Compares two semantic version numbers.
+   *
+   * <p>Examples of comparing two versions: <br>
+   * {@code 1.9} precedes {@code 1.14}; <br>
+   * {@code 1.14} precedes {@code 1.14.1}; <br>
+   * {@code 1.14} and {@code 1.14.0} are euqal;
+   *
+   * @return the value {@code 0} if the two versions are equal; a value less than {@code 0} if
+   *     {@code version1} precedes {@code version2}; a value greater than {@code 0} if {@code
+   *     version2} precedes {@code version1}.
+   */
+  private static int compareVersions(String version1, String version2) {
+    // Using String.split instead of the recommanded Guava Splitter because we've been avoiding
+    // depending on other third party libraries in this project.
+    String[] levels1 = version1.split("\\.", 0);
+    String[] levels2 = version2.split("\\.", 0);
+
+    int length = Math.max(levels1.length, levels2.length);
+    for (int i = 0; i < length; i++) {
+      Integer v1 = i < levels1.length ? Integer.parseInt(levels1[i]) : 0;
+      Integer v2 = i < levels2.length ? Integer.parseInt(levels2[i]) : 0;
+      int compare = v1.compareTo(v2);
+      if (compare != 0) {
+        return compare;
+      }
+    }
+
+    return 0;
+  }
 }
diff --git a/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/ModelMetadataInfo.java b/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/ModelMetadataInfo.java
index 57fa7113c2a..751ed500dc2 100644
--- a/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/ModelMetadataInfo.java
+++ b/tensorflow/lite/experimental/support/metadata/java/src/java/org/tensorflow/lite/support/metadata/ModelMetadataInfo.java
@@ -38,6 +38,9 @@ final class ModelMetadataInfo {
   /** Metadata array of output tensors. */
   private final List</* @Nullable */ TensorMetadata> outputsMetadata;
 
+  /** The minimum parser version required to fully understand the metadata flatbuffer. */
+  private final String /* @Nullable */ minVersion;
+
   /**
    * Creates a {@link ModelMetadataInfo} with the metadata FlatBuffer, {@code buffer}.
    *
@@ -56,6 +59,7 @@ final class ModelMetadataInfo {
 
     inputsMetadata = getInputsMetadata(modelMetadata);
     outputsMetadata = getOutputsMetadata(modelMetadata);
+    minVersion = modelMetadata.minParserVersion();
   }
 
   /** Gets the count of input tensors with metadata in the metadata FlatBuffer. */
@@ -77,6 +81,15 @@ final class ModelMetadataInfo {
     return inputsMetadata.get(inputIndex);
   }
 
+  /**
+   * Gets the minimum parser version of the metadata. It can be {@code null} if the version is not
+   * populated.
+   */
+  @Nullable
+  String getMininumParserVersion() {
+    return minVersion;
+  }
+
   /** Gets the root handler for the model metadata. */
   ModelMetadata getModelMetadata() {
     return modelMetadata;
diff --git a/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs b/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs
index 7806899c906..b8e529ad1c5 100644
--- a/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs
+++ b/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs
@@ -29,11 +29,31 @@ namespace tflite;
 // generate the model interface. It is recommended to fill in at least those
 // enties to boost the codegen performance.
 
-// This corresponds to the schema version.
+// LINT.IfChange
+
+// The Metadata schema is versioned by the Semantic versioning number, which
+// tracks the schema changes according to the Semantic versioning rules.
+//
+// ModelMetadata.min_parser_version indicates the minimum necessary metadata
+// parser version to fully understand all fields in a given metadata flatbuffer.
+//
+// New fields and types will have associated comments with the schema version for
+// which they were added.
+//
+// Schema Semantic version: 1.0.0
+
+// This indicates the flatbuffer compatibility. The number will bump up when a
+// break change is applied to the schema, such as removing fields or adding new
+// fields to the middle of a table.
 file_identifier "M001";
 // File extension of any written files.
 file_extension "tflitemeta";
 
+// LINT.ThenChange(//tensorflow/lite/experimental/\
+//     /supportmetadata/java/src/java/org/tensorflow/lite/support/metadata/\
+//     MetadataExtractor.java)
+
+// LINT.IfChange
 enum AssociatedFileType : byte {
   UNKNOWN = 0,
   // Files such as readme.txt
@@ -195,7 +215,7 @@ enum CoordinateType : byte {
 
 table BoundingBoxProperties {
   // Denotes the order of the elements defined in each bounding box type. An
-  // empty index array represent the defualt order of each bounding box type.
+  // empty index array represent the default order of each bounding box type.
   // For example, to denote the default order of BOUNDARIES, {left, top, right,
   // bottom}, the index should be {0, 1, 2, 3}. To denote the order {left,
   // right, top, bottom}, the order should be {0, 2, 1, 3}.
@@ -498,6 +518,17 @@ table ModelMetadata {
 
   // A list of associated files of this model.
   associated_files:[AssociatedFile];
+
+  // The minimum metadata parser version that can fully understand the fields in
+  // the metadata flatbuffer. The version is effectively the largest version
+  // number among the versions of all the fields populated and the smallest
+  // compatible version indicated by the file identifier.
+  //
+  // This field is automaticaly populated by the MetadataPopulator when
+  // the metadata is populated into a TFLite model.
+  min_parser_version:string;
 }
+// LINT.ThenChange(//tensorflow/lite/experimental/\
+//     support/metadata/cc/metadata_version.cc)
 
 root_type ModelMetadata;
diff --git a/tensorflow/lite/experimental/swift/BUILD.apple b/tensorflow/lite/experimental/swift/BUILD.apple
index 2ce8428b1ce..e671721dd1c 100644
--- a/tensorflow/lite/experimental/swift/BUILD.apple
+++ b/tensorflow/lite/experimental/swift/BUILD.apple
@@ -10,14 +10,32 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
+# TODO(b/153554551): investigate if separate delegate libraries can be made with same module_name
+# If you don't need delegates and want to reduce size of the app, you can exclude Metal/Core ML
+# delegate related dependencies from the rule.
+# For example, if you don't want to use Core ML delegate:
+# 1. add `exclude = ["Sources/CoreMLDelegate.swift"]` to `glob`, so that `srcs` would look like this:
+#    ```
+#    srcs = glob(
+#        ["Sources/*.swift"],
+#        exclude = ["Sources/CoreMLDelegate.swift"],
+#    ),
+# 2. remove "-Wl,-weak_framework,CoreML" from `linkopts`
+# 3. remove "...:coreml_delegate" from `deps`
+
 swift_library(
     name = "TensorFlowLite",
     srcs = glob(["Sources/*.swift"]),
+    linkopts = [
+        "-Wl,-weak_framework,CoreML",
+        "-Wl,-weak_framework,Metal",
+    ],
     module_name = "TensorFlowLite",
     tags = TFL_DEFAULT_TAGS,
     visibility = ios_visibility_whitelist(),
     deps = [
-        "//tensorflow/lite/experimental/ios:TensorFlowLiteC",
+        "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate",
+        "//tensorflow/lite/experimental/ios:tensorflow_lite_c",
     ],
 )
 
diff --git a/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift b/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift
index 9862de31e2c..9fc76bc3026 100644
--- a/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift
+++ b/tensorflow/lite/experimental/swift/Sources/CoreMLDelegate.swift
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import TensorFlowLiteC
+import TensorFlowLiteCCoreML
 
 /// A delegate that uses the `Core ML` framework for performing TensorFlow Lite graph operations.
 ///
@@ -35,6 +35,7 @@ public final class CoreMLDelegate: Delegate {
     self.options = options
     var delegateOptions = TfLiteCoreMlDelegateOptions()
     delegateOptions.enabled_devices = options.enabledDevices.cEnabledDevices
+    delegateOptions.coreml_version = Int32(options.coreMLVersion)
     delegateOptions.max_delegated_partitions = Int32(options.maxDelegatedPartitions)
     delegateOptions.min_nodes_per_partition = Int32(options.minNodesPerPartition)
     guard let delegate = TfLiteCoreMlDelegateCreate(&delegateOptions) else { return nil }
@@ -72,6 +73,9 @@ extension CoreMLDelegate {
     /// value is `.neuralEngine` indicating that the delegate is enabled for Neural Engine devices
     /// only.
     public var enabledDevices: EnabledDevices = .neuralEngine
+    /// Target Core ML version for the model conversion. When it's not set, Core ML version will
+    /// be set to highest available version for the platform.
+    public var coreMLVersion = 0
     /// The maximum number of Core ML delegate partitions created. Each graph corresponds to one
     /// delegated node subset in the TFLite model. The default value is `0` indicating that all
     /// possible partitions are delegated.
diff --git a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift-nightly.podspec b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift-nightly.podspec
index 3b21483f663..8b0e797eeaa 100644
--- a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift-nightly.podspec
+++ b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift-nightly.podspec
@@ -20,8 +20,20 @@ Pod::Spec.new do |s|
 
   tfl_dir = 'tensorflow/lite/'
   swift_dir = tfl_dir + 'experimental/swift/'
-  s.source_files = swift_dir + 'Sources/*.swift'
-  s.dependency 'TensorFlowLiteC', "~> #{s.version}"
+
+  s.default_subspec = 'Core'
+
+  s.subspec 'Core' do |core|
+    core.dependency 'TensorFlowLiteC', "#{s.version}"
+    core.source_files = swift_dir + 'Sources/*.swift'
+    core.exclude_files = swift_dir + 'Sources/CoreMLDelegate.swift'
+  end
+
+  s.subspec 'CoreML' do |coreml|
+    coreml.source_files = swift_dir + 'Sources/CoreMLDelegate.swift'
+    coreml.dependency 'TensorFlowLiteC/CoreML', "#{s.version}"
+    coreml.dependency 'TensorFlowLiteSwift/Core', "#{s.version}"
+  end
 
   s.test_spec 'Tests' do |ts|
     ts.source_files = swift_dir + 'Tests/*.swift'
diff --git a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec
index e19869ee955..679a894c414 100644
--- a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec
+++ b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec
@@ -1,6 +1,6 @@
 Pod::Spec.new do |s|
   s.name             = 'TensorFlowLiteSwift'
-  s.version          = '2.1.0'
+  s.version          = '2.2.0'
   s.authors          = 'Google Inc.'
   s.license          = { :type => 'Apache' }
   s.homepage         = 'https://github.com/tensorflow/tensorflow'
diff --git a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec.template b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec.template
new file mode 100644
index 00000000000..a925112f539
--- /dev/null
+++ b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec.template
@@ -0,0 +1,45 @@
+Pod::Spec.new do |s|
+  s.name             = 'TensorFlowLiteSwift'
+  s.version          = '${TFL_BUILD_VERSION}'
+  s.authors          = 'Google Inc.'
+  s.license          = { :type => 'Apache' }
+  s.homepage         = 'https://github.com/tensorflow/tensorflow'
+  s.source           = { :git => 'https://github.com/tensorflow/tensorflow.git', :commit => '${SOURCE_COMMIT}' }
+  s.summary          = 'TensorFlow Lite for Swift'
+  s.description      = <<-DESC
+
+  TensorFlow Lite is TensorFlow's lightweight solution for Swift developers. It
+  enables low-latency inference of on-device machine learning models with a
+  small binary size and fast performance supporting hardware acceleration.
+                       DESC
+
+  s.ios.deployment_target = '9.0'
+
+  s.module_name = 'TensorFlowLite'
+  s.static_framework = true
+
+  tfl_dir = 'tensorflow/lite/'
+  swift_dir = tfl_dir + 'experimental/swift/'
+
+  s.default_subspec = 'Core'
+
+  s.subspec 'Core' do |core|
+    core.dependency 'TensorFlowLiteC', "#{s.version}"
+    core.source_files = swift_dir + 'Sources/*.swift'
+    core.exclude_files = swift_dir + 'Sources/CoreMLDelegate.swift'
+  end
+
+  s.subspec 'CoreML' do |coreml|
+    coreml.source_files = swift_dir + 'Sources/CoreMLDelegate.swift'
+    coreml.dependency 'TensorFlowLiteC/CoreML', "#{s.version}"
+    coreml.dependency 'TensorFlowLiteSwift/Core', "#{s.version}"
+  end
+
+  s.test_spec 'Tests' do |ts|
+    ts.source_files = swift_dir + 'Tests/*.swift'
+    ts.resources = [
+      tfl_dir + 'testdata/add.bin',
+      tfl_dir + 'testdata/add_quantized.bin',
+    ]
+  end
+end
diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml
index 56a67a867f9..0da3d152090 100644
--- a/tensorflow/lite/g3doc/_book.yaml
+++ b/tensorflow/lite/g3doc/_book.yaml
@@ -28,8 +28,7 @@ upper_tabs:
         path: https://codelabs.developers.google.com/codelabs/recognize-flowers-with-tensorflow-on-ios/
         status: external
       - title: "Transfer learning for image classification"
-        path: https://colab.research.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb
-        status: external
+        path: /lite/tutorials/model_maker_image_classification
       - title: "Image classification on Raspberry Pi"
         path: https://github.com/tensorflow/examples/blob/master/lite/examples/image_classification/raspberry_pi/
         status: external
@@ -41,9 +40,8 @@ upper_tabs:
         status: external
 
       - heading: "Text"
-      - title: "Sentiment analysis"
-        path: https://colab.research.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/text_classification.ipynb
-        status: external
+      - title: "Text classification"
+        path: /lite/tutorials/model_maker_text_classification
 
       - heading: "Microcontrollers"
       - title: "Gesture recognition"
diff --git a/tensorflow/lite/g3doc/convert/1x_compatibility.md b/tensorflow/lite/g3doc/convert/1x_compatibility.md
index adb2af4d8ad..9f9f277a8d9 100644
--- a/tensorflow/lite/g3doc/convert/1x_compatibility.md
+++ b/tensorflow/lite/g3doc/convert/1x_compatibility.md
@@ -1,30 +1,32 @@
-# TensorFlow 1.x compatibility
+# TensorFlow 1.x Compatibility <a name="differences"></a>
 
-The `tf.lite.TFLiteConverter` was updated between TensorFlow 1.X and 2.0. This
-document explains the differences between the 1.X and 2.0 versions of the
-converter, and provides information about how to use the 1.X version if
-required.
+The `tf.lite.TFLiteConverter` Python API was updated between TensorFlow 1.x and
+2.x. This document explains the differences between the two versions, and
+provides information about how to use the 1.x version if required.
 
-## Summary of changes in Python API between 1.X and 2.0 <a name="differences"></a>
-
-The following section summarizes the changes in the Python API from 1.X to 2.0.
 If any of the changes raise concerns, please file a
-[GitHub issue](https://github.com/tensorflow/tensorflow/issues).
+[GitHub Issue](https://github.com/tensorflow/tensorflow/issues).
 
-### Formats supported by `TFLiteConverter`
+Note: We highly recommend that you
+[migrate your TensorFlow 1.x code to TensorFlow 2.x code](https://www.tensorflow.org/guide/migrate)
+.
 
-The 2.0 version of the converter supports SavedModel and Keras model files
-generated in both 1.X and 2.0. However, the conversion process no longer
-supports "frozen graph" `GraphDef` files generated in 1.X.
+## Model formats
 
-#### Converting frozen graphs
+#### SavedModel and Keras
 
-Users who want to convert frozen graph `GraphDef` files (`.pb` files) to
-TensorFlow Lite should use `tf.compat.v1.lite.TFLiteConverter`.
+The `tf.lite.TFLiteConverter` API supports SavedModel and Keras HDF5 files
+generated in both TensorFlow 1.x and 2.x.
 
-The following snippet shows a frozen graph file being converted:
+#### Frozen Graph
+
+Note: TensorFlow 2.x no longer supports the generation of frozen graph models.
+
+The `tf.compat.v1.lite.TFLiteConverter` API supports frozen graph models
+generated in TensorFlow 1.x, as shown below:
 
 ```python
+import tensorflow as tf
 # Path to the frozen graph file
 graph_def_file = 'frozen_graph.pb'
 # A list of the names of the model's input tensors
@@ -32,70 +34,68 @@ input_arrays = ['input_name']
 # A list of the names of the model's output tensors
 output_arrays = ['output_name']
 # Load and convert the frozen graph
-converter = lite.TFLiteConverter.from_frozen_graph(
+converter = tf.lite.TFLiteConverter.from_frozen_graph(
   graph_def_file, input_arrays, output_arrays)
 tflite_model = converter.convert()
 # Write the converted model to disk
 open("converted_model.tflite", "wb").write(tflite_model)
 ```
 
-### Quantization-aware training
+## Converter attributes
 
-The following attributes and methods associated with
-[quantization-aware training](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/quantize)
-have been removed from `TFLiteConverter` in TensorFlow 2.0:
+#### Renamed attributes
 
-*   `inference_type`
-*   `inference_input_type`
-*   `quantized_input_stats`
-*   `default_ranges_stats`
-*   `reorder_across_fake_quant`
-*   `change_concat_input_ranges`
-*   `post_training_quantize` - Deprecated in the 1.X API
-*   `get_input_arrays()`
+The following 1.x attribute has been renamed in 2.x.
 
-The rewriter function that supports quantization-aware training does not support
-models generated by TensorFlow 2.0. Additionally, TensorFlow Lite’s quantization
-API is being reworked and streamlined in a direction that supports
-quantization-aware training through the Keras API. These attributes will be
-removed in the 2.0 API until the new quantization API is launched. Users who
-want to convert models generated by the rewriter function can use
-`tf.compat.v1.lite.TFLiteConverter`.
+*   `target_ops` has been renamed to `target_spec.supported_ops` - In 2.x, in
+    line with future additions to the optimization framework, it has become an
+    attribute of `TargetSpec` and has been renamed to `supported_ops`.
 
-### Changes to `TFLiteConverter` attributes
+#### Unsupported attributes
 
-The `target_ops` attribute has become an attribute of `TargetSpec` and renamed
-to `supported_ops` in line with future additions to the optimization framework.
+The following 1.x attributes have been removed in 2.x.
 
-Additionally, the following attributes have been removed:
-
-*   `drop_control_dependency` (default: `True`)
-*   _Graph visualization_ - The recommended approach for visualizing a
-    TensorFlow Lite graph in TensorFlow 2.0 will be to use
-    [visualize.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/visualize.py).
-    Unlike GraphViz, it enables users to visualize the graph after post training
-    quantization has occurred. The following attributes related to graph
-    visualization will be removed:
+*   _Quantization_ - In 2.x,
+    [quantize aware training](https://www.tensorflow.org/model_optimization/guide/quantization/training)
+    is supported through the Keras API and
+    [post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)
+    uses fewer streamlined converter flags. Thus, the following attributes and
+    methods related to quantization have been removed:
+    *   `inference_type`
+    *   `quantized_input_stats`
+    *   `post_training_quantize`
+    *   `default_ranges_stats`
+    *   `reorder_across_fake_quant`
+    *   `change_concat_input_ranges`
+    *   `get_input_arrays()`
+*   _Visualization_ - In 2.x, the recommended approach for visualizing a
+    TensorFlow Lite graph is to use
+    [visualize.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/visualize.py)
+    . Unlike GraphViz, it enables users to visualize the graph after post
+    training quantization has occurred. Thus, the following attributes related
+    to graph visualization have been removed:
     *   `output_format`
     *   `dump_graphviz_dir`
     *   `dump_graphviz_video`
+*   _Frozen graph_ - In 2.x, the frozen graph model format has been removed.
+    Thus, the following attribute related to frozen graphs has been removed:
+    *   `drop_control_dependency`
 
-### General API changes
+## Unsupported APIs
 
-The following section explains several significant API changes between
-TensorFlow 1.X and 2.0.
+The following section explains several significant features in 1.x that have
+been removed in 2.x.
 
-#### Conversion methods
+#### Conversion APIs
 
-The following methods that were previously deprecated in 1.X will no longer be
-exported in 2.0:
+The following methods were deprecated in 1.x and have been removed in 2.x:
 
 *   `lite.toco_convert`
 *   `lite.TocoConverter`
 
-#### `lite.constants`
+#### `lite.constants` API
 
-The `lite.constants` API was removed in 2.0 in order to decrease duplication
+The `lite.constants` API was removed in 2.x in order to decrease duplication
 between TensorFlow and TensorFlow Lite. The following list maps the
 `lite.constant` type to the TensorFlow type:
 
@@ -106,12 +106,15 @@ between TensorFlow and TensorFlow Lite. The following list maps the
 *   `lite.constants.STRING`: `tf.string`
 *   `lite.constants.QUANTIZED_UINT8`: `tf.uint8`
 
-Additionally, `lite.constants.TFLITE` and `lite.constants.GRAPHVIZ_DOT` were
-removed due to the deprecation of the `output_format` flag in `TFLiteConverter`.
+Additionally, the deprecation of the `output_format` flag in `TFLiteConverter`
+led to the removal of the following constants:
 
-#### `lite.OpHint`
+*   `lite.constants.TFLITE`
+*   `lite.constants.GRAPHVIZ_DOT`
 
-The `OpHint` API is currently not available in 2.0 due to an incompatibility
-with the 2.0 APIs. This API enables conversion of LSTM based models. Support for
-LSTMs in 2.0 is being investigated. All related `lite.experimental` APIs have
-been removed due to this issue.
+#### `lite.OpHint` API
+
+The `OpHint` API is currently unsupported due to an incompatibility with the 2.x
+APIs. This API enables conversion of LSTM based models. Support for LSTMs in 2.x
+is being investigated. All related `lite.experimental` APIs have been removed
+due to this issue.
diff --git a/tensorflow/lite/g3doc/guide/android.md b/tensorflow/lite/g3doc/guide/android.md
index 940e523c1d5..30e77b6d24d 100644
--- a/tensorflow/lite/g3doc/guide/android.md
+++ b/tensorflow/lite/g3doc/guide/android.md
@@ -208,3 +208,18 @@ dependencies {
 Note that the `0.1.100` version here is purely for the sake of
 testing/development. With the local AAR installed, you can use the standard
 [TensorFlow Lite Java inference APIs](../guide/inference.md) in your app code.
+
+##### Build C++ libraries
+
+If you want to use TFLite through C++ libraries, you can build the shared
+libraries:
+
+32bit armeabi-v7a:
+```
+bazel build -c opt --config=android_arm //tensorflow/lite:libtensorflowlite.so
+```
+
+64bit arm64-v8a:
+```
+bazel build -c opt --config=android_arm64 //tensorflow/lite:libtensorflowlite.so
+```
diff --git a/tensorflow/lite/g3doc/microcontrollers/build_convert.md b/tensorflow/lite/g3doc/microcontrollers/build_convert.md
index b2bd2ce6ac8..cf18b782765 100644
--- a/tensorflow/lite/g3doc/microcontrollers/build_convert.md
+++ b/tensorflow/lite/g3doc/microcontrollers/build_convert.md
@@ -71,7 +71,7 @@ important to change the array declaration to `const` for better memory
 efficiency on embedded platforms.
 
 For an example of how to include and use a model in your program, see
-[`sine_model_data.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/sine_model_data.cc)
+[`model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/model.cc)
 in the *Hello World* example.
 
 ## Model architecture and training
diff --git a/tensorflow/lite/g3doc/microcontrollers/get_started.md b/tensorflow/lite/g3doc/microcontrollers/get_started.md
index 5c46701d1fe..96fa336c2ef 100644
--- a/tensorflow/lite/g3doc/microcontrollers/get_started.md
+++ b/tensorflow/lite/g3doc/microcontrollers/get_started.md
@@ -86,12 +86,10 @@ World README.md</a>
 
 The following section walks through the *Hello World* example's
 [`hello_world_test.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc),
-which demonstrates how to run inference using TensorFlow Lite for
-Microcontrollers.
+unit test which demonstrates how to run inference using TensorFlow Lite for
+Microcontrollers. It loads the model and runs inference several times.
 
-The test loads the model and then uses it to run inference several times.
-
-### Include the library headers
+### 1. Include the library headers
 
 To use the TensorFlow Lite for Microcontrollers library, we must include the
 following header files:
@@ -116,22 +114,20 @@ following header files:
 -   [`version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/version.h)
     provides versioning information for the TensorFlow Lite schema.
 
-### Include the model
+### 2. Include the model header
 
 The TensorFlow Lite for Microcontrollers interpreter expects the model to be
-provided as a C++ array. In the *Hello World* example, the model is defined in
-`sine_model_data.h` and `sine_model_data.cc`. The header is included with the
-following line:
+provided as a C++ array. The model is defined in `model.h` and `model.cc` files.
+The header is included with the following line:
 
 ```C++
-#include "tensorflow/lite/micro/examples/hello_world/sine_model_data.h"
+#include "tensorflow/lite/micro/examples/hello_world/model.h"
 ```
 
-### Set up the unit test
+### 3. Include the unit test framework header
 
-The code we are walking through is a unit test that uses the TensorFlow Lite for
-Microcontrollers unit test framework. To load the framework, we include the
-following file:
+In order to create a unit test, we include the TensorFlow Lite for
+Microcontrollers unit test framework by including the following line:
 
 ```C++
 #include "tensorflow/lite/micro/testing/micro_test.h"
@@ -143,11 +139,16 @@ The test is defined using the following macros:
 TF_LITE_MICRO_TESTS_BEGIN
 
 TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
+  . // add code here
+  .
+}
+
+TF_LITE_MICRO_TESTS_END
 ```
 
-The remainder of the code demonstrates how to load the model and run inference.
+We now discuss the code included in the macro above.
 
-### Set up logging
+### 4. Set up logging
 
 To set up logging, a `tflite::ErrorReporter` pointer is created using a pointer
 to a `tflite::MicroErrorReporter` instance:
@@ -162,14 +163,14 @@ logs. Since microcontrollers often have a variety of mechanisms for logging, the
 implementation of `tflite::MicroErrorReporter` is designed to be customized for
 your particular device.
 
-### Load a model
+### 5. Load a model
 
 In the following code, the model is instantiated using data from a `char` array,
-`g_sine_model_data`, which is declared in `sine_model_data.h`. We then check the
-model to ensure its schema version is compatible with the version we are using:
+`g_model`, which is declared in `model.h`. We then check the model to ensure its
+schema version is compatible with the version we are using:
 
 ```C++
-const tflite::Model* model = ::tflite::GetModel(g_sine_model_data);
+const tflite::Model* model = ::tflite::GetModel(g_model);
 if (model->version() != TFLITE_SCHEMA_VERSION) {
   TF_LITE_REPORT_ERROR(error_reporter,
       "Model provided is schema version %d not equal "
@@ -178,7 +179,7 @@ if (model->version() != TFLITE_SCHEMA_VERSION) {
 }
 ```
 
-### Instantiate operations resolver
+### 6. Instantiate operations resolver
 
 An
 [`AllOpsResolver`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/kernels/all_ops_resolver.h)
@@ -198,7 +199,7 @@ This is done using a different class, `MicroMutableOpResolver`. You can see how
 to use it in the *Micro speech* example's
 [`micro_speech_test.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc).
 
-### Allocate memory
+### 7. Allocate memory
 
 We need to preallocate a certain amount of memory for input, output, and
 intermediate arrays. This is provided as a `uint8_t` array of size
@@ -212,7 +213,7 @@ uint8_t tensor_arena[tensor_arena_size];
 The size required will depend on the model you are using, and may need to be
 determined by experimentation.
 
-### Instantiate interpreter
+### 8. Instantiate interpreter
 
 We create a `tflite::MicroInterpreter` instance, passing in the variables
 created earlier:
@@ -222,7 +223,7 @@ tflite::MicroInterpreter interpreter(model, resolver, tensor_arena,
                                      tensor_arena_size, error_reporter);
 ```
 
-### Allocate tensors
+### 9. Allocate tensors
 
 We tell the interpreter to allocate memory from the `tensor_arena` for the
 model's tensors:
@@ -231,7 +232,7 @@ model's tensors:
 interpreter.AllocateTensors();
 ```
 
-### Validate input shape
+### 10. Validate input shape
 
 The `MicroInterpreter` instance can provide us with a pointer to the model's
 input tensor by calling `.input(0)`, where `0` represents the first (and only)
@@ -265,7 +266,7 @@ The enum value `kTfLiteFloat32` is a reference to one of the TensorFlow Lite
 data types, and is defined in
 [`common.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/common.h).
 
-### Provide an input value
+### 11. Provide an input value
 
 To provide an input to the model, we set the contents of the input tensor, as
 follows:
@@ -276,7 +277,7 @@ input->data.f[0] = 0.;
 
 In this case, we input a floating point value representing `0`.
 
-### Run the model
+### 12. Run the model
 
 To run the model, we can call `Invoke()` on our `tflite::MicroInterpreter`
 instance:
@@ -300,7 +301,7 @@ successfully run.
 TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status);
 ```
 
-### Obtain the output
+### 12. Obtain the output
 
 The model's output tensor can be obtained by calling `output(0)` on the
 `tflite::MicroInterpreter`, where `0` represents the first (and only) output
@@ -327,7 +328,7 @@ float value = output->data.f[0];
 TF_LITE_MICRO_EXPECT_NEAR(0., value, 0.05);
 ```
 
-### Run inference again
+### 13. Run inference again
 
 The remainder of the code runs inference several more times. In each instance,
 we assign a value to the input tensor, invoke the interpreter, and read the
@@ -350,7 +351,7 @@ value = output->data.f[0];
 TF_LITE_MICRO_EXPECT_NEAR(-0.959, value, 0.05);
 ```
 
-### Read the application code
+### 14. Read the application code
 
 Once you have walked through this unit test, you should be able to understand
 the example's application code, located in
diff --git a/tensorflow/lite/g3doc/models/image_classification/overview.md b/tensorflow/lite/g3doc/models/image_classification/overview.md
index 9bb4493cfe6..b0b177e5a70 100644
--- a/tensorflow/lite/g3doc/models/image_classification/overview.md
+++ b/tensorflow/lite/g3doc/models/image_classification/overview.md
@@ -329,4 +329,4 @@ images for each of the new labels you wish to train.
 Learn how to perform transfer learning in the
 <a href="https://codelabs.developers.google.com/codelabs/recognize-flowers-with-tensorflow-on-android/#0">Recognize
 flowers with TensorFlow</a> codelab, or with the
-[model maker toolkit](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb).
+[model maker toolkit](/lite/tutorials/model_maker_image_classification).
diff --git a/tensorflow/lite/g3doc/performance/best_practices.md b/tensorflow/lite/g3doc/performance/best_practices.md
index 32f5ef485aa..7f7ebc465f4 100644
--- a/tensorflow/lite/g3doc/performance/best_practices.md
+++ b/tensorflow/lite/g3doc/performance/best_practices.md
@@ -25,8 +25,7 @@ models that have been optimized specifically for mobile and embedded devices.
 
 You can retrain the listed models on your own dataset by using transfer
 learning. Check out our transfer learning tutorial for
-[image classification](https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb)
-and
+[image classification](/lite/tutorials/model_maker_image_classification) and
 [object detection](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
 
 ## Profile your model
diff --git a/tensorflow/lite/g3doc/performance/coreml_delegate.md b/tensorflow/lite/g3doc/performance/coreml_delegate.md
index da3b943fd89..c267347cf3f 100644
--- a/tensorflow/lite/g3doc/performance/coreml_delegate.md
+++ b/tensorflow/lite/g3doc/performance/coreml_delegate.md
@@ -6,7 +6,7 @@ which results in faster model inference on iOS devices.
 
 Note: This delegate is in experimental (beta) phase.
 
-Note: Core ML delegate is using Core ML version 2.1.
+Note: Core ML delegate supports Core ML version 2 and later.
 
 **Supported iOS versions and devices:**
 
@@ -158,6 +158,14 @@ for more detail. Alternatively, you can implement your own set of blacklist
 devices using other libraries such as
 [DeviceKit](https://github.com/devicekit/DeviceKit).
 
+### Using older Core ML version
+
+Although iOS 13 supprots Core ML 3, the model might work better when it is
+converted with Core ML 2 model specification. The target conversion version is
+set to the latest version by default, but you can change this by setting
+`coreMLVersion` (in Swift, `coreml_version` in C API) in the delegate option to
+older version.
+
 ## Supported ops
 
 Following ops are supported by the Core ML delegate.
@@ -187,6 +195,8 @@ Following ops are supported by the Core ML delegate.
 *   ReluN1To1
 *   Relu6
 *   Reshape
+    *   Only supported when target Core ML version is 2, not supported when
+        targeting Core ML 3.
 *   ResizeBilinear
 *   SoftMax
 *   Tanh
diff --git a/tensorflow/lite/g3doc/performance/hexagon_delegate.md b/tensorflow/lite/g3doc/performance/hexagon_delegate.md
index 39d75293ec4..60fe9465bf4 100644
--- a/tensorflow/lite/g3doc/performance/hexagon_delegate.md
+++ b/tensorflow/lite/g3doc/performance/hexagon_delegate.md
@@ -259,42 +259,7 @@ ro.board.platform`).
     *   This is tentatively planned for a future release, though there is no
         concrete timeline.
 *   Which ops are supported by the delegate?
-    *   Initial list of supported ops:
-        *   Add
-        *   ArgMax
-        *   ArgMin
-        *   AveragePool2D (without any activation)
-        *   Concat
-        *   Conv2D with following constraints:
-            *   stride width/height <= 3
-        *   DepthToSpace
-        *   DepthwiseConv2D with following constraints:
-            *   Filter width == 3
-            *   depth_multiplier == 1
-            *   dilation only supported when stride == 1
-            *   Otherwise, stride height/width <= 3
-        *   FullyConnected (without any activation)
-        *   L2Normalization (without any activation)
-        *   Logistic (aka Sigmoid)
-        *   MaxPool2D (without any activation)
-        *   Mul (without any activation)
-        *   Neg
-        *   Pad: Only supports 0 padding
-        *   Relu
-        *   Relu6
-        *   Reshape
-        *   Resize Bilinear with following constraints:
-            *   Requested size <= 65
-        *   Resize Nearest Neighbor
-        *   SoftMax
-        *   SpaceToDepth
-        *   Split
-        *   Sub
-        *   Tanh
-        *   Transpose
-        *   TransposeConv2D with following constraints:
-            *   stride height/width <= 3
-            *   dilation height/width == 1
+    *   See the current list of [supported ops and constraints](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/delegates/hexagon/README.md)
 *   How can I tell that the model is using the DSP when I enable the delegate?
     *   Two log messages will be printed when you enable the delegate - one to
         indicate if the delegate was created and another to indicate how many
diff --git a/tensorflow/lite/g3doc/performance/post_training_quantization.md b/tensorflow/lite/g3doc/performance/post_training_quantization.md
index 194d102d43d..a526be75b61 100644
--- a/tensorflow/lite/g3doc/performance/post_training_quantization.md
+++ b/tensorflow/lite/g3doc/performance/post_training_quantization.md
@@ -4,51 +4,44 @@ Post-training quantization is a conversion technique that can reduce model size
 while also improving CPU and hardware accelerator latency, with little
 degradation in model accuracy. You can perform these techniques using an
 already-trained float TensorFlow model when you convert it to TensorFlow Lite
-format.
+format using the [TensorFlow Lite Converter](../convert/).
 
 Note: The procedures on this page require TensorFlow 1.15 or higher.
 
-
-### Optimization options
+### Optimization Methods
 
 There are several post-training quantization options to choose from. Here is a
 summary table of the choices and the benefits they provide:
 
-| Technique                 | Benefits                  | Hardware            |
-| ------------------------- | ------------------------- | ------------------- |
-| Dynamic range             | 4x smaller, 2-3x speedup, | CPU                 |
-: quantization              : accuracy                  :                     :
-| Full integer quantization | 4x smaller, 3x+ speedup   | CPU, Edge TPU, etc. |
-| Float16 quantization      | 2x smaller, potential GPU | CPU/GPU             |
-:                           : acceleration              :                     :
+| Technique            | Benefits                  | Hardware         |
+| -------------------- | ------------------------- | ---------------- |
+| Dynamic range        | 4x smaller, 2-3x speedup  | CPU              |
+: quantization         :                           :                  :
+| Full integer         | 4x smaller, 3x+ speedup   | CPU, Edge TPU,   |
+: quantization         :                           : Microcontrollers :
+| Float16 quantization | 2x smaller, potential GPU | CPU, GPU         |
+:                      : acceleration              :                  :
 
 This decision tree can help determine which post-training quantization method is
 best for your use case:
 
 ![post-training optimization options](images/optimization.jpg)
 
-Alternatively, you might achieve higher accuracy if you perform
-[quantization-aware training](
-https://github.com/tensorflow/tensorflow/tree/r1.14/tensorflow/contrib/quantize).
-However, doing so requires some model modifications to add fake quantization
-nodes, whereas the post-training quantization techniques on this page use an
-existing pre-trained model.
-
 ### Dynamic range quantization
 
 The simplest form of post-training quantization statically quantizes only the
-weights from floating point to 8-bits of precision. This technique is enabled as
-an option in the [TensorFlow Lite converter](../convert/):
+weights from floating point to integer, which has 8-bits of precision:
 
-```
+<pre>
 import tensorflow as tf
 converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
-converter.optimizations = [tf.lite.Optimize.DEFAULT]
+<b>converter.optimizations = [tf.lite.Optimize.DEFAULT]</b>
 tflite_quant_model = converter.convert()
-```
+</pre>
 
-At inference, weights are converted from 8-bits of precision to floating point and
-computed using floating-point kernels. This conversion is done once and cached to reduce latency.
+At inference, weights are converted from 8-bits of precision to floating point
+and computed using floating-point kernels. This conversion is done once and
+cached to reduce latency.
 
 To further improve latency, "dynamic-range" operators dynamically quantize
 activations based on their range to 8-bits and perform computations with 8-bit
@@ -58,89 +51,105 @@ point, so that the speedup with dynamic-range ops is less than a full
 fixed-point computation. Dynamic-range ops are available for the most
 compute-intensive operators in a network:
 
-*  [tf.contrib.layers.fully_connected](https://www.tensorflow.org/api_docs/python/tf/contrib/layers/fully_connected)
-*  [tf.nn.conv2d](https://www.tensorflow.org/api_docs/python/tf/nn/conv2d)
-*  [tf.nn.embedding_lookup](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup)
-*  [BasicRNN](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/BasicRNNCell)
-*  [tf.nn.bidirectional_dynamic_rnn for BasicRNNCell type](https://www.tensorflow.org/api_docs/python/tf/nn/bidirectional_dynamic_rnn)
-*  [tf.nn.dynamic_rnn for LSTM and BasicRNN Cell types](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn)
+*   `tf.keras.layers.Dense`
+*   `tf.keras.layers.Conv2D`
+*   `tf.keras.layers.LSTM`
+*   `tf.nn.embedding_lookup`
+*   `tf.compat.v1.nn.rnn_cell.BasicRNNCell`
+*   `tf.compat.v1.nn.bidirectional_dynamic_rnn`
+*   `tf.compat.v1.nn.dynamic_rnn`
 
-
-### Full integer quantization of weights and activations
+### Full integer quantization
 
 You can get further latency improvements, reductions in peak memory usage, and
-access to integer only hardware accelerators by making sure all model math is
-quantized.
+access to integer only hardware devices or accelerators by making sure all model
+math is integer quantized.
 
 To do this, you need to measure the dynamic range of activations and inputs by
-supplying a representative data set. You can simply create an input data
-generator and provide it to our converter. For example:
+supplying sample input data to the converter. Refer to the
+`representative_dataset_gen()` function used in the following code.
 
-```
+#### Integer with float fallback (using default float input/output)
+
+In order to fully integer quantize a model, but use float operators when they
+don't have an integer implementation (to ensure conversion occurs smoothly), use
+the following steps:
+
+<pre>
 import tensorflow as tf
-
+converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+<b>converter.optimizations = [tf.lite.Optimize.DEFAULT]
 def representative_dataset_gen():
   for _ in range(num_calibration_steps):
     # Get sample input data as a numpy array in a method of your choosing.
     yield [input]
-
-converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
-converter.optimizations = [tf.lite.Optimize.DEFAULT]
-converter.representative_dataset = representative_dataset_gen
+converter.representative_dataset = representative_dataset_gen</b>
 tflite_quant_model = converter.convert()
-```
+</pre>
 
-The resulting model should be fully quantized, but any
-ops that do not have quantized implementations are left in
-floating point. This allows conversion to occur smoothly, but the model won't be
-compatible with accelerators that require full integer quantization.
+Note: This won't be compatible with integer only devices (such as 8-bit
+microcontrollers) and accelerators (such as the Coral Edge TPU). For convenience
+during inference, the input and output still remain float in order to have the
+same interface as the original float only model.
 
-Additionally, the model still uses float input and output for convenience.
+#### Integer only
 
-To ensure compatibility with some accelerators (such as the Coral Edge TPU), you
-can enforce full integer quantization for all ops and use integer input and
-output by adding the following lines before you convert:
+*This is a common use case for
+[TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers)
+and [Coral Edge TPUs](https://coral.ai/).*
 
-```
-converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-converter.inference_input_type = tf.uint8
-converter.inference_output_type = tf.uint8
-```
+Additionally, to ensure compatibility with integer only devices (such as 8-bit
+microcontrollers) and accelerators (such as the Coral Edge TPU), you can enforce
+full integer quantization for all ops including the input and output, by using
+the following steps:
 
-The first line makes the converter throw an error if it encounters an operation
-it cannot currently quantize.
-
-Note: `target_spec.supported_ops` was previously `target_ops` in the Python API.
-
-
-### Float16 quantization of weights
-
-You can reduce the size of a floating point model by quantizing the weights to
-float16, the IEEE standard for 16-bit floating point numbers. The advantages of
-this quantization are as follows:
-
--   reduce model size by up to half (since all weights are now half the original
-    size)
--   minimal loss in accuracy
--   some delegates (e.g. the GPU delegate) can operate directly on float16 data,
-    which results in faster execution than float32 computations.
-
-This quantization may not be a good choice if you need maximum performance (a
-quantization to fixed point math would be better in that case). To enable
-float16 quantization of weights, specify "DEFAULT" optimization as above and
-then specify that float16 is in supported types for the target_spec:
-
-```
+<pre>
 import tensorflow as tf
 converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
 converter.optimizations = [tf.lite.Optimize.DEFAULT]
-converter.target_spec.supported_types = [tf.lite.constants.FLOAT16]
+def representative_dataset_gen():
+  for _ in range(num_calibration_steps):
+    # Get sample input data as a numpy array in a method of your choosing.
+    yield [input]
+converter.representative_dataset = representative_dataset_gen
+<b>converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]</b>
+<b>converter.inference_input_type = tf.int8</b>  # or tf.uint8
+<b>converter.inference_output_type = tf.int8</b>  # or tf.uint8
 tflite_quant_model = converter.convert()
-```
+</pre>
 
-By default, a float16 quantized model will "dequantize" the weights values to
-float32 when run on the CPU. The GPU delegate will not perform this
-dequantization, since it can operate on float16 data.
+Note: The converter will throw an error if it encounters an operation it cannot
+currently quantize.
+
+### Float16 quantization
+
+You can reduce the size of a floating point model by quantizing the weights to
+float16, the IEEE standard for 16-bit floating point numbers. To enable float16
+quantization of weights, use the following steps:
+
+<pre>
+import tensorflow as tf
+converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+<b>converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_types = [tf.lite.constants.FLOAT16]</b>
+tflite_quant_model = converter.convert()
+</pre>
+
+The advantages of this quantization are as follows:
+
+*   Reduce model size by up to half (since all weights are now half the original
+    size).
+*   Minimal loss in accuracy.
+*   Supports some delegates (e.g. the GPU delegate) can operate directly on
+    float16 data, which results in faster execution than float32 computations.
+
+The disadvantages of this quantization are as follows:
+
+*   Not a good choice for maximum performance (a quantization to fixed point
+    math would be better in that case).
+*   By default, a float16 quantized model will "dequantize" the weights values
+    to float32 when run on the CPU. (Note that the GPU delegate will not perform
+    this dequantization, since it can operate on float16 data.)
 
 ### Model accuracy
 
@@ -152,13 +161,18 @@ accuracy of the quantized model to verify that any degradation in accuracy is
 within acceptable limits. There is a tool to evaluate
 [TensorFlow Lite model accuracy](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/accuracy/ilsvrc/README.md){:.external}.
 
-If the accuracy drop is too high, consider using
-[quantization aware training](https://github.com/tensorflow/tensorflow/tree/r1.13/tensorflow/contrib/quantize){:.external}.
+Alternatively, if the accuracy drop is too high, consider using
+[quantization aware training](https://www.tensorflow.org/model_optimization/guide/quantization/training)
+. However, doing so requires modifications during model training to add fake
+quantization nodes, whereas the post-training quantization techniques on this
+page use an existing pre-trained model.
 
 ### Representation for quantized tensors
 
 8-bit quantization approximates floating point values using the following
-formula. `real_value = (int8_value - zero_point) * scale`.
+formula.
+
+$$real\_value = (int8\_value - zero\_point) \times scale$$
 
 The representation has two main parts:
 
diff --git a/tensorflow/lite/g3doc/performance/quantization_spec.md b/tensorflow/lite/g3doc/performance/quantization_spec.md
index 9c30fbdc855..e6cc9496f8c 100644
--- a/tensorflow/lite/g3doc/performance/quantization_spec.md
+++ b/tensorflow/lite/g3doc/performance/quantization_spec.md
@@ -73,7 +73,10 @@ multiplied by dynamic input and activation values. This means that there is a
 unavoidable runtime cost of multiplying the zero-point of the weight with the
 activation value. By enforcing that zero-point is 0 we can avoid this cost.
 
-Explanation of the math:
+Explanation of the math: this is similar to section 2.3 in
+[arXiv:1712.05877](https://arxiv.org/abs/1712.05877), except for the difference
+that we allow the scale values to be per-axis. This generalizes readily, as
+follows:
 
 $A$ is a $m \times n$ matrix of quantized activations. <br />
 $B$ is a $n \times p$ matrix of quantized weights. <br />
@@ -539,3 +542,7 @@ QUANTIZE (Requantization)
     range      : [-128, 127]
     granularity: per-tensor
 ```
+
+## References
+
+[arXiv:1712.05877](https://arxiv.org/abs/1712.05877)
diff --git a/tensorflow/lite/g3doc/tools/BUILD b/tensorflow/lite/g3doc/tools/BUILD
new file mode 100644
index 00000000000..f2c6d8efedc
--- /dev/null
+++ b/tensorflow/lite/g3doc/tools/BUILD
@@ -0,0 +1,15 @@
+package(
+    licenses = ["notice"],  # Apache 2.0
+)
+
+py_binary(
+    name = "build_py_api_docs",
+    srcs = ["build_py_api_docs.py"],
+    python_version = "PY3",
+    srcs_version = "PY3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "@absl_py//absl:app",
+        "@absl_py//absl/flags",
+    ],
+)
diff --git a/tensorflow/lite/g3doc/tools/build_py_api_docs.py b/tensorflow/lite/g3doc/tools/build_py_api_docs.py
new file mode 100644
index 00000000000..90a8e45ca6a
--- /dev/null
+++ b/tensorflow/lite/g3doc/tools/build_py_api_docs.py
@@ -0,0 +1,69 @@
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Generate python docs for tf.lite.
+
+# How to run
+
+```
+python build_docs.py --output_dir=/path/to/output
+```
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pathlib
+
+from absl import app
+from absl import flags
+
+import tensorflow as tf
+
+from tensorflow_docs.api_generator import generate_lib
+
+flags.DEFINE_string('output_dir', '/tmp/lite_api/',
+                    'The path to output the files to')
+
+flags.DEFINE_string('code_url_prefix',
+                    'https://github.com/tensorflow/tensorflow/blob/master/',
+                    'The url prefix for links to code.')
+
+flags.DEFINE_bool('search_hints', True,
+                  'Include metadata search hints in the generated files')
+
+flags.DEFINE_string('site_path', 'lite/api_docs/python',
+                    'Path prefix in the _toc.yaml')
+
+FLAGS = flags.FLAGS
+
+
+def main(_):
+  doc_generator = generate_lib.DocGenerator(
+      root_title='TensorFlow Lite',
+      py_modules=[('lite', tf.lite)],
+      base_dir=str(pathlib.Path(tf.__file__).parent),
+      code_url_prefix=FLAGS.code_url_prefix,
+      search_hints=FLAGS.search_hints,
+      site_path=FLAGS.site_path,
+      callbacks=[])
+
+  doc_generator.build(output_dir=FLAGS.output_dir)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/tensorflow/lite/g3doc/tutorials/_index.yaml b/tensorflow/lite/g3doc/tutorials/_index.yaml
index 263011bfbe5..06d5e780cd7 100644
--- a/tensorflow/lite/g3doc/tutorials/_index.yaml
+++ b/tensorflow/lite/g3doc/tutorials/_index.yaml
@@ -87,11 +87,11 @@ landing_page:
       path: https://codelabs.developers.google.com/codelabs/digit-classifier-tflite/#0
     - classname: tfo-landing-page-card
       description: >
-        <a href="https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb">
+        <a href="/lite/tutorials/model_maker_image_classification">
           <h3 class="no-link">Transfer learning for image classification</h3>
         </a>
         Learn how to use TensorFlow Lite Model Maker to quickly create image classification models.
-      path: https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb
+      path: /lite/tutorials/model_maker_image_classification
 
   # IoT developers
   ## Linux-based IoT devices
@@ -160,7 +160,7 @@ landing_page:
             bar. They show you how to train a model for a specific machine learning task, such as
             <a href="https://blog.tensorflow.org/2018/07/training-and-serving-realtime-mobile-object-detector-cloud-tpus.html">object detection</a>
             or
-            <a href="https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/text_classification.ipynb">sentiment analysis</a>.
+            <a href="/lite/tutorials/model_maker_text_classification">sentiment analysis</a>.
           </li>
           <li>
             Learn more about the development workflow in the TensorFlow Lite
diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb
new file mode 100644
index 00000000000..464a5d1b5ef
--- /dev/null
+++ b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb
@@ -0,0 +1,938 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "h2q27gKz1H20"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "TUfAcER1oUS6"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Gb7qyhNL1yWt"
+      },
+      "source": [
+        "# Image classification with TensorFlow Lite Model Maker"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "nDABAblytltI"
+      }, 
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lite/tutorials/model_maker_image_classification\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/tensorflow/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "m86-Nh4pMHqY"
+      },
+      "source": [
+        "Model Maker library simplifies the process of adapting and converting a TensorFlow neural-network model to particular input data when deploying this model for on-device ML applications.\n",
+        "\n",
+        "This notebook shows an end-to-end example that utilizes this Model Maker library to illustrate the adaption and conversion of a commonly-used image classification model to classify flowers on a mobile device."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "bcLF2PKkSbV3"
+      },
+      "source": [
+        "## Prerequisites\n",
+        "\n",
+        "To run this example, we first need to install serveral required packages, including Model Maker package that in github [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "6cv3K3oaksJv"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Gx1HGRoFQ54j"
+      },
+      "source": [
+        "Import the required packages."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "XtxiUeZEiXpt"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "assert tf.__version__.startswith('2')\n",
+        "\n",
+        "from tensorflow_examples.lite.model_maker.core.data_util.image_dataloader import ImageClassifierDataLoader\n",
+        "from tensorflow_examples.lite.model_maker.core.task import image_classifier\n",
+        "from tensorflow_examples.lite.model_maker.core.task.model_spec import mobilenet_v2_spec\n",
+        "from tensorflow_examples.lite.model_maker.core.task.model_spec import ImageModelSpec\n",
+        "\n",
+        "import matplotlib.pyplot as plt"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "KKRaYHABpob5"
+      },
+      "source": [
+        "## Simple End-to-End Example"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "SiZZ5DHXotaW"
+      },
+      "source": [
+        "### Get the data path\n",
+        "\n",
+        "Let's get some images to play with this simple end-to-end example. Hundreds of images is a good start for Model Maker while more data could achieve better accuracy."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "3jz5x0JoskPv"
+      },
+      "outputs": [],
+      "source": [
+        "image_path = tf.keras.utils.get_file(\n",
+        "      'flower_photos',\n",
+        "      'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',\n",
+        "      untar=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "a55MR6i6nuDm"
+      },
+      "source": [
+        "You could replace `image_path` with your own image folders. As for uploading data to colab, you could find the upload button in the left sidebar shown in the image below with the red rectangle. Just have a try to upload a zip file and unzip it. The root file path is the current path.\n",
+        "\n",
+        "\u003cimg src=\"https://storage.googleapis.com/download.tensorflow.org/models/tflite/screenshots/model_maker_image_classification.png\" alt=\"Upload File\" width=\"800\" hspace=\"100\"\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "NNRNv_mloS89"
+      },
+      "source": [
+        "If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in github."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "w-VDriAdsowu"
+      },
+      "source": [
+        "### Run the example\n",
+        "The example just consists of 4 lines of code as shown below, each of which representing one step of the overall process.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "6ahtcO86tZBL"
+      },
+      "source": [
+        "Step 1.   Load input data specific to an on-device ML app. Split it to training data and testing data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "lANoNS_gtdH1"
+      },
+      "outputs": [],
+      "source": [
+        "data = ImageClassifierDataLoader.from_folder(image_path)\n",
+        "train_data, test_data = data.split(0.9)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Y_9IWyIztuRF"
+      },
+      "source": [
+        "Step 2. Customize the TensorFlow model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "yRXMZbrwtyRD"
+      },
+      "outputs": [],
+      "source": [
+        "model = image_classifier.create(train_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "oxU2fDr-t2Ya"
+      },
+      "source": [
+        "Step 3. Evaluate the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "wQr02VxJt6Cs"
+      },
+      "outputs": [],
+      "source": [
+        "loss, accuracy = model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "eVZw9zU8t84y"
+      },
+      "source": [
+        "Step 4.  Export to TensorFlow Lite model.\n",
+        "\n",
+        "Here, we export TensorFlow Lite model with [metadata](https://www.tensorflow.org/lite/convert/metadata) which provides a standard for model descriptions.\n",
+        "You could download it in the left sidebar same as the uploading part for your own use."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Zb-eIzfluCoa"
+      },
+      "outputs": [],
+      "source": [
+        "model.export(export_dir='.')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "pyju1qc_v-wy"
+      },
+      "source": [
+        "After this simple 4 steps, we could further use TensorFlow Lite model file and label file in on-device applications like in [image classification](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification) reference app.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "R1QG32ivs9lF"
+      },
+      "source": [
+        "## Detailed Process\n",
+        "\n",
+        "Currently, we support several models such as  EfficientNet-Lite* models, MobileNetV2, ResNet50 as pre-trained models for image classification. But it is very flexible to add new pre-trained models to this library with just a few lines of code.\n",
+        "\n",
+        "\n",
+        "The following walks through this end-to-end example step by step to show more detail."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ygEncJxtl-nQ"
+      },
+      "source": [
+        "### Step 1: Load Input Data Specific to an On-device ML App\n",
+        "\n",
+        "The flower dataset contains 3670 images belonging to 5 classes. Download the archive version of the dataset and untar it.\n",
+        "\n",
+        "The dataset has the following directory structure:\n",
+        "\n",
+        "\u003cpre\u003e\n",
+        "\u003cb\u003eflower_photos\u003c/b\u003e\n",
+        "|__ \u003cb\u003edaisy\u003c/b\u003e\n",
+        "    |______ 100080576_f52e8ee070_n.jpg\n",
+        "    |______ 14167534527_781ceb1b7a_n.jpg\n",
+        "    |______ ...\n",
+        "|__ \u003cb\u003edandelion\u003c/b\u003e\n",
+        "    |______ 10043234166_e6dd915111_n.jpg\n",
+        "    |______ 1426682852_e62169221f_m.jpg\n",
+        "    |______ ...\n",
+        "|__ \u003cb\u003eroses\u003c/b\u003e\n",
+        "    |______ 102501987_3cdb8e5394_n.jpg\n",
+        "    |______ 14982802401_a3dfb22afb.jpg\n",
+        "    |______ ...\n",
+        "|__ \u003cb\u003esunflowers\u003c/b\u003e\n",
+        "    |______ 12471791574_bb1be83df4.jpg\n",
+        "    |______ 15122112402_cafa41934f.jpg\n",
+        "    |______ ...\n",
+        "|__ \u003cb\u003etulips\u003c/b\u003e\n",
+        "    |______ 13976522214_ccec508fe7.jpg\n",
+        "    |______ 14487943607_651e8062a1_m.jpg\n",
+        "    |______ ...\n",
+        "\u003c/pre\u003e"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "7tOfUr2KlgpU"
+      },
+      "outputs": [],
+      "source": [
+        "image_path = tf.keras.utils.get_file(\n",
+        "      'flower_photos',\n",
+        "      'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',\n",
+        "      untar=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "E051HBUM5owi"
+      },
+      "source": [
+        "Use `ImageClassifierDataLoader` class to load data.\n",
+        "\n",
+        "As for `from_folder()` method, it could load data from the folder. It assumes that the image data of the same class are in the same subdirectory and the subfolder name is the class name. Currently, JPEG-encoded images and PNG-encoded images are supported."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "I_fOlZsklmlL"
+      },
+      "outputs": [],
+      "source": [
+        "data = ImageClassifierDataLoader.from_folder(image_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "u501eT4koURB"
+      },
+      "source": [
+        "Split it to training data (80%), validation data (10%, optional) and testing data (10%)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "cY4UU5SUobtJ"
+      },
+      "outputs": [],
+      "source": [
+        "train_data, rest_data = data.split(0.8)\n",
+        "validation_data, test_data = rest_data.split(0.5)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Z9_MYPie3EMO"
+      },
+      "source": [
+        "Show 25 image examples with labels."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Ih4Wx44I482b"
+      },
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(10,10))\n",
+        "for i, (image, label) in enumerate(data.dataset.take(25)):\n",
+        "  plt.subplot(5,5,i+1)\n",
+        "  plt.xticks([])\n",
+        "  plt.yticks([])\n",
+        "  plt.grid(False)\n",
+        "  plt.imshow(image.numpy(), cmap=plt.cm.gray)\n",
+        "  plt.xlabel(data.index_to_label[label.numpy()])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "AWuoensX4vDA"
+      },
+      "source": [
+        "### Step 2: Customize the TensorFlow Model\n",
+        "\n",
+        "Create a custom image classifier model based on the loaded data. The default model is EfficientNet-Lite0.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "TvYSUuJY3QxR"
+      },
+      "outputs": [],
+      "source": [
+        "model = image_classifier.create(train_data, validation_data=validation_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "4JFOKWnH9x8_"
+      },
+      "source": [
+        "Have a look at the detailed model structure."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "QNXAfjl192dC"
+      },
+      "outputs": [],
+      "source": [
+        "model.summary()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LP5FPk_tOxoZ"
+      },
+      "source": [
+        "### Step 3: Evaluate the Customized Model\n",
+        "\n",
+        "Evaluate the result of the model, get the loss and accuracy of the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "A8c2ZQ0J3Riy"
+      },
+      "outputs": [],
+      "source": [
+        "loss, accuracy = model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "6ZCrYOWoCt05"
+      },
+      "source": [
+        "We could plot the predicted results in 100 test images. Predicted labels with red color are the wrong predicted results while others are correct."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "n9O9Kx7nDQWD"
+      },
+      "outputs": [],
+      "source": [
+        "# A helper function that returns 'red'/'black' depending on if its two input\n",
+        "# parameter matches or not.\n",
+        "def get_label_color(val1, val2):\n",
+        "  if val1 == val2:\n",
+        "    return 'black'\n",
+        "  else:\n",
+        "    return 'red'\n",
+        "\n",
+        "# Then plot 100 test images and their predicted labels.\n",
+        "# If a prediction result is different from the label provided label in \"test\"\n",
+        "# dataset, we will highlight it in red color.\n",
+        "plt.figure(figsize=(20, 20))\n",
+        "predicts = model.predict_top_k(test_data)\n",
+        "for i, (image, label) in enumerate(test_data.dataset.take(100)):\n",
+        "  ax = plt.subplot(10, 10, i+1)\n",
+        "  plt.xticks([])\n",
+        "  plt.yticks([])\n",
+        "  plt.grid(False)\n",
+        "  plt.imshow(image.numpy(), cmap=plt.cm.gray)\n",
+        "\n",
+        "  predict_label = predicts[i][0][0]\n",
+        "  color = get_label_color(predict_label,\n",
+        "                          test_data.index_to_label[label.numpy()])\n",
+        "  ax.xaxis.label.set_color(color)\n",
+        "  plt.xlabel('Predicted: %s' % predict_label)\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "S3H0rkbLUZAG"
+      },
+      "source": [
+        "If the accuracy doesn't meet the app requirement, one could refer to [Advanced Usage](#scrollTo=zNDBP2qA54aK) to explore alternatives such as changing to a larger model, adjusting re-training parameters etc."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "aeHoGAceO2xV"
+      },
+      "source": [
+        "### Step 4: Export to TensorFlow Lite Model\n",
+        "\n",
+        "Convert the existing model to TensorFlow Lite model format and save the image labels in label file. The default TFLite filename is `model.tflite`, the default label filename is `label.txt`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Im6wA9lK3TQB"
+      },
+      "outputs": [],
+      "source": [
+        "model.export(export_dir='.')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ROS2Ay2jMPCl"
+      },
+      "source": [
+        "The TensorFlow Lite model file and label file could be used in [image classification](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification) reference app.\n",
+        "\n",
+        "As for android reference app as an example, we could add `flower_classifier.tflite` and `flower_label.txt` in [assets](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/app/src/main/assets) folder. Meanwhile, change label filename in [code](https://github.com/tensorflow/examples/blob/master/lite/examples/image_classification/android/app/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java#L65) and TensorFlow Lite file name in [code](https://github.com/tensorflow/examples/blob/master/lite/examples/image_classification/android/app/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java#L60). Thus, we could run the retrained float TensorFlow Lite model on the android app.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "-4jQaxyT5_KV"
+      },
+      "source": [
+        "Here, we also demonstrate how to use the above files to run and evaluate the TensorFlow Lite model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "S1YoPX5wOK-u"
+      },
+      "outputs": [],
+      "source": [
+        "# Read TensorFlow Lite model from TensorFlow Lite file.\n",
+        "with tf.io.gfile.GFile('model.tflite', 'rb') as f:\n",
+        "  model_content = f.read()\n",
+        "\n",
+        "# Read label names from label file.\n",
+        "with tf.io.gfile.GFile('labels.txt', 'r') as f:\n",
+        "  label_names = f.read().split('\\n')\n",
+        "\n",
+        "# Initialze TensorFlow Lite inpterpreter.\n",
+        "interpreter = tf.lite.Interpreter(model_content=model_content)\n",
+        "interpreter.allocate_tensors()\n",
+        "input_index = interpreter.get_input_details()[0]['index']\n",
+        "output = interpreter.tensor(interpreter.get_output_details()[0][\"index\"])\n",
+        "\n",
+        "# Run predictions on each test image data and calculate accuracy.\n",
+        "accurate_count = 0\n",
+        "for i, (image, label) in enumerate(test_data.dataset):\n",
+        "    # Pre-processing should remain the same. Currently, just normalize each pixel value and resize image according to the model's specification.\n",
+        "    image, _ = model.preprocess(image, label)\n",
+        "    # Add batch dimension and convert to float32 to match with the model's input\n",
+        "    # data format.\n",
+        "    image = tf.expand_dims(image, 0).numpy()\n",
+        "\n",
+        "    # Run inference.\n",
+        "    interpreter.set_tensor(input_index, image)\n",
+        "    interpreter.invoke()\n",
+        "\n",
+        "    # Post-processing: remove batch dimension and find the label with highest\n",
+        "    # probability.\n",
+        "    predict_label = np.argmax(output()[0])\n",
+        "    # Get label name with label index.\n",
+        "    predict_label_name = label_names[predict_label]\n",
+        "\n",
+        "    accurate_count += (predict_label == label.numpy())\n",
+        "\n",
+        "accuracy = accurate_count * 1.0 / test_data.size\n",
+        "print('TensorFlow Lite model accuracy = %.4f' % accuracy)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "fuHB-NFqpKTD"
+      },
+      "source": [
+        "Note that preprocessing for inference should be the same as training. Currently, preprocessing contains normalizing each pixel value and resizing the image according to the model's specification. For  EfficientNet-Lite0, input image should be normalized to `[0, 1]` and resized to `[224, 224, 3]`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "zNDBP2qA54aK"
+      },
+      "source": [
+        "## Advanced Usage\n",
+        "\n",
+        "The `create` function is the critical part of this library. It uses transfer learning with a pretrained model similiar to the [tutorial](https://www.tensorflow.org/tutorials/images/transfer_learning).\n",
+        "\n",
+        "The `create`function contains the following steps:\n",
+        "\n",
+        "1.   Split the data into training, validation, testing data according to parameter `validation_ratio` and `test_ratio`. The default value of `validation_ratio` and `test_ratio` are `0.1` and `0.1`.\n",
+        "2.   Download a [Image Feature Vector](https://www.tensorflow.org/hub/common_signatures/images#image_feature_vector) as the base model from TensorFlow Hub. The default pre-trained model is  EfficientNet-Lite0.\n",
+        "3.   Add a classifier head with a Dropout Layer with `dropout_rate` between head layer and pre-trained model. The default `dropout_rate` is the default `dropout_rate` value from [make_image_classifier_lib](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/tools/make_image_classifier/make_image_classifier_lib.py#L55) by TensorFlow Hub.\n",
+        "4.   Preprocess the raw input data. Currently, preprocessing steps including normalizing the value of each image pixel to model input scale and resizing it to model input size.   EfficientNet-Lite0 have the input scale `[0, 1]` and the input image size `[224, 224, 3]`.\n",
+        "5.   Feed the data into the classifier model. By default, the training parameters such as training epochs, batch size, learning rate, momentum are the default values from [make_image_classifier_lib](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/tools/make_image_classifier/make_image_classifier_lib.py#L55) by TensorFlow Hub. Only the classifier head is trained.\n",
+        "\n",
+        "\n",
+        "In this section, we describe several advanced topics, including switching to a different image classification model, changing the training hyperparameters etc.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "A4kiTJtZ_sDm"
+      },
+      "source": [
+        "## Change the model\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "794vgj6ud7Ep"
+      },
+      "source": [
+        "### Change to the model that's supported in this library.\n",
+        "\n",
+        "This library supports  EfficientNet-Lite models, MobileNetV2, ResNet50 by now. [EfficientNet-Lite](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite) are a family of image classification models that could achieve state-of-art accuracy and suitable for Edge devices. The default model is EfficientNet-Lite0.\n",
+        "\n",
+        "We could switch model to MobileNetV2 by just setting parameter `model_spec` to  `mobilenet_v2_spec` in `create` method."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "7JKsJ6-P6ae1"
+      },
+      "outputs": [],
+      "source": [
+        "model = image_classifier.create(train_data, model_spec=mobilenet_v2_spec, validation_data=validation_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "gm_B1Wv08AxR"
+      },
+      "source": [
+        "Evaluate the newly retrained MobileNetV2 model to see the accuracy and loss in testing data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "lB2Go3HW8X7_"
+      },
+      "outputs": [],
+      "source": [
+        "loss, accuracy = model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "vAciGzVWtmWp"
+      },
+      "source": [
+        "### Change to the model in TensorFlow Hub\n",
+        "\n",
+        "Moreover, we could also switch to other new models that inputs an image and outputs a feature vector with TensorFlow Hub format.\n",
+        "\n",
+        "As [Inception V3](https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1) model as an example, we could define `inception_v3_spec` which is an object of `ImageModelSpec` and contains the specification of the Inception V3 model.\n",
+        "\n",
+        "We need to specify the model name `name`, the url of the TensorFlow Hub model `uri`. Meanwhile, the default value of `input_image_shape` is `[224, 224]`. We need to change it to `[299, 299]` for Inception V3 model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "xdiMF2WMfAR4"
+      },
+      "outputs": [],
+      "source": [
+        "inception_v3_spec = ImageModelSpec(\n",
+        "    uri='https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1')\n",
+        "inception_v3_spec.input_image_shape = [299, 299]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "T_GGIoXZCs5F"
+      },
+      "source": [
+        "Then, by setting parameter `model_spec` to `inception_v3_spec` in `create` method, we could retrain the Inception V3 model.\n",
+        "\n",
+        "The remaining steps are exactly same and we could get a customized InceptionV3 TensorFlow Lite model in the end."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "UhZ5IRKdeex3"
+      },
+      "source": [
+        "### Change your own custom model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "svTjlZhrCrcV"
+      },
+      "source": [
+        "If we'd like to use the custom model that's not in TensorFlow Hub, we should create and export [ModelSpec](https://www.tensorflow.org/hub/api_docs/python/hub/ModuleSpec) in TensorFlow Hub.\n",
+        "\n",
+        "Then start to define `ImageModelSpec` object like the process above."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "4M9bn703AHt2"
+      },
+      "source": [
+        "## Change the training hyperparameters\n",
+        "We could also change the training hyperparameters like `epochs`, `dropout_rate` and `batch_size` that could affect the model accuracy. For instance,\n",
+        "\n",
+        "\n",
+        "*   `epochs`: more epochs could achieve better accuracy until it converges but training for too many epochs may lead to overfitting.\n",
+        "*   `dropout_rate`: avoid overfitting.\n",
+        "*   `batch_size`: number of samples to use in one training step.\n",
+        "*   `validation_data`: number of samples to use in one training step.\n",
+        "\n",
+        "\n",
+        "For example, we could train with more epochs.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "A3k7mhH54QcK"
+      },
+      "outputs": [],
+      "source": [
+        "model = image_classifier.create(train_data, validation_data=validation_data, epochs=10)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "VaYBQymQDsXU"
+      },
+      "source": [
+        "Evaluate the newly retrained model with 10 training epochs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "VafIYpKWD4Sw"
+      },
+      "outputs": [],
+      "source": [
+        "loss, accuracy = model.evaluate(test_data)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "image_classification.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb
new file mode 100644
index 00000000000..8261d6c9e34
--- /dev/null
+++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb
@@ -0,0 +1,877 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "h2q27gKz1H20"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "TUfAcER1oUS6"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Gb7qyhNL1yWt"
+      },
+      "source": [
+        "# Text classification with TensorFlow Lite Model Maker"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Fw5Y7snSuG51"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lite/tutorials/model_maker_text_classification\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/tensorflow/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "sr3q-gvm3cI8"
+      },
+      "source": [
+        "The TensorFlow Lite Model Maker library simplifies the process of adapting and converting a TensorFlow neural-network model to particular input data when deploying this model for on-device ML applications.\n",
+        "\n",
+        "This notebook shows an end-to-end example that utilizes this Model Maker library to illustrate the adaption and conversion of a commonly-used text classification model to classify movie reviews on a mobile device."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "bcLF2PKkSbV3"
+      },
+      "source": [
+        "## Prerequisites\n",
+        "\n",
+        "To run this example, we first need to install several required packages, including Model Maker package that in github [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "qhl8lqVamEty"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "l6lRhVK9Q_0U"
+      },
+      "source": [
+        "Import the required packages."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "XtxiUeZEiXpt"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import os\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "assert tf.__version__.startswith('2')\n",
+        "\n",
+        "from tensorflow_examples.lite.model_maker.core.data_util.text_dataloader import TextClassifierDataLoader\n",
+        "from tensorflow_examples.lite.model_maker.core.task.model_spec import AverageWordVecModelSpec\n",
+        "from tensorflow_examples.lite.model_maker.core.task.model_spec import BertClassifierModelSpec\n",
+        "from tensorflow_examples.lite.model_maker.core.task import text_classifier"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "06sWWfvE6I8e"
+      },
+      "source": [
+        "## Simple End-to-End Example"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "BRd13bfetO7B"
+      },
+      "source": [
+        "### Get the data path\n",
+        "Let's get some texts to play with this simple end-to-end example."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "R2BSkxWg6Rhx"
+      },
+      "outputs": [],
+      "source": [
+        "data_path = tf.keras.utils.get_file(\n",
+        "      fname='aclImdb',\n",
+        "      origin='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',\n",
+        "      untar=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "6MSCjPAvs2EQ"
+      },
+      "source": [
+        " You could replace it with your own text folders. As for uploading data to colab, you could find the upload button in the left sidebar shown in the image below with the red rectangle. Just have a try to upload a zip file and unzip it. The root file path is the current path.\n",
+        "\n",
+        "\u003cimg src=\"https://storage.googleapis.com/download.tensorflow.org/models/tflite/screenshots/model_maker_text_classification.png\" alt=\"Upload File\" width=\"800\" hspace=\"100\"\u003e\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "uO5egTlrtWxm"
+      },
+      "source": [
+        "If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in github."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "WlKU3SMX6TnB"
+      },
+      "source": [
+        "### Run the example\n",
+        "\n",
+        "The example just consists of 6 lines of code as shown below, representing 5 steps of the overall process."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "PBPUIhEjMjTR"
+      },
+      "source": [
+        "Step 0. Choose a `model_spec` that represents a model for text classifier."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "CtdZ-JDwMimd"
+      },
+      "outputs": [],
+      "source": [
+        "model_spec = AverageWordVecModelSpec()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "s5U-A3tw6Y27"
+      },
+      "source": [
+        "Step 1.   Load train and test data specific to an on-device ML app and preprocess the data according to specific `model_spec`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "HD5BvzWe6YKa"
+      },
+      "outputs": [],
+      "source": [
+        "train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=model_spec, class_labels=['pos', 'neg'])\n",
+        "test_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'test'), model_spec=model_spec, is_training=False, shuffle=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "2uZkLR6N6gDR"
+      },
+      "source": [
+        "Step 2. Customize the TensorFlow model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "kwlYdTcg63xy"
+      },
+      "outputs": [],
+      "source": [
+        "model = text_classifier.create(train_data, model_spec=model_spec)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "-BzCHLWJ6h7q"
+      },
+      "source": [
+        "Step 3. Evaluate the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "8xmnl6Yy7ARn"
+      },
+      "outputs": [],
+      "source": [
+        "loss, acc = model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "CgCDMe0e6jlT"
+      },
+      "source": [
+        "Step 4.  Export to TensorFlow Lite  model.\n",
+        "You could download it in the left sidebar same as the uploading part for your own use."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Hm_UULdW7A9T"
+      },
+      "outputs": [],
+      "source": [
+        "model.export(export_dir='.')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "rVxaf3x_7OfB"
+      },
+      "source": [
+        "After this simple 5 steps, we could further use TensorFlow Lite model file and label file in on-device applications like in [text classification](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification) reference app."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "l65ctmtW7_FF"
+      },
+      "source": [
+        "## Detailed Process\n",
+        "\n",
+        "In the above, we tried the simple end-to-end example. The following walks through the example step by step to show more detail."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "kJ_B8fMDOhMR"
+      },
+      "source": [
+        "### Step 0: Choose a model_spec that represents a model for text classifier.\n",
+        "\n",
+        "each `model_spec` object represents a specific model for the text classifier. Currently, we support averging word embedding model and BERT-base model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vEAWuZQ1PFiX"
+      },
+      "outputs": [],
+      "source": [
+        "model_spec = AverageWordVecModelSpec()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ygEncJxtl-nQ"
+      },
+      "source": [
+        "### Step 1: Load Input Data Specific to an On-device ML App\n",
+        "\n",
+        "The IMDB dataset contains 25000 movie reviews for training and 25000 movie reviews for testing from the [Internet Movie Database](https://www.imdb.com/). The dataset has two classes: positive and negative movie reviews.\n",
+        "\n",
+        "Download the archive version of the dataset and untar it.\n",
+        "\n",
+        "The IMDB dataset has the following directory structure:\n",
+        "\n",
+        "\u003cpre\u003e\n",
+        "\u003cb\u003eaclImdb\u003c/b\u003e\n",
+        "|__ \u003cb\u003etrain\u003c/b\u003e\n",
+        "    |______ \u003cb\u003epos\u003c/b\u003e: [1962_10.txt, 2499_10.txt, ...]\n",
+        "    |______ \u003cb\u003eneg\u003c/b\u003e: [104_3.txt, 109_2.txt, ...]\n",
+        "    |______ unsup: [12099_0.txt, 1424_0.txt, ...]\n",
+        "|__ \u003cb\u003etest\u003c/b\u003e\n",
+        "    |______ \u003cb\u003epos\u003c/b\u003e: [1384_9.txt, 191_9.txt, ...]\n",
+        "    |______ \u003cb\u003eneg\u003c/b\u003e: [1629_1.txt, 21_1.txt]\n",
+        "\n",
+        "\u003c/pre\u003e\n",
+        "\n",
+        "Note that the text data under `train/unsup` folder are unlabeled documents for unsupervised learning and such data should be ignored in this tutorial.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "7tOfUr2KlgpU"
+      },
+      "outputs": [],
+      "source": [
+        "data_path = tf.keras.utils.get_file(\n",
+        "      fname='aclImdb',\n",
+        "      origin='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',\n",
+        "      untar=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "E051HBUM5owi"
+      },
+      "source": [
+        "Use `TextClassifierDataLoader` to load data.\n",
+        "\n",
+        "As for `from_folder()` method, it could load data from the folder. It assumes that the text data of the same class are in the same subdirectory and the subfolder name is the class name. Each text file contains one movie review sample.\n",
+        "\n",
+        "Parameter `class_labels` is used to specify which subfolder should be considered. As for `train` folder, this parameter is used to skip `unsup` subfolder.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "I_fOlZsklmlL"
+      },
+      "outputs": [],
+      "source": [
+        "train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=model_spec, class_labels=['pos', 'neg'])\n",
+        "test_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'test'), model_spec=model_spec, is_training=False, shuffle=False)\n",
+        "train_data, validation_data = train_data.split(0.9)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "AWuoensX4vDA"
+      },
+      "source": [
+        "### Step 2: Customize the TensorFlow Model\n",
+        "\n",
+        "Create a custom text classifier model based on the loaded data. Currently, we support averaging word embedding and BERT-base model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "TvYSUuJY3QxR"
+      },
+      "outputs": [],
+      "source": [
+        "model = text_classifier.create(train_data, model_spec=model_spec, validation_data=validation_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "0JKI-pNc8idH"
+      },
+      "source": [
+        "Have a look at the detailed model structure."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "gd7Hs8TF8n3H"
+      },
+      "outputs": [],
+      "source": [
+        "model.summary()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LP5FPk_tOxoZ"
+      },
+      "source": [
+        "### Step 3: Evaluate the Customized Model\n",
+        "\n",
+        "Evaluate the result of the model, get the loss and accuracy of the model.\n",
+        "\n",
+        "Evaluate the loss and accuracy in `test_data`. If no data is given the results are evaluated on the data that's splitted in the `create` method."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "A8c2ZQ0J3Riy"
+      },
+      "outputs": [],
+      "source": [
+        "loss, acc = model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "aeHoGAceO2xV"
+      },
+      "source": [
+        "### Step 4: Export to TensorFlow Lite Model\n",
+        "\n",
+        "Convert the existing model to TensorFlow Lite model format that could be later used in on-device ML application. Meanwhile, save the text labels in label file and vocabulary in vocab file. The default TFLite filename is `model.tflite`, the default label filename is `label.txt`, the default vocab filename is `vocab`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Im6wA9lK3TQB"
+      },
+      "outputs": [],
+      "source": [
+        "model.export(export_dir='.')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "w12kvDdHJIGH"
+      },
+      "source": [
+        "The TensorFlow Lite model file and label file could be used in the [text classification](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification) reference app.\n",
+        "\n",
+        "In detail, we could add `movie_review_classifier.tflite`, `text_label.txt` and `vocab.txt` to the [assets directory](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification/android/app/src/main/assets) folder. Meanwhile, change the filenames in [code](https://github.com/tensorflow/examples/blob/master/lite/examples/text_classification/android/app/src/main/java/org/tensorflow/lite/examples/textclassification/TextClassificationClient.java#L43). "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "HZKYthlVrTos"
+      },
+      "source": [
+        "Here, we also demonstrate how to use the above files to run and evaluate the TensorFlow Lite model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "ochbq95ZrVFX"
+      },
+      "outputs": [],
+      "source": [
+        "# Read TensorFlow Lite model from TensorFlow Lite file.\n",
+        "with tf.io.gfile.GFile('model.tflite', 'rb') as f:\n",
+        "  model_content = f.read()\n",
+        "\n",
+        "# Read label names from label file.\n",
+        "with tf.io.gfile.GFile('labels.txt', 'r') as f:\n",
+        "  label_names = f.read().split('\\n')\n",
+        "\n",
+        "# Initialze TensorFlow Lite inpterpreter.\n",
+        "interpreter = tf.lite.Interpreter(model_content=model_content)\n",
+        "interpreter.allocate_tensors()\n",
+        "input_index = interpreter.get_input_details()[0]['index']\n",
+        "output = interpreter.tensor(interpreter.get_output_details()[0][\"index\"])\n",
+        "\n",
+        "# Run predictions on each test data and calculate accuracy.\n",
+        "accurate_count = 0\n",
+        "for text, label in test_data.dataset:\n",
+        "    # Add batch dimension and convert to float32 to match with the model's input\n",
+        "    # data format.\n",
+        "    text = tf.expand_dims(text, 0)\n",
+        "    text = tf.cast(text, tf.float32)\n",
+        "\n",
+        "    # Run inference.\n",
+        "    interpreter.set_tensor(input_index, text)\n",
+        "    interpreter.invoke()\n",
+        "\n",
+        "    # Post-processing: remove batch dimension and find the label with highest\n",
+        "    # probability.\n",
+        "    predict_label = np.argmax(output()[0])\n",
+        "    # Get label name with label index.\n",
+        "    predict_label_name = label_names[predict_label]\n",
+        "    accurate_count += (predict_label == label.numpy())\n",
+        "\n",
+        "accuracy = accurate_count * 1.0 / test_data.size\n",
+        "print('TensorFlow Lite model accuracy = %.4f' % accuracy)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "KLKmboKFtgc2"
+      },
+      "source": [
+        "Note that preprocessing for inference should be the same as training. Currently, preprocessing contains split the text to tokens by '\\W', encode the tokens to ids, the pad the text with `pad_id` to have the length of `seq_length`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "EoWiA_zX8rxE"
+      },
+      "source": [
+        "# Advanced Usage\n",
+        "\n",
+        "The `create` function is the critical part of this library in which parameter `model_spec` defines the specification of the model, currently `AverageWordVecModelSpec` and `BertModelSpec` is supported. The `create` function contains the following steps for `AverageWordVecModelSpec`:\n",
+        "\n",
+        "1.   Tokenize the text and select the top `num_words` most frequent words to generate the vocubulary. The default value of `num_words` in `AverageWordVecModelSpec` object is `10000`.\n",
+        "2.   Encode the text string tokens to int ids.\n",
+        "3.   Create the text classifier model. Currently, this library supports one model: average the word embedding of the text with RELU activation, then leverage softmax dense layer for classification. As for [Embedding layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding), the input dimension is the size of the vocabulary, the output dimension is `AverageWordVecModelSpec` object's variable `wordvec_dim` which default value is `16`, the input length is `AverageWordVecModelSpec` object's variable `seq_len` which default value is `256`.\n",
+        "4.   Train the classifier model. The default epoch is `2` and the default batch size is `32`.\n",
+        "\n",
+        "In this section, we describe several advanced topics, including adjusting the model, changing the training hyperparameters etc.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "mwtiksguDfhl"
+      },
+      "source": [
+        "# Adjust the model\n",
+        "\n",
+        "We could adjust the model infrastructure like variables `wordvec_dim`, `seq_len` in `AverageWordVecModelSpec` class.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "cAOd5_bzH9AQ"
+      },
+      "source": [
+        "*   `wordvec_dim`: Dimension of word embedding.\n",
+        "*   `seq_len`: length of sequence.\n",
+        "\n",
+        "For example, we could train with larger `wordvec_dim`. If we change the model, we need to construct the new `model_spec` firstly."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "e9WBN0UTQoMN"
+      },
+      "outputs": [],
+      "source": [
+        "new_model_spec = AverageWordVecModelSpec(wordvec_dim=32)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "6LSTdghTP0Cv"
+      },
+      "source": [
+        "Secondly, we should get the preprocessed data accordingly."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DVZurFBORG3J"
+      },
+      "outputs": [],
+      "source": [
+        "new_train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=new_model_spec, class_labels=['pos', 'neg'])\n",
+        "new_train_data, new_validation_data = new_train_data.split(0.9)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "tD7QVVHeRZoM"
+      },
+      "source": [
+        "Finally, we could train the new model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "PzpV246_JGEu"
+      },
+      "outputs": [],
+      "source": [
+        "model = text_classifier.create(new_train_data, model_spec=new_model_spec, validation_data=new_validation_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LvQuy7RSDir3"
+      },
+      "source": [
+        "## Change the training hyperparameters\n",
+        "We could also change the training hyperparameters like `epochs` and `batch_size` that could affect the model accuracy. For instance,\n",
+        "\n",
+        "*   `epochs`: more epochs could achieve better accuracy, but may lead to overfitting.\n",
+        "*   `batch_size`: number of samples to use in one training step.\n",
+        "\n",
+        "For example, we could train with more epochs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "rnWFaYZBG6NW"
+      },
+      "outputs": [],
+      "source": [
+        "model = text_classifier.create(train_data, model_spec=model_spec, validation_data=validation_data, epochs=5)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "nUaKQZBQHBQR"
+      },
+      "source": [
+        "Evaluate the newly retrained model with 5 training epochs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "BMPi1xflHDSY"
+      },
+      "outputs": [],
+      "source": [
+        "loss, accuracy = model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Eq6B9lKMfhS6"
+      },
+      "source": [
+        "## Change the Model\n",
+        "\n",
+        "We could change the model by changing the `model_spec`. The following shows how we change to BERT-base model.\n",
+        "\n",
+        "First, we could change `model_spec` to `BertModelSpec`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "QfFCWrwyggrT"
+      },
+      "outputs": [],
+      "source": [
+        "model_spec = BertClassifierModelSpec()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "L2d7yycrgu6L"
+      },
+      "source": [
+        "The remaining steps remains the same.\n",
+        "\n",
+        "Load data and preprocess the data according to `model_spec`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "6GQXQO54iyyE"
+      },
+      "outputs": [],
+      "source": [
+        "train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=model_spec, class_labels=['pos', 'neg'])\n",
+        "test_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'test'), model_spec=model_spec, is_training=False, shuffle=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ZTMqpDXCi11Q"
+      },
+      "source": [
+        "Then retrain the model. Note that it could take a long time to retrain the BERT model. we just set `epochs` equals 1 to demonstrate it."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "c991Bdkgi1Bf"
+      },
+      "outputs": [],
+      "source": [
+        "model = text_classifier.create(train_data, model_spec=model_spec, epochs=1)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "text_classification.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/tensorflow/lite/graph_info.cc b/tensorflow/lite/graph_info.cc
index 875a03af817..a419a56a9e6 100644
--- a/tensorflow/lite/graph_info.cc
+++ b/tensorflow/lite/graph_info.cc
@@ -191,11 +191,11 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
   std::vector<NodeSubset>* node_subsets_;
   std::vector<NodeSubset::Type> node_type_;
   // Maps from tensor index to the epoch in which it is assigned. Also special
-  // negative values of kEpochNotAssigned if not assigned, kEpochNotReady if it
-  // is an input or constant.
+  // negative values of kEpochNotReady if not assigned, kEpochAlwaysReady if it
+  // is an input to the whole model or a constant that has no dependencies.
   std::vector<int> tensor_epochs_;
   // Maps from tensor index to the epoch in which it is assigned. Also special
-  // negative values of kEpochNotAssigned if not assigned.
+  // negative values of kEpochNotReady if not assigned.
   std::vector<int> node_epochs_;
 };
 
diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc
index e32e0768995..fb87702fd13 100644
--- a/tensorflow/lite/interpreter_builder.cc
+++ b/tensorflow/lite/interpreter_builder.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/util.h"
 #include "tensorflow/lite/version.h"
@@ -209,7 +210,15 @@ class MallocDataAllocator : public BuiltinDataAllocator {
  public:
   void* Allocate(size_t size, size_t alignment_hint) override {
 #ifdef TFLITE_USE_STD_ALIGNED_ALLOC
-    return aligned_alloc(alignment_hint, size);
+    // Ensure that alignment is a power of two and a multiple of sizeof(void *)
+    // and that size is an integral multiple of alignment.
+    size_t used_alignment = std::max(alignment_hint, sizeof(void*));
+    size_t used_size =
+        ((size + used_alignment - 1) / used_alignment) * used_alignment;
+    TFLITE_DCHECK(
+        (used_alignment != 0) &&
+        ((used_alignment & (used_alignment - 1)) == 0));  // is power-of-two
+    return aligned_alloc(used_alignment, used_size);
 #else
     return malloc(size);
 #endif
diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD
index 49c2136ffb4..5eb5e8ab023 100644
--- a/tensorflow/lite/java/BUILD
+++ b/tensorflow/lite/java/BUILD
@@ -14,6 +14,7 @@ package(
 exports_files([
     "src/testdata/add.bin",
     "src/testdata/add_unknown_dimensions.bin",
+    "src/testdata/grace_hopper_224.jpg",
 ])
 
 JAVA_SRCS = glob([
@@ -240,6 +241,7 @@ java_test(
     data = [
         "src/testdata/add.bin",
         "src/testdata/add_unknown_dimensions.bin",
+        "//tensorflow/lite:testdata/dynamic_shapes.bin",
         "//tensorflow/lite:testdata/multi_add.bin",
         "//tensorflow/lite:testdata/multi_add_flex.bin",
     ],
@@ -351,6 +353,7 @@ filegroup(
 filegroup(
     name = "portable_gpu_tests",
     srcs = [
+        "src/test/java/org/tensorflow/lite/InterpreterTestHelper.java",
         "src/test/java/org/tensorflow/lite/gpu/GpuDelegateTest.java",
     ],
     visibility = ["//visibility:public"],
diff --git a/tensorflow/lite/java/ovic/BUILD b/tensorflow/lite/java/ovic/BUILD
index 947fbee1a45..e64bd3036ac 100644
--- a/tensorflow/lite/java/ovic/BUILD
+++ b/tensorflow/lite/java/ovic/BUILD
@@ -58,7 +58,6 @@ android_library(
     deps = [
         "//tensorflow/lite/java:tensorflowlite",
         "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
-        "@org_checkerframework_qual",
     ],
 )
 
@@ -75,7 +74,6 @@ java_library(
         "//tensorflow/lite/java:tensorflowlite_java",
         "//tensorflow/lite/java/src/main/native",
         "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
-        "@org_checkerframework_qual",
     ],
 )
 
@@ -114,7 +112,6 @@ android_library(
     deps = [
         "//tensorflow/lite/java:tensorflowlite",
         "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
-        "@org_checkerframework_qual",
     ],
 )
 
@@ -131,6 +128,5 @@ java_library(
         "//tensorflow/lite/java:tensorflowlite_java",
         "//tensorflow/lite/java/src/main/native",
         "//tensorflow/lite/java/src/testhelper/java/org/tensorflow/lite:testhelper",
-        "@org_checkerframework_qual",
     ],
 )
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index efcdc0e4c65..5625ef98bb6 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -348,7 +348,22 @@ public final class Interpreter implements AutoCloseable {
    */
   public void resizeInput(int idx, @NonNull int[] dims) {
     checkNotClosed();
-    wrapper.resizeInput(idx, dims);
+    wrapper.resizeInput(idx, dims, false);
+  }
+
+  /**
+   * Resizes idx-th input of the native model to the given dims.
+   *
+   * <p>When `strict` is True, only unknown dimensions can be resized. Unknown dimensions are
+   * indicated as `-1` in the array returned by `Tensor.shapeSignature()`.
+   *
+   * @throws IllegalArgumentException if {@code idx} is negtive or is not smaller than the number of
+   *     model inputs; or if error occurs when resizing the idx-th input. Additionally, the error
+   *     occurs when attempting to resize a tensor with fixed dimensions when `struct` is True.
+   */
+  public void resizeInput(int idx, @NonNull int[] dims, boolean strict) {
+    checkNotClosed();
+    wrapper.resizeInput(idx, dims, strict);
   }
 
   /** Gets the number of input tensors. */
@@ -476,6 +491,11 @@ public final class Interpreter implements AutoCloseable {
     wrapper.resetVariableTensors();
   }
 
+  int getExecutionPlanLength() {
+    checkNotClosed();
+    return wrapper.getExecutionPlanLength();
+  }
+
   /** Release resources associated with the {@code Interpreter}. */
   @Override
   public void close() {
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 73fe506f131..8eb3c66f3b5 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -174,7 +174,12 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   /** Resizes dimensions of a specific input. */
   void resizeInput(int idx, int[] dims) {
-    if (resizeInput(interpreterHandle, errorHandle, idx, dims)) {
+    resizeInput(idx, dims, false);
+  }
+
+  /** Resizes dimensions of a specific input. */
+  void resizeInput(int idx, int[] dims, boolean strict) {
+    if (resizeInput(interpreterHandle, errorHandle, idx, dims, strict)) {
       // Tensor allocation is deferred until either an explicit `allocateTensors()` call or
       // `invoke()` avoiding redundant allocations if multiple tensors are simultaneosly resized.
       isMemoryAllocated = false;
@@ -185,7 +190,7 @@ final class NativeInterpreterWrapper implements AutoCloseable {
   }
 
   private static native boolean resizeInput(
-      long interpreterHandle, long errorHandle, int inputIdx, int[] dims);
+      long interpreterHandle, long errorHandle, int inputIdx, int[] dims, boolean strict);
 
   /** Triggers explicit allocation of tensors. */
   void allocateTensors() {
@@ -319,6 +324,11 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     return outputTensor;
   }
 
+  /** Gets the number of ops in the execution plan. */
+  int getExecutionPlanLength() {
+    return getExecutionPlanLength(interpreterHandle);
+  }
+
   private void applyDelegates(Interpreter.Options options) {
     // First apply the flex delegate if necessary. This ensures the graph is fully resolved before
     // applying other delegates.
@@ -414,6 +424,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native int getOutputCount(long interpreterHandle);
 
+  private static native int getExecutionPlanLength(long interpreterHandle);
+
   private static native String[] getInputNames(long interpreterHandle);
 
   private static native String[] getOutputNames(long interpreterHandle);
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index 89a2a6a0639..cc9a6a451ac 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
@@ -196,7 +196,7 @@ public final class Tensor {
   }
 
   private void setTo(Buffer src) {
-    // Note that we attempt to use zero-copy optimization for direct, native-ordered buffers.
+    // Note that we attempt to use a direct memcpy optimization for direct, native-ordered buffers.
     // There are no base Buffer#order() or Buffer#put() methods, so again we have to ugly cast.
     if (src instanceof ByteBuffer) {
       ByteBuffer srcBuffer = (ByteBuffer) src;
diff --git a/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index b6f59102d68..690b58ac1f4 100644
--- a/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -241,6 +241,15 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputTensorIndex(
   return interpreter->outputs()[output_index];
 }
 
+JNIEXPORT jint JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_getExecutionPlanLength(
+    JNIEnv* env, jclass clazz, jlong handle) {
+  tflite_api_dispatcher::Interpreter* interpreter =
+      convertLongToInterpreter(env, handle);
+  if (interpreter == nullptr) return 0;
+  return static_cast<jint>(interpreter->execution_plan().size());
+}
+
 JNIEXPORT jint JNICALL
 Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputCount(JNIEnv* env,
                                                                 jclass clazz,
@@ -461,7 +470,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getOutputDataType(
 JNIEXPORT jboolean JNICALL
 Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput(
     JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle,
-    jint input_idx, jintArray dims) {
+    jint input_idx, jintArray dims, jboolean strict) {
   BufferErrorReporter* error_reporter =
       convertLongToErrorReporter(env, error_handle);
   if (error_reporter == nullptr) return JNI_FALSE;
@@ -480,8 +489,14 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput(
   TfLiteTensor* target = interpreter->tensor(tensor_idx);
   bool is_changed = AreDimsDifferent(env, target, dims);
   if (is_changed) {
-    TfLiteStatus status = interpreter->ResizeInputTensor(
-        tensor_idx, convertJIntArrayToVector(env, dims));
+    TfLiteStatus status;
+    if (strict) {
+      status = interpreter->ResizeInputTensorStrict(
+          tensor_idx, convertJIntArrayToVector(env, dims));
+    } else {
+      status = interpreter->ResizeInputTensor(
+          tensor_idx, convertJIntArrayToVector(env, dims));
+    }
     if (status != kTfLiteOk) {
       ThrowException(env, kIllegalArgumentException,
                      "Internal error: Failed to resize %d-th input: %s",
diff --git a/tensorflow/lite/java/src/main/native/tensor_jni.cc b/tensorflow/lite/java/src/main/native/tensor_jni.cc
index 99be71ba37d..dfa4e22162a 100644
--- a/tensorflow/lite/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/lite/java/src/main/native/tensor_jni.cc
@@ -402,14 +402,26 @@ JNIEXPORT void JNICALL Java_org_tensorflow_lite_Tensor_writeDirectBuffer(
   TfLiteTensor* tensor = GetTensorFromHandle(env, handle);
   if (tensor == nullptr) return;
 
-  char* src_data_raw = static_cast<char*>(env->GetDirectBufferAddress(src));
+  void* src_data_raw = env->GetDirectBufferAddress(src);
   if (!src_data_raw) {
     ThrowException(env, kIllegalArgumentException,
                    "Input ByteBuffer is not a direct buffer");
     return;
   }
 
-  tensor->data.raw = src_data_raw;
+  if (!tensor->data.data) {
+    ThrowException(env, kIllegalArgumentException,
+                   "Internal error: Tensor hasn't been allocated.");
+    return;
+  }
+
+  // Historically, we would simply overwrite the tensor buffer pointer with
+  // the direct Buffer address. However, that is generally unsafe, and
+  // specifically wrong if the graph happens to have dynamic shapes where
+  // arena-allocated input buffers will be refreshed during invocation.
+  // TODO(b/156094015): Explore whether this is actually faster than
+  // using ByteBuffer.put(ByteBuffer).
+  memcpy(tensor->data.data, src_data_raw, tensor->bytes);
 }
 
 JNIEXPORT void JNICALL
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterMobileNetTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterMobileNetTest.java
index aaac2f9690a..446cf5f7b02 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterMobileNetTest.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterMobileNetTest.java
@@ -18,7 +18,11 @@ package org.tensorflow.lite;
 import static com.google.common.truth.Truth.assertThat;
 
 import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.PriorityQueue;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -61,14 +65,9 @@ public final class InterpreterMobileNetTest {
   }
 
   private static void runMobileNetFloatTest(Interpreter.Options options) {
-    // Create a gray image.
-    ByteBuffer img = ByteBuffer.allocateDirect(1 * 224 * 224 * 3 * 4);
-    img.order(ByteOrder.nativeOrder());
-    img.rewind();
-    while (img.hasRemaining()) {
-      img.putFloat(0.5f);
-    }
-
+    ByteBuffer img =
+        TestUtils.getTestImageAsFloatByteBuffer(
+            "tensorflow/lite/java/src/testdata/grace_hopper_224.jpg");
     float[][] labels = new float[1][1001];
     try (Interpreter interpreter = new Interpreter(MOBILENET_FLOAT_MODEL_BUFFER, options)) {
       interpreter.run(img, labels);
@@ -78,22 +77,53 @@ public final class InterpreterMobileNetTest {
     assertThat(labels[0])
         .usingExactEquality()
         .containsNoneOf(new float[] {Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY});
+    // 653 == "military uniform"
+    assertThat(getTopKLabels(labels, 3)).contains(653);
   }
 
   private static void runMobileNetQuantizedTest(Interpreter.Options options) {
-    // Create a gray image.
-    ByteBuffer img = ByteBuffer.allocateDirect(1 * 224 * 224 * 3);
-    img.order(ByteOrder.nativeOrder());
-    img.rewind();
-    while (img.hasRemaining()) {
-      img.put((byte) 128);
-    }
-
+    ByteBuffer img =
+        TestUtils.getTestImageAsByteBuffer(
+            "tensorflow/lite/java/src/testdata/grace_hopper_224.jpg");
+    byte[][] labels = new byte[1][1001];
     try (Interpreter interpreter = new Interpreter(MOBILENET_QUANTIZED_MODEL_BUFFER, options)) {
-      byte[][] labels = new byte[1][1001];
       interpreter.run(img, labels);
       assertThat(interpreter.getInputTensor(0).shape()).isEqualTo(new int[] {1, 224, 224, 3});
       assertThat(interpreter.getOutputTensor(0).shape()).isEqualTo(new int[] {1, 1001});
     }
+    // 653 == "military uniform"
+    assertThat(getTopKLabels(labels, 3)).contains(653);
+  }
+
+  private static ArrayList<Integer> getTopKLabels(byte[][] byteLabels, int k) {
+    float[][] labels = new float[1][1001];
+    for (int i = 0; i < byteLabels[0].length; ++i) {
+      labels[0][i] = (byteLabels[0][i] & 0xff) / 255.0f;
+    }
+    return getTopKLabels(labels, k);
+  }
+
+  private static ArrayList<Integer> getTopKLabels(float[][] labels, int k) {
+    PriorityQueue<Map.Entry<Integer, Float>> pq =
+        new PriorityQueue<>(
+            k,
+            new Comparator<Map.Entry<Integer, Float>>() {
+              @Override
+              public int compare(Map.Entry<Integer, Float> o1, Map.Entry<Integer, Float> o2) {
+                // Intentionally reversed to put high confidence at the head of the queue.
+                return o1.getValue().compareTo(o2.getValue()) * -1;
+              }
+            });
+
+    for (int i = 0; i < labels[0].length; ++i) {
+      pq.add(new AbstractMap.SimpleEntry<>(i, labels[0][i]));
+    }
+
+    final ArrayList<Integer> topKLabels = new ArrayList<>();
+    int topKLabelsSize = Math.min(pq.size(), k);
+    for (int i = 0; i < topKLabelsSize; ++i) {
+      topKLabels.add(pq.poll().getKey());
+    }
+    return topKLabels;
   }
 }
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index 328ccf8cef6..6b6799eaad9 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -40,6 +40,8 @@ public final class InterpreterTest {
       "tensorflow/lite/testdata/multi_add_flex.bin";
   private static final String UNKNOWN_DIMS_MODEL_PATH =
       "tensorflow/lite/java/src/testdata/add_unknown_dimensions.bin";
+  private static final String DYNAMIC_SHAPES_MODEL_PATH =
+      "tensorflow/lite/testdata/dynamic_shapes.bin";
 
   private static final ByteBuffer MODEL_BUFFER = TestUtils.getTestFileAsBuffer(MODEL_PATH);
   private static final ByteBuffer MULTIPLE_INPUTS_MODEL_BUFFER =
@@ -48,6 +50,8 @@ public final class InterpreterTest {
       TestUtils.getTestFileAsBuffer(FLEX_MODEL_PATH);
   private static final ByteBuffer UNKNOWN_DIMS_MODEL_PATH_BUFFER =
       TestUtils.getTestFileAsBuffer(UNKNOWN_DIMS_MODEL_PATH);
+  private static final ByteBuffer DYNAMIC_SHAPES_MODEL_BUFFER =
+      TestUtils.getTestFileAsBuffer(DYNAMIC_SHAPES_MODEL_PATH);
 
   @Test
   public void testInterpreter() throws Exception {
@@ -266,10 +270,23 @@ public final class InterpreterTest {
       assertThat(interpreter.getInputTensor(0).shape()).isEqualTo(inputDims);
       assertThat(interpreter.getInputTensor(0).shapeSignature()).isEqualTo(inputDimsSignature);
 
+      // Resize tensor with strict checking. Try invalid resize.
+      inputDims[2] = 5;
+      try {
+        interpreter.resizeInput(0, inputDims, true);
+        fail();
+      } catch (IllegalArgumentException e) {
+        assertThat(e)
+            .hasMessageThat()
+            .contains(
+                "ResizeInputTensorStrict only allows mutating unknown dimensions identified by -1");
+      }
+      inputDims[2] = 3;
+
       // Set the dimension of the unknown dimension to the expected dimension and ensure shape
       // signature doesn't change.
       inputDims[1] = 3;
-      interpreter.resizeInput(0, inputDims);
+      interpreter.resizeInput(0, inputDims, true);
       assertThat(interpreter.getInputTensor(0).shape()).isEqualTo(inputDims);
       assertThat(interpreter.getInputTensor(0).shapeSignature()).isEqualTo(inputDimsSignature);
 
@@ -421,7 +438,7 @@ public final class InterpreterTest {
     interpreter.close();
   }
 
-  /** Smoke test validating that flex model loading fails when the flex delegate is not linked. */
+  // Smoke test validating that flex model loading fails when the flex delegate is not linked.
   @Test
   public void testFlexModel() throws Exception {
     try {
@@ -560,6 +577,45 @@ public final class InterpreterTest {
     }
   }
 
+  private static FloatBuffer fill(FloatBuffer buffer, float value) {
+    while (buffer.hasRemaining()) {
+      buffer.put(value);
+    }
+    buffer.rewind();
+    return buffer;
+  }
+
+  // Regression test case to ensure that graphs with dynamically computed shapes work properly.
+  // Historically, direct ByteBuffer addresses would overwrite the arena-allocated tensor input
+  // pointers. Normally this works fine, but for dynamic graphs, the original input tensor pointers
+  // may be "restored" at invocation time by the arena allocator, resetting the direct ByteBuffer
+  // address and leading to stale input data being used.
+  @Test
+  public void testDynamicShapesWithDirectBufferInputs() {
+    try (Interpreter interpreter = new Interpreter(DYNAMIC_SHAPES_MODEL_BUFFER)) {
+      ByteBuffer input0 =
+          ByteBuffer.allocateDirect(8 * 42 * 1024 * 4).order(ByteOrder.nativeOrder());
+      ByteBuffer input1 =
+          ByteBuffer.allocateDirect(1 * 90 * 1024 * 4).order(ByteOrder.nativeOrder());
+      ByteBuffer input2 = ByteBuffer.allocateDirect(1 * 4).order(ByteOrder.nativeOrder());
+      Object[] inputs = {input0, input1, input2};
+
+      fill(input0.asFloatBuffer(), 2.0f);
+      fill(input1.asFloatBuffer(), 0.5f);
+      // Note that the value of this input dictates the shape of the output.
+      fill(input2.asFloatBuffer(), 1.0f);
+
+      FloatBuffer output = FloatBuffer.allocate(8 * 1 * 1024);
+      Map<Integer, Object> outputs = new HashMap<>();
+      outputs.put(0, output);
+
+      interpreter.runForMultipleInputsOutputs(inputs, outputs);
+
+      FloatBuffer expected = fill(FloatBuffer.allocate(8 * 1 * 1024), 2.0f);
+      assertThat(output.array()).usingTolerance(0.1f).containsExactly(expected.array()).inOrder();
+    }
+  }
+
   private static native long getNativeHandleForDelegate();
 
   private static native long getNativeHandleForInvalidDelegate();
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTestHelper.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTestHelper.java
new file mode 100644
index 00000000000..34eb47e4dbe
--- /dev/null
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTestHelper.java
@@ -0,0 +1,29 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite;
+
+/** Utility for interacting with Interpreter in delegate tests. */
+public abstract class InterpreterTestHelper {
+
+  /**
+   * Returns the number of nodes in the execution plan that are invoked per inference.
+   *
+   * <p>WARNING: This is an experimental API and subject to change.
+   */
+  public static int executionPlanLength(Interpreter interpreter) {
+    return interpreter.getExecutionPlanLength();
+  }
+}
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TestUtils.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TestUtils.java
index 1471b4b506b..ae88cddcf57 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TestUtils.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TestUtils.java
@@ -15,17 +15,24 @@ limitations under the License.
 
 package org.tensorflow.lite;
 
+import java.awt.image.BufferedImage;
 import java.io.File;
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.StandardOpenOption;
 import java.util.EnumSet;
+import javax.imageio.ImageIO;
 
 /** Utility for interacting with test-specific data. */
 public abstract class TestUtils {
 
+  private static final float DEFAULT_IMAGE_MEAN = 127.5f;
+  private static final float DEFAULT_IMAGE_STD = 127.5f;
+
   public static MappedByteBuffer getTestFileAsBuffer(String path) {
     try (FileChannel fileChannel =
         (FileChannel)
@@ -40,5 +47,60 @@ public abstract class TestUtils {
     return true;
   }
 
+  public static ByteBuffer getTestImageAsByteBuffer(String path) {
+    File imageFile = new File(path);
+    try {
+      BufferedImage image = ImageIO.read(imageFile);
+      return toByteBuffer(image);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static ByteBuffer getTestImageAsFloatByteBuffer(String path) {
+    File imageFile = new File(path);
+    try {
+      BufferedImage image = ImageIO.read(imageFile);
+      return toFloatByteBuffer(image);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static ByteBuffer toByteBuffer(BufferedImage image) {
+    ByteBuffer imgData =
+        ByteBuffer.allocateDirect(image.getHeight() * image.getWidth() * 3)
+            .order(ByteOrder.nativeOrder());
+    for (int y = 0; y < image.getHeight(); y++) {
+      for (int x = 0; x < image.getWidth(); x++) {
+        int val = image.getRGB(x, y);
+        imgData.put((byte) ((val >> 16) & 0xFF));
+        imgData.put((byte) ((val >> 8) & 0xFF));
+        imgData.put((byte) (val & 0xFF));
+      }
+    }
+    return imgData;
+  }
+
+  private static ByteBuffer toFloatByteBuffer(BufferedImage image) {
+    return toFloatByteBuffer(image, DEFAULT_IMAGE_MEAN, DEFAULT_IMAGE_STD);
+  }
+
+  private static ByteBuffer toFloatByteBuffer(
+      BufferedImage image, float imageMean, float imageStd) {
+    ByteBuffer imgData =
+        ByteBuffer.allocateDirect(image.getHeight() * image.getWidth() * 3 * 4)
+            .order(ByteOrder.nativeOrder());
+    for (int y = 0; y < image.getHeight(); y++) {
+      for (int x = 0; x < image.getWidth(); x++) {
+        int pixelValue = image.getRGB(x, y);
+        imgData.putFloat((((pixelValue >> 16) & 0xFF) - imageMean) / imageStd);
+        imgData.putFloat((((pixelValue >> 8) & 0xFF) - imageMean) / imageStd);
+        imgData.putFloat(((pixelValue & 0xFF) - imageMean) / imageStd);
+      }
+    }
+    return imgData;
+  }
+
   private TestUtils() {}
 }
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/gpu/GpuDelegateTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/gpu/GpuDelegateTest.java
index b9cbc27052f..d92a7119aab 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/gpu/GpuDelegateTest.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/gpu/GpuDelegateTest.java
@@ -18,18 +18,28 @@ package org.tensorflow.lite.gpu;
 import static com.google.common.truth.Truth.assertThat;
 
 import java.nio.ByteBuffer;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.PriorityQueue;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.tensorflow.lite.Interpreter;
+import org.tensorflow.lite.InterpreterTestHelper;
 import org.tensorflow.lite.TestUtils;
 
 /** Unit tests for {@link org.tensorflow.lite.gpu.GpuDelegate}. */
 @RunWith(JUnit4.class)
 public final class GpuDelegateTest {
 
-  private static final String MODEL_PATH = "tensorflow/lite/java/src/testdata/add.bin";
+  private static final String MODEL_PATH = "tensorflow/lite/testdata/multi_add.bin";
   private static final ByteBuffer MODEL_BUFFER = TestUtils.getTestFileAsBuffer(MODEL_PATH);
+  private static final ByteBuffer MOBILENET_QUANTIZED_MODEL_BUFFER =
+      TestUtils.getTestFileAsBuffer(
+          "third_party/tensorflow/lite/java/demo/app/src/main/assets/mobilenet_v1_1.0_224_quant.tflite");
 
   @Test
   public void testBasic() throws Exception {
@@ -39,19 +49,98 @@ public final class GpuDelegateTest {
   }
 
   @Test
-  public void testInterpreterWithGpu() throws Exception {
+  public void testInterpreterWithGpu_FloatModel() throws Exception {
     Interpreter.Options options = new Interpreter.Options();
     try (GpuDelegate delegate = new GpuDelegate();
         Interpreter interpreter = new Interpreter(MODEL_BUFFER, options.addDelegate(delegate))) {
-      float[] oneD = {1.23f, 6.54f, 7.81f};
-      float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD};
-      float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD};
-      float[][][][] fourD = {threeD, threeD};
-      float[][][][] parsedOutputs = new float[2][8][8][3];
-      interpreter.run(fourD, parsedOutputs);
-      float[] outputOneD = parsedOutputs[0][0][0];
-      float[] expected = {3.69f, 19.62f, 23.43f};
-      assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder();
+      float[] input0 = {1.23f};
+      float[] input1 = {2.43f};
+      Object[] inputs = {input0, input1, input0, input1};
+      float[] parsedOutput0 = new float[1];
+      float[] parsedOutput1 = new float[1];
+      Map<Integer, Object> outputs = new HashMap<>();
+      outputs.put(0, parsedOutput0);
+      outputs.put(1, parsedOutput1);
+      interpreter.runForMultipleInputsOutputs(inputs, outputs);
+      float[] expected0 = {4.89f};
+      float[] expected1 = {6.09f};
+      assertThat(parsedOutput0).usingTolerance(0.1f).containsExactly(expected0).inOrder();
+      assertThat(parsedOutput1).usingTolerance(0.1f).containsExactly(expected1).inOrder();
     }
   }
+
+  @Test
+  public void testInterpreterWithGpu_QuantModelRunWithDelegate() throws Exception {
+    ByteBuffer img =
+        TestUtils.getTestImageAsByteBuffer(
+            "tensorflow/lite/java/src/testdata/grace_hopper_224.jpg");
+
+    Interpreter.Options options = new Interpreter.Options();
+    try (GpuDelegate delegate =
+            new GpuDelegate(new GpuDelegate.Options().setQuantizedModelsAllowed(true));
+        Interpreter interpreter =
+            new Interpreter(MOBILENET_QUANTIZED_MODEL_BUFFER, options.addDelegate(delegate))) {
+      byte[][] output = new byte[1][1001];
+      interpreter.run(img, output);
+      // Should be only 1 node (Delegate) in the execution plan.
+      assertThat(InterpreterTestHelper.executionPlanLength(interpreter)).isEqualTo(1);
+      assertThat(interpreter.getInputTensor(0).shape()).isEqualTo(new int[] {1, 224, 224, 3});
+      assertThat(interpreter.getOutputTensor(0).shape()).isEqualTo(new int[] {1, 1001});
+      // 653 == "military uniform"
+      assertThat(getTopKLabels(output, 3)).contains(653);
+    }
+  }
+
+  @Test
+  public void testInterpreterWithGpu_QuantModelRunOnCPU() throws Exception {
+    ByteBuffer img =
+        TestUtils.getTestImageAsByteBuffer(
+            "tensorflow/lite/java/src/testdata/grace_hopper_224.jpg");
+
+    Interpreter.Options options = new Interpreter.Options();
+    try (GpuDelegate delegate = new GpuDelegate();
+        Interpreter interpreter =
+            new Interpreter(MOBILENET_QUANTIZED_MODEL_BUFFER, options.addDelegate(delegate))) {
+      byte[][] output = new byte[1][1001];
+      interpreter.run(img, output);
+      // Original execution plan remains since default behavior doesn't allow quantized models.
+      assertThat(InterpreterTestHelper.executionPlanLength(interpreter)).isEqualTo(31);
+      assertThat(interpreter.getInputTensor(0).shape()).isEqualTo(new int[] {1, 224, 224, 3});
+      assertThat(interpreter.getOutputTensor(0).shape()).isEqualTo(new int[] {1, 1001});
+      // 653 == "military uniform"
+      assertThat(getTopKLabels(output, 3)).contains(653);
+    }
+  }
+
+  private static ArrayList<Integer> getTopKLabels(byte[][] byteLabels, int k) {
+    float[][] labels = new float[1][1001];
+    for (int i = 0; i < byteLabels[0].length; ++i) {
+      labels[0][i] = (byteLabels[0][i] & 0xff) / 255.0f;
+    }
+    return getTopKLabels(labels, k);
+  }
+
+  private static ArrayList<Integer> getTopKLabels(float[][] labels, int k) {
+    PriorityQueue<Map.Entry<Integer, Float>> pq =
+        new PriorityQueue<>(
+            k,
+            new Comparator<Map.Entry<Integer, Float>>() {
+              @Override
+              public int compare(Map.Entry<Integer, Float> o1, Map.Entry<Integer, Float> o2) {
+                // Intentionally reversed to put high confidence at the head of the queue.
+                return o1.getValue().compareTo(o2.getValue()) * -1;
+              }
+            });
+
+    for (int i = 0; i < labels[0].length; ++i) {
+      pq.add(new AbstractMap.SimpleEntry<>(i, labels[0][i]));
+    }
+
+    final ArrayList<Integer> topKLabels = new ArrayList<>();
+    int topKLabelsSize = Math.min(pq.size(), k);
+    for (int i = 0; i < topKLabelsSize; ++i) {
+      topKLabels.add(pq.poll().getKey());
+    }
+    return topKLabels;
+  }
 }
diff --git a/tensorflow/lite/java/src/testdata/grace_hopper_224.jpg b/tensorflow/lite/java/src/testdata/grace_hopper_224.jpg
new file mode 100644
index 00000000000..15a2f2bd2a5
Binary files /dev/null and b/tensorflow/lite/java/src/testdata/grace_hopper_224.jpg differ
diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index 5b6fe4b5b21..3a29fee5699 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@@ -357,9 +357,9 @@ cc_test(
         ":cpu_backend_context",
         ":cpu_backend_gemm",
         "@com_google_googletest//:gtest",
-        # ruy's reference path provides the reference implementation
+        # ruy:reference_mul provides the reference implementation
         # that this test compares against.
-        "@ruy//ruy",
+        "@ruy//ruy:reference_mul",
     ],
 )
 
@@ -386,7 +386,7 @@ cc_library(
         "//tensorflow/lite/c:common",
         "//tensorflow/lite/kernels/internal:cppmath",
         "//tensorflow/lite/kernels/internal:quantization_util",
-        "@flatbuffers",
+        "@flatbuffers//:runtime_cc",
     ],
 )
 
diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc
index 4d52b5c7446..47146771b50 100644
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@@ -84,8 +84,10 @@ struct LeakyReluOpData : public OpData {
 };
 
 struct PreluOpData : public OpData {
-  int32_t output_multiplier = 0;
-  int output_shift = 0;
+  int32_t output_multiplier_1 = 0;
+  int32_t output_shift_1 = 0;
+  int32_t output_multiplier_2 = 0;
+  int32_t output_shift_2 = 0;
 };
 
 struct HardSwishData {
@@ -364,7 +366,8 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
 
   LeakyReluOpData* data = reinterpret_cast<LeakyReluOpData*>(node->user_data);
 
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+      output->type == kTfLiteInt16) {
     const auto* params =
         reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
 
@@ -436,21 +439,29 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
 
     int input_scale_log2_rounded;
-    TF_LITE_ENSURE(context,
-                   CheckedLog2(input->params.scale, &input_scale_log2_rounded));
+    bool param_scale_pot =
+        CheckedLog2(input->params.scale, &input_scale_log2_rounded);
+
+    data->input_left_shift =
+        (15 - kInputIntegerBits) + input_scale_log2_rounded;
+    param_scale_pot &=
+        (data->input_left_shift == 0 || data->input_left_shift == 1);
+
+    if (!param_scale_pot) {
+      // In case of general scale parameter, we need to do a rescaling.
+      // Magic constant 4096:
+      // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval
+      // from 16-bit (-2^15, 2^15),
+      // so we need to multiply by
+      // 2^(15 - kInputIntegerBits) = 2^12 = 4096.
+      data->input_multiplier = static_cast<int32_t>(input->params.scale * 4096);
+    }
 
     int output_scale_log2_rounded;
     TF_LITE_ENSURE(
         context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
     TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
                       -kOutputFractionalBits);
-
-    data->input_left_shift =
-        (15 - kInputIntegerBits) + input_scale_log2_rounded;
-    // Support for shifts is limited until we have a parameterized version of
-    // SaturatingRoundingMultiplyByPOT().
-    TF_LITE_ENSURE(context, data->input_left_shift >= 0);
-    TF_LITE_ENSURE(context, data->input_left_shift <= 1);
   }
 
   return context->ResizeTensor(context, output,
@@ -524,19 +535,28 @@ TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
 
     int input_scale_log2_rounded;
-    TF_LITE_ENSURE(context,
-                   CheckedLog2(input->params.scale, &input_scale_log2_rounded));
+    bool param_scale_pot =
+        CheckedLog2(input->params.scale, &input_scale_log2_rounded);
+
+    data->input_left_shift =
+        (15 - kInputIntegerBits) + input_scale_log2_rounded;
+    param_scale_pot &= (data->input_left_shift == 0);
+
+    if (!param_scale_pot) {
+      // In case of general scale parameter, we need to do a rescaling.
+      // Magic constant 4096:
+      // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval
+      // from 16-bit (-2^15, 2^15),
+      // so we need to multiply by
+      // 2^(15 - kInputIntegerBits) = 2^12 = 4096.
+      data->input_multiplier = static_cast<int32_t>(input->params.scale * 4096);
+    }
 
     int output_scale_log2_rounded;
     TF_LITE_ENSURE(
         context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
     TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
                       -kOutputFractionalBits);
-
-    data->input_left_shift =
-        (15 - kInputIntegerBits) + input_scale_log2_rounded;
-    // The int16 logistic implementation does not support shifting of the input.
-    TF_LITE_ENSURE_EQ(context, data->input_left_shift, 0);
   }
 
   return context->ResizeTensor(context, output,
@@ -647,7 +667,6 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
 
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
       output->type == kTfLiteInt16) {
-    // This scale check is actually needed for quantized path:
     // prelu(x) = x if x >= 0 else x * alpha.
     // So if we translate that for quantized computation:
     //
@@ -659,19 +678,19 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
     // ouput_q = (input_q - input_zp) * input_scale / output_scale + output_q
     // else:
     // output_q = (input_q - input_zp) * (alpha_q - alpha_zp) * input_scale
-    //            * alpha_scale / output_scale +output_q
+    //            * alpha_scale / output_scale + output_q
     //
-    // So we have two float values which we need to translate into multiplier
-    // shift languages.
-    // For simplicity & efficiency, if we make sure input_scale
-    // & output_scale are the same, we only need to translate the latter one
-    // into multiplier & shift format.
-    TF_LITE_ENSURE(context,
-                   std::abs(input->params.scale - output->params.scale) < 1e-4);
-    double real_multiplier =
+    // So for input_q - input_zp >= 0:
+    // output real multiplier 1 is input_scale / output_scale;
+    // for input_q - input_zp < 0:
+    // output real multiplier 2 is input_scale  * alpha_scale/ output_scale.
+    double real_multiplier_1 = input->params.scale / output->params.scale;
+    double real_multiplier_2 =
         input->params.scale * alpha->params.scale / output->params.scale;
-    QuantizeMultiplierSmallerThanOneExp(
-        real_multiplier, &data->output_multiplier, &data->output_shift);
+    QuantizeMultiplier(real_multiplier_1, &data->output_multiplier_1,
+                       &data->output_shift_1);
+    QuantizeMultiplier(real_multiplier_2, &data->output_multiplier_2,
+                       &data->output_shift_2);
   }
 
   // PRelu (parameteric Relu) shares the same alpha value on "shared axis".
@@ -849,13 +868,13 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteInt16: {
       TanhParams params;
       params.input_left_shift = data->input_left_shift;
-      if (kernel_type == kReference) {
+      if (kernel_type == kReference || (data->input_multiplier > 0)) {
         const int size =
             MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
 
-        reference_integer_ops::Tanh(data->input_left_shift, size,
-                                    GetTensorData<int16_t>(input),
-                                    GetTensorData<int16_t>(output));
+        reference_integer_ops::Tanh(
+            data->input_multiplier, data->input_left_shift, size,
+            GetTensorData<int16_t>(input), GetTensorData<int16_t>(output));
       } else {
         optimized_ops::Tanh(
             params, GetTensorShape(input), GetTensorData<int16_t>(input),
@@ -924,11 +943,12 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
     }
     case kTfLiteInt16: {
       LogisticParams params;
-      if (kernel_type == kReference) {
+      if (kernel_type == kReference || (data->input_multiplier > 0)) {
         const int size =
             MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
 
-        reference_integer_ops::Logistic(size, GetTensorData<int16_t>(input),
+        reference_integer_ops::Logistic(data->input_multiplier, size,
+                                        GetTensorData<int16_t>(input),
                                         GetTensorData<int16_t>(output));
       } else {
         optimized_ops::Logistic(
@@ -1153,8 +1173,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
       op_params.input_offset = -input->params.zero_point;
       op_params.alpha_offset = -alpha->params.zero_point;
       op_params.output_offset = output->params.zero_point;
-      op_params.output_multiplier = data->output_multiplier;
-      op_params.output_shift = data->output_shift;
+      op_params.output_multiplier_1 = data->output_multiplier_1;
+      op_params.output_shift_1 = data->output_shift_1;
+      op_params.output_multiplier_2 = data->output_multiplier_2;
+      op_params.output_shift_2 = data->output_shift_2;
       reference_ops::BroadcastPrelu4DSlow(
           op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
           GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
@@ -1166,8 +1188,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
       op_params.input_offset = -input->params.zero_point;
       op_params.alpha_offset = -alpha->params.zero_point;
       op_params.output_offset = output->params.zero_point;
-      op_params.output_multiplier = data->output_multiplier;
-      op_params.output_shift = data->output_shift;
+      op_params.output_multiplier_1 = data->output_multiplier_1;
+      op_params.output_shift_1 = data->output_shift_1;
+      op_params.output_multiplier_2 = data->output_multiplier_2;
+      op_params.output_shift_2 = data->output_shift_2;
       reference_ops::BroadcastPrelu4DSlow(
           op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
           GetTensorShape(alpha), GetTensorData<int8_t>(alpha),
@@ -1183,6 +1207,22 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
   }
 }
 
+template <typename T>
+void QuantizeLeakyRelu(const TfLiteTensor* input, TfLiteTensor* output,
+                       const LeakyReluOpData* data) {
+  LeakyReluParams op_params;
+
+  op_params.input_offset = input->params.zero_point;
+  op_params.output_offset = output->params.zero_point;
+  op_params.output_multiplier_alpha = data->output_multiplier_alpha;
+  op_params.output_shift_alpha = data->output_shift_alpha;
+  op_params.output_multiplier_identity = data->output_multiplier_identity;
+  op_params.output_shift_identity = data->output_shift_identity;
+  reference_ops::QuantizeLeakyRelu(
+      op_params, GetTensorShape(input), GetTensorData<T>(input),
+      GetTensorShape(output), GetTensorData<T>(output));
+}
+
 TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
@@ -1201,33 +1241,21 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     } break;
     case kTfLiteUInt8: {
-      op_params.input_offset = input->params.zero_point;
-      op_params.output_offset = output->params.zero_point;
-      op_params.output_multiplier_alpha = data->output_multiplier_alpha;
-      op_params.output_shift_alpha = data->output_shift_alpha;
-      op_params.output_multiplier_identity = data->output_multiplier_identity;
-      op_params.output_shift_identity = data->output_shift_identity;
-      reference_ops::QuantizeLeakyRelu(
-          op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
-          GetTensorShape(output), GetTensorData<uint8_t>(output));
+      QuantizeLeakyRelu<uint8_t>(input, output, data);
       return kTfLiteOk;
     } break;
     case kTfLiteInt8: {
-      op_params.input_offset = input->params.zero_point;
-      op_params.output_offset = output->params.zero_point;
-      op_params.output_multiplier_alpha = data->output_multiplier_alpha;
-      op_params.output_shift_alpha = data->output_shift_alpha;
-      op_params.output_multiplier_identity = data->output_multiplier_identity;
-      op_params.output_shift_identity = data->output_shift_identity;
-      reference_ops::QuantizeLeakyRelu(
-          op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
-          GetTensorShape(output), GetTensorData<int8_t>(output));
+      QuantizeLeakyRelu<int8_t>(input, output, data);
+      return kTfLiteOk;
+    } break;
+    case kTfLiteInt16: {
+      QuantizeLeakyRelu<int16_t>(input, output, data);
       return kTfLiteOk;
     } break;
     default:
       TF_LITE_KERNEL_LOG(
           context,
-          "Only float32, int8 and uint8 is supported currently, got %s.",
+          "Only float32, int8, int16 and uint8 is supported currently, got %s.",
           TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc
index b4711216524..9f6fb932d34 100644
--- a/tensorflow/lite/kernels/activations_test.cc
+++ b/tensorflow/lite/kernels/activations_test.cc
@@ -108,10 +108,20 @@ class BaseActivationsOpModel : public SingleOpModel {
   BaseActivationsOpModel(TensorData input, float alpha) {
     input_ = AddInput(input);
     // The output scale and input scale might be different.
-    if (input.type == TensorType_UINT8 || input.type == TensorType_INT8) {
+    if (input.type == TensorType_UINT8 || input.type == TensorType_INT8 ||
+        input.type == TensorType_INT16) {
       auto output_min = (input.min >= 0) ? input.min : input.min * alpha;
       auto output_max = (input.max >= 0) ? input.max : input.max * alpha;
-      output_ = AddOutput({input.type, {}, output_min, output_max});
+      if (input.type == TensorType_INT16) {
+        output_ = AddOutput({TensorType_INT16,
+                             {},
+                             0,
+                             0,
+                             output_max / (std::numeric_limits<int16_t>::max()),
+                             0});
+      } else {
+        output_ = AddOutput({input.type, {}, output_min, output_max});
+      }
     } else {
       output_ = AddOutput({input.type, {}});
     }
@@ -504,14 +514,15 @@ TEST(QuantizedActivationsOpTest, LeakyReluUint8) {
                   kQuantizedTolerance * 8)));
 }
 
-TEST(QuantizedActivationsOpTest, LeakyReluInt8) {
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedActivationsOpTestLeakyRelu() {
   const float kMin = -1;
   const float kMax = 127.f / 128.f;
 
   QuantizedActivationsOpModel m(
-      /*input=*/{TensorType_INT8, {5, 5}, 5 * kMin, 5 * kMax}, 0.1);
+      /*input=*/{tensor_type, {5, 5}, 5 * kMin, 5 * kMax}, 0.1);
 
-  m.SetInput<int8_t>({
+  m.SetInput<integer_dtype>({
       -5.0f, -4.6f, -4.2f, -3.8f, -3.4f,  // Row 1
       -3.0f, -2.6f, -2.2f, -1.8f, -1.4f,  // Row 2
       -1.0f, -0.6f, -0.2f, 0.2f,  0.6f,   // Row 3
@@ -519,7 +530,12 @@ TEST(QuantizedActivationsOpTest, LeakyReluInt8) {
       3.0f,  3.4f,  3.8f,  4.2f,  4.6f,   // Row 5
   });
   m.Invoke();
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
+
+  float kTestQuantizedTolerance = tensor_type == TensorType_INT16
+                                      ? kQuantizedToleranceInt16
+                                      : kQuantizedTolerance * 5;
+
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
               ElementsAreArray(ArrayFloatNear(
                   {
                       -0.50f, -0.46f, -0.42f, -0.38f, -0.34f,  // Row 1
@@ -528,7 +544,15 @@ TEST(QuantizedActivationsOpTest, LeakyReluInt8) {
                       1.00f,  1.40f,  1.80f,  2.20f,  2.60f,   // Row 4
                       3.00f,  3.40f,  3.80f,  4.20f,  4.60f,   // Row 5
                   },
-                  kQuantizedTolerance * 5)));
+                  kTestQuantizedTolerance)));
+}
+
+TEST(QuantizedActivationsOpTest, LeakyReluInt8) {
+  QuantizedActivationsOpTestLeakyRelu<TensorType_INT8, int8_t>();
+}
+
+TEST(QuantizedActivationsOpTest, LeakyReluInt16) {
+  QuantizedActivationsOpTestLeakyRelu<TensorType_INT16, int16_t>();
 }
 
 TEST(QuantizedActivationsOpTest, Relu1Int8) {
@@ -773,19 +797,73 @@ TEST_P(TanhOpTest, TanhInt16) {
   const float kMax = 32767.f / 32768.f;
   QuantizedActivationsOpModel m(
       GetRegistration(), BuiltinOperator_TANH,
-      /*input=*/{TensorType_INT16, {1, 2, 8, 1}, 8 * kMin, 8 * kMax},
-      /*output=*/{TensorType_INT16, {1, 2, 8, 1}, kMin, kMax});
-  m.SetInput<int16_t>({0, -6, 2, 4,   //
-                       -4, -2, 8, 1,  //
-                       7, -8, 3, -5,  //
-                       6, -1, -3, 5});
+      /*input=*/{TensorType_INT16, {89}, 8 * kMin, 8 * kMax},
+      /*output=*/{TensorType_INT16, {89}, kMin, kMax});
+  m.SetInput<int16_t>(
+      {-8.0000000000, -7.8181818182, -7.6363636364, -7.4545454545,
+       -7.2727272727, -7.0909090909, -6.9090909091, -6.7272727273,
+       -6.5454545455, -6.3636363636, -6.1818181818, -6.0000000000,
+       -5.8181818182, -5.6363636364, -5.4545454545, -5.2727272727,
+       -5.0909090909, -4.9090909091, -4.7272727273, -4.5454545455,
+       -4.3636363636, -4.1818181818, -4.0000000000, -3.8181818182,
+       -3.6363636364, -3.4545454545, -3.2727272727, -3.0909090909,
+       -2.9090909091, -2.7272727273, -2.5454545455, -2.3636363636,
+       -2.1818181818, -2.0000000000, -1.8181818182, -1.6363636364,
+       -1.4545454545, -1.2727272727, -1.0909090909, -0.9090909091,
+       -0.7272727273, -0.5454545455, -0.3636363636, -0.1818181818,
+       0.0000000000,  0.1818181818,  0.3636363636,  0.5454545455,
+       0.7272727273,  0.9090909091,  1.0909090909,  1.2727272727,
+       1.4545454545,  1.6363636364,  1.8181818182,  2.0000000000,
+       2.1818181818,  2.3636363636,  2.5454545455,  2.7272727273,
+       2.9090909091,  3.0909090909,  3.2727272727,  3.4545454545,
+       3.6363636364,  3.8181818182,  4.0000000000,  4.1818181818,
+       4.3636363636,  4.5454545455,  4.7272727273,  4.9090909091,
+       5.0909090909,  5.2727272727,  5.4545454545,  5.6363636364,
+       5.8181818182,  6.0000000000,  6.1818181818,  6.3636363636,
+       6.5454545455,  6.7272727273,  6.9090909091,  7.0909090909,
+       7.2727272727,  7.4545454545,  7.6363636364,  7.8181818182,
+       8.0000000000});
   m.Invoke();
   EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
               ElementsAreArray(ArrayFloatNear(
-                  {0.0, -0.999987, 0.964027, 0.999329,                //
-                   -0.999329, -0.96402, 0.99999, 0.76159,             //
-                   0.999998337, -0.99999, 0.995054754, -0.999909204,  //
-                   0.999999996, -0.76159, -0.995054754, 0.999909204},
+                  {-0.9999997749, -0.9999996762, -0.9999995342, -0.9999993300,
+                   -0.9999990361, -0.9999986134, -0.9999980053, -0.9999971306,
+                   -0.9999958722, -0.9999940619, -0.9999914578, -0.9999877117,
+                   -0.9999823226, -0.9999745703, -0.9999634183, -0.9999473758,
+                   -0.9999242982, -0.9998911009, -0.9998433469, -0.9997746542,
+                   -0.9996758446, -0.9995337191, -0.9993292997, -0.9990353053,
+                   -0.9986125310, -0.9980046622, -0.9971308601, -0.9958751909,
+                   -0.9940716137, -0.9914827859, -0.9877703933, -0.9824541388,
+                   -0.9748561217, -0.9640275801, -0.9486568273, -0.9269625051,
+                   -0.8965880154, -0.8545351057, -0.7972097087, -0.7206956332,
+                   -0.6213939966, -0.4971057414, -0.3484130125, -0.1798408185,
+                   0.0000000000,  0.1798408185,  0.3484130125,  0.4971057414,
+                   0.6213939966,  0.7206956332,  0.7972097087,  0.8545351057,
+                   0.8965880154,  0.9269625051,  0.9486568273,  0.9640275801,
+                   0.9748561217,  0.9824541388,  0.9877703933,  0.9914827859,
+                   0.9940716137,  0.9958751909,  0.9971308601,  0.9980046622,
+                   0.9986125310,  0.9990353053,  0.9993292997,  0.9995337191,
+                   0.9996758446,  0.9997746542,  0.9998433469,  0.9998911009,
+                   0.9999242982,  0.9999473758,  0.9999634183,  0.9999745703,
+                   0.9999823226,  0.9999877117,  0.9999914578,  0.9999940619,
+                   0.9999958722,  0.9999971306,  0.9999980053,  0.9999986134,
+                   0.9999990361,  0.9999993300,  0.9999995342,  0.9999996762,
+                   0.9999997749},
+                  kQuantizedToleranceInt16)));
+}
+
+TEST_P(TanhOpTest, TanhInt16General) {
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
+  QuantizedActivationsOpModel m(
+      GetRegistration(), BuiltinOperator_TANH,
+      /*input=*/{TensorType_INT16, {6}, 11 * kMin, 11 * kMax},
+      /*output=*/{TensorType_INT16, {5}, kMin, kMax});
+  m.SetInput<int16_t>({-10, -4, 0, 6, 7.0909090909, 8});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {-0.999969, -0.99408, 0, 0.999664, 0.999939, 0.999969},
                   kQuantizedToleranceInt16)));
 }
 
@@ -914,20 +992,74 @@ TEST_P(LogisticOpTest, SigmoidInt16) {
   const float kMax = 32767.f / 32768.f;
   QuantizedActivationsOpModel m(
       GetRegistration(), BuiltinOperator_LOGISTIC,
-      /*input=*/{TensorType_INT16, {1, 2, 6, 1}, 8 * kMin, 8 * kMax},
-      /*output=*/{TensorType_INT16, {1, 2, 6, 1}, kMin, kMax});
-  m.SetInput<int16_t>({0, -6, 2, 4,  //
-                       3, -2, 8, 1,  //
-                       5, -8, 7, -3});
+      /*input=*/{TensorType_INT16, {89}, 8 * kMin, 8 * kMax},
+      /*output=*/{TensorType_INT16, {89}, kMin, kMax});
+  m.SetInput<int16_t>(
+      {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182,
+       -9.0909090909,  -8.8636363636, -8.6363636364, -8.4090909091,
+       -8.1818181818,  -7.9545454545, -7.7272727273, -7.5000000000,
+       -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909,
+       -6.3636363636,  -6.1363636364, -5.9090909091, -5.6818181818,
+       -5.4545454545,  -5.2272727273, -5.0000000000, -4.7727272727,
+       -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636,
+       -3.6363636364,  -3.4090909091, -3.1818181818, -2.9545454545,
+       -2.7272727273,  -2.5000000000, -2.2727272727, -2.0454545455,
+       -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364,
+       -0.9090909091,  -0.6818181818, -0.4545454545, -0.2272727273,
+       0.0000000000,   0.2272727273,  0.4545454545,  0.6818181818,
+       0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,
+       1.8181818182,   2.0454545455,  2.2727272727,  2.5000000000,
+       2.7272727273,   2.9545454545,  3.1818181818,  3.4090909091,
+       3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,
+       4.5454545455,   4.7727272727,  5.0000000000,  5.2272727273,
+       5.4545454545,   5.6818181818,  5.9090909091,  6.1363636364,
+       6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,
+       7.2727272727,   7.5000000000,  7.7272727273,  7.9545454545,
+       8.1818181818,   8.4090909091,  8.6363636364,  8.8636363636,
+       9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,
+       10.0000000000});
   m.Invoke();
-  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
-              ElementsAreArray(ArrayFloatNear(
-                  {
-                      0.5, 0.002473, 0.880797, 0.982014,       //
-                      0.952574, 0.119203, 0.9995, 0.731059,    //
-                      0.993307, 0.0003535, 0.999089, 0.047426  //
-                  },
-                  kQuantizedToleranceInt16)));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<int16_t>(),
+      ElementsAreArray(ArrayFloatNear(
+          {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729,
+           0.0001414198, 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396,
+           0.0004404502, 0.0005527786, 0.0006937345, 0.0008706021, 0.0010925128,
+           0.0013709094, 0.0017201256, 0.0021581065, 0.0027073042, 0.0033957870,
+           0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576, 0.0105038445,
+           0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+           0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047,
+           0.1145124805, 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272,
+           0.2871859014, 0.3358556241, 0.3882805886, 0.4434251301, 0.5000000000,
+           0.5565748699, 0.6117194114, 0.6641443759, 0.7128140986, 0.7570113728,
+           0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195, 0.9065929953,
+           0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+           0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555,
+           0.9916136424, 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130,
+           0.9972926958, 0.9978418935, 0.9982798744, 0.9986290906, 0.9989074872,
+           0.9991293979, 0.9993062655, 0.9994472214, 0.9995595498, 0.9996490604,
+           0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802, 0.9998873271,
+           0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021},
+          kQuantizedToleranceInt16)));
+}
+
+TEST_P(LogisticOpTest, SigmoidInt16General) {
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
+  QuantizedActivationsOpModel m(
+      GetRegistration(), BuiltinOperator_LOGISTIC,
+      /*input=*/{TensorType_INT16, {8}, 10 * kMin, 10 * kMax},
+      /*output=*/{TensorType_INT16, {8}, kMin, kMax});
+  m.SetInput<int16_t>({
+      0, -6, 2, 4,   //
+      3, -2, 10, 1,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(
+      m.GetDequantizedOutput<int16_t>(),
+      ElementsAreArray(ArrayFloatNear({0.5, 0.00814819, 0.832031, 0.960846,  //
+                                       0.916809, 0.167969, 0.999664, 0.689972},
+                                      kQuantizedToleranceInt16)));
 }
 
 TEST(FloatActivationsOpTest, Softmax4D) {
diff --git a/tensorflow/lite/kernels/batch_matmul.cc b/tensorflow/lite/kernels/batch_matmul.cc
index 3e03b13ecbe..a7912654faa 100644
--- a/tensorflow/lite/kernels/batch_matmul.cc
+++ b/tensorflow/lite/kernels/batch_matmul.cc
@@ -44,6 +44,7 @@ enum KernelType {
 struct OpData {
   // The index of the temporary tensors where we store transposed LHS/RHS.
   int scratch_tensor_index;
+  bool rhs_transposed;
 };
 
 struct OpContext {
@@ -63,6 +64,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   // Creates two temp tensors to store the transposed LHS and/or RHS if
   // needed.
   auto* op_data = new OpData();
+  // If the RHS is constant, we only transpose once.
+  op_data->rhs_transposed = false;
   context->AddTensors(context, 2, &op_data->scratch_tensor_index);
   return op_data;
 }
@@ -74,8 +77,8 @@ void Free(TfLiteContext* context, void* buffer) {
 TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
                                 const RuntimeShape& extended_lhs_shape,
                                 const RuntimeShape& extended_rhs_shape,
-                                bool adjoint_lhs, bool adjoint_rhs,
-                                int output_rank, TfLiteTensor* output) {
+                                bool adj_x, bool adj_y, int output_rank,
+                                TfLiteTensor* output) {
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank);
   // Fill in any broadcast dimensions.
   for (int i = 0; i < output_rank - 2; ++i) {
@@ -88,8 +91,8 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
     output_shape->data[i] = broadcast_dim;
   }
   // Fill in the matmul dimensions.
-  int lhs_rows_index = adjoint_lhs ? output_rank - 1 : output_rank - 2;
-  int rhs_cols_index = adjoint_rhs ? output_rank - 2 : output_rank - 1;
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
 
   output_shape->data[output_rank - 2] = extended_lhs_shape.Dims(lhs_rows_index);
   output_shape->data[output_rank - 1] = extended_rhs_shape.Dims(rhs_cols_index);
@@ -107,7 +110,7 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node,
   node->temporaries->data[0] = op_data->scratch_tensor_index;
   node->temporaries->data[1] = op_data->scratch_tensor_index + 1;
   // Temp tensor for Transposed LHS;
-  if (op_context->params->adjoint_lhs) {
+  if (op_context->params->adj_x) {
     TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0);
     const TfLiteTensor* lhs = op_context->lhs;
     int lhs_rank = NumDimensions(lhs);
@@ -125,13 +128,12 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node,
                                                      scratch_buffer_size));
   }
 
-  // We need the RHS transposed in the standard case, so if the flag is set,
-  // we do nothing. If the flag is not set, we need this temporary space.
-  // Note: we assume that the RHS is an in-memory tensor. If RHS is from a
-  // constant buffer (e.g. a weights buffer) with allocation type
-  // kTfLiteMmapRo, then this logic must be updated (since a read-only buffer
-  // is in the opposite layout pattern).
-  if (!op_context->params->adjoint_rhs) {
+  // We need a temp buffer for the RHS if we need to transpose the RHS. We
+  // transpose by default, so that the two inputs (LHS and RHS) are in a proper
+  // layout for our fast matrix multiplication routines. If the transpose flag
+  // is set by the caller, the data is already in the desired layout.
+  const bool rhs_needs_temp = !(op_context->params->adj_y);
+  if (rhs_needs_temp) {
     TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/1);
     const TfLiteTensor* rhs = op_context->rhs;
     int rhs_rank = NumDimensions(rhs);
@@ -143,6 +145,11 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node,
     scratch_buffer_size->data[rhs_rank - 2] = rhs->dims->data[rhs_rank - 1];
     scratch_buffer_size->data[rhs_rank - 1] = rhs->dims->data[rhs_rank - 2];
 
+    if (IsConstantTensor(op_context->rhs)) {
+      scratch_buffer->allocation_type = kTfLiteArenaRwPersistent;
+    } else {
+      scratch_buffer->allocation_type = kTfLiteArenaRw;
+    }
     scratch_buffer->type = op_context->rhs->type;
     scratch_buffer->allocation_type = kTfLiteArenaRw;
     TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer,
@@ -158,8 +165,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
   TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context));
 
-  bool adjoint_lhs = op_context.params->adjoint_lhs;
-  bool adjoint_rhs = op_context.params->adjoint_rhs;
+  bool adj_x = op_context.params->adj_x;
+  bool adj_y = op_context.params->adj_y;
 
   const TfLiteTensor* lhs_data = GetInput(context, node, kInputLHSTensor);
   const TfLiteTensor* rhs_data = GetInput(context, node, kInputRHSTensor);
@@ -192,15 +199,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     }
   }
   // Ensure other dimensions work for matrix multiplication.
-  int accum_dim_lhs = adjoint_lhs ? extended_lhs_shape.Dims(output_rank - 2)
-                                  : extended_lhs_shape.Dims(output_rank - 1);
-  int accum_dim_rhs = adjoint_rhs ? extended_rhs_shape.Dims(output_rank - 1)
-                                  : extended_rhs_shape.Dims(output_rank - 2);
+  int accum_dim_lhs = adj_x ? extended_lhs_shape.Dims(output_rank - 2)
+                            : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs = adj_y ? extended_rhs_shape.Dims(output_rank - 1)
+                            : extended_rhs_shape.Dims(output_rank - 2);
 
   TF_LITE_ENSURE_EQ(context, accum_dim_lhs, accum_dim_rhs);
   TfLiteStatus status =
-      ResizeOutputTensor(context, extended_lhs_shape, extended_rhs_shape,
-                         adjoint_lhs, adjoint_rhs, output_rank, output);
+      ResizeOutputTensor(context, extended_lhs_shape, extended_rhs_shape, adj_x,
+                         adj_y, output_rank, output);
   return status;
 }
 
@@ -244,33 +251,37 @@ RuntimeShape SwapRowColumnDims(const RuntimeShape& shape) {
 template <KernelType kernel_type>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
   const TfLiteTensor* lhs = GetInput(context, node, kInputLHSTensor);
   const TfLiteTensor* rhs = GetInput(context, node, kInputRHSTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   RuntimeShape orig_lhs_shape = GetTensorShape(lhs);
   RuntimeShape orig_rhs_shape = GetTensorShape(rhs);
 
-  bool adjoint_rhs = op_context.params->adjoint_rhs;
-  bool adjoint_lhs = op_context.params->adjoint_lhs;
+  bool adj_y = op_context.params->adj_y;
+  bool adj_x = op_context.params->adj_x;
 
-  const TfLiteTensor* rhs_tensor =
-      adjoint_rhs ? rhs : GetTemporary(context, node, 1);
-  const TfLiteTensor* lhs_tensor =
-      adjoint_lhs ? GetTemporary(context, node, 0) : lhs;
-  if (!adjoint_rhs) {
-    TransposeRowsColumns<float>(
-        rhs, GetTensorData<float>(rhs), GetTemporary(context, node, 1),
-        GetTensorData<float>(GetTemporary(context, node, 1)));
+  const TfLiteTensor* rhs_tensor = adj_y ? rhs : GetTemporary(context, node, 1);
+  const TfLiteTensor* lhs_tensor = adj_x ? GetTemporary(context, node, 0) : lhs;
+  if (!adj_y) {
+    // TODO(b/154760341) Constant tensors should already be transposed, but
+    // we transpose once if necessary for now.
+    if (!(IsConstantTensor(rhs) && op_data->rhs_transposed)) {
+      TransposeRowsColumns<float>(
+          rhs, GetTensorData<float>(rhs), GetTemporary(context, node, 1),
+          GetTensorData<float>(GetTemporary(context, node, 1)));
+      op_data->rhs_transposed = true;
+    }
   }
-  if (adjoint_lhs) {
+  if (adj_x) {
     TransposeRowsColumns<float>(
         lhs, GetTensorData<float>(lhs), GetTemporary(context, node, 0),
         GetTensorData<float>(GetTemporary(context, node, 0)));
   }
   RuntimeShape rhs_shape =
-      adjoint_rhs ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+      adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
   RuntimeShape lhs_shape =
-      adjoint_lhs ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+      adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
 
   switch (lhs->type) {
     case kTfLiteFloat32:
diff --git a/tensorflow/lite/kernels/batch_matmul_test.cc b/tensorflow/lite/kernels/batch_matmul_test.cc
index 28f8d87be9b..eeb075c4fe1 100644
--- a/tensorflow/lite/kernels/batch_matmul_test.cc
+++ b/tensorflow/lite/kernels/batch_matmul_test.cc
@@ -28,13 +28,13 @@ template <typename T>
 class BatchMatMulOpModel : public SingleOpModel {
  public:
   BatchMatMulOpModel(const TensorData& lhs, const TensorData& rhs,
-                     bool adjoint_lhs = false, bool adjoint_rhs = false) {
+                     bool adj_x = false, bool adj_y = false) {
     lhs_id_ = AddInput(lhs);
     rhs_id_ = AddInput(rhs);
     output_id_ = AddOutput(lhs.type);
-    SetBuiltinOp(
-        BuiltinOperator_BATCH_MATMUL, BuiltinOptions_BatchMatMulOptions,
-        CreateBatchMatMulOptions(builder_, adjoint_lhs, adjoint_rhs).Union());
+    SetBuiltinOp(BuiltinOperator_BATCH_MATMUL,
+                 BuiltinOptions_BatchMatMulOptions,
+                 CreateBatchMatMulOptions(builder_, adj_x, adj_y).Union());
     BuildInterpreter({GetShape(lhs_id_), GetShape(rhs_id_)});
   }
 
diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
index 3a780eed0a0..8ccc7a68eb7 100644
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
@@ -785,7 +785,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     const TfLiteTensor* fw_projection_weights =
         GetOptionalInputTensor(context, node, kFwProjectionWeightsTensor);
     if (fw_projection_weights != nullptr) {
-      fw_row_sums_rows += ceil(n_fw_output / n_fw_cell);
+      fw_row_sums_rows += ceil(static_cast<float>(n_fw_output) / n_fw_cell);
     }
     node->temporaries->data[kFwRowSums] =
         op_data->scratch_tensor_index + kFwRowSums;
@@ -808,7 +808,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     const TfLiteTensor* bw_projection_weights =
         GetOptionalInputTensor(context, node, kBwProjectionWeightsTensor);
     if (bw_projection_weights != nullptr) {
-      bw_row_sums_rows += ceil(n_bw_output / n_bw_cell);
+      bw_row_sums_rows += ceil(static_cast<float>(n_bw_output) / n_bw_cell);
     }
     node->temporaries->data[kBwRowSums] =
         op_data->scratch_tensor_index + kBwRowSums;
diff --git a/tensorflow/lite/kernels/comparisons.cc b/tensorflow/lite/kernels/comparisons.cc
index 4e20efc20e3..91dbc447c35 100644
--- a/tensorflow/lite/kernels/comparisons.cc
+++ b/tensorflow/lite/kernels/comparisons.cc
@@ -120,18 +120,18 @@ void Comparison(const TfLiteTensor* input1, const TfLiteTensor* input2,
             GetTensorShape(output), GetTensorData<bool>(output));
 }
 
-template <bool (*opname)(const StringRef&, const StringRef&)>
-void ComparisonString(const TfLiteTensor* input1, const TfLiteTensor* input2,
+void ComparisonString(bool (*opname)(const StringRef&, const StringRef&),
+                      const TfLiteTensor* input1, const TfLiteTensor* input2,
                       TfLiteTensor* output, bool requires_broadcast) {
   bool* output_data = GetTensorData<bool>(output);
   if (requires_broadcast) {
-    reference_ops::BroadcastComparison4DSlowStringImpl<opname>(
-        GetTensorShape(input1), input1, GetTensorShape(input2), input2,
+    reference_ops::BroadcastComparison4DSlowStringImpl(
+        opname, GetTensorShape(input1), input1, GetTensorShape(input2), input2,
         GetTensorShape(output), output_data);
   } else {
-    reference_ops::ComparisonStringImpl<opname>(
-        GetTensorShape(input1), input1, GetTensorShape(input2), input2,
-        GetTensorShape(output), output_data);
+    reference_ops::ComparisonStringImpl(opname, GetTensorShape(input1), input1,
+                                        GetTensorShape(input2), input2,
+                                        GetTensorShape(output), output_data);
   }
 }
 
@@ -166,8 +166,8 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
           input1, input2, output, requires_broadcast);
       break;
     case kTfLiteString:
-      ComparisonString<reference_ops::StringRefEqualFn>(input1, input2, output,
-                                                        requires_broadcast);
+      ComparisonString(reference_ops::StringRefEqualFn, input1, input2, output,
+                       requires_broadcast);
       break;
     default:
       context->ReportError(
@@ -210,8 +210,8 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
           input1, input2, output, requires_broadcast);
       break;
     case kTfLiteString:
-      ComparisonString<reference_ops::StringRefNotEqualFn>(
-          input1, input2, output, requires_broadcast);
+      ComparisonString(reference_ops::StringRefNotEqualFn, input1, input2,
+                       output, requires_broadcast);
       break;
     default:
       context->ReportError(
diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc
index 7f148dfa9f1..110eb3a07ef 100644
--- a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc
@@ -25,7 +25,7 @@ limitations under the License.
 #include <type_traits>
 
 #include <gtest/gtest.h>
-#include "ruy/ruy.h"  // from @ruy
+#include "ruy/reference_mul.h"  // from @ruy
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
 
@@ -353,8 +353,7 @@ void ReferenceGemm(
   ruy::MulParams<AccumScalar, DstScalar> ruy_mul_params;
   cpu_backend_gemm::detail::MakeRuyMulParams(params, &ruy_mul_params);
 
-  ruy::Mul<ruy::Path::kReference>(ruy_lhs, ruy_rhs, ruy_mul_params,
-                                  context->ruy_context(), &ruy_dst);
+  ruy::ReferenceMul(ruy_lhs, ruy_rhs, ruy_mul_params, &ruy_dst);
 }
 
 template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
diff --git a/tensorflow/lite/kernels/densify_test.cc b/tensorflow/lite/kernels/densify_test.cc
index fad7fff159c..5cb90932069 100644
--- a/tensorflow/lite/kernels/densify_test.cc
+++ b/tensorflow/lite/kernels/densify_test.cc
@@ -44,11 +44,10 @@ using ::testing::ElementsAreArray;
 template <typename T>
 class DensifyOpModel : public SingleOpModel {
  public:
-  DensifyOpModel(TensorType type, std::initializer_list<int> shape,
-                 std::initializer_list<T> input_data, int version = 1) {
-    const TensorData io_tensor_data = {type, shape};
-    input_ = AddConstSparseInput(type, shape, input_data);
-    output_ = AddOutput(io_tensor_data);
+  DensifyOpModel(const TensorData& input, std::initializer_list<T> input_data,
+                 int version = 1) {
+    input_ = AddConstSparseInput(input, input_data);
+    output_ = AddOutput({input.type, input.shape});
 
     SetBuiltinOp(BuiltinOperator_DENSIFY, BuiltinOptions_DensifyOptions,
                  CreateDensifyOptions(builder_).Union());
@@ -56,7 +55,7 @@ class DensifyOpModel : public SingleOpModel {
     resolver_ = absl::make_unique<SingleOpResolver>(
         BuiltinOperator_DENSIFY, ops::builtin::Register_DENSIFY(), version);
 
-    BuildInterpreter({shape});
+    BuildInterpreter({input.shape});
   }
 
   std::vector<T> GetInput() { return ExtractVector<T>(input_); }
@@ -71,7 +70,12 @@ TEST(DensifyOpTest, Float) {
   std::initializer_list<float> dense_values = {6, 0, 9, 8, 0, 0,
                                                0, 0, 5, 0, 0, 7};
   std::initializer_list<float> sparse_values = {6, 9, 8, 5, 7};
-  DensifyOpModel<float> m(TensorType_FLOAT32, {3, 4}, dense_values);
+  TensorData input = {};
+  input.type = TensorType_FLOAT32;
+  input.shape = {3, 4};
+  input.traversal_order = {0, 1};
+  input.format = {kTfLiteDimDense, kTfLiteDimSparseCSR};
+  DensifyOpModel<float> m(input, dense_values);
   m.Invoke();
   EXPECT_THAT(m.GetInput(), ElementsAreArray(sparse_values));
   EXPECT_THAT(m.GetOutput(), ElementsAreArray(dense_values));
@@ -81,7 +85,12 @@ TEST(DensifyOpTest, Float3D) {
   std::initializer_list<float> dense_values = {6, 0, 9, 8, 0, 0,
                                                0, 0, 5, 0, 0, 7};
   std::initializer_list<float> sparse_values = {6, 9, 8, 5, 7};
-  DensifyOpModel<float> m(TensorType_FLOAT32, {3, 2, 2}, dense_values);
+  TensorData input = {};
+  input.type = TensorType_FLOAT32;
+  input.shape = {3, 2, 2};
+  input.traversal_order = {0, 1, 2};
+  input.format = {kTfLiteDimDense, kTfLiteDimDense, kTfLiteDimSparseCSR};
+  DensifyOpModel<float> m(input, dense_values);
   m.Invoke();
   EXPECT_THAT(m.GetInput(), ElementsAreArray(sparse_values));
   EXPECT_THAT(m.GetOutput(), ElementsAreArray(dense_values));
@@ -91,7 +100,12 @@ TEST(DensifyOpTest, Int8) {
   std::initializer_list<int8_t> dense_values = {6, 0, 9, 8, 0, 0,
                                                 0, 0, 5, 0, 0, 7};
   std::initializer_list<int8_t> sparse_values = {6, 9, 8, 5, 7};
-  DensifyOpModel<int8_t> m(TensorType_INT8, {3, 4}, dense_values);
+  TensorData input = {};
+  input.type = TensorType_INT8;
+  input.shape = {3, 4};
+  input.traversal_order = {0, 1};
+  input.format = {kTfLiteDimDense, kTfLiteDimSparseCSR};
+  DensifyOpModel<int8_t> m(input, dense_values);
   m.Invoke();
   EXPECT_THAT(m.GetInput(), ElementsAreArray(sparse_values));
   EXPECT_THAT(m.GetOutput(), ElementsAreArray(dense_values));
diff --git a/tensorflow/lite/kernels/depthwise_conv.cc b/tensorflow/lite/kernels/depthwise_conv.cc
index 34d0556e7bd..8500b5cd39b 100644
--- a/tensorflow/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/kernels/depthwise_conv.cc
@@ -379,9 +379,8 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
   op_params.input_offset = -input->params.zero_point;
   op_params.weights_offset = 0;
   op_params.output_offset = output->params.zero_point;
-  // TODO(b/130439627): Use calculated value for clamping.
-  op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
-  op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
   TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
                                                &op_params.depth_multiplier));
 
diff --git a/tensorflow/lite/kernels/fully_connected.cc b/tensorflow/lite/kernels/fully_connected.cc
index 62a4ede9a06..cbc3efd5da5 100644
--- a/tensorflow/lite/kernels/fully_connected.cc
+++ b/tensorflow/lite/kernels/fully_connected.cc
@@ -50,6 +50,10 @@ bool SupportedSparsityFormat(const TfLiteSparsity& sparsity) {
 
   return false;
 }
+
+static const int kDimMetadataSizeRandomSparse = 2;
+static const int kDimMetadataSizeBlockSparse = 3;
+
 }  // namespace
 
 // This file has four implementations of FullyConnected
@@ -57,8 +61,6 @@ enum KernelType {
   kReference,
   kGenericOptimized,
   kLegacyPie,  // Legacy path used by the PIE team and related clients.
-  kSparseReference,
-  kSparseOptimized,
 };
 
 struct OpData {
@@ -627,54 +629,68 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
     FullyConnectedParams op_params;
     op_params.float_activation_min = output_activation_min;
     op_params.float_activation_max = output_activation_max;
-    reference_ops::FullyConnected(
-        op_params, GetTensorShape(input), GetTensorData<float>(input),
-        GetTensorShape(filter), GetTensorData<float>(filter),
-        GetTensorShape(bias), GetTensorData<float>(bias),
-        GetTensorShape(output), GetTensorData<float>(output));
-  } else if (kernel_type == kSparseReference) {
-    FullyConnectedParams op_params;
-    op_params.float_activation_min = output_activation_min;
-    op_params.float_activation_max = output_activation_max;
-    TF_LITE_ENSURE(context, filter->sparsity != nullptr);
-
-    const auto& sparsity = *filter->sparsity;
-    reference_ops::FullyConnectedSparseWeight(
-        sparsity, op_params, GetTensorShape(input), GetTensorData<float>(input),
-        GetTensorShape(filter), GetTensorData<float>(filter),
-        GetTensorShape(bias), GetTensorData<float>(bias),
-        GetTensorShape(output), GetTensorData<float>(output));
-  } else if (kernel_type == kSparseOptimized) {
-    FullyConnectedParams op_params;
-    op_params.float_activation_min = output_activation_min;
-    op_params.float_activation_max = output_activation_max;
-    TF_LITE_ENSURE(context, filter->sparsity != nullptr);
-
-    const auto& sparsity = *filter->sparsity;
-    if (!SupportedSparsityFormat(sparsity)) {
-      context->ReportError(context,
-                           "Unsupported sparse fully-connected weight format.");
-      return kTfLiteError;
+    if (filter->sparsity != nullptr) {
+      const auto& sparsity = *filter->sparsity;
+      reference_ops::FullyConnectedSparseWeight(
+          sparsity, op_params, GetTensorShape(input),
+          GetTensorData<float>(input), GetTensorShape(filter),
+          GetTensorData<float>(filter), GetTensorShape(bias),
+          GetTensorData<float>(bias), GetTensorShape(output),
+          GetTensorData<float>(output));
+    } else {
+      reference_ops::FullyConnected(
+          op_params, GetTensorShape(input), GetTensorData<float>(input),
+          GetTensorShape(filter), GetTensorData<float>(filter),
+          GetTensorShape(bias), GetTensorData<float>(bias),
+          GetTensorShape(output), GetTensorData<float>(output));
     }
-    optimized_ops::FullyConnectedSparseWeight(
-        sparsity, op_params, GetTensorShape(input), GetTensorData<float>(input),
-        GetTensorShape(filter), GetTensorData<float>(filter),
-        GetTensorShape(bias), GetTensorData<float>(bias),
-        GetTensorShape(output), GetTensorData<float>(output));
   } else if (kernel_type == kLegacyPie) {
     return EvalPie(context, node, params, data, input, filter, bias, output);
   } else {
     FullyConnectedParams op_params;
     op_params.float_activation_min = output_activation_min;
     op_params.float_activation_max = output_activation_max;
-    op_params.lhs_cacheable = IsConstantTensor(filter);
-    op_params.rhs_cacheable = IsConstantTensor(input);
-    optimized_ops::FullyConnected(
-        op_params, GetTensorShape(input), GetTensorData<float>(input),
-        GetTensorShape(filter), GetTensorData<float>(filter),
-        GetTensorShape(bias), GetTensorData<float>(bias),
-        GetTensorShape(output), GetTensorData<float>(output),
-        CpuBackendContext::GetFromContext(context));
+    if (filter->sparsity != nullptr) {
+      const auto& sparsity = *filter->sparsity;
+      if (!SupportedSparsityFormat(sparsity)) {
+        TF_LITE_KERNEL_LOG(context,
+                           "Unsupported sparse fully-connected weight format.");
+        return kTfLiteError;
+      }
+
+      if (sparsity.dim_metadata_size == kDimMetadataSizeRandomSparse) {
+        // Random sparse.
+        optimized_ops::FullyConnectedSparseWeight(
+            sparsity, op_params, GetTensorShape(input),
+            GetTensorData<float>(input), GetTensorShape(filter),
+            GetTensorData<float>(filter), GetTensorShape(bias),
+            GetTensorData<float>(bias), GetTensorShape(output),
+            GetTensorData<float>(output));
+      } else if (sparsity.dim_metadata_size == kDimMetadataSizeBlockSparse &&
+                 sparsity.dim_metadata[2].dense_size == 4) {
+        // Block sparse with block size of 1x4.
+        optimized_ops::FullyConnectedSparseWeight1x4(
+            sparsity, op_params, GetTensorShape(input),
+            GetTensorData<float>(input), GetTensorShape(filter),
+            GetTensorData<float>(filter), GetTensorShape(bias),
+            GetTensorData<float>(bias), GetTensorShape(output),
+            GetTensorData<float>(output));
+      } else {
+        TF_LITE_KERNEL_LOG(context,
+                           "Unsupported sparse fully-connected weight format.");
+        return kTfLiteError;
+      }
+
+    } else {
+      op_params.lhs_cacheable = IsConstantTensor(filter);
+      op_params.rhs_cacheable = IsConstantTensor(input);
+      optimized_ops::FullyConnected(
+          op_params, GetTensorShape(input), GetTensorData<float>(input),
+          GetTensorShape(filter), GetTensorData<float>(filter),
+          GetTensorShape(bias), GetTensorData<float>(bias),
+          GetTensorShape(output), GetTensorData<float>(output),
+          CpuBackendContext::GetFromContext(context));
+    }
   }
 
   return kTfLiteOk;
@@ -735,23 +751,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
 }  // namespace fully_connected
 
-// TODO(b/147449640): Clean up sparse registrations after conversion is done.
-TfLiteRegistration* Register_FULLY_CONNECTED_SPARSE_REF() {
-  static TfLiteRegistration r = {
-      fully_connected::Init, fully_connected::Free,
-      fully_connected::Prepare<fully_connected::kSparseReference>,
-      fully_connected::Eval<fully_connected::kSparseReference>};
-  return &r;
-}
-
-TfLiteRegistration* Register_FULLY_CONNECTED_SPARSE_OPT() {
-  static TfLiteRegistration r = {
-      fully_connected::Init, fully_connected::Free,
-      fully_connected::Prepare<fully_connected::kSparseOptimized>,
-      fully_connected::Eval<fully_connected::kSparseOptimized>};
-  return &r;
-}
-
 TfLiteRegistration* Register_FULLY_CONNECTED_REF() {
   static TfLiteRegistration r = {
       fully_connected::Init, fully_connected::Free,
diff --git a/tensorflow/lite/kernels/fully_connected_test.cc b/tensorflow/lite/kernels/fully_connected_test.cc
index 6eda657f5bf..7227b8a5e92 100644
--- a/tensorflow/lite/kernels/fully_connected_test.cc
+++ b/tensorflow/lite/kernels/fully_connected_test.cc
@@ -361,11 +361,6 @@ const auto kKernelMapNoPie = new std::map<string, TfLiteRegistration*>({
     {"GenericOptimized", ops::builtin::Register_FULLY_CONNECTED_GENERIC_OPT()},
 });
 
-const auto kKernelMapSparse = new std::map<string, TfLiteRegistration*>({
-    {"SparseReference", ops::builtin::Register_FULLY_CONNECTED_SPARSE_REF()},
-    {"SparseOptimized", ops::builtin::Register_FULLY_CONNECTED_SPARSE_OPT()},
-});
-
 class QuantizedFullyConnectedOpTest : public SingleOpTest {
  protected:
   const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
@@ -1139,7 +1134,7 @@ class SparseFullyConnectedOpModel : public SingleOpModel {
  public:
   SparseFullyConnectedOpModel(TfLiteRegistration* registration, int units,
                               int batches, const TensorData& input,
-                              std::initializer_list<int> weights_shape,
+                              const TensorData& weights,
                               std::initializer_list<T> weights_data)
       : batches_(batches), units_(units) {
     int total_input_size = 1;
@@ -1149,7 +1144,7 @@ class SparseFullyConnectedOpModel : public SingleOpModel {
     input_size_ = total_input_size / batches_;
 
     input_ = AddInput(input);
-    weights_ = AddConstSparseInput(input.type, weights_shape, weights_data);
+    weights_ = AddConstSparseInput(weights, weights_data);
 
     TensorData bias{input.type, {units_}};
     bias_ = AddInput(bias);
@@ -1187,20 +1182,24 @@ class SparseFullyConnectedOpModel : public SingleOpModel {
 class SparseFullyConnectedOpTest : public SingleOpTest {
  protected:
   const std::map<string, TfLiteRegistration*>& GetKernelMap() override {
-    return *kKernelMapSparse;
+    return *kKernelMapNoPie;
   }
 };
 
 TEST_P(SparseFullyConnectedOpTest, SimpleTest) {
-  std::initializer_list<int> weight_shape = {3, 10};
   std::initializer_list<float> weight_data = {
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
   };
+  TensorData weight = {};
+  weight.type = TensorType_FLOAT32;
+  weight.shape = {3, 10};
+  weight.traversal_order = {0, 1};
+  weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR};
   SparseFullyConnectedOpModel<float> m(
       GetRegistration(), /*units=*/3, /*batches=*/2,
-      /*input=*/{TensorType_FLOAT32, {2, 10}}, weight_shape, weight_data);
+      /*input=*/{TensorType_FLOAT32, {2, 10}}, weight, weight_data);
   m.SetBias({1, 2, 3});
 
   m.SetInput({
@@ -1215,13 +1214,17 @@ TEST_P(SparseFullyConnectedOpTest, SimpleTest) {
 }
 
 TEST_P(SparseFullyConnectedOpTest, SimpleTest2) {
-  std::initializer_list<int> weight_shape = {1, 2};
   std::initializer_list<float> weight_data = {
       2, 4  // u = 0
   };
+  TensorData weight = {};
+  weight.type = TensorType_FLOAT32;
+  weight.shape = {1, 2};
+  weight.traversal_order = {0, 1};
+  weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR};
   SparseFullyConnectedOpModel<float> m(
       GetRegistration(), /*units=*/1, /*batches=*/2,
-      /*input=*/{TensorType_FLOAT32, {2, 2}}, weight_shape, weight_data);
+      /*input=*/{TensorType_FLOAT32, {2, 2}}, weight, weight_data);
   m.SetBias({1});
 
   m.SetInput({
@@ -1235,12 +1238,41 @@ TEST_P(SparseFullyConnectedOpTest, SimpleTest2) {
   EXPECT_THAT(m.GetOutput(), ElementsAre(11, 9));
 }
 
+TEST_P(SparseFullyConnectedOpTest, Simple1x4Test) {
+  std::initializer_list<float> weight_data = {
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,  // u = 2
+  };
+  TensorData weight = {};
+  weight.type = TensorType_FLOAT32;
+  weight.shape = {3, 12};
+  weight.traversal_order = {0, 1, 2};
+  weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR};
+  weight.block_map = {1};
+  weight.block_size = {4};
+  SparseFullyConnectedOpModel<float> m(GetRegistration(),
+                                       /*units=*/3, /*batches=*/2,
+                                       /*input=*/{TensorType_FLOAT32, {2, 12}},
+                                       weight, weight_data);
+  m.SetBias({1, 2, 3});
+
+  m.SetInput({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10, 11,  12,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10, -11, 12,  // b = 1
+  });
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 3));
+  EXPECT_THAT(m.GetOutput(), ElementsAre(289, 290, 291, 81, 82, 83));
+}
 // TODO(b/148391360): Add tests for unsupported sparsity format.
 // TEST_P(SparseFullyConnectedOpTest, TestUnsupportedSparsityFormat)
 
 INSTANTIATE_TEST_SUITE_P(
     SparseFullyConnectedOpTest, SparseFullyConnectedOpTest,
-    ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMapSparse)));
+    ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMapNoPie)));
 
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 5958a9c1098..d6a96efdbf7 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -629,7 +629,6 @@ cc_library(
         ":cppmath",
         "//tensorflow/lite:minimal_logging",
         "//tensorflow/lite/c:common",
-        "//tensorflow/lite/kernels:cpu_backend_context",
         "@gemmlowp",
     ],
 )
@@ -654,7 +653,6 @@ cc_library(
         "//tensorflow/lite/kernels:cpu_backend_context",
         "//tensorflow/lite/kernels:cpu_backend_gemm",
         "@ruy//ruy",
-        "@ruy//ruy:detect_arm",
     ],
 )
 
@@ -786,7 +784,6 @@ cc_library(
     deps = [
         ":cpu_check",
         "//tensorflow/lite/c:common",
-        "//tensorflow/lite/kernels:cpu_backend_context",
         "//third_party/eigen3",
     ],
 )
@@ -820,6 +817,7 @@ cc_test(
         ":quantization_util",
         ":tensor_utils",
         "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:cpu_backend_context",
         "//tensorflow/lite/kernels:test_util",
         "@com_google_googletest//:gtest_main",
     ],
@@ -1039,6 +1037,7 @@ cc_test(
 
 cc_library(
     name = "cpu_check",
+    srcs = ["optimized/cpu_check.cc"],
     hdrs = [
         "optimized/cpu_check.h",
         "optimized/neon_check.h",
@@ -1058,9 +1057,6 @@ cc_library(
         ":windows": tflite_deps_intel,
         "//conditions:default": [],
     },
-    deps = [
-        "@ruy//ruy:detect_arm",  # safe to use regardless of arch.
-    ],
 )
 
 cc_test(
diff --git a/tensorflow/lite/kernels/internal/optimized/cpu_check.cc b/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
new file mode 100644
index 00000000000..8fd17a7e33a
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
@@ -0,0 +1,50 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
+
+#if defined __linux__ && defined __aarch64__
+#include <sys/auxv.h>
+#endif
+
+namespace tflite {
+
+namespace {
+
+// The implementation of dotprod detection is copied from ruy's internal
+// function DetectDotprod().
+// At the moment it's only implemented on Linux ARM64. Consider syncing again
+// with ruy in the future to share improvements.
+#if defined __linux__ && defined __aarch64__
+bool DetectDotprodByLinuxAuxvMethod() {
+  // This is the value of HWCAP_ASIMDDP in sufficiently recent Linux headers,
+  // however we need to support building against older headers for the time
+  // being.
+  const int kLocalHwcapAsimddp = 1 << 20;
+  return getauxval(AT_HWCAP) & kLocalHwcapAsimddp;
+}
+#endif
+
+}  // namespace
+
+bool DetectArmNeonDotprod() {
+#if defined __linux__ && defined __aarch64__
+  return DetectDotprodByLinuxAuxvMethod();
+#endif
+
+  return false;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/kernels/internal/optimized/cpu_check.h b/tensorflow/lite/kernels/internal/optimized/cpu_check.h
index 2c02e756f14..b39371a3e2f 100644
--- a/tensorflow/lite/kernels/internal/optimized/cpu_check.h
+++ b/tensorflow/lite/kernels/internal/optimized/cpu_check.h
@@ -15,8 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_
 
-#include "ruy/detect_arm.h"  // from @ruy
-
 // This include is superfluous. However, it's been here for a while, and a
 // number of files have been relying on it to include neon_check.h for them.
 // This should be removed, but with a global run of presubmits to catch
@@ -25,12 +23,16 @@ limitations under the License.
 
 namespace tflite {
 
+// On A64, returns true if the dotprod extension is present.
+// On other architectures, returns false unconditionally.
+bool DetectArmNeonDotprod();
+
 struct CpuFlags {
   bool neon_dotprod = false;
 };
 
 inline void GetCpuFlags(CpuFlags* cpu_flags) {
-  cpu_flags->neon_dotprod = ruy::DetectDotprod();
+  cpu_flags->neon_dotprod = DetectArmNeonDotprod();
 }
 
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h
index a9dae4feac5..8937fe2b26e 100644
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h
@@ -35,58 +35,99 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
   TFLITE_DCHECK_GT(params.input2_offset, -256);
   TFLITE_DCHECK_LT(params.input1_offset, 256);
   TFLITE_DCHECK_LT(params.input2_offset, 256);
+
 #ifdef USE_NEON
-  const int8x8_t output_activation_min_vector =
-      vdup_n_s8(params.quantized_activation_min);
-  const int8x8_t output_activation_max_vector =
-      vdup_n_s8(params.quantized_activation_max);
-  for (; i <= size - 8; i += 8) {
-    const int8x8_t input1_val_original = vld1_s8(input1_data + i);
-    const int8x8_t input2_val_original = vld1_s8(input2_data + i);
-    const int16x8_t input1_val_s16 = vmovl_s8(input1_val_original);
-    const int16x8_t input2_val_s16 = vmovl_s8(input2_val_original);
-    const int16x8_t input1_val =
-        vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
-    const int16x8_t input2_val =
-        vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
-    const int16x4_t input1_val_high = vget_high_s16(input1_val);
-    const int16x4_t input1_val_low = vget_low_s16(input1_val);
-    const int16x4_t input2_val_high = vget_high_s16(input2_val);
-    const int16x4_t input2_val_low = vget_low_s16(input2_val);
-    int32x4_t x11 = vmovl_s16(input1_val_low);
-    int32x4_t x12 = vmovl_s16(input1_val_high);
-    int32x4_t x21 = vmovl_s16(input2_val_low);
-    int32x4_t x22 = vmovl_s16(input2_val_high);
-    const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
-    x11 = vshlq_s32(x11, left_shift_dup);
-    x12 = vshlq_s32(x12, left_shift_dup);
-    x21 = vshlq_s32(x21, left_shift_dup);
-    x22 = vshlq_s32(x22, left_shift_dup);
-    x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
-    x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
-    x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
-    x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
-    const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
-    const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
-    x11 = vshlq_s32(x11, input1_shift_dup);
-    x12 = vshlq_s32(x12, input1_shift_dup);
-    x21 = vshlq_s32(x21, input2_shift_dup);
-    x22 = vshlq_s32(x22, input2_shift_dup);
-    int32x4_t s1 = vaddq_s32(x11, x21);
-    int32x4_t s2 = vaddq_s32(x12, x22);
-    s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
-    s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+  const int8x16_t output_activation_min_vector =
+      vdupq_n_s8(params.quantized_activation_min);
+  const int8x16_t output_activation_max_vector =
+      vdupq_n_s8(params.quantized_activation_max);
+
+  const int input1_left_shift = params.left_shift + params.input1_shift;
+  const int input2_left_shift = params.left_shift + params.input2_shift;
+  const int32x4_t input1_left_dup = vdupq_n_s32(input1_left_shift);
+  const int32x4_t input2_left_dup = vdupq_n_s32(input2_left_shift);
+
+  for (; i <= size - 16; i += 16) {
+    const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+
+    const int16x8_t input1_val_s16_high =
+        vmovl_s8(vget_high_s8(input1_val_original));
+    const int16x8_t input1_val_s16_low =
+        vmovl_s8(vget_low_s8(input1_val_original));
+
+    const int16x8_t input2_val_s16_high =
+        vmovl_s8(vget_high_s8(input2_val_original));
+    const int16x8_t input2_val_s16_low =
+        vmovl_s8(vget_low_s8(input2_val_original));
+    const int16x8_t input1_val_high =
+        vaddq_s16(input1_val_s16_high, vdupq_n_s16(params.input1_offset));
+    const int16x8_t input2_val_high =
+        vaddq_s16(input2_val_s16_high, vdupq_n_s16(params.input2_offset));
+    const int16x8_t input1_val_low =
+        vaddq_s16(input1_val_s16_low, vdupq_n_s16(params.input1_offset));
+    const int16x8_t input2_val_low =
+        vaddq_s16(input2_val_s16_low, vdupq_n_s16(params.input2_offset));
+    const int16x4_t input1_val_high_high = vget_high_s16(input1_val_high);
+    const int16x4_t input1_val_high_low = vget_low_s16(input1_val_high);
+    const int16x4_t input1_val_low_high = vget_high_s16(input1_val_low);
+    const int16x4_t input1_val_low_low = vget_low_s16(input1_val_low);
+    const int16x4_t input2_val_high_high = vget_high_s16(input2_val_high);
+    const int16x4_t input2_val_high_low = vget_low_s16(input2_val_high);
+    const int16x4_t input2_val_low_high = vget_high_s16(input2_val_low);
+    const int16x4_t input2_val_low_low = vget_low_s16(input2_val_low);
+    int32x4_t x111 = vmovl_s16(input1_val_low_low);
+    int32x4_t x112 = vmovl_s16(input1_val_low_high);
+    int32x4_t x121 = vmovl_s16(input1_val_high_low);
+    int32x4_t x122 = vmovl_s16(input1_val_high_high);
+    int32x4_t x211 = vmovl_s16(input2_val_low_low);
+    int32x4_t x212 = vmovl_s16(input2_val_low_high);
+    int32x4_t x221 = vmovl_s16(input2_val_high_low);
+    int32x4_t x222 = vmovl_s16(input2_val_high_high);
+
+    x111 = vshlq_s32(x111, input1_left_dup);
+    x112 = vshlq_s32(x112, input1_left_dup);
+    x121 = vshlq_s32(x121, input1_left_dup);
+    x122 = vshlq_s32(x122, input1_left_dup);
+    x211 = vshlq_s32(x211, input2_left_dup);
+    x212 = vshlq_s32(x212, input2_left_dup);
+    x221 = vshlq_s32(x221, input2_left_dup);
+    x222 = vshlq_s32(x222, input2_left_dup);
+    x111 = vqrdmulhq_n_s32(x111, params.input1_multiplier);
+    x112 = vqrdmulhq_n_s32(x112, params.input1_multiplier);
+    x121 = vqrdmulhq_n_s32(x121, params.input1_multiplier);
+    x122 = vqrdmulhq_n_s32(x122, params.input1_multiplier);
+    x211 = vqrdmulhq_n_s32(x211, params.input2_multiplier);
+    x212 = vqrdmulhq_n_s32(x212, params.input2_multiplier);
+    x221 = vqrdmulhq_n_s32(x221, params.input2_multiplier);
+    x222 = vqrdmulhq_n_s32(x222, params.input2_multiplier);
+    int32x4_t s11 = vaddq_s32(x111, x211);
+    int32x4_t s12 = vaddq_s32(x112, x212);
+    int32x4_t s21 = vaddq_s32(x121, x221);
+    int32x4_t s22 = vaddq_s32(x122, x222);
+    s11 = vqrdmulhq_n_s32(s11, params.output_multiplier);
+    s12 = vqrdmulhq_n_s32(s12, params.output_multiplier);
+    s21 = vqrdmulhq_n_s32(s21, params.output_multiplier);
+    s22 = vqrdmulhq_n_s32(s22, params.output_multiplier);
     using gemmlowp::RoundingDivideByPOT;
-    s1 = RoundingDivideByPOT(s1, -params.output_shift);
-    s2 = RoundingDivideByPOT(s2, -params.output_shift);
-    const int16x4_t s1_narrowed = vmovn_s32(s1);
-    const int16x4_t s2_narrowed = vmovn_s32(s2);
-    const int16x8_t s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed),
-                                  vdupq_n_s16(params.output_offset));
-    const int8x8_t clamped =
-        vmax_s8(output_activation_min_vector,
-                vmin_s8(output_activation_max_vector, vqmovn_s16(s)));
-    vst1_s8(output_data + i, clamped);
+    s11 = RoundingDivideByPOT(s11, -params.output_shift);
+    s12 = RoundingDivideByPOT(s12, -params.output_shift);
+    s21 = RoundingDivideByPOT(s21, -params.output_shift);
+    s22 = RoundingDivideByPOT(s22, -params.output_shift);
+    const int16x4_t s11_narrowed = vmovn_s32(s11);
+    const int16x4_t s12_narrowed = vmovn_s32(s12);
+    const int16x4_t s21_narrowed = vmovn_s32(s21);
+    const int16x4_t s22_narrowed = vmovn_s32(s22);
+    const int16x8_t s1 = vaddq_s16(vcombine_s16(s11_narrowed, s12_narrowed),
+                                   vdupq_n_s16(params.output_offset));
+    const int16x8_t s2 = vaddq_s16(vcombine_s16(s21_narrowed, s22_narrowed),
+                                   vdupq_n_s16(params.output_offset));
+    const int16x8_t s = vcombine_s16(vqmovn_s16(s1), vqmovn_s16(s2));
+
+    const int8x16_t clamped =
+        vmaxq_s8(output_activation_min_vector,
+                 vminq_s8(output_activation_max_vector, s));
+    vst1q_s8(output_data + i, clamped);
   }
 #endif  // NEON
 
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
index 07f3117dac7..4c90cd86a56 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include <limits>
 #include <utility>
 
-#include "ruy/detect_arm.h"  // from @ruy
 #include "ruy/ruy.h"  // from @ruy
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/cpu_backend_gemm.h"
@@ -80,7 +79,7 @@ inline void* aligned_alloc(size_t alignment, size_t size,
 }
 
 bool HasSdotInstruction() {
-  static const bool has_dotprod = ruy::DetectDotprod();
+  static const bool has_dotprod = DetectArmNeonDotprod();
   return has_dotprod;
 }
 
@@ -1954,6 +1953,36 @@ void NeonCwiseClipping(int8_t* input, const int8_t clipping_value,
   }
 }
 
+void NeonSparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  const int kBlockSize = 4;
+  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
+
+  for (int batch = 0; batch < n_batch; batch++) {
+    const float* matrix_ptr = matrix;
+    for (int row = 0; row < m_rows; row++) {
+      float32x4_t acc_32x4 = vmovq_n_f32(0.0);
+      const float* vector_in_batch = vector + batch * m_cols;
+
+      for (int i = segments[row]; i < segments[row + 1]; i++) {
+        const int block_start_index = indices[i] * kBlockSize;
+        const float* vector_block_in_batch_ptr =
+            vector_in_batch + block_start_index;
+
+        // Load 4 float values from the vector and matrix row.
+        float32x4_t vector_f32x4 = vld1q_f32(vector_block_in_batch_ptr);
+        float32x4_t matrix_f32x4 = vld1q_f32(matrix_ptr);
+        // Multiply the vector and matrix row and add to accumulator.
+        acc_32x4 = vmlaq_f32(acc_32x4, matrix_f32x4, vector_f32x4);
+        matrix_ptr += kBlockSize;
+      }
+      result[batch * m_rows + row] += AccumulateNeonLane(acc_32x4);
+    }
+  }
+}
+
 void NeonSparseMatrixBatchVectorMultiplyAccumulate(
     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
index 98ae3c976df..b978bf5f3bb 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
@@ -76,6 +76,14 @@ void MatrixBatchVectorMultiplyAccumulate(
                    input_offset, scratch, row_sums, compute_row_sums, context);
 }
 
+void SparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  NEON_OR_PORTABLE(SparseMatrixBatchVectorMultiplyAccumulate1x4, matrix,
+                   segments, indices, m_rows, m_cols, vector, n_batch, result);
+}
+
 void SparseMatrixBatchVectorMultiplyAccumulate(
     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
index 059accb0222..1b043390c22 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
@@ -111,6 +111,11 @@ void NeonMatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
                                         int32_t n_row, int32_t n_col,
                                         int32_t* output);
 
+void NeonSparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
+
 // Multiply a matrix by a batch vector, and store results in a batch-size
 // vector. Sparse version.
 void NeonSparseMatrixBatchVectorMultiplyAccumulate(
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index 5f183de7269..b18f0f4bb5a 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -4332,6 +4332,41 @@ inline void Logistic(const LogisticParams& params,
     }
   }
 #endif
+#ifdef GEMMLOWP_SSE4
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    // This is the return type of math functions such as tanh, logistic,
+    // whose range is in [-1, 1].
+    using F0 = gemmlowp::FixedPoint<gemmlowp::int16x8_m128i, 0>;
+    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
+    using F3 = gemmlowp::FixedPoint<gemmlowp::int16x8_m128i, 3>;
+
+    for (; c <= flat_size - 16; c += 16) {
+      F3 input0 = F3::FromRaw(gemmlowp::to_int16x8_m128i(
+          _mm_loadu_si128(reinterpret_cast<const __m128i*>(input_data_ptr))));
+      F3 input1 = F3::FromRaw(gemmlowp::to_int16x8_m128i(_mm_loadu_si128(
+          reinterpret_cast<const __m128i*>(input_data_ptr + 8))));
+      F0 output0 = gemmlowp::logistic(input0);
+      F0 output1 = gemmlowp::logistic(input1);
+      _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr),
+                       output0.raw().v);
+      _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr + 8),
+                       output1.raw().v);
+      input_data_ptr += 16;
+      output_data_ptr += 16;
+    }
+    for (; c <= flat_size - 8; c += 8) {
+      F3 input = F3::FromRaw(gemmlowp::to_int16x8_m128i(
+          _mm_loadu_si128(reinterpret_cast<const __m128i*>(input_data_ptr))));
+      F0 output = gemmlowp::logistic(input);
+      _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr),
+                       output.raw().v);
+      input_data_ptr += 8;
+      output_data_ptr += 8;
+    }
+  }
+#endif
+
   {
     // F0 uses 0 integer bits, range [-1, 1].
     // This is the return type of math functions such as tanh, logistic,
@@ -4438,6 +4473,72 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
     }
   }
 #endif
+#ifdef GEMMLOWP_SSE4
+  {
+    // F0 uses 0 integer bits, range [-1, 1].
+    // This is the return type of math functions such as tanh, logistic,
+    // whose range is in [-1, 1].
+    using F0 = gemmlowp::FixedPoint<gemmlowp::int16x8_m128i, 0>;
+    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
+    using F3 = gemmlowp::FixedPoint<gemmlowp::int16x8_m128i, 3>;
+
+    if (input_left_shift == 0) {
+      for (; c <= flat_size - 16; c += 16) {
+        F3 input0 = F3::FromRaw(gemmlowp::to_int16x8_m128i(
+            _mm_loadu_si128(reinterpret_cast<const __m128i*>(input_data_ptr))));
+        F3 input1 = F3::FromRaw(gemmlowp::to_int16x8_m128i(_mm_loadu_si128(
+            reinterpret_cast<const __m128i*>(input_data_ptr + 8))));
+        F0 output0 = gemmlowp::tanh(input0);
+        F0 output1 = gemmlowp::tanh(input1);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr),
+                         output0.raw().v);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr + 8),
+                         output1.raw().v);
+
+        input_data_ptr += 16;
+        output_data_ptr += 16;
+      }
+      for (; c <= flat_size - 8; c += 8) {
+        F3 input = F3::FromRaw(gemmlowp::to_int16x8_m128i(
+            _mm_loadu_si128(reinterpret_cast<const __m128i*>(input_data_ptr))));
+        F0 output = gemmlowp::tanh(input);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr),
+                         output.raw().v);
+        input_data_ptr += 8;
+        output_data_ptr += 8;
+      }
+    } else {
+      for (; c <= flat_size - 16; c += 16) {
+        F3 input0 = F3::FromRaw(gemmlowp::SaturatingRoundingMultiplyByPOT<1>(
+            gemmlowp::to_int16x8_m128i(_mm_loadu_si128(
+                reinterpret_cast<const __m128i*>(input_data_ptr)))));
+        F3 input1 = F3::FromRaw(gemmlowp::SaturatingRoundingMultiplyByPOT<1>(
+            gemmlowp::to_int16x8_m128i(_mm_loadu_si128(
+                reinterpret_cast<const __m128i*>(input_data_ptr + 8)))));
+        F0 output0 = gemmlowp::tanh(input0);
+        F0 output1 = gemmlowp::tanh(input1);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr),
+                         output0.raw().v);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr + 8),
+                         output1.raw().v);
+
+        input_data_ptr += 16;
+        output_data_ptr += 16;
+      }
+      for (; c <= flat_size - 8; c += 8) {
+        F3 input = F3::FromRaw(gemmlowp::SaturatingRoundingMultiplyByPOT<1>(
+            gemmlowp::to_int16x8_m128i(_mm_loadu_si128(
+                reinterpret_cast<const __m128i*>(input_data_ptr)))));
+        F0 output = gemmlowp::tanh(input);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(output_data_ptr),
+                         output.raw().v);
+        input_data_ptr += 8;
+        output_data_ptr += 8;
+      }
+    }
+  }
+#endif
+
   {
     // F0 uses 0 integer bits, range [-1, 1].
     // This is the return type of math functions such as tanh, logistic,
@@ -7820,6 +7921,227 @@ void Transpose(const TransposeParams& unshrinked_params,
                       shrinked_output_shape, output_data);
 }
 
+// Assume input1 & input2 have the same scale & zero point.
+inline void MaximumElementwise(int size, const ArithmeticParams& params,
+                               const int8* input1_data, const int8* input2_data,
+                               int8* output_data) {
+  ruy::profiler::ScopeLabel label("MaximumElementwiseInt8/8bit");
+
+  int i = 0;
+#ifdef USE_NEON
+  for (; i <= size - 8; i += 8) {
+    const int8x8_t input1_val_original = vld1_s8(input1_data + i);
+    const int8x8_t input2_val_original = vld1_s8(input2_data + i);
+    const int8x8_t max_data = vmax_s8(input1_val_original, input2_val_original);
+    vst1_s8(output_data + i, max_data);
+  }
+#endif  // NEON
+  for (; i < size; ++i) {
+    const int8 input1_val = input1_data[i];
+    const int8 input2_val = input2_data[i];
+    output_data[i] = std::max(input1_val, input2_val);
+  }
+}
+
+inline void MaximumScalarBroadcast(int size, const ArithmeticParams& params,
+                                   int8 input1_data, const int8* input2_data,
+                                   int8* output_data) {
+  ruy::profiler::ScopeLabel label("MaximumScalarBroadcastInt8/8bit");
+  int i = 0;
+
+#ifdef USE_NEON
+  const int8x8_t input1_val_original = vdup_n_s8(input1_data);
+  for (; i <= size - 8; i += 8) {
+    const int8x8_t input2_val_original = vld1_s8(input2_data + i);
+    const int8x8_t max_data = vmax_s8(input1_val_original, input2_val_original);
+    vst1_s8(output_data + i, max_data);
+  }
+#endif  // NEON
+  for (; i < size; ++i) {
+    const int8 input2_val = input2_data[i];
+    output_data[i] = std::max(input1_data, input2_val);
+  }
+}
+
+// Assume input1 & input2 have the same scale & zero point.
+inline void MinimumElementwise(int size, const ArithmeticParams& params,
+                               const int8* input1_data, const int8* input2_data,
+                               int8* output_data) {
+  int i = 0;
+#ifdef USE_NEON
+  for (; i <= size - 16; i += 16) {
+    const int8x16_t input1_val_original = vld1q_s8(input1_data + i);
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+    const int8x16_t min_data =
+        vminq_s8(input1_val_original, input2_val_original);
+    vst1q_s8(output_data + i, min_data);
+  }
+#endif  // USE_NEON
+  for (; i < size; ++i) {
+    const int8 input1_val = input1_data[i];
+    const int8 input2_val = input2_data[i];
+    output_data[i] = std::min(input1_val, input2_val);
+  }
+}
+
+inline void MinimumScalarBroadcast(int size, const ArithmeticParams& params,
+                                   int8 input1_data, const int8* input2_data,
+                                   int8* output_data) {
+  int i = 0;
+
+#ifdef USE_NEON
+  const int8x16_t input1_val_original = vdupq_n_s8(input1_data);
+  for (; i <= size - 16; i += 16) {
+    const int8x16_t input2_val_original = vld1q_s8(input2_data + i);
+    const int8x16_t min_data =
+        vminq_s8(input1_val_original, input2_val_original);
+    vst1q_s8(output_data + i, min_data);
+  }
+#endif  // USE_NEON
+  for (; i < size; ++i) {
+    const int8 input2_val = input2_data[i];
+    output_data[i] = std::min(input1_data, input2_val);
+  }
+}
+
+template <typename ElementwiseF, typename ScalarBroadcastF>
+inline void BinaryBroadcastFiveFold(const ArithmeticParams& unswitched_params,
+                                    const RuntimeShape& unswitched_input1_shape,
+                                    const int8* unswitched_input1_data,
+                                    const RuntimeShape& unswitched_input2_shape,
+                                    const int8* unswitched_input2_data,
+                                    const RuntimeShape& output_shape,
+                                    int8* output_data,
+                                    ElementwiseF elementwise_f,
+                                    ScalarBroadcastF scalar_broadcast_f,
+                                    const std::string& label_name) {
+  ruy::profiler::ScopeLabel label(label_name);
+
+  ArithmeticParams switched_params = unswitched_params;
+  switched_params.input1_offset = unswitched_params.input2_offset;
+  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
+  switched_params.input1_shift = unswitched_params.input2_shift;
+  switched_params.input2_offset = unswitched_params.input1_offset;
+  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
+  switched_params.input2_shift = unswitched_params.input1_shift;
+
+  const bool use_unswitched =
+      unswitched_params.broadcast_category ==
+      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+  const ArithmeticParams& params =
+      use_unswitched ? unswitched_params : switched_params;
+  const int8* input1_data =
+      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+  const int8* input2_data =
+      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+  // Fivefold nested loops. The second input resets its position for each
+  // iteration of the second loop. The first input resets its position at the
+  // beginning of the fourth loop. The innermost loop is an elementwise add of
+  // sections of the arrays.
+  int8* output_data_ptr = output_data;
+  const int8* input1_data_ptr = input1_data;
+  const int8* input2_data_reset = input2_data;
+  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
+  // between input shapes. y3 for input 1 is always broadcast, and so the
+  // dimension there is 1, whereas optionally y1 might be broadcast for
+  // input 2. Put another way, input1.shape.FlatSize = y0 * y1 * y2 * y4,
+  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
+  int y0 = params.broadcast_shape[0];
+  int y1 = params.broadcast_shape[1];
+  int y2 = params.broadcast_shape[2];
+  int y3 = params.broadcast_shape[3];
+  int y4 = params.broadcast_shape[4];
+  if (y4 > 1) {
+    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
+    // dimension.
+    for (int i0 = 0; i0 < y0; ++i0) {
+      const int8* input2_data_ptr = nullptr;
+      for (int i1 = 0; i1 < y1; ++i1) {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2) {
+          for (int i3 = 0; i3 < y3; ++i3) {
+            elementwise_f(y4, params, input1_data_ptr, input2_data_ptr,
+                          output_data_ptr);
+            input2_data_ptr += y4;
+            output_data_ptr += y4;
+          }
+          // We have broadcast y4 of input1 data y3 times, and now move on.
+          input1_data_ptr += y4;
+        }
+      }
+      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
+      input2_data_reset = input2_data_ptr;
+    }
+  } else {
+    // Special case of y4 == 1, in which the innermost loop is a single
+    // element and can be combined with the next (y3) as an inner broadcast.
+    //
+    // Note that this handles the case of pure scalar broadcast when
+    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
+    // broadcast with batch (as y2 > 1).
+    //
+    // NOTE The process is the same as the above general case except
+    // simplified for y4 == 1 and the loop over y3 is contained within the
+    // AddScalarBroadcast function.
+    for (int i0 = 0; i0 < y0; ++i0) {
+      const int8* input2_data_ptr = nullptr;
+      for (int i1 = 0; i1 < y1; ++i1) {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2) {
+          scalar_broadcast_f(y3, params, *input1_data_ptr, input2_data_ptr,
+                             output_data_ptr);
+          input2_data_ptr += y3;
+          output_data_ptr += y3;
+          input1_data_ptr += 1;
+        }
+      }
+      input2_data_reset = input2_data_ptr;
+    }
+  }
+}
+
+template <typename Op>
+inline void BroadcastMaximumDispatch(const ArithmeticParams& params,
+                                     const RuntimeShape& input1_shape,
+                                     const int8* input1_data,
+                                     const RuntimeShape& input2_shape,
+                                     const int8* input2_data,
+                                     const RuntimeShape& output_shape,
+                                     int8* output_data, Op op) {
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) {
+    return reference_ops::MaximumMinimumBroadcastSlow(
+        input1_shape, input1_data, input2_shape, input2_data, output_shape,
+        output_data, op);
+  }
+
+  BinaryBroadcastFiveFold(params, input1_shape, input1_data, input2_shape,
+                          input2_data, output_shape, output_data,
+                          MaximumElementwise, MaximumScalarBroadcast,
+                          "BroadcastMaximumFivefoldInt8/8bit");
+}
+
+template <typename Op>
+inline void BroadcastMinimumDispatch(const ArithmeticParams& params,
+                                     const RuntimeShape& input1_shape,
+                                     const int8* input1_data,
+                                     const RuntimeShape& input2_shape,
+                                     const int8* input2_data,
+                                     const RuntimeShape& output_shape,
+                                     int8* output_data, Op op) {
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) {
+    return reference_ops::MaximumMinimumBroadcastSlow(
+        input1_shape, input1_data, input2_shape, input2_data, output_shape,
+        output_data, op);
+  }
+
+  BinaryBroadcastFiveFold(params, input1_shape, input1_data, input2_shape,
+                          input2_data, output_shape, output_data,
+                          MinimumElementwise, MinimumScalarBroadcast,
+                          "BroadcastMinimumFivefoldInt8/8bit");
+}
+
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/internal/optimized/sparse_ops/fully_connected.h b/tensorflow/lite/kernels/internal/optimized/sparse_ops/fully_connected.h
index f7e54e144ce..750e63e152f 100644
--- a/tensorflow/lite/kernels/internal/optimized/sparse_ops/fully_connected.h
+++ b/tensorflow/lite/kernels/internal/optimized/sparse_ops/fully_connected.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/cppmath.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/tensor_utils.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
@@ -68,6 +69,42 @@ inline void FullyConnectedSparseWeight(
   }
 }
 
+inline void FullyConnectedSparseWeight1x4(
+    const TfLiteSparsity& sparsity, const FullyConnectedParams& params,
+    const RuntimeShape& input_shape, const float* input_data,
+    const RuntimeShape& weights_shape, const float* weights_data,
+    const RuntimeShape& bias_shape, const float* bias_data,
+    const RuntimeShape& output_shape, float* output_data) {
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
+  const int output_elements = output_shape.FlatSize();
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
+                                       output_shape, output_dims_count - 1);
+  const int* w1_segments = sparsity.dim_metadata[1].array_segments->data;
+  const int* w1_indices = sparsity.dim_metadata[1].array_indices->data;
+
+  for (int i = 0; i < output_elements; ++i) {
+    output_data[i] = 0.f;
+  }
+
+  tensor_utils::SparseMatrixBatchVectorMultiplyAccumulate1x4(
+      weights_data, w1_segments, w1_indices, weights_shape.Dims(0),
+      weights_shape.Dims(1), input_data, batches, output_data);
+
+  for (int b = 0; b < batches; ++b) {
+    for (int i = 0; i < output_depth; ++i) {
+      float total = output_data[b * output_depth + i];
+      float bias_value = bias_data[i];
+      output_data[b * output_depth + i] = ActivationFunctionWithMinMax(
+          total + bias_value, output_activation_min, output_activation_max);
+    }
+  }
+}
+
 }  // namespace optimized_ops
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_SPARSE_OPS_FULLY_CONNECTED_H_
diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
index 1d0d2273e93..986e70a7823 100644
--- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
@@ -86,6 +86,14 @@ void MatrixBatchVectorMultiplyAccumulate(
                   input_offset);
 }
 
+void SparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  NEON_OR_PORTABLE(SparseMatrixBatchVectorMultiplyAccumulate1x4, matrix,
+                   segments, indices, m_rows, m_cols, vector, n_batch, result);
+}
+
 void SparseMatrixBatchVectorMultiplyAccumulate(
     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
diff --git a/tensorflow/lite/kernels/internal/reference/comparisons.h b/tensorflow/lite/kernels/internal/reference/comparisons.h
index 379a20f5065..d9bc10a9390 100644
--- a/tensorflow/lite/kernels/internal/reference/comparisons.h
+++ b/tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -78,8 +78,8 @@ inline void ComparisonImpl(
   }
 }
 
-template <bool (*F)(const StringRef&, const StringRef&)>
-inline void ComparisonStringImpl(const RuntimeShape& input1_shape,
+inline void ComparisonStringImpl(bool (*F)(const StringRef&, const StringRef&),
+                                 const RuntimeShape& input1_shape,
                                  const TfLiteTensor* input1,
                                  const RuntimeShape& input2_shape,
                                  const TfLiteTensor* input2,
@@ -180,8 +180,8 @@ inline void BroadcastComparison4DSlowImpl(
   }
 }
 
-template <bool (*F)(const StringRef&, const StringRef&)>
 inline void BroadcastComparison4DSlowStringImpl(
+    bool (*F)(const StringRef&, const StringRef&),
     const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
     const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
     const RuntimeShape& unextended_output_shape, bool* output_data) {
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
index aa626f43f19..e315683c0cd 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@@ -58,12 +58,15 @@ inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
   }
 }
 
-inline void Logistic(int32_t input_size, const int16_t* ptr_input_data,
-                     int16_t* ptr_output_data) {
+inline void Logistic(int32_t input_multiplier, int32_t input_size,
+                     const int16_t* ptr_input_data, int16_t* ptr_output_data) {
   // We use the LUT for sigmoid and take into account, that
   // tanh(x) = 2*sigmoid(2*x) - 1
+
+  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
+
   for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data = *ptr_input_data;
+    int32_t input_data = (*ptr_input_data) * input_data_mul;
 
     // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
     // we do interpolation on unsigned values.
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
index 8c07c6f6d6c..baae65ab30e 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@@ -57,12 +57,16 @@ inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
   }
 }
 
-inline void Tanh(int32_t input_left_shift, int32_t input_size,
-                 const int16_t* ptr_input_data, int16_t* ptr_output_data) {
+inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
+                 int32_t input_size, const int16_t* ptr_input_data,
+                 int16_t* ptr_output_data) {
   // We use the LUT for sigmoid and take into account, that
   // tanh(x) = 2*sigmoid(2*x) - 1
+
+  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
+
   for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data = *ptr_input_data;
+    int32_t input_data = (*ptr_input_data) * input_data_mul;
 
     if (input_left_shift == 1) {
       input_data <<= 1;
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
index 19c74973aeb..0e66dfee191 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -21,7 +21,6 @@ limitations under the License.
 
 #include "fixedpoint/fixedpoint.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/internal/cppmath.h"
@@ -53,7 +52,7 @@ void PortableSymmetricQuantizeFloats(const float* values, const int size,
 void PortableSymmetricQuantizeFloats(const float* values, const int size,
                                      int8_t* quantized_values, float min_value,
                                      float max_value, float* scaling_factor) {
-  const int kScale = 127;
+  const int32_t kScale = 127;
   const float range = std::max(std::abs(min_value), std::abs(max_value));
   if (range == 0) {
     memset(quantized_values, 0, size * sizeof(int8_t));
@@ -66,7 +65,8 @@ void PortableSymmetricQuantizeFloats(const float* values, const int size,
     const int32_t quantized_value =
         static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
     // Clamp: just in case some odd numeric offset.
-    quantized_values[i] = std::min(kScale, std::max(-kScale, quantized_value));
+    quantized_values[i] = static_cast<int8_t>(
+        std::min(kScale, std::max(-kScale, quantized_value)));
   }
 }
 
@@ -234,6 +234,30 @@ void PortableMatrixBatchVectorMultiplyAccumulate(
   }    // for batch
 }
 
+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  const int kBlockSize = 4;
+  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
+  for (int batch = 0; batch < n_batch; batch++) {
+    const float* matrix_ptr = matrix;
+    for (int row = 0; row < m_rows; row++) {
+      float dot_prod = 0.0f;
+      const float* vector_in_batch = vector + batch * m_cols;
+      for (int i = segments[row]; i < segments[row + 1]; i++) {
+        const int block_start_index = indices[i] * kBlockSize;
+        const float* vector_block_in_batch_ptr =
+            vector_in_batch + block_start_index;
+        for (int c = 0; c < kBlockSize; c++) {
+          dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
+        }
+      }
+      result[batch * m_rows + row] += dot_prod;
+    }
+  }
+}
+
 void PortableSparseMatrixBatchVectorMultiplyAccumulate(
     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
@@ -636,7 +660,8 @@ void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
       int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
       value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
       value -= output_zp;
-      value = std::min(std::max(-128, value), 127);
+      value = std::min(std::max(static_cast<int32_t>(-128), value),
+                       static_cast<int32_t>(127));
 
       output[index] = static_cast<int8>(value);
     }
@@ -724,7 +749,8 @@ void PortableVectorBatchVectorCwiseProductAccumulate(
       int32_t prod = vector[v] * *batch_vector++;
       prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
       int32_t output = prod + *result;
-      output = std::max(std::min(32767, output), -32768);
+      output = std::max(std::min(static_cast<int32_t>(32767), output),
+                        static_cast<int32_t>(-32768));
       *result++ = output;
     }
   }
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
index c4f886f6a5c..f2e6c9b4f7d 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -18,7 +18,6 @@ limitations under the License.
 // TODO(ghodrat): Remove this header file and the dependency to internal data
 // structure.
 #include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
 
 #if defined(_MSC_VER)
@@ -109,6 +108,14 @@ void MatrixBatchVectorMultiplyAccumulate(
                                               per_channel_scale, input_offset);
 }
 
+void SparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
+      matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
+}
+
 void SparseMatrixBatchVectorMultiplyAccumulate(
     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
index 04fedc327d0..6c15a6cd919 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@@ -20,13 +20,17 @@ limitations under the License.
 // TODO(ghodrat): Remove this header file and the dependency to internal data
 // structure.
 #include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/cpu_backend_context.h"
 
 #if defined(_MSC_VER)
 #define __restrict__ __restrict
 #endif
 
 namespace tflite {
+
+// Not all backends support CpuBackendContext usage, so forward declare to avoid
+// pulling in its implementation.
+class CpuBackendContext;
+
 namespace tensor_utils {
 
 // Limit a float input f between +abs_limit and -abs_limit.
@@ -85,6 +89,11 @@ void PortableMatrixBatchVectorMultiplyAccumulate(
     int n_batch, float* __restrict__ result, const float* per_channel_scale,
     const int32_t* input_offset);
 
+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
+
 void PortableSparseMatrixBatchVectorMultiplyAccumulate(
     const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
     int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
diff --git a/tensorflow/lite/kernels/internal/reference/prelu.h b/tensorflow/lite/kernels/internal/reference/prelu.h
index d3d7d78a4a4..50d9ad24dd9 100644
--- a/tensorflow/lite/kernels/internal/reference/prelu.h
+++ b/tensorflow/lite/kernels/internal/reference/prelu.h
@@ -48,14 +48,16 @@ inline void BroadcastPrelu4DSlow(
               params.input_offset + input_data[input_index];
           int32 output_value;
           if (input_value >= 0) {
-            output_value = input_value;
+            output_value = MultiplyByQuantizedMultiplier(
+                input_value, params.output_multiplier_1, params.output_shift_1);
           } else {
             auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
             const int32 alpha_value =
                 params.alpha_offset + alpha_data[alpha_index];
+
             output_value = MultiplyByQuantizedMultiplier(
-                input_value * alpha_value, params.output_multiplier,
-                params.output_shift);
+                input_value * alpha_value, params.output_multiplier_2,
+                params.output_shift_2);
           }
           output_value += params.output_offset;
 
diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index f40b268b443..1a6c6d0d80e 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h
@@ -2597,7 +2597,7 @@ inline void HardSwish(const HardSwishParams& params,
     // significant bits in the high bits of our 16-bit fixedpoint values, so
     // that fixed-point approximate computations below are as accurate as
     // possible.
-    const int16_t input_value_on_hires_input_scale = input_value << 7;
+    const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
     // Compute the input value on essentially the output scale, just not
     // right-shifted yet. This is the value that we'll use in the (x >= +3)
     // case, and that in the general case we'll multiply against the "relu-ish"
diff --git a/tensorflow/lite/kernels/internal/tensor.h b/tensorflow/lite/kernels/internal/tensor.h
index 0005bf38d54..543117df0e5 100644
--- a/tensorflow/lite/kernels/internal/tensor.h
+++ b/tensorflow/lite/kernels/internal/tensor.h
@@ -119,6 +119,8 @@ class SequentialTensorWriter {
   T* output_ptr_;
 };
 
+// String ops are not yet supported on platforms w/ static memory.
+#ifndef TF_LITE_STATIC_MEMORY
 template <>
 class SequentialTensorWriter<string> {
  public:
@@ -138,6 +140,7 @@ class SequentialTensorWriter<string> {
   TfLiteTensor* output_;
   DynamicBuffer buffer_;
 };
+#endif  // TF_LITE_STATIC_MEMORY
 
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/internal/tensor_utils.h b/tensorflow/lite/kernels/internal/tensor_utils.h
index e8edf2f59f2..5e106eb7de4 100644
--- a/tensorflow/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/tensor_utils.h
@@ -20,13 +20,18 @@ limitations under the License.
 
 #include "third_party/eigen3/Eigen/Core"
 #include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/cpu_backend_context.h"
 
 #if defined(_MSC_VER)
 #define __restrict__ __restrict
 #endif
 
 namespace tflite {
+
+// Not all backends support CpuBackendContext usage, so forward declare to avoid
+// pulling in its implementation. Use of CpuBackendContext in method
+// implementations is purely optional.
+class CpuBackendContext;
+
 namespace tensor_utils {
 
 // Checks if all entries of vector are zero for float.
@@ -65,6 +70,15 @@ void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
                                          int m_cols, const float* vector,
                                          int n_batch, float* result);
 
+// Same as the function above, but the matrix is a sparse tensor with block
+// pattern 1x4.
+// This function assumes that m_cols is a multiple of the block size (4 in this
+// case) so that there's no incomplete block.
+void SparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
+
 // Same as the function above, but the matrix is stored in block compressed
 // sparse row format with block pattern 1x16 which consists of two arrays:
 //   1. A matrix array stores non-zero blocks of the matrix in row major.
diff --git a/tensorflow/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/lite/kernels/internal/tensor_utils_test.cc
index 9b047d3ba84..3ad59acdb68 100644
--- a/tensorflow/lite/kernels/internal/tensor_utils_test.cc
+++ b/tensorflow/lite/kernels/internal/tensor_utils_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
 #include "tensorflow/lite/kernels/test_util.h"
diff --git a/tensorflow/lite/kernels/internal/types.h b/tensorflow/lite/kernels/internal/types.h
index cbdedd88901..52d74d1eca4 100644
--- a/tensorflow/lite/kernels/internal/types.h
+++ b/tensorflow/lite/kernels/internal/types.h
@@ -972,8 +972,10 @@ struct PreluParams {
   int32 input_offset;
   int32 alpha_offset;
   int32 output_offset;
-  int32 output_multiplier;
-  int output_shift;
+  int32 output_multiplier_1;
+  int32 output_shift_1;
+  int32 output_multiplier_2;
+  int32 output_shift_2;
 };
 
 struct PoolParams {
diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc
index b30747eac61..ded536ab3a7 100644
--- a/tensorflow/lite/kernels/kernel_util.cc
+++ b/tensorflow/lite/kernels/kernel_util.cc
@@ -126,11 +126,27 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
   // pipeline.
   if (bias) {
     const double bias_scale = static_cast<double>(bias->params.scale);
-    // Here we're making sure the input_product_scale & bias_scale the same.
-    // Normally this should be guaranteed by the training pipeline, we are
-    // setting the threshold to be 2e-6 to allow some numeric stability
-    // difference.
-    TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 2e-6);
+    // Here we're making sure the input_product_scale & bias_scale are about the
+    // same. Since we have:
+    // (output - output_zp) * output_scale =
+    // input_product_scale * input_product + bias * bias_scale ---- (0)
+    //
+    // (0) equals:
+    // (input_product + bias) * input_product_scale ----- (1)
+    //           +
+    // bias * (bias_scale - input_product_scale)   ------ (2)
+    //
+    // For the real kernel computation, we're doing (1), so we really need to
+    // make sure (2) has minimum impact on the output, so:
+    // bias * (bias_scale - input_product_scale) / output_scale should be
+    // a small number for an integer.
+    // Since normally bias should be within a small range.
+    // We should expect (bias_scale - input_product_scale) / output_scale to
+    // be a small number like 0.02.
+    const double scale_diff = std::abs(input_product_scale - bias_scale);
+    const double output_scale = static_cast<double>(output->params.scale);
+
+    TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
   }
   return GetQuantizedConvolutionMultipler(context, input, filter, output,
                                           multiplier);
diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h
index ad068ddd3fd..5793b08616d 100644
--- a/tensorflow/lite/kernels/kernel_util.h
+++ b/tensorflow/lite/kernels/kernel_util.h
@@ -87,6 +87,10 @@ inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
 }
 
 // Determines whether tensor is constant.
+// TODO(b/138199592): Introduce new query which checks for constant OR
+// persistent-read-only, which would be useful for most tensor kernels that
+// are potentially dynamic based on the input tensor value availability at the
+// time of prepare.
 inline bool IsConstantTensor(const TfLiteTensor* tensor) {
   return tensor->allocation_type == kTfLiteMmapRo;
 }
@@ -105,6 +109,14 @@ inline void SetTensorToDynamic(TfLiteTensor* tensor) {
   }
 }
 
+// Sets tensor to persistent and read-only.
+inline void SetTensorToPersistentRo(TfLiteTensor* tensor) {
+  if (tensor->allocation_type != kTfLitePersistentRo) {
+    tensor->allocation_type = kTfLitePersistentRo;
+    tensor->data.raw = nullptr;
+  }
+}
+
 // Determines whether it is a hybrid op - one that has float inputs and
 // quantized weights.
 inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) {
diff --git a/tensorflow/lite/kernels/lstm.cc b/tensorflow/lite/kernels/lstm.cc
index 4eafc215b6f..e78ec95a9a8 100644
--- a/tensorflow/lite/kernels/lstm.cc
+++ b/tensorflow/lite/kernels/lstm.cc
@@ -1393,7 +1393,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     const TfLiteTensor* projection_weights =
         GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
     if (projection_weights != nullptr) {
-      row_sums_rows += ceil(n_output / n_cell);
+      row_sums_rows += ceil(static_cast<float>(n_output) / n_cell);
     }
 
     TfLiteTensor* row_sums = GetTemporary(context, node, /*index=*/9);
diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc
index 9895c9183ec..f7422b2876a 100644
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@@ -594,7 +594,7 @@ inline void LstmStepHybrid(
       num_row_sums += use_cifg ? 3 : 4;
     }
     if (projection_weights_ptr != nullptr) {
-      num_row_sums += ceil(n_output / n_cell);
+      num_row_sums += ceil(static_cast<float>(n_output) / n_cell);
     }
     TF_LITE_ASSERT(row_sums_size == num_row_sums);
     input_to_input_row_sums = row_sums;
@@ -744,7 +744,6 @@ inline void LstmStepHybrid(
         forget_gate_scratch, /*per_channel_scale=*/nullptr, zero_points,
         accum_scratch_ptr, aux_input_to_forget_row_sums, compute_row_sums,
         context);
-    row_sums += n_cell;
 
     for (int b = 0; b < n_batch; ++b) {
       product_scaling_factors[b] =
diff --git a/tensorflow/lite/kernels/lstm_eval_test.cc b/tensorflow/lite/kernels/lstm_eval_test.cc
index 5409c24d1fe..885ae250ae7 100644
--- a/tensorflow/lite/kernels/lstm_eval_test.cc
+++ b/tensorflow/lite/kernels/lstm_eval_test.cc
@@ -40,15 +40,20 @@ bool ArrayEq(const T* result, const T* expected_result, int size) {
   return true;
 }
 
-// The class that holds input parameters for quantized lstm.
-class QuantizedLstmParam {
- public:
-  // Getter methods.
-  TfLiteTensor* GetInput() {
-    PackWeightToTensor(&input_tensor_, input_, input_size_);
-    input_tensor_.data.int8 = input_.data();
-    return &input_tensor_;
+template <typename T>
+bool ArrayFloatNear(const T* result, const T* expected_result, int size,
+                    double threshold) {
+  for (int i = 0; i < size; ++i) {
+    if (std::abs(result[i] - expected_result[i]) > threshold) {
+      return false;
+    }
   }
+  return true;
+}
+
+// Base class that holds input parameters for quantized and hybrid lstm.
+class BaseLstmParam {
+ public:
   TfLiteTensor* Geti2i() {
     PackWeightToTensor(&i2i_tensor_, i2i_, i2i_size_);
     i2i_tensor_.data.int8 = i2i_.data();
@@ -94,6 +99,232 @@ class QuantizedLstmParam {
     projection_tensor_.data.int8 = projection_.data();
     return &projection_tensor_;
   }
+  ~BaseLstmParam() {
+    TfLiteIntArrayFree(input_tensor_.dims);
+    TfLiteIntArrayFree(i2i_tensor_.dims);
+    TfLiteIntArrayFree(i2f_tensor_.dims);
+    TfLiteIntArrayFree(i2c_tensor_.dims);
+    TfLiteIntArrayFree(i2o_tensor_.dims);
+    TfLiteIntArrayFree(r2i_tensor_.dims);
+    TfLiteIntArrayFree(r2f_tensor_.dims);
+    TfLiteIntArrayFree(r2c_tensor_.dims);
+    TfLiteIntArrayFree(r2o_tensor_.dims);
+    TfLiteIntArrayFree(layer_norm_input_tensor_.dims);
+    TfLiteIntArrayFree(layer_norm_forget_tensor_.dims);
+    TfLiteIntArrayFree(layer_norm_cell_tensor_.dims);
+    TfLiteIntArrayFree(layer_norm_output_tensor_.dims);
+    TfLiteIntArrayFree(input_bias_tensor_.dims);
+    TfLiteIntArrayFree(forget_bias_tensor_.dims);
+    TfLiteIntArrayFree(cell_bias_tensor_.dims);
+    TfLiteIntArrayFree(output_bias_tensor_.dims);
+    TfLiteIntArrayFree(projection_tensor_.dims);
+    TfLiteIntArrayFree(projection_bias_tensor_.dims);
+    TfLiteIntArrayFree(activation_tensor_.dims);
+    TfLiteIntArrayFree(cell_tensor_.dims);
+    TfLiteIntArrayFree(output_tensor_.dims);
+  }
+
+ protected:
+  template <typename T>
+  void PackWeightToTensor(TfLiteTensor* tensor, std::vector<T>& data,
+                          std::vector<int32_t> dims) {
+    if (data.empty()) {
+      int total = 1;
+      for (int i = 0; i < dims.size(); ++i) {
+        total *= dims[i];
+      }
+      for (int i = 0; i < total; ++i) {
+        data.push_back(0);
+      }
+    }
+    tensor->dims = TfLiteIntArrayCreate(dims.size());
+    for (int i = 0; i < dims.size(); ++i) {
+      tensor->dims->data[i] = dims[i];
+    }
+  }
+  // Dimensions. Need proper size to trigger neon code.
+  const int n_batch_ = 2;
+  const int n_input_ = 18;
+  const int n_cell_ = 10;
+  const int n_output_ = 6;
+
+  std::vector<int32_t> input_size_ = {n_batch_, n_input_};
+  TfLiteTensor input_tensor_;
+
+  // input_to_input_weights.
+  std::vector<int8_t> i2i_ = {
+      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
+      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  0,   //
+      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6, 1, 2, 3, -4, 5,  6,   //
+      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6, 1, 7, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
+      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 8,  5,  -6,  //
+      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
+      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6, 1, 2, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  6, 1, 2, 3, 14, 5,  6,   //
+      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
+  };
+  std::vector<int32_t> i2i_size_ = {n_cell_, n_input_};
+  TfLiteTensor i2i_tensor_;
+
+  // input_to_forget_weights.
+  std::vector<int8_t> i2f_ = {
+      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  0,   //
+      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6,  1,  2, 3, -4, 5,  6,   //
+      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6,  1,  7, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
+      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6,  11, 2, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  -6, 1,  2, 3, 14, 5,  6,   //
+      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
+      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
+      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6,  13, 2, 3, 4,  5,  6,   //
+      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 8,  5,  -6,  //
+  };
+  std::vector<int32_t> i2f_size_ = {n_cell_, n_input_};
+  TfLiteTensor i2f_tensor_;
+
+  // input_to_cell_weights.
+  std::vector<int8_t> i2c_ = {
+      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  0,   //
+      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6,  1, 2, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  16, 1, 2, 3, 14, 5,  6,   //
+      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6,  7, 2, 3, 4,  5,  6,   //
+      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  6,   //
+      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  6,   //
+      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1, 2, 3, 8,  5,  -6,  //
+      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6,  1, 2, 3, -4, 5,  6,   //
+      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6,  1, 7, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  6,   //
+  };
+  std::vector<int32_t> i2c_size_ = {n_cell_, n_input_};
+  TfLiteTensor i2c_tensor_;
+
+  // input_to_output_weights.
+  std::vector<int8_t> i2o_ = {
+      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6,  1,  7, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  -1, 2, 3, 4,  5,  6,   //
+      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6,  1,  2, 3, 4,  -5, 6,   //
+      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  6,  1,  2, 3, 14, 5,  6,   //
+      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  -6, 1,  2, 3, 4,  5,  6,   //
+      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
+      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  0,   //
+      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6,  1,  2, 3, -4, 5,  6,   //
+      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6,  -1, 2, 3, 4,  5,  6,   //
+      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 8,  5,  -6,  //
+  };
+  std::vector<int32_t> i2o_size_ = {n_cell_, n_input_};
+  TfLiteTensor i2o_tensor_;
+
+  // recurrent_to_input_weights.
+  std::vector<int8_t> r2i_ = {
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+  };
+  std::vector<int32_t> r2i_size_ = {n_cell_, n_output_};
+  TfLiteTensor r2i_tensor_;
+
+  // recurrent_to_forget_weights.
+  std::vector<int8_t> r2f_ = {
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+  };
+  std::vector<int32_t> r2f_size_ = {n_cell_, n_output_};
+  TfLiteTensor r2f_tensor_;
+
+  // recurrent_to_cell_weights.
+  std::vector<int8_t> r2c_ = {
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+  };
+  std::vector<int32_t> r2c_size_ = {n_cell_, n_output_};
+  TfLiteTensor r2c_tensor_;
+
+  // recurrent_to_output_weights.
+  std::vector<int8_t> r2o_ = {
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+  };
+  std::vector<int32_t> r2o_size_ = {n_cell_, n_output_};
+  TfLiteTensor r2o_tensor_;
+
+  std::vector<int32_t> layer_norm_input_size_ = {n_cell_};
+  TfLiteTensor layer_norm_input_tensor_;
+
+  TfLiteTensor layer_norm_forget_tensor_;
+  std::vector<int32_t> layer_norm_forget_size_ = {n_cell_};
+
+  std::vector<int32_t> layer_norm_cell_size_ = {n_cell_};
+  TfLiteTensor layer_norm_cell_tensor_;
+
+  std::vector<int32_t> layer_norm_output_size_ = {n_cell_};
+  TfLiteTensor layer_norm_output_tensor_;
+
+  std::vector<int32_t> input_bias_size_ = {n_cell_};
+  TfLiteTensor input_bias_tensor_;
+
+  std::vector<int32_t> forget_bias_size_ = {n_cell_};
+  TfLiteTensor forget_bias_tensor_;
+
+  std::vector<int32_t> cell_bias_size_ = {n_cell_};
+  TfLiteTensor cell_bias_tensor_;
+
+  std::vector<int32_t> output_bias_size_ = {n_cell_};
+  TfLiteTensor output_bias_tensor_;
+
+  // projection_weights.
+  std::vector<int8_t> projection_ = {
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
+      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
+      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
+  };
+  std::vector<int32_t> projection_size_ = {n_cell_, n_output_};
+  TfLiteTensor projection_tensor_;
+
+  // projection_bias.
+  std::vector<int32_t> projection_bias_ = {
+      16, 4, 5, 6, 1, 1  //
+  };
+
+  std::vector<int32_t> projection_bias_size_ = {n_output_};
+  TfLiteTensor projection_bias_tensor_;
+
+  std::vector<int32_t> activation_size_ = {n_batch_, n_output_};
+  TfLiteTensor activation_tensor_;
+
+  std::vector<int32_t> cell_size_ = {n_batch_, n_cell_};
+  TfLiteTensor cell_tensor_;
+
+  std::vector<int32_t> output_size_ = {n_batch_, n_output_};
+  TfLiteTensor output_tensor_;
+};
+
+class QuantizedLstmParam : public BaseLstmParam {
+ public:
+  // Getter methods.
+  TfLiteTensor* GetInput() {
+    PackWeightToTensor(&input_tensor_, input_, input_size_);
+    input_tensor_.data.int8 = input_.data();
+    return &input_tensor_;
+  }
   TfLiteTensor* GetInputLayerNorm() {
     PackWeightToTensor(&layer_norm_input_tensor_, layer_norm_input_,
                        layer_norm_input_size_);
@@ -274,30 +505,7 @@ class QuantizedLstmParam {
     cell_tensor_.data.i16 = cell_.data();
     return &cell_tensor_;
   }
-
   ~QuantizedLstmParam() {
-    TfLiteIntArrayFree(input_tensor_.dims);
-    TfLiteIntArrayFree(i2i_tensor_.dims);
-    TfLiteIntArrayFree(i2f_tensor_.dims);
-    TfLiteIntArrayFree(i2c_tensor_.dims);
-    TfLiteIntArrayFree(i2o_tensor_.dims);
-    TfLiteIntArrayFree(r2i_tensor_.dims);
-    TfLiteIntArrayFree(r2f_tensor_.dims);
-    TfLiteIntArrayFree(r2c_tensor_.dims);
-    TfLiteIntArrayFree(r2o_tensor_.dims);
-    TfLiteIntArrayFree(projection_tensor_.dims);
-    TfLiteIntArrayFree(layer_norm_input_tensor_.dims);
-    TfLiteIntArrayFree(layer_norm_forget_tensor_.dims);
-    TfLiteIntArrayFree(layer_norm_cell_tensor_.dims);
-    TfLiteIntArrayFree(layer_norm_output_tensor_.dims);
-    TfLiteIntArrayFree(input_bias_tensor_.dims);
-    TfLiteIntArrayFree(forget_bias_tensor_.dims);
-    TfLiteIntArrayFree(cell_bias_tensor_.dims);
-    TfLiteIntArrayFree(output_bias_tensor_.dims);
-    TfLiteIntArrayFree(projection_bias_tensor_.dims);
-    TfLiteIntArrayFree(activation_tensor_.dims);
-    TfLiteIntArrayFree(cell_tensor_.dims);
-    TfLiteIntArrayFree(output_tensor_.dims);
     TfLiteIntArrayFree(scratch0_tensor_.dims);
     TfLiteIntArrayFree(scratch1_tensor_.dims);
     TfLiteIntArrayFree(scratch2_tensor_.dims);
@@ -307,241 +515,63 @@ class QuantizedLstmParam {
   }
 
  private:
-  template <typename T>
-  void PackWeightToTensor(TfLiteTensor* tensor, std::vector<T>& data,
-                          std::vector<int32_t> dims) {
-    if (data.empty()) {
-      int total = 1;
-      for (int i = 0; i < dims.size(); ++i) {
-        total *= dims[i];
-      }
-      for (int i = 0; i < total; ++i) {
-        data.push_back(0);
-      }
-    }
-    tensor->dims = TfLiteIntArrayCreate(dims.size());
-    for (int i = 0; i < dims.size(); ++i) {
-      tensor->dims->data[i] = dims[i];
-    }
-  }
-
-  // Dimensions. Need proper size to trigger neon code.
-  const int n_batch_ = 2;
-  const int n_input_ = 18;
-  const int n_cell_ = 10;
-  const int n_output_ = 6;
   // input.
   std::vector<int8_t> input_ = {
       8, 2, 3,  4, 5, 6, 1, -2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6,  //
       1, 2, -3, 4, 5, 6, 1, 2,  3, 4, 5, 6, 1, 2, 3, 4, 5, 6,  //
   };
-  std::vector<int32_t> input_size_ = {n_batch_, n_input_};
-  TfLiteTensor input_tensor_;
 
-  // input_to_input_weights.
-  std::vector<int8_t> i2i_ = {
-      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
-      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  0,   //
-      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6, 1, 2, 3, -4, 5,  6,   //
-      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6, 1, 7, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
-      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 8,  5,  -6,  //
-      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
-      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6, 1, 2, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  6, 1, 2, 3, 14, 5,  6,   //
-      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6, 1, 2, 3, 4,  5,  6,   //
-  };
-  std::vector<int32_t> i2i_size_ = {n_cell_, n_input_};
-  TfLiteTensor i2i_tensor_;
-
-  // input_to_forget_weights.
-  std::vector<int8_t> i2f_ = {
-      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  0,   //
-      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6,  1,  2, 3, -4, 5,  6,   //
-      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6,  1,  7, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
-      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6,  11, 2, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  -6, 1,  2, 3, 14, 5,  6,   //
-      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
-      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
-      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6,  13, 2, 3, 4,  5,  6,   //
-      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 8,  5,  -6,  //
-  };
-  std::vector<int32_t> i2f_size_ = {n_cell_, n_input_};
-  TfLiteTensor i2f_tensor_;
-  // input_to_cell_weights.
-  std::vector<int8_t> i2c_ = {
-      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  0,   //
-      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6,  1, 2, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  16, 1, 2, 3, 14, 5,  6,   //
-      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6,  7, 2, 3, 4,  5,  6,   //
-      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  6,   //
-      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  6,   //
-      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1, 2, 3, 8,  5,  -6,  //
-      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6,  1, 2, 3, -4, 5,  6,   //
-      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6,  1, 7, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1, 2, 3, 4,  5,  6,   //
-  };
-  std::vector<int32_t> i2c_size_ = {n_cell_, n_input_};
-  TfLiteTensor i2c_tensor_;
-
-  // input_to_output_weights.
-  std::vector<int8_t> i2o_ = {
-      1,  2,  3, 4,  5, 6, 1, 2,  3, 4, -5, 6,  1,  7, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  6,  -1, 2, 3, 4,  5,  6,   //
-      1,  2,  3, 4,  3, 6, 1, 2,  6, 4, 5,  6,  1,  2, 3, 4,  -5, 6,   //
-      8,  2,  3, 4,  5, 6, 7, 2,  3, 4, 5,  6,  1,  2, 3, 14, 5,  6,   //
-      18, 2,  3, 4,  5, 6, 1, 2,  3, 4, 5,  -6, 1,  2, 3, 4,  5,  6,   //
-      8,  2,  3, 4,  5, 6, 3, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  6,   //
-      1,  2,  3, 4,  5, 6, 5, 2,  3, 4, 5,  6,  1,  2, 3, 4,  5,  0,   //
-      8,  2,  3, 4,  3, 6, 1, -2, 3, 4, 5,  6,  1,  2, 3, -4, 5,  6,   //
-      1,  2,  3, -4, 5, 6, 1, 2,  3, 4, 5,  6,  -1, 2, 3, 4,  5,  6,   //
-      1,  -2, 2, 4,  5, 6, 1, 2,  3, 4, 5,  6,  1,  2, 3, 8,  5,  -6,  //
-  };
-  std::vector<int32_t> i2o_size_ = {n_cell_, n_input_};
-  TfLiteTensor i2o_tensor_;
-
-  // recurrent_to_input_weights.
-  std::vector<int8_t> r2i_ = {
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-  };
-  std::vector<int32_t> r2i_size_ = {n_cell_, n_output_};
-  TfLiteTensor r2i_tensor_;
-
-  // recurrent_to_forget_weights.
-  std::vector<int8_t> r2f_ = {
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-  };
-  std::vector<int32_t> r2f_size_ = {n_cell_, n_output_};
-  TfLiteTensor r2f_tensor_;
-
-  // recurrent_to_cell_weights.
-  std::vector<int8_t> r2c_ = {
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-  };
-  std::vector<int32_t> r2c_size_ = {n_cell_, n_output_};
-  TfLiteTensor r2c_tensor_;
-
-  // recurrent_to_output_weights.
-  std::vector<int8_t> r2o_ = {
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-  };
-  std::vector<int32_t> r2o_size_ = {n_cell_, n_output_};
-  TfLiteTensor r2o_tensor_;
-
-  // input_layer_norm_coefficients.
   std::vector<int16_t> layer_norm_input_ = {8, 2, 3, 4, 5, 6, 1, 2, 3, 4};
-  std::vector<int32_t> layer_norm_input_size_ = {n_cell_};
-  TfLiteTensor layer_norm_input_tensor_;
 
   // forget_layer_norm_coefficient.
   std::vector<int16_t> layer_norm_forget_ = {
       1, 2, 3, 4, 7, 3, 4, -5, 6, 3,  //
   };
-  std::vector<int32_t> layer_norm_forget_size_ = {n_cell_};
-  TfLiteTensor layer_norm_forget_tensor_;
 
   // cell_layer_norm_coefficients.
   std::vector<int16_t> layer_norm_cell_ = {
       6, 4, 5, 6, 1, 2, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> layer_norm_cell_size_ = {n_cell_};
-  TfLiteTensor layer_norm_cell_tensor_;
 
   // output_layer_norm_coefficients.
   std::vector<int16_t> layer_norm_output_ = {
       16, 4, 5, 6, 1, 1, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> layer_norm_output_size_ = {n_cell_};
-  TfLiteTensor layer_norm_output_tensor_;
 
   // input_gate_bias.
   std::vector<int32_t> input_bias_ = {
       16, 4, 5, 6, 1, 1, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> input_bias_size_ = {n_cell_};
-  TfLiteTensor input_bias_tensor_;
 
   // forget_gate_bias.
   std::vector<int32_t> forget_bias_ = {
       16, 4, 5, 6, 1, 1, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> forget_bias_size_ = {n_cell_};
-  TfLiteTensor forget_bias_tensor_;
 
   // cell_bias.
   std::vector<int32_t> cell_bias_ = {
       16, 4, 5, 6, 1, 1, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> cell_bias_size_ = {n_cell_};
-  TfLiteTensor cell_bias_tensor_;
 
   // output_gate_bias.
   std::vector<int32_t> output_bias_ = {
       16, 4, 5, 6, 1, 1, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> output_bias_size_ = {n_cell_};
-  TfLiteTensor output_bias_tensor_;
-
-  // projection_weights.
-  std::vector<int8_t> projection_ = {
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-      8, 2, 3, 4, 5, 6, 1, 2,  3,  4,  //
-      6, 4, 5, 6, 1, 2, 3, 4,  -5, 6,  //
-      1, 2, 3, 4, 7, 3, 4, -5, 6,  3,  //
-  };
-  std::vector<int32_t> projection_size_ = {n_cell_, n_output_};
-  TfLiteTensor projection_tensor_;
-
-  // projection_bias.
-  std::vector<int32_t> projection_bias_ = {
-      16, 4, 5, 6, 1, 1  //
-  };
-  std::vector<int32_t> projection_bias_size_ = {n_output_};
-  TfLiteTensor projection_bias_tensor_;
 
   // activation.
   std::vector<int8_t> activation_;
-  std::vector<int32_t> activation_size_ = {n_batch_, n_output_};
-  TfLiteTensor activation_tensor_;
 
   // cell.
   std::vector<int16_t> cell_ = {
       16, 4,  5, 6, 1, 1, 3, 4, -5, 6,  //
       1,  14, 5, 6, 1, 1, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> cell_size_ = {n_batch_, n_cell_};
-  TfLiteTensor cell_tensor_;
 
   // output.
   std::vector<int8_t> output_ = {
       1, 1, 3, 4, -5, 6,  //
       1, 4, 3, 4, -5, 6,  //
   };
-  std::vector<int32_t> output_size_ = {n_batch_, n_output_};
-  TfLiteTensor output_tensor_;
 
   // quantized_lstm_param
   ops::builtin::lstm_eval::IntegerLstmParameter integer_lstm_param_;
@@ -603,5 +633,292 @@ void TestOneFullyQuantizedLSTM() {
 TEST(TestOneFullyQuantizedLSTM, TestOneFullyQuantizedLSTM) {
   TestOneFullyQuantizedLSTM();
 }
+
+class HybridLstmParam : public BaseLstmParam {
+ public:
+  TfLiteTensor* GetFloatOutput() {
+    PackWeightToTensor(&output_tensor_, output_float_, output_size_);
+    output_tensor_.data.f = output_float_.data();
+    return &output_tensor_;
+  }
+  const TfLiteLSTMParams GetLSTMParam() {
+    return {kTfLiteActRelu, 0, 0, kTfLiteLSTMFullKernel, true};
+  }
+  TfLiteTensor* GetScratchBuffer() {
+    PackWeightToTensor(&scratch_buffer_tensor_, scratch_buffer_,
+                       scratch_buffer_size_);
+    scratch_buffer_tensor_.data.f = scratch_buffer_.data();
+    return &scratch_buffer_tensor_;
+  }
+  TfLiteTensor* GetScalingFactors() {
+    PackWeightToTensor(&scaling_factors_tensor_, scaling_factors_,
+                       scaling_factors_size_);
+    scaling_factors_tensor_.data.f = scaling_factors_.data();
+    return &scaling_factors_tensor_;
+  }
+  TfLiteTensor* GetProdScalingFactors() {
+    PackWeightToTensor(&prod_scaling_factors_tensor_, prod_scaling_factors_,
+                       prod_scaling_factors_size_);
+    prod_scaling_factors_tensor_.data.f = prod_scaling_factors_.data();
+    return &prod_scaling_factors_tensor_;
+  }
+  TfLiteTensor* GetInputQuantized() {
+    PackWeightToTensor(&input_quantized_tensor_, input_quantized_, input_size_);
+    input_quantized_tensor_.data.int8 = input_quantized_.data();
+    return &input_quantized_tensor_;
+  }
+  TfLiteTensor* GetActivationStateQuantized() {
+    PackWeightToTensor(&activation_quantized_tensor_, activation_quantized_,
+                       activation_size_);
+    activation_quantized_tensor_.data.int8 = activation_quantized_.data();
+    return &activation_quantized_tensor_;
+  }
+  TfLiteTensor* GetCellStateQuantized() {
+    PackWeightToTensor(&cell_quantized_tensor_, cell_quantized_, cell_size_);
+    cell_quantized_tensor_.data.int8 = cell_quantized_.data();
+    return &cell_quantized_tensor_;
+  }
+  TfLiteTensor* GetZeroPoints() {
+    PackWeightToTensor(&zero_points_tensor_, zero_points_, zero_points_size_);
+    zero_points_tensor_.data.i32 = zero_points_.data();
+    return &zero_points_tensor_;
+  }
+  TfLiteTensor* GetRowSums() {
+    PackWeightToTensor(&row_sums_tensor_, row_sums_, row_sums_size_);
+    row_sums_tensor_.data.i32 = row_sums_.data();
+    return &row_sums_tensor_;
+  }
+  TfLiteTensor* GetFloatInput() {
+    PackWeightToTensor(&input_tensor_, input_float_, input_size_);
+    input_tensor_.data.f = input_float_.data();
+    return &input_tensor_;
+  }
+  TfLiteTensor* GetActivation() {
+    PackWeightToTensor(&activation_tensor_, activation_state_,
+                       activation_size_);
+    activation_tensor_.data.f = activation_state_.data();
+    return &activation_tensor_;
+  }
+  TfLiteTensor* GetCell() {
+    PackWeightToTensor(&cell_tensor_, cell_state_, cell_size_);
+    cell_tensor_.data.f = cell_state_.data();
+    return &cell_tensor_;
+  }
+  TfLiteTensor* GetAccumScratchBuffer() {
+    PackWeightToTensor(&accum_scratch_tensor_, accum_scratch_,
+                       accum_scratch_size_);
+    accum_scratch_tensor_.data.i32 = accum_scratch_.data();
+    return &accum_scratch_tensor_;
+  }
+  TfLiteTensor* GetInputBias() {
+    PackWeightToTensor(&input_bias_tensor_, input_float_bias_,
+                       input_bias_size_);
+    input_bias_tensor_.data.f = input_float_bias_.data();
+    return &input_bias_tensor_;
+  }
+  TfLiteTensor* GetForgetBias() {
+    PackWeightToTensor(&forget_bias_tensor_, forget_float_bias_,
+                       forget_bias_size_);
+    forget_bias_tensor_.data.f = forget_float_bias_.data();
+    return &forget_bias_tensor_;
+  }
+  TfLiteTensor* GetCellBias() {
+    PackWeightToTensor(&cell_bias_tensor_, cell_float_bias_, cell_bias_size_);
+    cell_bias_tensor_.data.f = cell_float_bias_.data();
+    return &cell_bias_tensor_;
+  }
+  TfLiteTensor* GetOutputBias() {
+    PackWeightToTensor(&output_bias_tensor_, output_float_bias_,
+                       output_bias_size_);
+    output_bias_tensor_.data.f = output_float_bias_.data();
+    return &output_bias_tensor_;
+  }
+  TfLiteTensor* GetProjectionBias() {
+    PackWeightToTensor(&projection_bias_tensor_, projection_float_bias_,
+                       projection_bias_size_);
+    projection_bias_tensor_.data.f = projection_float_bias_.data();
+    return &projection_bias_tensor_;
+  }
+  int GetNumRowSums() { return n_row_sums_; }
+  TfLiteTensor* GetInputLayerNorm() {
+    PackWeightToTensor(&layer_norm_input_tensor_, layer_norm_float_input_,
+                       layer_norm_input_size_);
+    layer_norm_input_tensor_.data.f = layer_norm_float_input_.data();
+    return &layer_norm_input_tensor_;
+  }
+  TfLiteTensor* GetForgetLayerNorm() {
+    PackWeightToTensor(&layer_norm_forget_tensor_, layer_norm_float_forget_,
+                       layer_norm_forget_size_);
+    layer_norm_forget_tensor_.data.f = layer_norm_float_forget_.data();
+    return &layer_norm_forget_tensor_;
+  }
+  TfLiteTensor* GetCellLayerNorm() {
+    PackWeightToTensor(&layer_norm_cell_tensor_, layer_norm_float_cell_,
+                       layer_norm_cell_size_);
+    layer_norm_cell_tensor_.data.f = layer_norm_float_cell_.data();
+    return &layer_norm_cell_tensor_;
+  }
+  TfLiteTensor* GetOutputLayerNorm() {
+    PackWeightToTensor(&layer_norm_output_tensor_, layer_norm_float_output_,
+                       layer_norm_output_size_);
+    layer_norm_output_tensor_.data.f = layer_norm_float_output_.data();
+    return &layer_norm_output_tensor_;
+  }
+  static TfLiteTensor* addScale(TfLiteTensor* t, float scale) {
+    t->params.scale = scale;
+    return t;
+  }
+  ~HybridLstmParam() {
+    TfLiteIntArrayFree(scratch_buffer_tensor_.dims);
+    TfLiteIntArrayFree(accum_scratch_tensor_.dims);
+    TfLiteIntArrayFree(scaling_factors_tensor_.dims);
+    TfLiteIntArrayFree(prod_scaling_factors_tensor_.dims);
+    TfLiteIntArrayFree(input_quantized_tensor_.dims);
+    TfLiteIntArrayFree(activation_quantized_tensor_.dims);
+    TfLiteIntArrayFree(cell_quantized_tensor_.dims);
+    TfLiteIntArrayFree(zero_points_tensor_.dims);
+    TfLiteIntArrayFree(row_sums_tensor_.dims);
+  }
+
+ private:
+  const int n_row_sums_ = 9;  // Number of weights + 1 for projection weights.
+
+  std::vector<float> scratch_buffer_;
+  std::vector<int32_t> scratch_buffer_size_ = {n_batch_, n_cell_ * 4};
+  TfLiteTensor scratch_buffer_tensor_;
+
+  std::vector<float> scaling_factors_;
+  std::vector<int32_t> scaling_factors_size_ = {n_batch_};
+  TfLiteTensor scaling_factors_tensor_;
+
+  std::vector<float> prod_scaling_factors_;
+  std::vector<int32_t> prod_scaling_factors_size_ = {n_batch_};
+  TfLiteTensor prod_scaling_factors_tensor_;
+
+  std::vector<int8_t> input_quantized_;
+  TfLiteTensor input_quantized_tensor_;
+
+  std::vector<int8_t> activation_quantized_;
+  TfLiteTensor activation_quantized_tensor_;
+
+  std::vector<int8_t> cell_quantized_;
+  TfLiteTensor cell_quantized_tensor_;
+
+  std::vector<float> cell_state_ = {
+      16, 4, 5, 6, 1, 1, 3, 4, -5, 6, 1, 14, 5, 6, 1, 1, 3, 4, -5, 6,
+  };
+
+  std::vector<int32_t> zero_points_;
+  std::vector<int32_t> zero_points_size_ = {n_batch_};
+  TfLiteTensor zero_points_tensor_;
+
+  std::vector<int32_t> row_sums_;
+  std::vector<int32_t> row_sums_size_ = {n_row_sums_, n_cell_};
+  TfLiteTensor row_sums_tensor_;
+
+  std::vector<float> activation_state_;
+
+  std::vector<int32_t> accum_scratch_;
+  std::vector<int32_t> accum_scratch_size_ = {n_cell_, n_batch_};
+  TfLiteTensor accum_scratch_tensor_;
+  std::vector<float> output_float_ = {
+      1, 1, 3, 4, -5, 6,  //
+      1, 4, 3, 4, -5, 6,  //
+  };
+  std::vector<float> input_float_ = {
+      6.06, 7.66, 7.10, 9.32, 3.85, 0.33, 7.15, 1.56, 9.54,
+      5.30, 4.53, 0.19, 1.83, 4.60, 0.84, 5.08, 4.37, 9.92,  //
+      4.08, 3.79, 1.17, 8.99, 0.14, 9.22, 3.18, 2.97, 7.53,
+      0.59, 9.89, 9.13, 7.68, 0.63, 2.15, 4.31, 7.20, 4.09,  //
+  };
+  std::vector<float> input_float_bias_ = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  std::vector<float> forget_float_bias_ = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  std::vector<float> cell_float_bias_ = {
+      -11, -7, -4, -5, -1, -1, -2, -3.5, -3, -4,
+  };
+  std::vector<float> output_float_bias_ = {0.16, 0.4, 0.5, 0.6,  0.1,
+                                           0.1,  0.3, 0.4, -0.5, 0.6};
+  std::vector<float> projection_float_bias_ = {0, 0, 0, 0, 0, 0};
+  std::vector<float> layer_norm_float_input_ = {8, 2, 3, 4, 5, 6, 1, -2, 3, 4};
+  std::vector<float> layer_norm_float_forget_ = {
+      0.1, 0.2, 0.3, 0.4, 0.7, 0.3, 0.4, -0.5, 0.6, 0.3,  //
+  };
+  std::vector<float> layer_norm_float_cell_ = {
+      0.6, 0.4, 0.5, 0.6, 0.1, 0.2, 0.3, 0.4, -0.5, 0.6,  //
+  };
+  std::vector<float> layer_norm_float_output_ = {
+      0.6, 0.4, 0.5, 0.6, 0.1, 0.2, 0.3, 0.4, -0.5, 0.6,  //
+  };
+};
+
+void TestOneHybridAsymmLSTM() {
+  CpuBackendContext context;
+  HybridLstmParam one_parameter;
+  auto activation = one_parameter.GetActivation();
+  auto output = one_parameter.GetFloatOutput();
+  auto cell = one_parameter.GetCell();
+  auto param = one_parameter.GetLSTMParam();
+  bool compute_row_sums = true;
+  constexpr float kDefaultScale = 18.0;
+  ops::builtin::lstm_eval::EvalHybrid(
+      one_parameter.GetFloatInput(),
+      HybridLstmParam::addScale(one_parameter.Geti2i(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Geti2f(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Geti2c(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Geti2o(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Getr2i(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Getr2f(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Getr2c(), kDefaultScale),
+      HybridLstmParam::addScale(one_parameter.Getr2o(), kDefaultScale),
+      /*cell_to_input_weights=*/nullptr,
+      /*cell_to_forget_weights=*/nullptr,
+      /*cell_to_output_weights=*/nullptr, one_parameter.GetInputLayerNorm(),
+      one_parameter.GetForgetLayerNorm(), one_parameter.GetCellLayerNorm(),
+      one_parameter.GetOutputLayerNorm(),
+      /*aux_input=*/nullptr,
+      /*aux_input_to_input_weights=*/nullptr,
+      /*aux_input_to_forget_weights=*/nullptr,
+      /*aux_input_to_cell_weights=*/nullptr,
+      /*aux_input_to_output_weights=*/nullptr, one_parameter.GetInputBias(),
+      one_parameter.GetForgetBias(), one_parameter.GetCellBias(),
+      one_parameter.GetOutputBias(),
+      HybridLstmParam::addScale(one_parameter.GetProjection(), 1.0),
+      one_parameter.GetProjectionBias(), &param,
+      /*forward_sequence=*/true,
+      /*time_major=*/true,
+      /*output_offset=*/0, one_parameter.GetScratchBuffer(),
+      one_parameter.GetScalingFactors(), one_parameter.GetProdScalingFactors(),
+      /*recovered_cell_weights=*/nullptr, one_parameter.GetInputQuantized(),
+      /*aux_input_quantized=*/nullptr,
+      one_parameter.GetActivationStateQuantized(),
+      one_parameter.GetCellStateQuantized(), activation, cell,
+      one_parameter.GetAccumScratchBuffer(), output,
+      one_parameter.GetZeroPoints(), one_parameter.GetRowSums(),
+      one_parameter.GetNumRowSums(), &compute_row_sums, &context);
+  const std::vector<float> expected_cell = {
+      7.83134,  1.96158, 2.18285, 3.28739,  0.483214,
+      0.618206, 1.21539, 1.4052,  -3.17735, 2.24296,  //
+      0.498944, 6.91104, 1.74126, 3.28993,  0.580477,
+      0.489936, 1.2527,  1.50157, -3.71849, 2.76743,  //
+  };
+  const std::vector<float> expected_activation = {
+      53.0403, 59.3623, 24.8493, 53.0403, 59.3623, 24.8493,  //
+      36.7559, 57.5202, 29.7217, 36.7559, 57.5202, 29.7217,
+  };
+  EXPECT_TRUE(ArrayFloatNear(cell->data.f, expected_cell.data(), 20, 1e-2));
+  EXPECT_TRUE(
+      ArrayFloatNear(activation->data.f, expected_activation.data(), 12, 1e-4));
+  EXPECT_TRUE(
+      ArrayFloatNear(output->data.f, expected_activation.data(), 12, 1e-4));
+}
+
+TEST(TestOneHybridAsymmLSTM, TestOneHybridAsymmLSTM) {
+  TestOneHybridAsymmLSTM();
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/maximum_minimum.cc b/tensorflow/lite/kernels/maximum_minimum.cc
index 3c6c524c13d..cad86acd8dd 100644
--- a/tensorflow/lite/kernels/maximum_minimum.cc
+++ b/tensorflow/lite/kernels/maximum_minimum.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
@@ -31,6 +32,7 @@ namespace maximum_minimum {
 // This file has a reference implementation of TFMaximum/TFMinimum.
 enum KernelType {
   kReference,
+  kGenericOptimized,
 };
 
 constexpr int kInputTensor1 = 0;
@@ -85,7 +87,7 @@ struct MinimumOp {
   }
 };
 
-template <typename data_type, typename op_type>
+template <KernelType kernel_type, typename data_type, typename op_type>
 void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
                      const OpContext& op_context) {
   reference_ops::MaximumMinimumBroadcastSlow(
@@ -98,29 +100,82 @@ void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
       op_type::template op<data_type>);
 }
 
+// Maximum generic opt int8.
+template <>
+void TFLiteOperation<maximum_minimum::kGenericOptimized, int8, MaximumOp>(
+    TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) {
+  tflite::ArithmeticParams op_params;
+  const bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
+      GetTensorShape(op_context.input1), GetTensorShape(op_context.input2),
+      &op_params);
+  if (need_broadcast) {
+    optimized_ops::BroadcastMaximumDispatch(
+        op_params, GetTensorShape(op_context.input1),
+        GetTensorData<int8>(op_context.input1),
+        GetTensorShape(op_context.input2),
+        GetTensorData<int8>(op_context.input2),
+        GetTensorShape(op_context.output),
+        GetTensorData<int8>(op_context.output), MaximumOp::template op<int8>);
+    return;
+  }
+  reference_ops::MaximumMinimumBroadcastSlow(
+      GetTensorShape(op_context.input1), GetTensorData<int8>(op_context.input1),
+      GetTensorShape(op_context.input2), GetTensorData<int8>(op_context.input2),
+      GetTensorShape(op_context.output), GetTensorData<int8>(op_context.output),
+      MaximumOp::template op<int8>);
+}
+
+// Minimum generic opt int8.
+template <>
+void TFLiteOperation<maximum_minimum::kGenericOptimized, int8, MinimumOp>(
+    TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) {
+  tflite::ArithmeticParams op_params;
+  const bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
+      GetTensorShape(op_context.input1), GetTensorShape(op_context.input2),
+      &op_params);
+  if (need_broadcast) {
+    optimized_ops::BroadcastMinimumDispatch(
+        op_params, GetTensorShape(op_context.input1),
+        GetTensorData<int8>(op_context.input1),
+        GetTensorShape(op_context.input2),
+        GetTensorData<int8>(op_context.input2),
+        GetTensorShape(op_context.output),
+        GetTensorData<int8>(op_context.output), MinimumOp::template op<int8>);
+    return;
+  }
+  reference_ops::MaximumMinimumBroadcastSlow(
+      GetTensorShape(op_context.input1), GetTensorData<int8>(op_context.input1),
+      GetTensorShape(op_context.input2), GetTensorData<int8>(op_context.input2),
+      GetTensorShape(op_context.output), GetTensorData<int8>(op_context.output),
+      MinimumOp::template op<int8>);
+}
+
 template <KernelType kernel_type, typename OpType>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
 
-  if (kernel_type == kReference) {
     switch (op_context.output->type) {
       case kTfLiteFloat32:
-        TFLiteOperation<float, OpType>(context, node, op_context);
+        TFLiteOperation<kernel_type, float, OpType>(context, node, op_context);
         break;
       case kTfLiteUInt8:
-        TFLiteOperation<uint8_t, OpType>(context, node, op_context);
+        TFLiteOperation<kernel_type, uint8_t, OpType>(context, node,
+                                                      op_context);
         break;
       case kTfLiteInt8:
-        TFLiteOperation<int8_t, OpType>(context, node, op_context);
+        TFLiteOperation<kernel_type, int8_t, OpType>(context, node, op_context);
         break;
       case kTfLiteInt32:
-        TFLiteOperation<int32_t, OpType>(context, node, op_context);
+        TFLiteOperation<kernel_type, int32_t, OpType>(context, node,
+                                                      op_context);
         break;
       case kTfLiteInt64:
-        TFLiteOperation<int64_t, OpType>(context, node, op_context);
+        TFLiteOperation<kernel_type, int64_t, OpType>(context, node,
+                                                      op_context);
         break;
       case kTfLiteInt16:
-        TFLiteOperation<int16_t, OpType>(context, node, op_context);
+        TFLiteOperation<kernel_type, int16_t, OpType>(context, node,
+                                                      op_context);
         break;
       default:
         context->ReportError(context,
@@ -128,12 +183,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                              op_context.output->type);
         return kTfLiteError;
     }
-  } else {
-    context->ReportError(context,
-                         "Type %d is currently not supported by Maximum.",
-                         op_context.output->type);
-    return kTfLiteError;
-  }
   return kTfLiteOk;
 }
 
@@ -147,6 +196,14 @@ TfLiteRegistration* Register_MAXIMUM_REF() {
   return &r;
 }
 
+TfLiteRegistration* Register_MAXIMUM_GENERIC_OPT() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, maximum_minimum::Prepare,
+      maximum_minimum::Eval<maximum_minimum::kGenericOptimized,
+                            maximum_minimum::MaximumOp>};
+  return &r;
+}
+
 TfLiteRegistration* Register_MINIMUM_REF() {
   static TfLiteRegistration r = {
       nullptr, nullptr, maximum_minimum::Prepare,
@@ -154,8 +211,21 @@ TfLiteRegistration* Register_MINIMUM_REF() {
                             maximum_minimum::MinimumOp>};
   return &r;
 }
-TfLiteRegistration* Register_MAXIMUM() { return Register_MAXIMUM_REF(); }
-TfLiteRegistration* Register_MINIMUM() { return Register_MINIMUM_REF(); }
+
+TfLiteRegistration* Register_MINIMUM_GENERIC_OPT() {
+  static TfLiteRegistration r = {
+      nullptr, nullptr, maximum_minimum::Prepare,
+      maximum_minimum::Eval<maximum_minimum::kGenericOptimized,
+                            maximum_minimum::MinimumOp>};
+  return &r;
+}
+
+TfLiteRegistration* Register_MAXIMUM() {
+  return Register_MAXIMUM_GENERIC_OPT();
+}
+TfLiteRegistration* Register_MINIMUM() {
+  return Register_MINIMUM_GENERIC_OPT();
+}
 
 }  // namespace builtin
 }  // namespace ops
diff --git a/tensorflow/lite/kernels/non_max_suppression.cc b/tensorflow/lite/kernels/non_max_suppression.cc
index ee8e407066d..f57ee1bc5d2 100644
--- a/tensorflow/lite/kernels/non_max_suppression.cc
+++ b/tensorflow/lite/kernels/non_max_suppression.cc
@@ -19,7 +19,6 @@ limitations under the License.
 #include <numeric>
 #include <vector>
 
-#include "flatbuffers/flexbuffers.h"  // from @flatbuffers
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
diff --git a/tensorflow/lite/kernels/rank.cc b/tensorflow/lite/kernels/rank.cc
index 8e27ebcc325..53fd92f1682 100644
--- a/tensorflow/lite/kernels/rank.cc
+++ b/tensorflow/lite/kernels/rank.cc
@@ -30,19 +30,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   output->type = kTfLiteInt32;
 
+  // By design, the input shape is always known at the time of Prepare, even
+  // if the preceding op that generates |input| is dynamic. Thus, we can
+  // always compute the rank immediately, without waiting for Eval.
+  SetTensorToPersistentRo(output);
+
   // Rank produces a 0-D int32 Tensor representing the rank of input.
   TfLiteIntArray* output_size = TfLiteIntArrayCreate(0);
-  return context->ResizeTensor(context, output, output_size);
-}
+  TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_size));
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   TF_LITE_ENSURE_EQ(context, NumDimensions(output), 0);
 
+  // Immediately propagate the known rank to the output tensor. This allows
+  // downstream ops that rely on the value to use it during prepare.
   if (output->type == kTfLiteInt32) {
     int32_t* output_data = GetTensorData<int32_t>(output);
     *output_data = NumDimensions(input);
@@ -53,6 +57,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
 }  // namespace rank
 
 TfLiteRegistration* Register_RANK() {
diff --git a/tensorflow/lite/kernels/rank_test.cc b/tensorflow/lite/kernels/rank_test.cc
index f3dc97126ba..5373a0a66fe 100644
--- a/tensorflow/lite/kernels/rank_test.cc
+++ b/tensorflow/lite/kernels/rank_test.cc
@@ -43,6 +43,9 @@ class RankOpModel : public SingleOpModel {
 
   std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+  TfLiteAllocationType GetOutputAllocationType() const {
+    return interpreter_->tensor(interpreter_->outputs()[0])->allocation_type;
+  }
 
  private:
   int input_;
@@ -51,6 +54,13 @@ class RankOpModel : public SingleOpModel {
 
 TEST(RankOpTest, InputTypeFloat) {
   RankOpModel model({1, 3, 1, 3, 5}, TensorType_FLOAT32);
+  ASSERT_EQ(model.GetOutputAllocationType(), kTfLitePersistentRo);
+
+  // Unlike most ops, Rank populates outputs in Prepare().
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({5}));
+  EXPECT_TRUE(model.GetOutputShape().empty());
+
+  // Invoke is superfluous and shouldn't change the output.
   model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({5}));
@@ -59,7 +69,6 @@ TEST(RankOpTest, InputTypeFloat) {
 
 TEST(RankOpTest, InputTypeInt) {
   RankOpModel model({1, 3, 1, 3, 5}, TensorType_INT32);
-  model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({5}));
   EXPECT_TRUE(model.GetOutputShape().empty());
@@ -67,7 +76,6 @@ TEST(RankOpTest, InputTypeInt) {
 
 TEST(RankOpTest, ScalarTensor) {
   RankOpModel model({}, TensorType_FLOAT32);
-  model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({0}));
   EXPECT_TRUE(model.GetOutputShape().empty());
@@ -75,7 +83,6 @@ TEST(RankOpTest, ScalarTensor) {
 
 TEST(RankOpTest, EmptyTensor) {
   RankOpModel model({1, 0}, TensorType_FLOAT32);
-  model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({2}));
   EXPECT_TRUE(model.GetOutputShape().empty());
diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc
index 28515ae9f77..8ca58e6a309 100644
--- a/tensorflow/lite/kernels/register.cc
+++ b/tensorflow/lite/kernels/register.cc
@@ -39,10 +39,10 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_RELU6, Register_RELU6(), /* min_version = */ 1,
              /* max_version = */ 2);
   AddBuiltin(BuiltinOperator_TANH, Register_TANH(), /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
   AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(),
              /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
   AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(),
              /* min_version */ 1,
              /* max_version */ 3);
@@ -52,10 +52,10 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D());
   AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(),
              /* min_version = */ 1,
-             /* max_version = */ 3);
+             /* max_version = */ 4);
   AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(),
              /* min_version = */ 1,
-             /* max_version = */ 3);
+             /* max_version = */ 5);
   AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(),
              /* min_version = */ 1,
              /* max_version = */ 3);
@@ -77,15 +77,15 @@ BuiltinOpResolver::BuiltinOpResolver() {
              Register_EMBEDDING_LOOKUP_SPARSE());
   AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(),
              /* min_version = */ 1,
-             /* max_version = */ 6);
+             /* max_version = */ 8);
   AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION());
   AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP());
   AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
              /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
   AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(),
              /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
   AddBuiltin(BuiltinOperator_ADD, Register_ADD(),
              /* min_version = */ 1,
              /* max_version = */ 2);
@@ -96,7 +96,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
              /* min_version = */ 1,
              /* max_version = */ 3);
   AddBuiltin(BuiltinOperator_MUL, Register_MUL(), /* min_version = */ 1,
-             /* max_version = */ 3);
+             /* max_version = */ 4);
   AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION(),
              /* min_version = */ 1,
              /* max_version = */ 2);
@@ -121,7 +121,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
              Register_RESIZE_NEAREST_NEIGHBOR(),
              /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
   AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
   AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH(),
              /* min_version = */ 1,
@@ -142,8 +142,9 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_SUB, Register_SUB(),
              /* min_version = */ 1,
              /* max_version = */ 3);
-  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version = */ 1,
-             /* max_version = */ 3);
+  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(),
+             /* min_version = */ 1,
+             /* max_version = */ 4);
   AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(),
              /* min_version = */ 1,
              /* max_version = */ 2);
diff --git a/tensorflow/lite/kernels/resize_bilinear_test.cc b/tensorflow/lite/kernels/resize_bilinear_test.cc
index 5cbba026010..d4d414ae29c 100644
--- a/tensorflow/lite/kernels/resize_bilinear_test.cc
+++ b/tensorflow/lite/kernels/resize_bilinear_test.cc
@@ -190,10 +190,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches) {
 
 TEST_P(ResizeBilinearOpTest,
        TwoDimensionalResizeWithTwoBatches_HalfPixelCenters) {
-  // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   ResizeBilinearOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}}, {3, 3},
                           GetParam(), /**half_pixel_centers**/ true);
   m.SetInput<float>({
@@ -253,10 +249,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8) {
 
 TEST_P(ResizeBilinearOpTest,
        TwoDimensionalResizeWithTwoBatchesUInt8_HalfPixelCenters) {
-  // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   ResizeBilinearOpModel m({TensorType_UINT8, {2, 2, 2, 1}}, {3, 3}, GetParam(),
                           /**half_pixel_centers**/ true);
   m.SetInput<uint8>({
diff --git a/tensorflow/lite/kernels/resize_nearest_neighbor.cc b/tensorflow/lite/kernels/resize_nearest_neighbor.cc
index 122f2448d39..1b58e5245ee 100644
--- a/tensorflow/lite/kernels/resize_nearest_neighbor.cc
+++ b/tensorflow/lite/kernels/resize_nearest_neighbor.cc
@@ -89,7 +89,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   tflite::ResizeNearestNeighborParams op_params;
   op_params.align_corners = params->align_corners;
-  op_params.half_pixel_centers = false;
+  op_params.half_pixel_centers = params->half_pixel_centers;
 
   if (output->type == kTfLiteFloat32) {
     reference_ops::ResizeNearestNeighbor(
diff --git a/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc b/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc
index b894d3a74f4..656bd6ee750 100644
--- a/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc
+++ b/tensorflow/lite/kernels/resize_nearest_neighbor_test.cc
@@ -33,7 +33,9 @@ class ResizeNearestNeighborOpModel : public SingleOpModel {
  public:
   explicit ResizeNearestNeighborOpModel(const TensorData& input,
                                         std::initializer_list<int> size_data,
-                                        TestType test_type) {
+                                        TestType test_type,
+                                        bool align_corners = false,
+                                        bool half_pixel_centers = false) {
     bool const_size = (test_type == TestType::kConst);
 
     input_ = AddInput(input);
@@ -45,7 +47,10 @@ class ResizeNearestNeighborOpModel : public SingleOpModel {
     output_ = AddOutput(input.type);
     SetBuiltinOp(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
                  BuiltinOptions_ResizeNearestNeighborOptions,
-                 CreateResizeNearestNeighborOptions(builder_).Union());
+                 CreateResizeNearestNeighborOptions(
+                     builder_, /*align_corners*/ align_corners,
+                     /*half_pixel_centers*/ half_pixel_centers)
+                     .Union());
     if (const_size) {
       BuildInterpreter({GetShape(input_)});
     } else {
@@ -182,6 +187,47 @@ TEST_P(ResizeNearestNeighborOpTest, TwoDimensionalResizeWithTwoBatches) {
                                         10, 10, 16,  //
                                     })));
 }
+TEST_P(ResizeNearestNeighborOpTest,
+       TwoDimensionalResizeWithTwoBatches_AlignCorners) {
+  ResizeNearestNeighborOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}}, {3, 3},
+                                 GetParam(), /**align_corners**/ true);
+  m.SetInput<float>({
+      3, 6,   //
+      9, 12,  //
+      4, 10,  //
+      10, 16  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray(ArrayFloatNear({
+                                        3, 6, 6,     //
+                                        9, 12, 12,   //
+                                        9, 12, 12,   //
+                                        4, 10, 10,   //
+                                        10, 16, 16,  //
+                                        10, 16, 16,  //
+                                    })));
+}
+TEST_P(ResizeNearestNeighborOpTest,
+       TwoDimensionalResizeWithTwoBatches_HalfPixelCenters) {
+  ResizeNearestNeighborOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}}, {3, 3},
+                                 GetParam(), /**align_corners**/ false,
+                                 /**half_pixel_centers**/ true);
+  m.SetInput<float>({
+      3, 6,   //
+      9, 12,  //
+      4, 10,  //
+      10, 16  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray(ArrayFloatNear({
+                                        3, 6, 6,     //
+                                        9, 12, 12,   //
+                                        9, 12, 12,   //
+                                        4, 10, 10,   //
+                                        10, 16, 16,  //
+                                        10, 16, 16,  //
+                                    })));
+}
 TEST_P(ResizeNearestNeighborOpTest, ThreeDimensionalResize) {
   ResizeNearestNeighborOpModel m({TensorType_FLOAT32, {1, 2, 2, 2}}, {3, 3},
                                  GetParam());
@@ -248,6 +294,36 @@ TEST_P(ResizeNearestNeighborOpTest, ThreeDimensionalResizeUInt8) {
                                         10, 12, 10, 12, 14, 16,  //
                                     })));
 }
+TEST_P(ResizeNearestNeighborOpTest, ThreeDimensionalResizeUInt8_AlignCorners) {
+  ResizeNearestNeighborOpModel m({TensorType_UINT8, {1, 2, 2, 2}}, {3, 3},
+                                 GetParam(), /**align_corners**/ true);
+  m.SetInput<uint8>({
+      3, 4, 6, 10,     //
+      10, 12, 14, 16,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<uint8>(), ElementsAreArray(ArrayFloatNear({
+                                        3, 4, 6, 10, 6, 10,      //
+                                        10, 12, 14, 16, 14, 16,  //
+                                        10, 12, 14, 16, 14, 16,  //
+                                    })));
+}
+TEST_P(ResizeNearestNeighborOpTest,
+       ThreeDimensionalResizeUInt8_HalfPixelCenters) {
+  ResizeNearestNeighborOpModel m({TensorType_UINT8, {1, 2, 2, 2}}, {3, 3},
+                                 GetParam(), /**align_corners**/ false,
+                                 /**half_pixel_centers**/ true);
+  m.SetInput<uint8>({
+      3, 4, 6, 10,     //
+      10, 12, 14, 16,  //
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput<uint8>(), ElementsAreArray(ArrayFloatNear({
+                                        3, 4, 6, 10, 6, 10,      //
+                                        10, 12, 14, 16, 14, 16,  //
+                                        10, 12, 14, 16, 14, 16,  //
+                                    })));
+}
 TEST_P(ResizeNearestNeighborOpTest, ThreeDimensionalResizeInt8) {
   ResizeNearestNeighborOpModel m({TensorType_INT8, {1, 2, 2, 2}}, {3, 3},
                                  GetParam());
diff --git a/tensorflow/lite/kernels/shape.cc b/tensorflow/lite/kernels/shape.cc
index 88794fefac4..d979f083f70 100644
--- a/tensorflow/lite/kernels/shape.cc
+++ b/tensorflow/lite/kernels/shape.cc
@@ -54,19 +54,22 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteError;
   }
 
+  // By design, the input shape is always known at the time of Prepare, even
+  // if the preceding op that generates |input| is dynamic. Thus, we can
+  // always compute the shape immediately, without waiting for Eval.
+  SetTensorToPersistentRo(output);
+
   // Shape always produces a 1-dimensional output tensor, where each output
   // element is the length of the corresponding input tensor's dimension.
   TfLiteIntArray* output_size = TfLiteIntArrayCreate(1);
   output_size->data[0] = NumDimensions(input);
-  return context->ResizeTensor(context, output, output_size);
-}
+  TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_size));
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   TFLITE_DCHECK_EQ(NumDimensions(output), 1);
   TFLITE_DCHECK_EQ(SizeOfDimension(output, 0), NumDimensions(input));
 
+  // Immediately propagate the known shape to the output tensor. This allows
+  // downstream ops that rely on the value to use it during prepare.
   switch (output->type) {
     case kTfLiteInt32:
       ExtractShape(input, GetTensorData<int32_t>(output));
@@ -81,6 +84,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
 }  // namespace shape
 
 TfLiteRegistration* Register_SHAPE() {
diff --git a/tensorflow/lite/kernels/shape_test.cc b/tensorflow/lite/kernels/shape_test.cc
index 6a7dad4d3e0..3eeb83f5000 100644
--- a/tensorflow/lite/kernels/shape_test.cc
+++ b/tensorflow/lite/kernels/shape_test.cc
@@ -45,6 +45,9 @@ class ShapeOpModel : public SingleOpModel {
   int32_t GetOutputSize() { return GetTensorSize(output_); }
   std::vector<T> GetOutput() { return ExtractVector<T>(output_); }
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+  TfLiteAllocationType GetOutputAllocationType() const {
+    return interpreter_->tensor(interpreter_->outputs()[0])->allocation_type;
+  }
 
  private:
   int input_;
@@ -54,6 +57,13 @@ class ShapeOpModel : public SingleOpModel {
 TEST(ShapeOpTest, OutTypeInt) {
   ShapeOpModel<int32_t> model({1, 3, 1, 3, 5}, TensorType_FLOAT32,
                               TensorType_INT32);
+  ASSERT_EQ(model.GetOutputAllocationType(), kTfLitePersistentRo);
+
+  // Unlike most ops, Rank populates outputs in Prepare().
+  EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 3, 1, 3, 5}));
+  EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({5}));
+
+  // Invoke is superfluous and shouldn't change the output.
   model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 3, 1, 3, 5}));
@@ -63,7 +73,6 @@ TEST(ShapeOpTest, OutTypeInt) {
 TEST(ShapeOpTest, OutTypeInt64) {
   ShapeOpModel<int64_t> model({1, 3, 1, 3, 5}, TensorType_FLOAT32,
                               TensorType_INT64);
-  model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 3, 1, 3, 5}));
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({5}));
@@ -71,7 +80,6 @@ TEST(ShapeOpTest, OutTypeInt64) {
 
 TEST(ShapeOpTest, ScalarTensor) {
   ShapeOpModel<int32_t> model({}, TensorType_FLOAT32, TensorType_INT32);
-  model.Invoke();
 
   EXPECT_EQ(model.GetOutputSize(), 0);
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({0}));
@@ -79,7 +87,6 @@ TEST(ShapeOpTest, ScalarTensor) {
 
 TEST(ShapeOpTest, EmptyTensor) {
   ShapeOpModel<int32_t> model({1, 0}, TensorType_FLOAT32, TensorType_INT32);
-  model.Invoke();
 
   EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 0}));
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2}));
diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h
index 7cd01f0072a..90a4df56c57 100644
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@@ -95,7 +95,9 @@ struct TensorData {
              int32_t zero_point = 0, bool per_channel_quantization = false,
              std::vector<float> per_channel_quantization_scales = {},
              std::vector<int64_t> per_channel_quantization_offsets = {},
-             int32_t channel_index = 0)
+             int32_t channel_index = 0, std::vector<int> traversal_order = {},
+             std::vector<TfLiteDimensionType> format = {},
+             std::vector<int> block_size = {}, std::vector<int> block_map = {})
       : type(type),
         shape(shape),
         min(min),
@@ -107,7 +109,11 @@ struct TensorData {
             std::move(per_channel_quantization_scales)),
         per_channel_quantization_offsets(
             std::move(per_channel_quantization_offsets)),
-        channel_index(channel_index) {}
+        channel_index(channel_index),
+        traversal_order(traversal_order),
+        format(format),
+        block_size(block_size),
+        block_map(block_map) {}
   TensorType type;
   std::vector<int> shape;
   float min;
@@ -118,6 +124,10 @@ struct TensorData {
   std::vector<float> per_channel_quantization_scales;
   std::vector<int64_t> per_channel_quantization_offsets;
   int32_t channel_index;
+  std::vector<int> traversal_order;
+  std::vector<TfLiteDimensionType> format;
+  std::vector<int> block_size;
+  std::vector<int> block_map;
 };
 
 class SingleOpResolver : public OpResolver {
@@ -189,12 +199,75 @@ class SingleOpModel {
     return AddConstInput(TensorData{type, shape}, data);
   }
 
-  // Add a constant sparse tensor as input. For unit test purpose, we choose to
-  // compress all dimensions and traverse them in the original order.
+  // Add a constant sparse tensor as input.
   template <typename T>
-  int AddConstSparseInput(TensorType type, std::initializer_list<int> shape,
-                          std::initializer_list<T> data) {
-    return AddSparseTensor(TensorData{type, shape}, data);
+  int AddConstSparseInput(const TensorData& t, std::initializer_list<T> data) {
+    int id = tensors_.size();
+    const int dims_count = t.traversal_order.size();
+    std::vector<T> dense_data(data);
+
+    tflite::optimize::sparsity::FormatConverter<T> converter(
+        t.shape, t.traversal_order, t.format, t.block_size, t.block_map);
+    converter.DenseToSparse(dense_data.data());
+
+    const auto dim_metadata = converter.GetDimMetadata();
+    const auto sparse_data = converter.GetData();
+
+    // Build sparsity parameter.
+    std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
+        dims_count);
+    for (int i = 0; i < dims_count; i++) {
+      const int metadata_idx = 2 * i;
+      if (i < t.shape.size() &&
+          t.format[t.traversal_order[i]] == kTfLiteDimSparseCSR) {
+        auto array_segments =
+            CreateInt32Vector(builder_,
+                              builder_.CreateVector(dim_metadata[metadata_idx]))
+                .Union();
+        auto array_indices =
+            CreateInt32Vector(
+                builder_, builder_.CreateVector(dim_metadata[metadata_idx + 1]))
+                .Union();
+        fb_dim_metadata[i] = CreateDimensionMetadata(
+            builder_, DimensionType_SPARSE_CSR, 0,
+            SparseIndexVector_Int32Vector, array_segments,
+            SparseIndexVector_Int32Vector, array_indices);
+      } else {
+        fb_dim_metadata[i] = CreateDimensionMetadata(
+            builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
+      }
+    }
+
+    flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
+        builder_, builder_.CreateVector(t.traversal_order),
+        builder_.CreateVector(t.block_map),
+        builder_.CreateVector(fb_dim_metadata));
+
+    int buffer_id = 0;
+    if (data.size()) {
+      // Initialize buffers list with empty buffer to allow for non-const
+      // tensors.
+      if (buffers_.empty()) {
+        buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
+      }
+
+      // Add compressed data as a Buffer to buffers list.
+      buffer_id = buffers_.size();
+      auto data_buffer = builder_.CreateVector(
+          reinterpret_cast<const uint8_t*>(sparse_data.data()),
+          sizeof(T) * sparse_data.size());
+      buffers_.push_back(CreateBuffer(builder_, data_buffer));
+    }
+
+    tensors_.push_back(CreateTensor(
+        builder_, builder_.CreateVector<int>(t.shape), t.type,
+        /*buffer=*/buffer_id,
+        /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
+
+    inputs_.push_back(id);
+    tensor_data_[id] = t;
+
+    return id;
   }
 
   // Add a null input tensor (optional input) and return kTfLiteOptionalTensor.
@@ -625,84 +698,6 @@ class SingleOpModel {
     return id;
   }
 
-  template <typename T>
-  int AddSparseTensor(const TensorData& t, std::initializer_list<T> data) {
-    int id = tensors_.size();
-    const auto& shape = t.shape;
-    const int dims_count = shape.size();
-    std::vector<TfLiteDimensionType> format(dims_count);
-    std::vector<int> traversal_order(dims_count);
-    std::vector<T> dense_data(data);
-
-    // Compress only the last dimension and traverse in the original order.
-    for (int i = 0; i < dims_count; i++) {
-      format[i] = kTfLiteDimDense;
-      traversal_order[i] = i;
-    }
-    format[dims_count - 1] = kTfLiteDimSparseCSR;
-
-    tflite::optimize::sparsity::FormatConverter<T> converter(
-        shape, traversal_order, format);
-    converter.DenseToSparse(dense_data.data());
-
-    const auto& dim_metadata = converter.GetDimMetadata();
-    const auto& sparse_data = converter.GetData();
-
-    // Build sparsity parameter.
-    std::vector<flatbuffers::Offset<DimensionMetadata>> fb_dim_metadata(
-        dims_count);
-    for (int i = 0; i < dims_count - 1; i++) {
-      const int metadata_idx = 2 * i;
-      fb_dim_metadata[i] = CreateDimensionMetadata(
-          builder_, DimensionType_DENSE, dim_metadata[metadata_idx][0]);
-    }
-
-    // Parameters for the last compressed dimension.
-    const int compressed_metadata_idx = 2 * (dims_count - 1);
-    auto array_segments =
-        CreateInt32Vector(builder_, builder_.CreateVector(
-                                        dim_metadata[compressed_metadata_idx]))
-            .Union();
-    auto array_indices =
-        CreateInt32Vector(
-            builder_,
-            builder_.CreateVector(dim_metadata[compressed_metadata_idx + 1]))
-            .Union();
-    fb_dim_metadata[dims_count - 1] = CreateDimensionMetadata(
-        builder_, DimensionType_SPARSE_CSR, 0, SparseIndexVector_Int32Vector,
-        array_segments, SparseIndexVector_Int32Vector, array_indices);
-
-    flatbuffers::Offset<SparsityParameters> s_param = CreateSparsityParameters(
-        builder_, builder_.CreateVector(traversal_order), 0,
-        builder_.CreateVector(fb_dim_metadata));
-
-    int buffer_id = 0;
-    if (data.size()) {
-      // Initialize buffers list with empty buffer to allow for non-const
-      // tensors.
-      if (buffers_.empty()) {
-        buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
-      }
-
-      // Add compressed data as a Buffer to buffers list.
-      buffer_id = buffers_.size();
-      auto data_buffer = builder_.CreateVector(
-          reinterpret_cast<const uint8_t*>(sparse_data.data()),
-          sizeof(T) * sparse_data.size());
-      buffers_.push_back(CreateBuffer(builder_, data_buffer));
-    }
-
-    tensors_.push_back(CreateTensor(
-        builder_, builder_.CreateVector<int>(t.shape), t.type,
-        /*buffer=*/buffer_id,
-        /*name=*/0, /*quantization=*/0, /*is_variable=*/false, s_param));
-
-    inputs_.push_back(id);
-    tensor_data_[id] = t;
-
-    return id;
-  }
-
   std::vector<int8_t> QuantizeTensor(int index,
                                      const std::vector<float>& data) {
     TfLiteTensor* t = interpreter_->tensor(index);
diff --git a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
index 73b0535fc46..a6fe785ce53 100644
--- a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
@@ -537,7 +537,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     const TfLiteTensor* projection_weights =
         GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
     if (projection_weights != nullptr) {
-      row_sums_rows += ceil(n_output / n_cell);
+      row_sums_rows += ceil(static_cast<float>(n_output) / n_cell);
     }
     int row_sums_dims[2] = {row_sums_rows, n_cell};
     if (!TfLiteIntArrayEqualsArray(row_sums->dims, 2, row_sums_dims)) {
diff --git a/tensorflow/lite/micro/apollo3evb/micro_time.cc b/tensorflow/lite/micro/apollo3evb/micro_time.cc
new file mode 100644
index 00000000000..12c9ae5c633
--- /dev/null
+++ b/tensorflow/lite/micro/apollo3evb/micro_time.cc
@@ -0,0 +1,72 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Reference implementation of timer functions.  Platforms are not required to
+// implement these timer methods, but they are required to enable profiling.
+
+// On platforms that have a POSIX stack or C library, it can be written using
+// methods from <sys/time.h> or clock() from <time.h>.
+
+// To add an equivalent function for your own platform, create your own
+// implementation file, and place it in a subfolder with named after the OS
+// you're targeting. For example, see the Cortex M bare metal version in
+// tensorflow/lite/micro/bluepill/micro_timer.cc or the mbed one on
+// tensorflow/lite/micro/mbed/micro_timer.cc.
+
+#include "tensorflow/lite/micro/micro_time.h"
+
+// These are headers from Ambiq's Apollo3 SDK.
+#include "am_bsp.h"         // NOLINT
+#include "am_mcu_apollo.h"  // NOLINT
+#include "am_util.h"        // NOLINT
+
+namespace tflite {
+namespace {
+
+// Select CTIMER 1 as benchmarking timer on Sparkfun Edge. This timer must not
+// be used elsewhere.
+constexpr int kTimerNum = 1;
+
+// Clock set to operate at 12MHz.
+constexpr int kClocksPerSecond = 12e6;
+
+}  // namespace
+
+int32_t ticks_per_second() { return kClocksPerSecond; }
+
+// Calling this method enables a timer that runs for eternity. The user is
+// responsible for avoiding trampling on this timer's config, otherwise timing
+// measurements may no longer be valid.
+int32_t GetCurrentTimeTicks() {
+  // TODO(b/150808076): Split out initialization, intialize in interpreter.
+  static bool is_initialized = false;
+  if (!is_initialized) {
+    am_hal_ctimer_config_t timer_config;
+    // Operate as a 32-bit timer.
+    timer_config.ui32Link = 1;
+    // Set timer A to continuous mode at 12MHz.
+    timer_config.ui32TimerAConfig =
+        AM_HAL_CTIMER_FN_CONTINUOUS | AM_HAL_CTIMER_HFRC_12MHZ;
+
+    am_hal_ctimer_stop(kTimerNum, AM_HAL_CTIMER_BOTH);
+    am_hal_ctimer_clear(kTimerNum, AM_HAL_CTIMER_BOTH);
+    am_hal_ctimer_config(kTimerNum, &timer_config);
+    am_hal_ctimer_start(kTimerNum, AM_HAL_CTIMER_TIMERA);
+    is_initialized = true;
+  }
+  return CTIMERn(kTimerNum)->TMR0;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/examples/hello_world/BUILD b/tensorflow/lite/micro/examples/hello_world/BUILD
index c03069e4ecc..155aaafd98c 100644
--- a/tensorflow/lite/micro/examples/hello_world/BUILD
+++ b/tensorflow/lite/micro/examples/hello_world/BUILD
@@ -16,12 +16,12 @@ package(default_visibility = ["//visibility:public"])
 licenses(["notice"])  # Apache 2.0
 
 cc_library(
-    name = "sine_model_data",
+    name = "model",
     srcs = [
-        "sine_model_data.cc",
+        "model.cc",
     ],
     hdrs = [
-        "sine_model_data.h",
+        "model.h",
     ],
     build_for_embedded = True,
     copts = micro_copts(),
@@ -33,9 +33,9 @@ tflite_micro_cc_test(
         "hello_world_test.cc",
     ],
     deps = [
+        ":model",
         "//tensorflow/lite:schema_fbs_version",
         "//tensorflow/lite/micro:micro_framework",
-        "//tensorflow/lite/micro/examples/hello_world:sine_model_data",
         "//tensorflow/lite/micro/kernels:all_ops_resolver",
         "//tensorflow/lite/micro/kernels:micro_ops",
         "//tensorflow/lite/micro/testing:micro_test",
@@ -83,10 +83,10 @@ cc_binary(
     ],
     deps = [
         ":constants",
+        ":model",
         ":output_handler",
         "//tensorflow/lite:schema_fbs_version",
         "//tensorflow/lite/micro:micro_framework",
-        "//tensorflow/lite/micro/examples/hello_world:sine_model_data",
         "//tensorflow/lite/micro/kernels:all_ops_resolver",
         "//tensorflow/lite/schema:schema_fbs",
     ],
diff --git a/tensorflow/lite/micro/examples/hello_world/Makefile.inc b/tensorflow/lite/micro/examples/hello_world/Makefile.inc
index a4d2da7d891..f1c8859be80 100644
--- a/tensorflow/lite/micro/examples/hello_world/Makefile.inc
+++ b/tensorflow/lite/micro/examples/hello_world/Makefile.inc
@@ -1,9 +1,9 @@
 HELLO_WORLD_TEST_SRCS := \
 tensorflow/lite/micro/examples/hello_world/hello_world_test.cc \
-tensorflow/lite/micro/examples/hello_world/sine_model_data.cc
+tensorflow/lite/micro/examples/hello_world/model.cc
 
 HELLO_WORLD_TEST_HDRS := \
-tensorflow/lite/micro/examples/hello_world/sine_model_data.h
+tensorflow/lite/micro/examples/hello_world/model.h
 
 OUTPUT_HANDLER_TEST_SRCS := \
 tensorflow/lite/micro/examples/hello_world/output_handler_test.cc \
@@ -16,12 +16,12 @@ tensorflow/lite/micro/examples/hello_world/constants.h
 HELLO_WORLD_SRCS := \
 tensorflow/lite/micro/examples/hello_world/main.cc \
 tensorflow/lite/micro/examples/hello_world/main_functions.cc \
-tensorflow/lite/micro/examples/hello_world/sine_model_data.cc \
+tensorflow/lite/micro/examples/hello_world/model.cc \
 tensorflow/lite/micro/examples/hello_world/output_handler.cc \
 tensorflow/lite/micro/examples/hello_world/constants.cc
 
 HELLO_WORLD_HDRS := \
-tensorflow/lite/micro/examples/hello_world/sine_model_data.h \
+tensorflow/lite/micro/examples/hello_world/model.h \
 tensorflow/lite/micro/examples/hello_world/output_handler.h \
 tensorflow/lite/micro/examples/hello_world/constants.h \
 tensorflow/lite/micro/examples/hello_world/main_functions.h
diff --git a/tensorflow/lite/micro/examples/hello_world/README.md b/tensorflow/lite/micro/examples/hello_world/README.md
index 3f3fef67f28..020a7d49e88 100644
--- a/tensorflow/lite/micro/examples/hello_world/README.md
+++ b/tensorflow/lite/micro/examples/hello_world/README.md
@@ -1,41 +1,32 @@
-# Hello World example
+# Hello World Example
 
 This example is designed to demonstrate the absolute basics of using [TensorFlow
 Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers).
 It includes the full end-to-end workflow of training a model, converting it for
-use with TensorFlow Lite, and running inference on a microcontroller.
+use with TensorFlow Lite for Microcontrollers for running inference on a
+microcontroller.
 
-The sample is built around a model trained to replicate a `sine` function. It
-contains implementations for several platforms. In each case, the model is used
-to generate a pattern of data that is used to either blink LEDs or control an
-animation.
+The model is trained to replicate a `sine` function and generates a pattern of
+data to either blink LEDs or control an animation, depending on the capabilities
+of the device.
 
-![Animation of example running on STM32F746](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/images/STM32F746.gif)
+![Animation on STM32F746](images/animation_on_STM32F746.gif)
 
 ## Table of contents
 
--   [Understand the model](#understand-the-model)
 -   [Deploy to Arduino](#deploy-to-arduino)
 -   [Deploy to ESP32](#deploy-to-esp32)
 -   [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge)
 -   [Deploy to STM32F746](#deploy-to-STM32F746)
 -   [Run the tests on a development machine](#run-the-tests-on-a-development-machine)
-
-## Understand the model
-
-The sample comes with a pre-trained model. The code used to train and convert
-the model is available as a tutorial in [create_sine_model.ipynb](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/create_sine_model.ipynb).
-
-Walk through this tutorial to understand what the model does,
-how it works, and how it was converted for use with TensorFlow Lite for
-Microcontrollers.
+-   [Train your own model](#train-your-own-model)
 
 ## Deploy to Arduino
 
 The following instructions will help you build and deploy this sample
 to [Arduino](https://www.arduino.cc/) devices.
 
-![Animation of example running on Arduino MKRZERO](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/images/arduino_mkrzero.gif)
+![Animation on Arduino MKRZERO](images/animation_on_arduino_mkrzero.gif)
 
 The sample has been tested with the following devices:
 
@@ -132,7 +123,7 @@ idf.py --port /dev/ttyUSB0 flash monitor
 The following instructions will help you build and deploy this sample on the
 [SparkFun Edge development board](https://sparkfun.com/products/15170).
 
-![Animation of example running on SparkFun Edge](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/images/sparkfun_edge.gif)
+![Animation on SparkFun Edge](images/animation_on_sparkfun_edge.gif)
 
 If you're new to using this board, we recommend walking through the
 [AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow)
@@ -272,7 +263,7 @@ The following instructions will help you build and deploy the sample to the
 [STM32F7 discovery kit](https://os.mbed.com/platforms/ST-Discovery-F746NG/)
 using [ARM Mbed](https://github.com/ARMmbed/mbed-cli).
 
-![Animation of example running on STM32F746](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/images/STM32F746.gif)
+![Animation on STM32F746](images/animation_on_STM32F746.gif)
 
 Before we begin, you'll need the following:
 
@@ -400,7 +391,14 @@ the trained TensorFlow model, runs some example inputs through it, and got the
 expected outputs.
 
 To understand how TensorFlow Lite does this, you can look at the source in
-[hello_world_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc).
+[hello_world_test.cc](hello_world_test.cc).
 It's a fairly small amount of code that creates an interpreter, gets a handle to
 a model that's been compiled into the program, and then invokes the interpreter
 with the model and sample inputs.
+
+### Train your own model
+
+So far you have used an existing trained model to run inference on
+microcontrollers. If you wish to train your own model, follow the instructions
+given in the [train/](train/) directory.
+
diff --git a/tensorflow/lite/micro/examples/hello_world/create_sine_model.ipynb b/tensorflow/lite/micro/examples/hello_world/create_sine_model.ipynb
deleted file mode 100644
index 614cb80b47e..00000000000
--- a/tensorflow/lite/micro/examples/hello_world/create_sine_model.ipynb
+++ /dev/null
@@ -1,1333 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "create_sine_model.ipynb",
-      "version": "0.3.2",
-      "provenance": [],
-      "collapsed_sections": [],
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sblS7n3zWCWV",
-        "colab_type": "text"
-      },
-      "source": [
-        "**Copyright 2019 The TensorFlow Authors.**"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "0rvUzWmoWMH5",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aCZBFzjClURz",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Create and convert a TensorFlow model\n",
-        "This notebook is designed to demonstrate the process of creating a TensorFlow model and converting it to use with TensorFlow Lite. The model created in this notebook is used in the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world) sample for [TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers/overview).\n",
-        "\n",
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/create_sine_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/create_sine_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dh4AXGuHWeu1",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Import dependencies\n",
-        "Our first task is to import the dependencies we need. Run the following cell to do so:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "53PBJBv1jEtJ",
-        "colab_type": "code",
-        "outputId": "9b035753-60e5-43db-a78d-284ea9de9513",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 479
-        }
-      },
-      "source": [
-        "# TensorFlow is an open source machine learning library\n",
-        "import tensorflow as tf\n",
-        "# Numpy is a math library\n",
-        "import numpy as np\n",
-        "# Matplotlib is a graphing library\n",
-        "import matplotlib.pyplot as plt\n",
-        "# math is Python's math library\n",
-        "import math"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "p-PuBEb6CMeo",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Generate data\n",
-        "Deep learning networks learn to model patterns in underlying data. In this notebook, we're going to train a network to model data generated by a [sine](https://en.wikipedia.org/wiki/Sine) function. This will result in a model that can take a value, `x`, and predict its sine, `y`.\n",
-        "\n",
-        "In a real world application, if you needed the sine of `x`, you could just calculate it directly. However, by training a model to do this, we can demonstrate the basic principles of machine learning.\n",
-        "\n",
-        "In the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world) sample for [TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers/overview), we'll use this model to control LEDs that light up in a sequence.\n",
-        "\n",
-        "The code in the following cell will generate a set of random `x` values, calculate their sine values, and display them on a graph:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uKjg7QeMDsDx",
-        "colab_type": "code",
-        "outputId": "b17a43c6-eba1-4cc7-8807-14fcf5918d01",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 269
-        }
-      },
-      "source": [
-        "# We'll generate this many sample datapoints\n",
-        "SAMPLES = 1000\n",
-        "\n",
-        "# Set a \"seed\" value, so we get the same random numbers each time we run this\n",
-        "# notebook\n",
-        "np.random.seed(1337)\n",
-        "\n",
-        "# Generate a uniformly distributed set of random numbers in the range from\n",
-        "# 0 to 2π, which covers a complete sine wave oscillation\n",
-        "x_values = np.random.uniform(low=0, high=2*math.pi, size=SAMPLES)\n",
-        "\n",
-        "# Shuffle the values to guarantee they're not in order\n",
-        "np.random.shuffle(x_values)\n",
-        "\n",
-        "# Calculate the corresponding sine values\n",
-        "y_values = np.sin(x_values)\n",
-        "\n",
-        "# Plot our data. The 'b.' argument tells the library to print blue dots.\n",
-        "plt.plot(x_values, y_values, 'b.')\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzt3X2UVPWd5/H3F1pU1ASRjhLhgDNy\nJpJJgrOVZioa4yQGNJsjzE7iqvRKcpwpH+Im2TkrrZNzNg8ziTSZGcnOEUNHozCgxjUjYtZZMEYH\nZyyBZgYThSgswRFWpBWZaFSQ5rt/3NtD3apb/VQPt27V53VOna77rVvd3/ahvv17NndHRERkwJik\nExARkcaiwiAiIhEqDCIiEqHCICIiESoMIiISocIgIiIRKgwiIhKhwiAiIhEqDCIiEtGWdAKjMWnS\nJJ8+fXrSaYiIpMqWLVtedff2oe5LZWGYPn06vb29SachIpIqZvbicO5TV5KIiESoMIiISIQKg4iI\nRKgwiIhIhAqDiIhEVKUwmNkPzWy/mT1b5nUzs/9pZjvN7Odm9nsFry00sx3hY2E18hERkdGrVovh\nbuDiQV6/BJgRPnLA7QBmNhH4OjAb6AC+bmanViknGYXZs6GtDU45BcaPB7PgMXYsnHsu5PNJZygi\ntVaVwuDuG4ADg9wyD1jpgaeBCWY2GZgLPOruB9z9deBRBi8wUkX5PHziE0EROP74oABs2gT9/fDm\nm/D228fuPXoUtm6Fj33sWKE45RTo6koufxGpjXqNMZwJvFRwvSeMlYuXMLOcmfWaWW9fX1/NEm0V\nM2cGH/IbNgRF4PDhkb3/6NHgfUuWwJgxMGkS9PTUJlcRqa/UDD67e4+7Z9w9094+5IpuidHVBe97\nHxx3HGzfXr3v6w6vvQbXXBO0PDo7q/e9RaT+6lUY9gJTC66nhLFycamifB6mTg3+uu/rgyNHhn6P\nGZx44sh/1uHDsHp1ME6hbiaRdKpXYVgLXBXOTvp94N/c/WVgHTDHzE4NB53nhDGpgp4e+OAHgy6j\nPXsGv7etLegSMoOOjqCr6K23gtbAwGPOnGBsAYL7BtPfHxQijUOIpE+1pqveC+SB3zGzPWZ2tZld\na2bXhrc8AuwCdgI/AK4HcPcDwJ8Dm8PHt8KYVKirK+ja2bZt8PsmTYKnnoJ33w0+zI8ehY0b4+9d\nty5obbgH9y1fDhMnDl4kBsYh1L0kkh7m7knnMGKZTMa1u2p5c+fC+vXlXx8/Pvjrf9EiyGar8zN7\neuDLX4ZDh8rf094ODz1UvZ8pIiNjZlvcPTPUfakZfJah5fPBh365ovC+9wXF4De/gQcfrO4HdC4H\n77wDCxbAuHHx9/T1Bd1amr0k0thUGJpET0/woVu49qDQokXwyivQ3V3bPFatCloNy5eXv+faa1Uc\nRBpZKg/qkajp0+HFMsdvTJwIt9wS/EVfTwM/75prSl9zh+uui94nIo1DLYaUO+20wYvCa68l9+Gb\nywUD2xMmlL529GhQNDQoLdJ4VBhSbPZsOFBmDte0aUFRSFo2C6+/HnRlxVm9WsVBpNGoMKTU3LnB\nvkZxFi2C3bvrms6QuruD1sNJJ5W+ds892pxPpJGoMKTQ7NnxM49OPDH48K31APNoZbPwpS+Vxt3h\nggvUchBpFBp8TplyA80dHeUXpjWSgaK1bFmw+G3AkSNBt9KOHen4PUSamVoMKVKuKMyZk64P0+5u\neOONoJgV27QpaBGJSHJUGFKis7N8S2FdSneXuvrq+PimTcG24CKSDBWGFOjsDLpZik2blq6WQrFc\nLlgIN7AxX6Ht2zXmIJIUFYYGN3dufFGYMKHxZh6NRi4XjDfEWb1aK6RFkqDC0MDy+fjZR2PHwiOP\n1D+fWsnlyq9zuOYaTWUVqTcVhgZ22WWlsZNPhiefbL4dSru7y++v9JnP1DcXkVanwtCgpk+PP1zn\nr/6q+YrCgFwumGFV7OBBmDy5/vmItKpqHdRzsZk9b2Y7zeymmNdvNbOt4eMFMztY8Fp/wWtrq5FP\n2s2dGz8DacGC5t90bt26+Gms+/ZpGqtIvVRcGMxsLHAbcAkwE7jCzCKTDd39v7n7LHefBfwN8HcF\nL7898Jq7X1ppPmlXblyhoyPY0roVbNwYzLgqtmmTjgkVqYdqtBg6gJ3uvsvdDwP3AfMGuf8K4N4q\n/NymtGRJaSzt01JHY/fu+H2V7rqr7qmItJxqFIYzgZcKrveEsRJmNg04C/hZQfgEM+s1s6fNbH4V\n8kmtmTNhzZpobMaM5piWOhqPPloae+01zVISqbV6Dz5fDjzg7v0FsWnhGaRXAkvN7Lfj3mhmubCA\n9Pb19dUj17qaOTNY1FVo7FhYsSKZfBpBNls6U+no0fhWlYhUTzUKw15gasH1lDAW53KKupHcfW/4\ndRfwBHBu3BvdvcfdM+6eaW9vrzTnhtLVVVoUIFj41awzkIZrYHX0mIL/Utes0ViDSC1VozBsBmaY\n2VlmNo7gw79kdpGZfQA4FcgXxE41s+PD55OA84BtVcgpNXp64v8CPuec5p+BNFy5HGQy0diSJSoO\nIrVScWFw9yPADcA6YDtwv7s/Z2bfMrPCWUaXA/e5uxfEzgF6zewZ4HFgsbu3VGG4+ebS2EknwbaW\n+qcwtLgN91QcRGrDop/T6ZDJZLy3tzfpNCrW0xNs+VBs+XK1FuJ0dcW3rp56Sl1uIsNhZlvCMd1B\naeVzgm65pTQ2Z46KQjnd3fF7Ki1cWP9cRJqZCkNCOjtLp6GefXZ6z1aol+7uYLuQQjt2qEtJpJpU\nGBLQ01O6lbYZrFyZTD5pEzcu873vaYtukWpRYUjAl79cGrvxRvWTD1fcZnuHDgXjNSoOIpVTYaiz\nuXODD7FCY8YEXSQyfOvWwQUXlMbjxm1EZGRUGOqoqyt+g7yLLqp/Ls1g8eKgC67Q7t1qNYhUSoWh\nTvJ5+O53S+MTJmjAebSyWZgXs12jWg0ilVFhqJOVK6F4yYhZcx3RmYRFi+JbDZqlJDJ6KgwJmTYN\n/umfNOBcqWwWvv/90viSJdqFVWS0VBjqoKsLfvKTYJDZDMaNg3vvVVGollwu6JIrdv319c9FpBmo\nMNTYwDYOe/YEW0Z//OPwxBMqCtUWt1p869ZgFpiIjIwKQ43dfXf0etcuFYVa6O4OzsQutn69ZimJ\njJQKQw11dcH+/dHYb/1WMrm0glWrYPLk0rhmKYmMjApDjcSds2AWzL2X2vnGN0pjL71UGhOR8lQY\namTp0tLY97+vbqRay+WCzQgL9fdrrEFkJFQYauTFF6PX06drO+16iduM8Gc/q38eImlVlcJgZheb\n2fNmttPMbop5/Qtm1mdmW8PHHxe8ttDMdoSPpthZf+ZMeOutaCxuR1CpjWwWOjqisSNHYPbsZPIR\nSZuKC4OZjQVuAy4BZgJXmNnMmFt/5O6zwscd4XsnAl8HZgMdwNfN7NRKc0pSTw9s3x6NHXecWgv1\ntnEjjB8fjW3aFJyDISKDq0aLoQPY6e673P0wcB8Qs4NNrLnAo+5+wN1fBx4FLq5CTomJaxn8wR/U\nPw+BG24oja1erRXRIkOpRmE4Eyic97EnjBX7IzP7uZk9YGZTR/jeVOjshAMHorHx47VJXlK6u+E9\n7ymN60AkkcHVa/D5YWC6u3+YoFWwYqTfwMxyZtZrZr19fX1VT7BS+XzpqWwAt95a/1zkmLgdbfft\nq38eImlSjcKwF5hacD0ljP07d3/N3QeOp7kD+A/DfW/B9+hx94y7Z9rb26uQdnXdVDLkDjNmaGwh\nablc6Q6sDz+s1dAig6lGYdgMzDCzs8xsHHA5sLbwBjMrXI96KTAwPLsOmGNmp4aDznPCWKr09MCG\nDaXxFSNuF0ktdHcHx34O6O+Ha6/VWINIORUXBnc/AtxA8IG+Hbjf3Z8zs2+Z2aXhbV82s+fM7Bng\ny8AXwvceAP6coLhsBr4VxlLlO98pjS1frsVsjeSqq6KtBne47rrk8hFpZObFp8ekQCaT8d7e3qTT\nAI7tnlpo0SKd4dyITjopur7khBPg7beTy0ek3sxsi7tnhrpPK58rVNxXPWGCikKj+sM/jF6/845O\nehOJo8JQga4uOHgwGvvwh5PJRYa2ahWccUY09pd/qbEGkWIqDKOUz5d2IYF2T2103/xm9Pro0fgZ\nZSKtTIVhlOIWSV1wgQacG93A9NVCGzaoS0mkkArDKP30p9FrnbWQHt3dpWdEa12DyDEqDKPQ1QU7\nd0Zj8+aptZAmxYXh4EEVB5EBKgyjEPcBUtw9IY0tbrPD667TQLQIqDCMWNxMpFmz1FpIm1wuODyp\n0NGjcP31iaQj0lBUGEYgn4/flG3ZsvrnIpWLazVs3aozG0RUGEbgiSeCrRQKTZ+u1kJa5XLB1iVj\niv4vePjhZPIRaRQqDCNw4YXBaWyFdGRnuuVypYsSTzklmVxEGoUKwzB1dsJnPxucxjZ/fnCm8PLl\n2la7GSxbFt1gb+9edSdJa1NhGIbOzuAQngMHYP36YDO2jRtVFJpFNgsf/Wg0tnq1pq9K61JhGIZ7\n741er1mTTB5SO1dfXRq788765yHSCFQYhjB3bjCNsZD6oJtPLgdz5kRjmzZpXYO0JhWGIRRvfQGl\nG7FJc7jwwtLYwoV1T0MkcVUpDGZ2sZk9b2Y7zaxkr0oz+1Mz22ZmPzezx8xsWsFr/Wa2NXysLX5v\nkrq6SlsL48drbKFZXXhhdBAaYMcOjTVI66m4MJjZWOA24BJgJnCFmc0suu1fgIy7fxh4ACjcsPpt\nd58VPi6lgdx1V2ns1lvrn4fURzYLV15ZGteUZGk11WgxdAA73X2Xux8G7gPmFd7g7o+7+8Chik8D\nU6rwc2sqn4e+vmjs7LPVWmh2q1bBaadFYwcOaFtuaS3VKAxnAi8VXO8JY+VcDfx9wfUJZtZrZk+b\n2fxybzKzXHhfb1/xJ3YNXHZZaSzuDAZpPt/5Tmnsnnvqn4dIUuo6+GxmnUAGKNxxaFp4OPWVwFIz\n++2497p7j7tn3D3T3t5e0zy7umDPnmisvV1bX7SKuBlKL7+sGUrSOqpRGPYCUwuup4SxCDO7CPga\ncKm7HxqIu/ve8Osu4Ang3CrkVJG4vw6/+MX65yHJWbcuOJFvQH9//FGuIs2oGoVhMzDDzM4ys3HA\n5UBkdpGZnQssJygK+wvip5rZ8eHzScB5wLYq5DRqPT2lrYWOjuDUL2ktixdDW9ux6zVrNENJWkPF\nhcHdjwA3AOuA7cD97v6cmX3LzAZmGX0XOBn4X0XTUs8Bes3sGeBxYLG7J1YY8nm49tpo7Mwzg+0v\npPVkszBpUjQWN/4g0mzahr5laO7+CPBIUex/FDy/qMz7ngI+VI0cqmHlytJttaW1Fa9j2bcvmTxE\n6kkrnwv8+MelsQUL6p+HNI4vfCF6fehQsE2KSDNTYQjNnl26bmHBAo0ttLrubjj++Ghs/XrNUJLm\npsIQ2rw5em0WLHYS+dznSmM3lWz8ItI8VBgI/vorHlvQDqoyYNUqmDgxGnvySbUapHmpMBA/P/27\n3y2NSeu65ZbotbtWwkvzavnCkM/D2qI9XS+4QHsiSVQuB4sWRXdf/cEP1GqQ5tTyhWHlyuiUxDFj\ngoVNIsW6u+HjHz923d8P11+fXD4itdLyheHpp6PXl16qPZGkvHfeiV5v3arV0NJ8WrowTJ8e/I89\nYMyYoLtApJy4s6G//vX65yFSSy1bGObOhRdfjMbe/361FmRwuRzMmhWN7dun8xqkubRsYXj88dJY\n3OldIsWWLSuNqTtJmklLFoZ8Ht59NxqbMEGrnGV4stnSVsPBgyoO0jxasjDErVp95JHSmEg5ca2G\npUvrn4dILbRcYejqgg0bjl2bwfLlGluQkclmSycqbN+uVoM0B/MU7jOdyWS8t7d3VO89/XTYv//Y\n9fveB6+8UqXEpOVMnhzdinvmTHjuueTyERmMmW0Jj1IeVFVaDGZ2sZk9b2Y7zayko8bMjjezH4Wv\nbzSz6QWv3RzGnzezmm5onM9HiwLABz5Qy58oze7UU6PXOq9BmkHFhcHMxgK3AZcAM4ErzGxm0W1X\nA6+7+9nArUB3+N6ZBEeBfhC4GFgWfr+aiBtb0CpnqcRXvxq9PnAAOjuTyUWkWqrRYugAdrr7Lnc/\nDNwHzCu6Zx6wInz+APApM7Mwfp+7H3L3XwE7w+9Xdfl8sCNmoXPO0diCVCaXC7ojC61erT2UpPry\n+WAzx3r8t1WNwnAm8FLB9Z4wFntPeEb0vwGnDfO9VRF3bGfxX3sio1F8yhvAddfVPQ1pYvk8XHgh\nfO1rwddaF4fUzEoys5yZ9ZpZb1/xUWujoB1UpVq6u6Gt6PT0Z55Rq0GqZ8kSOHw4+OP28OHab/le\njcKwF5hacD0ljMXeY2ZtwHuB14b5XgDcvcfdM+6eaW9vH3GSV10F48YF01PHjdPYglTXySeXxuLO\n+RAZqXwe1qwpjdVSNQrDZmCGmZ1lZuMIBpOLTjhgLbAwfP454GcezJNdC1wezlo6C5gBbKpCTiWy\nWXjiCfj2t4OvGluQaoprfa5dq1aDVC7uD4xXX63tz2wb+pbBufsRM7sBWAeMBX7o7s+Z2beAXndf\nC9wJ/K2Z7QQOEBQPwvvuB7YBR4AvuXt/pTmVk82qIEhtdHfD+vXR3XqPHg2a/PpvTipR+N/UgAUL\navszW26Bm0it5PNw/vnRg5/mz4cHH0wuJ0m3rq7SFsOMGfDCC6P7fnVd4CYiQcvg9tuDcz0GrFmj\nLblldHp6SouCGaxYEX9/NakwiFRRLhe0GgotWaI9lGTkvve90ti8efXpmlRhEKmy4uM/Ae68s/55\nSHrl87BtW2m8XidMqjCIVFnc8Z8nnFD/PCS94mYizZ9fv4kMKgwiVZbLBQsoC736qqauyvDErVsw\nq+959CoMIjWweHF0NfS2bcHYg4qDDCWutVCvsYUBKgwiNZDNwh//cTR29Kj2UJKhPf109LrerQVQ\nYRCpmauuKo398pf1z0PSo6ur9EyPG2+s/yJJFQaRGslmg8VIhQ4d0tRVKe/226PXJ58crKqvNxUG\nkRqKW4x03XUaa5BSXV3wxhvR2KRJyeSiwiBSQ9lsMM2w0NGj2nlVovL5+P8mbr65/rmACoNIzS1a\nFN0mA7TzqkTFHTs8a1ZyZ8aoMIjU2MAeSmbHYmo1SKHNm0tjy5bVP48BKgwidZDLBXPRC61Zo1aD\nBGMLb78djc2alex27SoMInUSNxf9+uvrn4c0lrhZakm2FkCFQaRuslk47rho7LnnkslFGkM+DwcP\nRmNnnJH84U4VFQYzm2hmj5rZjvDrqTH3zDKzvJk9Z2Y/N7P/XPDa3Wb2KzPbGj5mVZKPSKM77bTo\n9bvv6ryGVrZyZWnsm9+sfx7FKm0x3AQ85u4zgMfC62JvAVe5+weBi4GlZjah4PUb3X1W+Ig5xE6k\necT9T6/zGlpTPg8/+MGx64GtL5KaiVSo0sIwDxhYwrMCmF98g7u/4O47wuf/D9gPtFf4c0VSKW7n\nVYAf/7j+uUiybroJ+gtOuP/4x5NZ5Ryn0sJwuru/HD7fB5w+2M1m1gGMA/5vQfjbYRfTrWZ2fIX5\niDS8xYth7NhorF1/KrWUfB6efDIaizvgKSlDFgYz+6mZPRvziEy+c3cHfJDvMxn4W+CL7j5wXPrN\nwAeAjwITgbK9rWaWM7NeM+vt6+sb+jcTaVDZLPzJn0Rj99+vqaut5KabwIs+LeMOeErKkIXB3S9y\n99+NeTwEvBJ+4A988O+P+x5m9h7gfwNfc/enC773yx44BNwFdAySR4+7Z9w9064/ryTlrroqel7D\nkSPxA5HSfOJaC9OmNcbYwoBKu5LWAgvD5wuBh4pvMLNxwIPASnd/oOi1gaJiBOMTz1aYj0gqZLNw\n223HupTcg4FIDUI3vyeeKI392Z/VPY1BVVoYFgOfNrMdwEXhNWaWMbM7wnsuAy4AvhAzLXW1mf0C\n+AUwCfiLCvMRSY1cLuhSGtgqo78frr1WXUrN7sILgzPAzYI9tBplJlIh8+KOrhTIZDLe29ubdBoi\nFcvn4bzzov3NF1wA//APyeUktdPTE8xAmzULJkwIikQ9F7OZ2RZ3zwx1X9tQN4hI7WSz8N73Rle/\n6pS35tTVdWzjxPXrYfny5Fc4l6MtMUQS9uEPR69PPFHdSc0m7ryF730vmVyGQ4VBJGHF6xpefDFY\n7KTi0Dzizlto5F58FQaRhGWzwfTFadOOxfr7tfNqM9m1qzT21a/WP4/hUmEQaQDZbHR7BICtW9Vq\naAb5fOnZzXPmNN5MpEIqDCIN4sorS2Of/3z985Dqyefh/PODIg/BFNUFC2DdumTzGooKg0iD6O4u\n3UNp714tekuzhQuDY1wHuMMHP5hcPsOlwiDSQD71qdLY9derSymNenpgx45ozCxYu9DoVBhEGsi6\nddBRtGNYf7/2UUqjO+8sjV15ZeOuXSikwiDSYDZuDFbGFtq2LZlcZHTyedi0KRo75xxYtSqZfEZK\nhUGkAY0bF71upL36ZWjFi9kApk6tfx6jpcIg0oCK9+Z/4w0NQqfJCy+Uxv7oj+qfx2ipMIg0oFwu\n2Etn5szgevt2uOYa6OxMNi8ZWmdnadffggWNvW6hmAqDSIPK5eDkk6Ox1avVcmhknZ3Bv6NC8+en\nZ2xhgAqDSAN7//tLY428+Vory+dLiwIE5y2kjQqDSAOL+1DZtk3rGhpR3JTiWbPSMT21WEWFwcwm\nmtmjZrYj/Hpqmfv6C05vW1sQP8vMNprZTjP7UXgMqIiEstlgrKHYZZfVPxcZ3NNPl8aWLat/HtVQ\naYvhJuAxd58BPBZex3nb3WeFj0sL4t3Are5+NvA6cHX820VaVy4H7e3R2J49wcEv0hi6uo7thzRg\n/vx0thag8sIwD1gRPl8BzB/uG83MgE8CD4zm/SKt5ItfLI0tXVr/PKRU3CE8ZukcWxhQaWE43d1f\nDp/vA04vc98JZtZrZk+b2cCH/2nAQXc/El7vAc4s94PMLBd+j96+vr4K0xZJl+7u0kVvhw+r1dAI\nFi4sjd14Y3pbCzCMwmBmPzWzZ2Me8wrvc3cHyp1JNC08gPpKYKmZ/fZIE3X3HnfPuHumvbhdLdIC\n4g52ufvuuqchBbq6SjfKmzAhKORpNmRhcPeL3P13Yx4PAa+Y2WSA8Ov+Mt9jb/h1F/AEcC7wGjDB\nzNrC26YAeyv+jUSaVHd36QZ7+/drXUNS4rqQIF0L2cqptCtpLTDQkFoIPFR8g5mdambHh88nAecB\n28IWxuPA5wZ7v4gcs3EjnHFGNHbLLcnk0uriZoadfXb6WwtQeWFYDHzazHYAF4XXmFnGzO4I7zkH\n6DWzZwgKwWJ3H1gw3gX8qZntJBhziNmoVkQK/f7vR69371arod7y+WBmWLFm2R7dgj/c0yWTyXhv\nb2/SaYgkIp+H884LTgMb0NERtCakPqZMCU7XK4699FIy+QyXmW0Jx3sHpZXPIimTzQazXgpt3qwZ\nSvWSz5cWBYD7769/LrWiwiCSQt3dwQKqAe7BQKi6lGrvpphlvB0d6Z6eWkyFQSSlFi2CMUX/B8cd\nJynV09UFGzZEY83YjafCIJJS2Sycf3409qtfqdVQS8X/bCdMaL6iACoMIqm2eDG0tR277usLDvRR\ncai+nh44eDAamzAhmVxqTYVBJMWy2aBrY8qUaPwb30gknaaVz8P115fGb765/rnUgwqDSMpls5Ap\nmoD48sswfXoi6TSlJUugvz8aW7SoOVY5x1FhEGkCcTt5vvgizJ1b/1yaTU8PrFkTjc2f3xwrnMtR\nYRBpAtlscOB8scceq38uzSSfh2uvjcbSvqX2cKgwiDSJVatKB0P7+4MD6mV0Vq6MrjAHOOec5lqz\nEEeFQaSJPPJIaWz1ap0RXU1f+UrSGdSeCoNIE8lmgwPoizXL5m711NkJ99xzbBHhmDHNPeBcSIVB\npMnEHUD/k59obcNITJ4ctLR+/Ws4ejQotv/4j8094FxIhUGkyWSzsHw5jB17LLZnjxa+Ddfs2bBv\nXzT2r//a/OMKhVQYRJpQLgdPPqmFbyPV1QWbNpXGL7mk/rkkSYVBpEmVW/imtQ3xenrij+o8+eRg\nxlcrqagwmNlEM3vUzHaEX0+NuecPzGxrweMdM5sfvna3mf2q4LWYYTMRGa24+fbr1+vshjjFZ1wM\nWL++vnk0gkpbDDcBj7n7DOCx8DrC3R9391nuPgv4JPAWUPiP+saB1919a4X5iEiBcrOUlizRFNZC\nnZ3BQHOx5ctba2xhQKWFYR6wIny+Apg/yL0AnwP+3t3fqvDnisgwxc1SgvjD7FtRPh/MQCq2YEFr\nTE2NU2lhON3dXw6f7wNOH+L+y4F7i2LfNrOfm9mtZnZ8uTeaWc7Mes2st6+vr4KURVpLNhvfpbRn\nj8YbIH5coaOj9cYVCg1ZGMzsp2b2bMxjXuF97u6Al/k2mNlk4EPAuoLwzcAHgI8CE4GyPZ/u3uPu\nGXfPtLe3D5W2iBTo7oY5c0rj69e39hTWnh546KFo7CMfac7Dd0aibagb3P2icq+Z2StmNtndXw4/\n+PcP8q0uAx5093cLvvdAa+OQmd0F/Pdh5i0iI7RuXTBHv3g65pe+BB/6UOv1pefzcN110b2QxoyB\n229PLqdGUWlX0lpgYfh8IfDQIPdeQVE3UlhMMDMjGJ94tsJ8RGQQGzfC+PHR2JEj8LGPtdZgdE9P\nsHX20aPR+KWXtl6BjFNpYVgMfNrMdgAXhdeYWcbM7hi4ycymA1OBfyh6/2oz+wXwC2AS8BcV5iMi\nQ7jhhvh43AllzainJ1gFvr+of2NgLyQB8+I9ZVMgk8l4b29v0mmIpNbMmbB9ezR20knw5pvJ5FNP\nkyeXbnlhBt//fvPPQjKzLe6eGeo+rXwWaUHbtsG0adHYb34TfGg282B0XFGA1igKI6HCINKidu8u\nXfy2b1/zbrZ32mnxRaGV1yuUo8Ig0sKWLQu6UYpde21zbZvR1QUHDpTGP/KR1l6vUI4Kg0gLy2bh\nyitL4+7Bwq9mKA7lNscDTU0tR4VBpMWtWhW/+A3grrvqm0u1DcxAKnbiifDUU5qaWo4Kg4iwbl2w\nDUSxvr7gTIc0rnEoVxQmTIDPqs3qAAAHPElEQVS33lJRGIwKg4gAweK3BQuOnXE8YO/e9C2AK1cU\nQAPNw6HCICL/btWq8v3uF16YjtlKnZ3li0JHR+uc21wJFQYRicjl4ruVDh8OPnBnz65/TsM1d278\nFtoQ/E6tvjnecKkwiEiJjRtLF8AN2LSp8YpDPg/nnlv+tLUFC1QURkKFQURi7d4d33KAoDi0tzfG\nuENPTzAGsrXM+Y/Ll2utwkipMIhIWRs3Bh+sx8ccofXqq8EHcpJrHQYbZD7ttGBKqgabR06FQUQG\nlcvB44+Xf33JkqBw1LtAzJ1bviiMHQsPP6wpqaOlwiAiQ8pmg5ZDOYcPBwWis7O2eeTzQReWWfnx\nhEmT4MknVRQqocIgIsOSywVdMyefXP6e1avhlFOq33oYGFz+2MeCLqxyOjqCRXkqCpWpqDCY2efN\n7DkzO2pmZff4NrOLzex5M9tpZjcVxM8ys41h/EdmNq6SfESktrJZeOONYJbP2LHx97z5ZtB6GDsW\nPvGJygaoe3rgve8dfHAZ4Oyzg6KlmUfVUWmL4VngPwEbyt1gZmOB24BLgJnAFWY2M3y5G7jV3c8G\nXgeurjAfEamDVauCI0HLzVqC4NjMDRuCD/XjjgsekyYNvkiuszMYNJ4+HdragjGEX/+6/P1jxgRF\nascOtRKqqaLC4O7b3f35IW7rAHa6+y53PwzcB8wLz3n+JPBAeN8KgnOfRSQlBmYtnXHG4PcdORI8\nXnst+LA3O/Y48USYOjV4vnp1sD32iy9Cf//g33PixOAeTUWtvnqMMZwJvFRwvSeMnQYcdPcjRXER\nSZFcDl5+OTgvua1t5O9/5x3Ys2f497e1BT/rtddG/rNkeIYsDGb2UzN7NuYxrx4JFuSRM7NeM+vt\n6+ur548WkWHo7oZ33w228I47/KdSbW1Bt9G772q/o1obsr67+0UV/oy9wNSC6ylh7DVggpm1ha2G\ngXi5PHqAHoBMJuMV5iQiNbJuXfC1qwuWLg0+yMeMGbprqNjYscHj859Xd1G91aMraTMwI5yBNA64\nHFjr7g48DnwuvG8h8FAd8hGROujuhkOHgkHoI0eCsYiJE0tbEyecEJz50NYG48fDzJnBvUeOBO9X\nUag/Cz6fR/lmsz8E/gZoBw4CW919rpm9H7jD3T8T3vcZYCkwFvihu387jP8WwWD0ROBfgE53PzTU\nz81kMt7b2zvqvEVEWpGZbXH3sksL/v2+SgpDUlQYRERGbriFQSufRUQkQoVBREQiVBhERCRChUFE\nRCJUGEREJCKVs5LMrA94cZRvnwQMsnFvw0t7/pD+3yHt+UP6f4e05w/J/A7T3L19qJtSWRgqYWa9\nw5mu1ajSnj+k/3dIe/6Q/t8h7flDY/8O6koSEZEIFQYREYloxcIwyDEhqZD2/CH9v0Pa84f0/w5p\nzx8a+HdouTEGEREZXCu2GEREZBAtUxjM7GIze97MdprZTUnnM1Jm9kMz229mzyady2iY2VQze9zM\ntpnZc2b2laRzGikzO8HMNpnZM+Hv8M2kcxoNMxtrZv9iZj9JOpfRMLPdZvYLM9tqZqnbTdPMJpjZ\nA2b2SzPbbmYNd1p1S3QlmdlY4AXg0wRHiG4GrnD3bYkmNgJmdgHwJrDS3X836XxGyswmA5Pd/Z/N\n7BRgCzA/Zf8ODDjJ3d80s+OAfwS+4u5PJ5zaiJjZnwIZ4D3u/tmk8xkpM9sNZNw9lesYzGwF8KS7\n3xGeUTPe3Q8mnVehVmkxdAA73X2Xux8mOAOirkeTVsrdNwAHks5jtNz9ZXf/5/D5G8B2UnbGtwfe\nDC+PCx+p+svKzKYA/xG4I+lcWpGZvRe4ALgTwN0PN1pRgNYpDGcCLxVc7yFlH0rNxMymA+cCG5PN\nZOTCbpitwH7gUXdP2++wFFgEHE06kQo4sN7MtphZLulkRugsoA+4K+zOu8PMTko6qWKtUhikQZjZ\nycCPga+6+6+Tzmek3L3f3WcRnFHeYWap6dYzs88C+919S9K5VOh8d/894BLgS2E3a1q0Ab8H3O7u\n5wK/ARpuzLNVCsNeYGrB9ZQwJnUU9sv/GFjt7n+XdD6VCJv/jwMXJ53LCJwHXBr20d8HfNLMUnei\nsrvvDb/uBx4k6CpOiz3AnoKW5gMEhaKhtEph2AzMMLOzwsGey4G1CefUUsKB2zuB7e7+10nnMxpm\n1m5mE8LnJxJMZvhlslkNn7vf7O5T3H06wf8DP3P3zoTTGhEzOymcvEDYBTMHSM1MPXffB7xkZr8T\nhj4FNNwEjLakE6gHdz9iZjcA64CxwA/d/bmE0xoRM7sXuBCYZGZ7gK+7+53JZjUi5wH/BfhF2EcP\n8Gfu/kiCOY3UZGBFOMttDHC/u6dyymeKnQ48GPydQRtwj7v/n2RTGrH/CqwO/0jdBXwx4XxKtMR0\nVRERGb5W6UoSEZFhUmEQEZEIFQYREYlQYRARkQgVBhERiVBhEBGRCBUGERGJUGEQEZGI/w/w1xWP\nb+vxVQAAAABJRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iWOlC7W_FYvA",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Add some noise\n",
-        "Since it was generated directly by the sine function, our data fits a nice, smooth curve.\n",
-        "\n",
-        "However, machine learning models are good at extracting underlying meaning from messy, real world data. To demonstrate this, we can add some noise to our data to approximate something more life-like.\n",
-        "\n",
-        "In the following cell, we'll add some random noise to each value, then draw a new graph:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "i0FJe3Y-Gkac",
-        "colab_type": "code",
-        "outputId": "60b19cdd-c69c-469e-9446-b738a79c1f51",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 269
-        }
-      },
-      "source": [
-        "# Add a small random number to each y value\n",
-        "y_values += 0.1 * np.random.randn(*y_values.shape)\n",
-        "\n",
-        "# Plot our data\n",
-        "plt.plot(x_values, y_values, 'b.')\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJztnX+YVOV597/3mdkdeNNa0tGWKFIS\njUlsuMIKEqc2uqlEg41K3bfVxPddC8j6A4jEq1Jtk5S3MZIQo5ugIqvAyzaaNC0JQoJFMW6kYRoE\nwdKgxh9NEH9Usr7UpGGX3Znn/ePeu89zzpyzO7MzuzNz5v5c116zM/OcmTP74/vc5/5JxhgoiqIo\njYVX7RNQFEVRxh8Vf0VRlAZExV9RFKUBUfFXFEVpQFT8FUVRGhAVf0VRlAZExV9RFKUBUfFXFEVp\nQFT8FUVRGpBktU8gihNPPNFMmzat2qehKIpSV+zdu/cXxpiTRlpXs+I/bdo07Nmzp9qnoSiKUlcQ\n0c+LWaduH0VRlAZExV9RFKUBUfFXFEVpQFT8FUVRGhAVf0VRlAZExV9RFKUBUfFvQLJZYOVKvlUU\npTGp2Tx/ZWzo6gKWLAFyOSCVAh5/HMhkijs2mwV6eoDW1uKPURSlNlHxbyCyWWDxYmBwkO/397OY\nFyPk2SxwwQXA8eNAc3Npm4aiKLWHun0aiJ4eIJ+394nYig8S5hbq6WHhz+X4tqdnbM9VUZSxRS3/\nmCIumnQa6O1lkW9tBZJJFm8AMAY4cMBvwUe5hVpb2eIXyz9s01AUpX5Q8Y8h4qLp72dL3/OskC9Y\nAKxdy8KfzwM33MDH9PbyRhHlFspk+Hj1+StKPFDxrzOKCbqKi0ZcPPk83+/u5vtELP4AW/iyAXge\n3xcSCb+FL5uAoij1j4p/HVFM0DWbBQ4dYveOWPeex0K+YQNb9SL8ggi+MbxOjrn7bhV7RYkrKv51\nRFjQ1RVnd3NIJICODqClhV06hw4B99/vt+xdxDXU2WljBCr8ihJfVPxriJFcOlFBVznu0CG7OQDA\n1KnA9On8XEuLPTaR4CsAcQsRAbNmASefDDz4IHDkCLB7NzB3rm4EihJXyAR9ADXCrFmzTCMNcyk2\njz6YxZNOA8uW8XHi6snl+DU6O+1ziQRw8cX8GpMnAyecANx5J28ATU18OzAQfm6pFPDEE7oBKEo9\nQER7jTGzRlqnln+NMJJLR5DHZKOQIK1Y8YsWscWfTgObNgF9fXZD2LyZ1xDxRnHTTcCkSWzly3Nh\n9PcDq1YBs2fbqwCt9lWU+kbFv0YoJo8+zL1jDG8ARHxcezuvveACK/xBjGEr/847gXvuAb7//ZHP\nb+tW/mpuBpYutVcNbi2AbgiKUj+o+NcIYXn0rpgC/mBucug3J+4d1ze/ciWvG8mjl88D69ZFu3sA\n3lSIbByhvx+44w57pSG1AO75FeO20g1CUapLRcSfiNYD+ASAN40xHwx5ngB8DcDFAH4N4M+NMU9X\n4r3jhJtHH4wBXH21P5jrundc4ZdUz0TCpnqKgBvj3xCSSWDv3ujz8TwOBO/b528L4b4GEb9fd7ff\nbdXdXSjy2h9IUWqHSln+/xfA3QC6I56fC+C9Q18fBrBm6FYJIZsFVqywFbrSjsF1C7nuHXnMDfAm\nk8Cll7JLZ3CQNwOx4JNJYP584I03wn39slmkUsBZZwFu3F0CxLkcr/E8TiH1PP8GITUFrshHxTX0\nakBRxp+KiL8x5kkimjbMkssAdBtOLfoXIppERO8yxrxeifePE2GtGUTs29v9IrlypV3X388BXvfq\n4Ne/5ufkCuCSS/ixtjauAbj++vBzkDjC0qXA22+zyLvk83w8YGsH3PoBCUAbw+e1YgV/tbbaIjKp\nHtarAUWpDuPV1fMUAK849w8PPeaDiDqIaA8R7Tly5Mg4nVpt4bZm8Dxgzhy26MWvfuutVhyPHvW3\ncJgxgwU0keDbtjZ7P5kEtm1jcV22jEW3vZ2t+zDyeQ7qilXvbgC5HPD001w7kBzBfMjngUcfZYE/\ncMC+Ti7H97VbqKJUh5oK+BpjugB0AZznX+XTqQrBrJ+2Nr8rZ/58Fu0DBzjwKngep20+/rjt4TN9\nug0iuxW+fX02dfPGG60A/+u/2s3E82yDN2OAyy7jbB+x8HfvBp55hgvBhksTFfr6+MpkYMCmni5e\nzNlGwSwndQMpytgzXuL/KoBTnftThh5TAgSzfnp6rGsnl+OOnOvX+3P7AX8Tto0bWUw3buTXuvVW\nFtQNG2x66ObNhaItcYLJk4Ff/pKrfQF+nzPO4CDzfffZ9ceP89qJE/kc3YByWKbRjBnAjh32fj7P\nwWr38wLqBlKU8WC83D5bALQTcw6A/1R/fzSZjHXvpNOFmTYDA4U9ev7sz1hA3awb8bdns/xa8+cX\n+u9dcjm+Glizhls8uPT0cIDYxRv66+nsBG67jTeGiRN5I0ok/GtPPx342tf8nyWVstb9rbfyYxLo\nVjeQoowtlUr1/CaAVgAnEtFhAH8DoAkAjDH3AdgGTvN8EZzqOb8S79sI9Pb6WzAT2Wwb2QCIgL//\ne9uV0+3ouWMH8MMfsvC3tAATJgDHjoW/lzG82QBspT/6qH1u717gqaf86+VKZMIEDg5v3gycfTZw\n5pnsmtq8GXjoIeD114EXXvAf63m8aUi2T3c3X9FIzyEJdOvQGEUZGyqV7fPJEZ43ABZX4r0aBfF7\nHz3qd6F85CPAVVcBjzzCPngRfDfDZtZQV489e2wmkIh0Zyfn7a9bx0JL5Pfz9/by92+/7T+fqG6g\nxtgYgrBrF4v/l7/McYjPfjb82N5em+3jViNLoHvFCk0FVZSxoqYCvoq1giVP3gs45n70I14zOMgW\n/sKFbNEvW2ZjA3v28NVBMmkDrCLS+/bZGgHAf6znseWfzbIVHobnFRaLBRkc5M8gbquwtYkEP9fd\n7Rd+qS9whV9jAIoyBhhjavJr5syZptHYtcuYiRONIRJ5Ncbz+MtKrv9r9mw+btcuYy680K5NJIy5\n7jr+SiTs+mTSmFSKH5s4kY9bu9aYpiY+trmZXzPqPRMJY5Yv59ch4turripcd9119vMEnyPi15f3\nlMebmuxxwu232/NPJPi+oijRANhjitDY8Qr4KkUgKZeuFZxIsAskit27gY9+lC3otja2miXPv72d\nLXvX8pZAqqR8dnez+yWft9XEu3fbthBBjGFXzpNPAl/8It9+4xvA8uV2TTJpC9KkOlmQKwdpIe0G\ngFta+DjXspfUV/lMGgNQlMqg4l9DuELX1GRz7d30yDDEp79sGfv0v/AFdo8AnEvvCqwbPDaGff/p\nNL+vK/aeB3zsY/y68+bxOcm0r3S60Ac/bx4/JxtW8PM0N/Oa4bKN9uxhF082ax+T1Ff5TOryUZTK\noD7/KuMOZ9m3D7joIs6dB4CurpH964IxbGVLk7fubq7CdQO1nsd5/A8/bF9zcNDm2ruxhuZm63fv\n6PCfpxSdBfv2yHzgwUG+f+uthTULDz/sP2+36ZxceQRnGejgeEWpPCr+Y8xwmSrBPj5CUxMPT5c+\nOGGIBS2tF4xhMU6n+b1cd4s0YLvkEq7I3bbN3yxOzi2TKewfJMjz0i5a3EerVnG/IGktEZxHEBTu\n5mb+vAB/vkWLbNB5uFkGiqJUFhX/MSSYqRLsu+/28XEZGOCrgLvvtm6bRKKw734yyWtkTq9Y1+46\nIs6937+fU0O3bwdWr+bXBwp97CNZ2W77Cc+zVcKPPsp+/0mThk/JnDsX2LKFN6tk0g6Y18HxijK+\nqPiPIW7Tsv5+4IYbbEtkEWuxhMMs/OnTgWuu4e9bWoBPf9pazeJe2beP3TIimAcOFPbsP+ssLtIS\na723l6t4R4PbfuIb3wAOHrTPPfQQ8O1v8/crV/pnDQCF+fwDA8CSJXxensd9fnQAjKKMDyr+Y4hr\nJQPW/378ODdKO/dcroz96lf9xzU1sah+5CMslKlUYdYOwPfXr/db7729trc+EdcBtLfbfj+VcKvI\ne/3d3/kfP3wYOP98mzkkraFTKTuMxs1k8jx7lZLP2xbT0i4a4LiHXP24IyMVRSkPFf8xxLWSg0PS\njxyJ7oaZy3EKpSC9+sOqbHM5f4C0tZVFUoS+pYWfl8reKEqxrkWQw84n6JoKDqORK5f3vx+YMsXf\nQiK4AWSzfGUg3UWPHbMZTboBKEqZFFMMUI2vuBV57drFxVVRxVrDfTU1cSFWMllYLCWFWsH3uv12\nPmbiRC6OSqW4gMst7nLXy7qw13NZu9ZfmEXERWGplL84zS1Sc4vJ3IKzqC+3QC2s2Ky5efhzVJRG\nBlrkVXvMnQtMm1baMZ7HQd2ODvbdu5x2WrgbRLpk9vb6M3OkG2iwW2axA1Wy2cK6gUSCLfEnngCu\nvdbWC3ge9yC67TZ7jlJMNhLSMG7DBo5ZBGsDBga026eilIu6fcYI140CcBWuuDw8j90ezc2chSNM\nngyccw5nw4hIXnopB35XruTX2r3brr/5Zr/wB103bsxBOn3mcoV+/+AAmWBMQF730CG/eAeDtJkM\nu5kkiPud7/Bm4bqkPC+6SVywAG1wkFNBg7OG3dkFiqKMDhX/EinGNx5M8bzoIiv8gBXQadP84v+L\nXxTm4W/ZwkPY83l+reXL+RiZwztS8VVwUEpUDr+7LrihyGdJJvlLGs7dc48/OAsUtopw4xGZDHDv\nvZz1FLYBGGMrieXzSt2BuyFec436/BWlXFT8SyCswyRQKJrBFM+tWwtf6+BBf5okwOt7e4EFC9jt\nIVWvInrHj3Me/fbthecjFnVQdIN5+1GiGZXf734WgC3xqVNtGqcMihFGuoro6OArmYULgWefLXw/\nY/i5qVPtsYcOcQaUVB67XUkVRRkdKv4lEPSNd3fbFEp3vq4rgG6//JFw3Rkyb1cgKpxxe+iQPR9J\nq3TXVYKgmIvwRrVZHu4qArAtq198Mfz9PM+mrgY3t5kzeWNQq19RykfFvwSCQghY8ZUgpczNFQE8\nehS4666Re/QkEtZ/ns36g5xNTTZfH7CCKFO7gPAK4koQJubBFg/F9uIJG9wiwWFx+dx9d/gVVC7H\nk8QOHOArB90AFKU8VPxLIMyHvnGjFTNpriZNzQAWO6lglcKnILNns3ADLKyHDvnFceFCW5HrCi9g\n3TBjWf0aFPORXDthZLN2Pq/72WS6WNimlU77f27uz1fFX1HKQ8W/RIJCKN0w168vzKRxp1SJdRtE\nLHbAH1iVtshBH3eYG2a8hXAk106QYAM7z+PPuGBB9PlnsxzAlo3TDQIfPcpB9GDQW9s/KErxqPiX\nSVBsXH/1+vVW8IN+f2mvPHeu9d+LOBrDohZm0ZcqvGNFKW2W3QZ2RDxj2K3S7eriCmYR8+AxiYQN\nAh89aucFP/oo8OCDwI9/bIPB2v5BUYpDxb9MghlAYqVLf/swiIC/+AsebiLHuoHhfJ7z5YNplEK9\n9bdvbWVLXwLT+/ez715iIq6YA/y502kbD3CvcC66yP/abhsMdQkpSvGo+I8C183gunbc6tjdu4cP\n8K5eDbz9tvXfuwFeIraE4xLYzGQ4E0rSVwcHufgrLAi+bh1/7mXLrNXvXiW0tfn7AQmyUaTTY/95\nFCUOqPiXSLDoyQ3iJhLhw1SEU04BXn+9sNmZZO4Q2bm2O3YAO3fGx43hdhZ1axKCrRuefpo3VNdN\n1Ntrn5eroXXruFGdmw6by3GX1H37qhMLUZR6Qnv7lEgw11+6WBJxALO3t7CzJcAi//nPFw5Yl/m0\nPT3cH2fOHBvcHK7PTr3hzuK9+277c5gwATjvPLtONtLhhrZ3dHAM4OST/YVw8jNbu7ZwFrCiKH7U\n8i+RqAEsTU3W39/UZC17CewuX84C6E7dksCwkMlwOqRM44pbDxs3VuH+HIDCuEnUOEmAA8TXXhv9\nPpoSqigjo+JfImLBLlvmb7J2+ukcxOzt5efuuMNao9u2sfjL8SJIUe0ixBUSdInEiaiU2SASGHZ7\nE4XNQSACPvQhO8lMZwEryvCo+I+CTIbbK7vif/AgW6MSeHSvCqQFcdAKjWqlPDhoA6ONZL1KTGDD\nBvv53boAIttULohkEREVBokVRSlEff6jpL3dtnhwkbbJrkAlk5zHH/RBiwvJ9W2HPdYIhMVS3NTX\ngQF2tUmrh6i5CJJB5AaJFUUpRC3/UZLJsGBJde/AgD9t8f3vB844g7/fto0btUnfn5GaoNVCEdd4\nEzZ7wLX83SupfJ67m4YhdQGNsmkqymhR8S8D8Vu3t/MmsG6dddk89xzw7//Og8vFWi22CVq9FXFV\ngqjZA+k01zw89ph/cw276rrwQr5ta2u8n5+ilIqK/xCl9IcJrnU3gRUrOEc/n2c3xdNP+/v0qEUa\nTdTsAckMkgyqpiZO8wxeETz6KD+2c6e/QE7aSAOa/68o/00xg36r8TWeA9xLHWDe1MSDxVMpY+bN\n40Hjcoy8lgwzJ+J17hqldHbt4p/hvHl2EH3UMPhEggfYy3GplA5/VxoHjOcAdyL6OBE9T0QvEtEt\nIc//OREdIaL9Q1/XVOJ9K0WpA8wlGNnfz2mH993HM3plqpV06QRYcgYGuCmZWpyjJ5PhttazZ1s3\nWtgoSPH5p9Pc/lqqhQUd/q4oTNluHyJKALgHwMcAHAbwFBFtMcYEhhTi740xS8p9v7FguP70rssA\niJ7K1d/P63p6Cvv6EKm7p1IEe/wDfH/OHPb19/bymk9/2j93WKqum5r0d6EoQGV8/rMBvGiMeRkA\niOhbAC4DEBT/miUq6yab9ffpkfRNovCmbevWhW8Ol1yiVn8lyGa5d8/AgM3nB3jDXrHC/oyvv543\nY4DXzpvH37/2mo6BVBShEuJ/CoBXnPuHAXw4ZF0bEZ0H4KcAPmOMeSW4gIg6AHQAwNSpUytwasUT\nlmEjbRYEEfaowSyy1vNYmGT4iFT3KuXhunCkp89ll9mZCFJhfTBgdrz1Fo+AlAD8Sy9xqmgjpdIq\nSpDxyvbZCuCbxph+IroWwEYAfxRcZIzpAtAFALNmzRqmIfL40Nrq79MjDDeQnYiblslownTa+phV\naEpjpAwsY4CtW4FHHuHfkTG2wtqlr8/2YsrneX6A5/HvKS5dUxWlVCoR8H0VwKnO/SlDj/03xphe\nY8zQhTgeADCzAu875kgh1wc+UPwxnsfC39HBorVsGfC5z2mXyVKRvkfuz6693bp6BOnkKVdickUg\nLbKbm9nVE9wQ4tY1VVFKpRLi/xSA9xLRu4moGcCVALa4C4joXc7dSwE8W4H3HRcyGfblp1J8P5Gw\nxUTCeef5m7BJa4Fis4iUQsJ+dpkMD6x38bzCBnhEwL33Al/8Ih/X0QHcdJN/rVYCK41O2W4fY8wg\nES0BsB1AAsB6Y8xPiOhvwfmmWwB8moguBTAI4C0Af17u+1aaKBeDPP71r/OQkDfeAL73Pft8IgFc\ndRX7lIPZQsNlESnDE/Wzk6Ew/f0s5Jdcwj59d5yjMXagC8AB4PXr+ftEArjiCuDIEWDGDL9LTgfB\nK40EmeFmDVaRWbNmmT179ozLe4W1VhYxcKd2Sc5+MI3zi19kwQj26Zf2BL29KiijYbgNWXoq5XI2\nldONxYjLJ/g7k2C8TBIT339nJ7vogn8DilJvENFeY8yskdZpewewwEhA8Ngx4JprgAce8LseRFiC\ne6VYpSP16VchKZ2oHkcSi3ELvf7wDwutfzcW4D4ezODq62PXXpibSVHiirZ0BlvnrtV48CBw/vn8\nuNteuamJv0+lOHf8uut49GKxffqVyhFsff3bv124JuyiNuqxffv4tRqtlbbSuKjlj/De7wMD/Hhn\nJ3eVbGsrHMEYhfr6x55gYV5wCtiUKcDhw9HHn3468MEPAg8/bDOEFi3iNhzqolMaARV/sIUfrNpt\nauLHly1jl9Djj3NwUWbxDkdUxbBSWVy30IEDtrCuqYlTRJcu9Vdnu1d3N9/Mm/n27fz79TygpYUz\ngxSlEWg48Q8GEbu6gBtu8Au/5wF3382Wf1+ffW7zZi4oCnP1BGnEnvzVIpvlTdoYDv6uXs0i/tJL\nwFe+Yh9ftoxHPba1WZHv7ORmfbkcPz99Oj+uG7cSdxpK/Lu6gCVL+B9dMjzkvov4gN94o9BH3N/P\nIqEzYmsHibHk83wF19vLG8Kdd9rf3+Agt3TYvt1/bG+vdfscP87uI5klrMF6Jc40TMA3rB3zpk0s\nCkGSSc7+2Lw5/LV277YtnJXqEzb3uKfH7+ZJJMJjL8FjAQ3WK41Bw1j+3d1+oU8kuMjn0UftY1Om\n8O2JJ7J7wCUYE9B0wNohKsaSSll//t1382MrV/rHRLa2Fo6PdC1/DdYrcaUhxD+btRWeAAu/+PQl\nEEhks0PCskSi8vuV2iBsBGRQ1KX2wp0H0NQEzJ/vH++owXqlEWgIt48UBAEs8osW2cZrqVRh068o\nPI8nSUXl9yu1QzCw393Nwftcjl1/UrjX3w+sXetvvJfJALfeyt+vXKnuPSWeNITlH8y7P+EE4KKL\n2O1z9tk88HukLhduGwAV/dpGKqzF5XPFFcC3vhX9O5ZqYJnE1trKqaNucoAGfpW40RDi77oAjh7l\nfu6A398fBREPDJk9W90A9YLbriOfBx58MHotkZ0KtmEDx4WSSb6Vq8X+fo3vKPGjIcQfsD7hiy4q\n/hgiYMKE4gq7lNqhtbWwqMtF2jobYwfAnHYa8PzzfEww9TcqU0hR6pmG8Pm7zJhR3LpkErj2Wr3c\nr0cyGeCeeziYG4zneB7/bl3yeeC558I3C0kO0L8BJW40jOUvTJpk0zaJeErXb/wG+3zd9M5PfAJY\ns6Z656mUR0eH7cUkbbXdW7f1AxAdD5DkAO31r8SNWPfzD/uHddstJxL8Tz84GJ7KqX7e+JLNcuxn\nyxZr8Tc12b8Huf/DH/L32qJbqReK7ecfW7dP2AxY4aKLgDPOAE4+uXA4i5DLaXVnXBGj4K23rPAT\n8axfGfcI2Ftt0a3Ekdi6faL+YVtb/Zf7Yeh81/giRoHbsA/gOMAJJ9hGcAAbBnLl6KYKp9O2Uliv\nAJR6JZbin80Chw7ZwJ7ncZ+e3bv9U5xckkme4NXSomMX44wYBcGrvdNPB7761cIRnek0H9PZaeMF\nOu5RiQOxE/+gT//cc3m83+7d0cfMnq3FW42CWPFSByCcdBJn/Lice26h0IddUerfjVKPxM7n7/5z\nDg4C//ZvIx+zcGHhP3A2q6X9cUQK/m67DbjqKr4qJAJ+/GN/CmhTE3DmmYVCH9ZBVFHqkdhZ/kHL\n7q23Rj4mOMZRB7DHGyn4W7mShT+fZ0Nh0SK7pr2dbzdutG0i0mmd0qbEh9hZ/vLPOWdOeIHPvHn+\nx8OsN/fqoa+vcD6sEg+CVnx7O9d2rFljRf3ss23657JlPBBIhV+JA7HN889meeBKf799LJXibpyA\nFXS3la97rJsVJMfpP3v8iCreCvv7AdgdlM/rFaEydpRbUFhsnn/s3D5CJsN92teutdW88+fbH+Zw\nP9RMBliwwB47OKiBvbgSNWtZrv6CSEGgBnuVsWA8Xc6xc/u4tLTY1D1jOI+7WNrbuambBvYaE2kO\nFySZ1L8JZexw506MdUFhrMW/t9d2cASAu+4qPntHYgdf+IJe3seV4TK6Mhng3nsL40Of+QxbZkuX\n8j+mZoMplUImDorBmkyOrYERW7cPwD+4RML2asnlgBUr+KsYMY9yCSj1T/DyWoq4XD+rNIeT+FBL\nCwd9+/p4FgQRxwAWLAiPHSlKKQQnDrpu6rEg1pa/tPZNJPh+Pg/s2FHY60dpPNyMrv5+ntr1uc9x\nkPf66/0jHSUDqLeX17quxOPHC8dAKspoEGNVjApJNx4rYi3+AFtu7qV7Pq/NuRR/mqfn8SYgG0GY\nmEvLENeNKLgBYEUpB/n7Cvs7qzSxdPu4qVLd3f5+PkTanEvxF2tJvx5p9hbM5nFdRMF/ymSS12sA\nWCmXnh6bTTYeGYaxE/+gL/fss/3Pv//9/I++dClvCk1NmrLXqLgxHfHtr1/PVwAi5tksx4iCvYAA\nvmL4xCeAX/8aaGvTvyGlPILdY8famKiI+BPRxwF8DUACwAPGmC8Fnk8B6AYwE0AvgCuMMT+rxHsH\ncYd39/UBL7/sf/7884F9+2wO9/Hj/E+v/7iNjWwE7e32qvHAAWDxYt4MomohH3mErbSdO3kDOXAA\n2LSJx4VOmqRXlsrIdHXx30xbG1+NjldHgbLFn4gSAO4B8DEAhwE8RURbjDEHnWULAfw/Y8zpRHQl\ngC8DuKLc9w4jnbYWmjHA4cP+51taWPwVxcV1Fd56K99fssRmioWRz/PVo8SRVq3i1uGAZgMpxdHV\nxbPCAf6bWb6c+0kdP863tV7kNRvAi8aYl40xxwF8C8BlgTWXAdg49P0/AriAaGxCGsHcfhfP4+fb\n27llAxHfjnVUXaltwqa+dXcPL/xCImGLvl57zf+cZgMpI7Fpk//+hg3AsWPjU+RVCbfPKQBece4f\nBvDhqDXGmEEi+k8AaQC/cBcRUQeADgCYOnXqqE4mmNtvX5uFXi7Dn3hCG3QpTLBHv/j+g64ezwNO\nPZWzfozhYO9nPmPdOwcOhM+N0HYQShjZLLumXY4csd97XgMVeRljugB0AdzYbTSvIbn911/vn8/6\nsY/5i7u0gEsRgoE2wBbbAPz3I8bDN7/Jrp077uA1q1fbS3P5exKf/9tvsyU3OKjZQIqfYPPIMFpa\naj/b51UApzr3pww9FrbmMBElAfwWOPA7JnR08O2SJfwPmkoVX9WrNB7BHv2A7eMvBkQiwVXAAHDn\nnfbx/n5/Smhvr/9vzQ0g699f4xHVobOnJ3qkrLBw4RieGCoj/k8BeC8RvRss8lcC+FRgzRYAVwPI\nAvifAH5gxriXtJTm6z+eUgzBK8HHH2cR37GDhT6fZ2Hv6fGnfBKxG6irK3y2r15hNi7DdehsbeVk\nANfyb27mv6H9+znzR4zYsaJs8R/y4S8BsB2c6rneGPMTIvpbAHuMMVsArAPwd0T0IoC3wBvEmKP/\neMpoyWRY/HfuLMy7TqVsn39jWPg9z24S6t9XgOHnPWcy7DJct467B5955vhnhFXE52+M2QZgW+Cx\nzzvf9wH400q8l6KMF1EjGyUV6Q70AAAfCUlEQVQX+/77bWwgn+cNwPPUv68wwVhSOs2xSID9+W6h\n6Ze+ZF2H4+WtqKmAr6LUGu7Vo/uPOXVqYTaQZABJbCCsfch4/nMr1SXYQmTpUuvmkStFwNaITJ7s\nrzAf61byKv6KUgRB/+3SpfwP7Hb4NIb/cfftC/f/j+eUJqU2EONh5Up/gDfYKmTLFv9j4+E6VPFX\nlCIItoC+6y7r6hHhB/ixgwft2r4+tupmz+bAcJQPWIk3YQFegahwMxgP12HsWzorSiVwW0ATce6+\nBHiD/7g/+pFtI24M1wV89rN8Sa9jIOPLSJPhenqAefMKOxCceGLh+qVLx94wUPFXlCIQ/+2iRXzf\ndfcEMQY4/XT/Y/k8W/zz5+to0DgS1iIkSCbDV4BBLryw8LH9+yt/jkFi6fbRoJoyFmQynOXjVv8G\nIWKr//nn/Y9LFpA2eIsPrs4Ml9bprk2n/e1nPA/4zd/0B4ABzvMfa2In/hpUU6rJyScDr7/uby1y\n2WVs8aXTvHl0d+smUO+EzYCO6sUfXHvFFcBDD/EVoucBTz/tf+0zz+QC1bEmduI/0g6sKOXQ3s6+\n+4EBO/7R5bXX/K6gZJLb9AL+Xi4bNnBzQf3brE+COtPbG14TElzb38/9oeRvZHAQeOopvk/Et889\nx5uFpnqWyHhPw1EaCwncueMf3WpfV/gTCeCP/5i/D/ZyUcOkvgnTmaiOAu5aYwoTBOQK4D3v4eFT\n41UlHruArwTmNKimjBWZDA986ejgv7HbbgPWrOEyfcnkmTePrf6tW9mKS6c51U9Qw6S+KUVnZO0l\nlxQKP8DCn0oBN9/Mt+OVDRY7yx/Qnj7K+OH+rb30EvCd7wCXX849/rdutbn++/axJScj+tTnX/+U\nqjPf+17hYzNmAL/zO7aR23g2o4yl+CvKeNPVxcVcAN8uX86Wv8z/Xb+eBX/NGnuMZqU1DsFusABb\n+M8+y0OAZAb0eBqusXP7KEo1CI7j27+fc/qloGdw0D+Sr5i8cKV+CRZ8tbayMSB4HruBBgZsIHgs\nRzaGoZa/olSAtjYewO3eB/xtH9Jp+7xmpcWXsDTQ3l7g4ouBhx+2mT2AvRoI/n2MByr+ilIBZPDG\nunWc6y++Wyne8TwWALfYR7PS4kkwtVPaOCeT/LuWsZ6TJxf+fYwnKv6KUiGmT2f/7d69wPbtbPGl\nUv5+7mIRJpPA3LksABr8jRfptG345/r5BwfZSJg6ldc88oitCE+lxt8AUPFXlAoRVfgjGT779tnn\nczl2AUyYwOKv1D/ZrH/IT7CBWz4PnHACd3f9m7+xdR8yH3q8DQAVf0WpEBLUy+f5Np3mzJ+tW+2g\nF3leCsLcAfBKfSKiv369v2WzW7Ur3HUXXwG4j8l86PFGxV9RKogb4F282DbwAvj7WbPY2n/ySbtu\nvAN9SuWQ4G5fX3iH16lTueVHLmfbgQTXNTVVJ+aj4q8oFaKnx/5zu60chHwe2LOHRUAsQiJ2Byn1\nibj6RNCDlv5f/ZUN/h89ypY/wIJf7ZiPir+iVAjp4dLfX1jQ46b2BSeArVunQd9ao9gCvKCrb+FC\n9uvv32+rdoULLrBXAF//uv+5aqDirygVQnq4rFgB7NhhN4AzzwRuvNE/wNu1DgcGbFBY2z9Un1Lb\nwrtWf9TvTa4Q8nleVw0ffxCt8FWUCpLJsPi7TdxefJEv/RcssFcAQb/vk08C558P3Hcff7W2atVv\ntQgrwBturbj6crnwtdksZ/jU2ghPFX9FqTCZjL+1g4hCezsHe72Q/7pnn/XHCQYGxr/cX2Hcec0i\n1FHzecPWushVxP338waxaFHtdBtWt4+ijAHt7cDGjYX93sUt9Nhjfuu/VjJAFPt7Ep8/EO0GCq4N\nirp7FQFw9k8tCD+g4q8oY0KUKIhb6Ac/8KeBErGwVDsDRGHc7prXXw8cO8bfHzvGcZlMxt+qA+Dq\nbrkvGVwtLbXbxkPFX1HGiKj2vJkMcNNNwB138P1kkuMBKvi1RzYLPPCA/7F161jUZYqbm9kVTPVs\nbuZ1kv1TS79f9fkryjiTzXI5v2R+rF5t+/yH+ZWj/M3K2CMBXZfBQd4A+vrCRzK6HD/Ouf2PP86b\nQC39DtXyV5RxprvbpnzmciwkgK0ITiY5+0dcCxdcwBam5wH33FP9/PBGorXVVuYKngc8/XR4RW8Q\nIj52vObyloKKv6JUmd27ufJXrMjBQeCWW4CPf5xTBMW1kM8DS5bYiU9KdZg8GXj9dXvfdfUE3T6f\n+hSP9lSfv6IoaG9na99N7Qy6D3bu5C8i/3OSNqriPz709BRa+K++6r/v1m64az0P+P3f5yu6WhzX\nqeKvKONMJsNtAO67zz4mQz0EEZGgmEjfd53/Oz60tvLPvL+f7wc3aSHMNSS/q/Gcy1sKZQV8iei3\niegxInph6PadEetyRLR/6GtLOe+pKHGgvR2YOJFFIpnkgO/atcCUKeHriYA5czhwCOj83/FCUnZv\nu41/R2EFevk8u+IEz7O/q1oUfaHcbJ9bADxujHkvgMeH7odxzBgzY+jr0jLfU1HqHhGVSy8FzjqL\nH5s+HXjzzfD1nmdTBbu7OdOkmPYDSvlkMmzB9/YCn/xk+JpnnrHfex7XctSy8APlu30uA9A69P1G\nAD0A/rLM11SUhuDAAWDzZv5+927gvPMK0woFYzhV8KWXbKsAgK8aaimIGEe6ujjQnstx5XXQRQcM\nX61dq5Rr+f+uMUbi3m8A+N2IdROIaA8R/QsRzYt6MSLqGFq358iRI2WemqLUNps2+e9LgDeMfJ79\nznfc4d8g5s+vfQuzXgirp+jq4grfgQGbrulm9pxySuHvzJj6uBob0fInoh0AJoc89dfuHWOMIaKo\nPe/3jDGvEtF7APyAiA4YY14KLjLGdAHoAoBZs2bVyf6pKKOjrQ149FF73xjgne8EwuwezysMKiYS\nXGm6cqUGfsslrI0zwBZ/sII3meTfQ3Mz8PnP8xWZTPKq1jD20TCi+Btj5kQ9R0T/QUTvMsa8TkTv\nAhDqsTTGvDp0+zIR9QBoAVAg/orSSHR0sBvnjjuswIQJvwR729qAT3/aZp7kcsANN/D3UX3n454V\nVKnPF2zj3N0NvPyyv/8SwOK+ejX7/9Npvu3s9N+vm5+1MWbUXwC+AuCWoe9vAbAqZM07AaSGvj8R\nwAsAzhzptWfOnGkUpRHYtcuYCy80xvMkU9z/lUrxGmOMue668DWJhDG33174us3NxhDx7a5d/HX7\n7fb16pldu4yZOJE/+8SJ5X0m97VSKWOSyfCfM2DM7NnGrF1bufeuNAD2mCL0u9yA75cAfJuIFgL4\nOYA/AwAimgXgOmPMNQA+AGAtEeXBMYYvGWMOlvm+ihIbpNPnzp1sdSYSwDnn8FXASSfxJDDpGNnS\nUlhFCoRXj7ptJI4fB1atArZvL35CVa0TNnRltJ8nk2ELft064D/+A/j5z+1z06YBP/uZvb97N7B3\nL/8OarFtQ7GUJf7GmF4AF4Q8vgfANUPf7wIwPbhGURSL2wJaBn0PDvKQl507/f7kD32Iu0QK06YB\nDz00svi89lrlxLIWkEEqo2mdEHQXZbN+l5rLxImFG24+z5u0tOKuBx9/EK3wVZQaQYT4vPP8vmYR\nHbEyzzmHrwQk+Pvaa+Gv194ObNhgxXHhQj6uFvvMjIaRBqlE4QZ3k0nOmALsVVKQF14Ib9X89a/X\nmY8/gIq/otQQPT2FOeSSV+55LDrt7fz42rX+2bEiQK5V+8QTfnGcPj1eAeDRtE5w3UW5HP8cm5p4\nI5B+S55nvfyyEScSwLnnshsuDrMXVPwVpYZwe8l4Hg99mTQpPJMkOCYS4Lz0xYt5s0il2DJubbV5\n57XaZ6bSDJcFJO4iSc+UDXTRIrumpaXQDWQMd1q99dZx+ADjgIq/otQQxboywtZls5yXLpZqfz8H\nfd1Not6DvMUQlrPvfmb52XV3A+vX25z9lhb/Brtvn7/5HhG32M5m4/EzVPFXlBqjVOtcMoEOHSrs\nLAnEK8g7HGLtuzMQ+vs5kyqs187UqXbE4owZ/L27YbS0FL7H/ffzZhqHTVTFX1HqEHfCl8QDkkn2\nS0tm0D33sI/ftfzT6XhWBLvWPmDjJvk88NhjnDElgh382REBO3ZYF5BsGO95j7+Pj2yscdlEVfwV\npQ5wfdgAi5M7PDyf52Cl9JlJJOzEr85O7iMUZt3Wu4AJbhA3iDF+wZauqO7MBLdfTz7Pm0FYPYUE\n3es9UwpQ8VeUmieYmigZKGK1homYZAABLPj9/X7rNi7WqxAM4rp4Hm+Ghw5xQHz9+vDOm0TAaadx\nW4ewoS3Sp78e2jUXQ7ldPRVFGUOyWWvli99eOkwSRXcBleCkWLkyA1hcQnGxXgUJ4l57LWc5eR6n\nby5fzj2UiNhf7wbEAf/Pzxjg8sv5+DBSqfgIP6CWv6LULGG+6WB3z6je8fk8568HXRdE7Mu++eb4\niJgggfL2dn8W1MqVLPi5nN38ZOMMuonefhu4+mrg4EHgySft4/Pm8UYSp5+Zir+i1Cjix5aALmDd\nNuKbFjFLJOx9sfJlvYsx7NZYtszGBOKGfCZxe4lLKKx2ws3lTya5InpwkNcvX86ZQG1ttjjOff16\nR8VfUWoUt3eNWPsi8IDdBN73PuD88zk1cdMm/4yAMEbTjKyeWkOH5fl3dtppXKtX22D39OnsGhPu\nv9+61yZN4kZ4I9UN1Csq/opSo7iFXOm0zdRJJGxrAmO4+dvzz7NPeunSaPFvarKujjCff5TAjyR+\ntbYxuJk/btqmXBH19bHgi5tIzrmry7rW3J9PJbuH1hIq/opSw7jiJK6HdNoOcRFca95l9mxu6CaV\nq0DpAj+c+NWSVSybUDpt3TyS5+85qS3GsHvH7c+TzfLmKt06Ozvtc+V0D61lVPwVpU6QjWDlyuj8\n85NP9j/+6qt86/ajkUInt9hrOIEP+szTafta5VrFlbpqCG5CS5dym+vDh23vHgnySqrrqlW8OUrv\nI4mvEPFm6f68RtM9tNZR8VeUOsNt/pZIAJ/5DGepvPEGPy8zZo1h8b/2Wn68o4Nvw6z14axbKRRb\nvJhf1w0Wl9tTvxR30nAbhbsJ9fX5R2O6SOzEGGDzZmDLFv5ZdnYO/zmCQeQ4bAAq/opSZwQtUQD4\n6Edt1ornAe94B/CrX9ljNm2y4t/T4+99I69z9dX8fFi74t7e8MlVpVrFroAX605KJICLLwa2bbPx\niuBG0dpqYyFusZtABEyYAJx9tj+FUz5Pb+/wn6OW3FuVQsVfUeoQNxawcqV/EEk+7xd+gNMVARax\n3bv9bSGOHvULW3t7oZU90pVBMUIYFNDhrO1gz/3Nm+1zYe6lTAZYsMDOOHBJJLhds7RpdhE30NGj\nw3+OOAZ9VfwVpc5xffJhELGbRsS3r88+53mcy+4KW1gbaGD4K4NiCArocNa2a8kHP0tUptIbb9hG\nbO4GsGgRsGaNLfaS15FxmMaw//+00+zVUZA4Bn1V/BWlzslkeGLXNddwZWoY4qs+ftwvjIkEXxXI\n8PjmZrsuajOQSWJRuFk3vb3+26CAhlnb2Sy/p9QxSCFbUxOPXJT3l4A1wLdy9RNseXHCCXaN+/7y\nWQXXNRYkjkFfFX9FiQGZDPDAAyxMMopQRH7CBCuSEgwWZG0whuCKPVC8yyOsJYU7fL6zc/i5t+7V\niZx/sKFa0H109dV+t1fQ7XPXXdyeISjgBw6wC0wQ11gUcZuCpuKvKDFBUja7uzmPfWCALfulS63g\nzZ/vn04FWIvXFbbgZrB+vc2Bb22NzrxxUyaBwuHzvb3Dj0GUYLTbYjnYUC3oPpIsJ0Es/2CH02BR\nl9xu2sTCH2X1xxUVf0WJEbIBSMtnALjzThZCCbI2NVmLHwi3eF2RzGatoBKxxRw2FyCb5U6i0nba\nTbUsppNoNgv80z/5jyPyF1wBNh4gm9HkyYWtrV2Syehq5nSan5s+Pfq84oqKv6LEjKieQGJ5//CH\nwC23cIO3T31qZItXNhOZI7BpU2FMQObhDg6yEF96KXDGGexyGRzk8wiKuEs2y/2J3E0J4Pd0C64E\nEftcjn36iYS/VbPLggXh1cyuaymV4rhJnNw6I6HirygxI9gTaOlSO+Xr0CG23J96ioV79WrOchnO\nDx8MlM6YAfzgB7ab6IYNhYHk73+fb2XTiBJxobu7UPiB8KuFnh67NpfjDeamm/hWNjr3+GCAOuha\nAuywexV/RVFqlmJaIojbRlw2YrVL8zJJh+zv5z5BuRwL+b33Fl4JZDK8gXznO8CHP8wbhrRLOOcc\n4Ec/KnS15HLA1q328TDXy0iceSYHsV33k2xowdm6kybxFY08v28fPxeWlppOR89BaCRU/BWljii1\n0tS1koHwlgeS/ZPL8UYQ7PP/l3/JefAA8OKL9nFjgH/+Z/6eyA6PN8bvhiHiQDPgT890N7D2dmDd\nOnuuTU2Fwi9ZRDKQ5oUX7GfavZtfS4LJslGE/fyWLAmPC4yUwho3VPwVpY4otdL06NFwwQfsLIBn\nn7WP5XLs/li1CnjtNRbUO+6Ifv1gcPaee9i9c/So3TCMYb+8266BiDeHZJI3hpYW7j4qmTuTJxd+\nbndgvQi/sHkz996XgrThOpQG3UunnAL8wz80lssHUPFXlLqilErTbJYzfQQi7m2zfz8LbyIBXHIJ\n8NOf+nP/3RYJbh78SAwO2lTOlSuta4aIXUYi3m4Fbi5n308KuSSQu3GjFW63k2gUbkvrYO8it0Np\nsHL4qqsaT/gBHeCuKHWFBHO/8IXiXD6uZZ5McsbN6tU2C+hrXwsf9RiGuHa8CNXwPA4oZ7O286jn\n8eu99JK/6Cvs/USs3e6cCxcCf/In3JNnJD+9MbxZuVc7+bx/48hkuN2De86TJg3/unFFxV9R6oxM\nhq3rkaxVV4CTSeDuu/mYYIfOKLeQCxG3ht65k/3tUdx/P7tcAN6c5syxG4DncWaRWzMw3GYiU8o2\nby7Mzgkjn+e1rpvK8wqzjNrbgYkT+b1TqXj06RkNKv6KElPkKuG227iNcUeHLcRKJPiruZk3hjAS\nCb4lYneMZM5cfnn4WnHXHDvG/v5MhitzUykrtJdf7i/GuvJK3iCKsb6DPXuikM1MWkqE9eYv9uop\nzpTl8yeiPwWwAsAHAMw2xuyJWPdxAF8DkADwgDHmS+W8r6IoxRGs1JVAqOcBM2eyW2XfvvBWyOIX\nlzTRAwf4tcKE+owzOHYgbN7MaaUdHf5WET09ftfPN7/Jt0Hr303lBHjzmDkT2LOnMI//4ouB733P\nX+RFBMyaBZx1Fp+3DGmXDSxufXpGhTFm1F9g0X8fgB4AsyLWJAC8BOA9AJoBPAPgzJFee+bMmUZR\nlPLZtcuY22835rrrjEkkJBnTGCJjJk40Zu1aviWyz4V9eR6v3bXLmFTK/1zYsSecwOuFtWuNmTw5\n+rXPO8+Y0083ZvlyY+bN8z8/ezYfn0rxezU18efZtct+xnnz+PN5njHNzbzW8/yvk0rZY+IKgD2m\nCP0uy/I3xjwLADT89dhsAC8aY14eWvstAJcBiGg+qyhKpQhOxEombbaNMf6++itWADt2RMcA8nng\n+uu5N/4TT7A1/fTTXC0c5o9/+22OEzz5JPBf/+UfyBLGOefwVUU6zdW6Lnv2AM88468Ydgu4Mhng\nu9+1+f2HDnH8IfhZ4jKIpRKMR6rnKQBece4fBvDhsIVE1AGgAwCmTp069memKDHHrQsAbKbL+vV2\nJKIUWq1YYfv6J5PA3LnAz37Goutm5CxezIK+Zg2L7XnnRffVAYAHHxz5PPN5jhN4HrtsgkNcJBNI\nGBy07RiCFc/y2MaN/toAID6DWCrBiOJPRDsATA556q+NMQ9X8mSMMV0AugBg1qxZWoCtKGUSrAsQ\na7m9vbBFRNhsYMncccnn/S2Sb7oJ+MpX+LlkEpg2rbAIazjcGICkg460ToiqeA72Nxqu3UOjMqL4\nG2PmlPkerwI41bk/ZegxRVHGmKgJVK6FLC0X3EBoNsttm48d4/VEVpTdDJpslmsHXPE+ejT6fCZN\nYqv+l7+0j8lr5/O2WZy4d1zc+5J9NFzFswZ1h2c83D5PAXgvEb0bLPpXAvjUOLyvoiiIFsEoqzms\nvXJzM3DjjVwd3NYW3S4hlwOOHIk+ly9/mXv4uJXDJ54I/MEf2PuPPMK3RMDUqcArrxS2kVi40J5D\n3Gbrjhflpnr+CYDVAE4C8H0i2m+MuYiITgandF5sjBkkoiUAtoMzf9YbY35S9pkrilIWUVbzqlWF\n/W/mzuXK4P5+bucMcBpna2u4OyaK3l4Wblf833yTg8Fh/v7Dh9mVJMNpJHdfmrDFcbbueFFuts93\nAXw35PHXAFzs3N8GYFs576UoSmUJTsSS8Yxbt/rXybQstzfPDTewH729nQe3jJTJA7CrxhXor3yF\n2z64LqMgxvAwlqlT7SD4oMire2d0aGM3RWlgxI+fy3ExlLR+cLnySv9aWb92LWfUdHayq8bNxgm+\nvjH+4zs6uHV02LB391ix8lXcK4+Kv6I0KOKvl7YMixdzS+ZUyp8i+eCDNhALhNcJPPGEP7PmjTds\nW+auLlslHAzIBjNyJAU1kWCLPyj8xQyyUYpDxV9RGhTX7QPwbVTBl1jmw9UJhIlxV1d0h02X6dP5\naiAsBdWd4BU1OF43hNJR8VeUBiWT4U6fixezMEsKp1vwJVcAnjdynUCQbJaHvYs7J9hh053O5Xl8\n1dHRET5sPWwYvfTuD3sNZWRU/BWlgRGh3LTJn8IZdMkEA61RdQKCK+wi/MEOm+50LgkiB0dIuhlJ\n8jpE9ooj+BpLlhS+hhKOir+iNDBSzHX8OFv6rnC6ufxhFv5wdQIrVvivGubM4cfc15A0UUFGSAbX\nuHn8nZ2FG1FwmLv27ikOFX9FaWCGq5AdaVh8dzdP25LAb9AN4+blB4Uf4PuXXDJ8muhIefyZDLt6\nlizhz9DIw1lKRcVfURqY4WYCj7QxbNhgUzOlTkCOCbP4wwKzy5cD27Zx1pG0bAgyUh6/pI1q0Lc0\nVPwVpYEZzrIeaWOQTp5EnJYZ1m6hrY3XHjgQnqmTyfDz5Qq3FnqVjoq/ojQ4UcJZysYQ1m7BTc0M\ny9TRBmzVRcVfUZRIRrMxyDErVw6fqaNUFxV/RVFGRXA+cHAjKCZTR6keKv6KopRFMQNVVPBrDxV/\nRVHKQgeq1CdetU9AUZT6Rtw7iYT68+sJtfwVRSkLde/UJyr+iqKUjbp36g91+yiKojQgKv6KoigN\niIq/oihKA6LiryiK0oCo+CuKojQgKv6KoigNCBlpyF1jENERAD8f5eEnAvhFBU+nGtT7Z6j38wfq\n/zPU+/kD9f8ZqnH+v2eMOWmkRTUr/uVARHuMMbOqfR7lUO+fod7PH6j/z1Dv5w/U/2eo5fNXt4+i\nKEoDouKvKIrSgMRV/LuqfQIVoN4/Q72fP1D/n6Hezx+o/89Qs+cfS5+/oiiKMjxxtfwVRVGUYYid\n+BPRx4noeSJ6kYhuqfb5lAoRrSeiN4no36p9LqOBiE4loieI6CAR/YSIbqz2OZUKEU0got1E9MzQ\nZ/g/1T6n0UBECSLaR0Tfq/a5jAYi+hkRHSCi/US0p9rnUypENImI/pGIniOiZ4mopvqexsrtQ0QJ\nAD8F8DEAhwE8BeCTxpiDVT2xEiCi8wD8CkC3MeaD1T6fUiGidwF4lzHmaSL6TQB7Acyrs98BAXiH\nMeZXRNQE4J8B3GiM+Zcqn1pJENFNAGYBOMEY84lqn0+pENHPAMwyxtRlnj8RbQSw0xjzABE1A/gf\nxpij1T4vIW6W/2wALxpjXjbGHAfwLQCXVfmcSsIY8ySAt6p9HqPFGPO6Mebpoe9/CeBZAKdU96xK\nwzC/GrrbNPRVV1YSEU0B8McAHqj2uTQiRPRbAM4DsA4AjDHHa0n4gfiJ/ykAXnHuH0adCU+cIKJp\nAFoA/Li6Z1I6Qy6T/QDeBPCYMabePkMngOUA8tU+kTIwAB4lor1E1FHtkymRdwM4AmDDkOvtASJ6\nR7VPyiVu4q/UCET0GwA2AVhmjHm72udTKsaYnDFmBoApAGYTUd244IjoEwDeNMbsrfa5lMkfGmPO\nAjAXwOIhl2i9kARwFoA1xpgWAP8FoKZikHET/1cBnOrcnzL0mDKODPnJNwF40BjznWqfTzkMXao/\nAeDj1T6XEjgXwKVDPvNvAfgjIvpGdU+pdIwxrw7dvgngu2C3br1wGMBh54rxH8GbQc0QN/F/CsB7\niejdQwGWKwFsqfI5NRRDwdJ1AJ41xtxZ7fMZDUR0EhFNGvp+IjiB4LnqnlXxGGNuNcZMMcZMA/8P\n/MAY87+qfFolQUTvGEoYwJC75EIAdZMBZ4x5A8ArRPS+oYcuAFBTSQ+xGuBujBkkoiUAtgNIAFhv\njPlJlU+rJIjomwBaAZxIRIcB/I0xZl11z6okzgXwvwEcGPKZA8BfGWO2VfGcSuVdADYOZY95AL5t\njKnLdMk65ncBfJdtCSQBPGSM+afqnlLJLAXw4JAh+jKA+VU+Hx+xSvVUFEVRiiNubh9FURSlCFT8\nFUVRGhAVf0VRlAZExV9RFKUBUfFXFEVpQFT8FUVRGhAVf0VRlAZExV9RFKUB+f8FvkT+M2urzAAA\nAABJRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Up8Xk_pMH4Rt",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Split our data\n",
-        "We now have a noisy dataset that approximates real world data. We'll be using this to train our model.\n",
-        "\n",
-        "To evaluate the accuracy of the model we train, we'll need to compare its predictions to real data and check how well they match up. This evaluation happens during training (where it is referred to as validation) and after training (referred to as testing) It's important in both cases that we use fresh data that was not already used to train the model.\n",
-        "\n",
-        "To ensure we have data to use for evaluation, we'll set some aside before we begin training. We'll reserve 20% of our data for validation, and another 20% for testing. The remaining 60% will be used to train the model. This is a typical split used when training models.\n",
-        "\n",
-        "The following code will split our data and then plot each set as a different color:\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "nNYko5L1keqZ",
-        "colab_type": "code",
-        "outputId": "b9f9c57b-b6aa-4817-8ab4-4a2201732b9a",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 269
-        }
-      },
-      "source": [
-        "# We'll use 60% of our data for training and 20% for testing. The remaining 20%\n",
-        "# will be used for validation. Calculate the indices of each section.\n",
-        "TRAIN_SPLIT =  int(0.6 * SAMPLES)\n",
-        "TEST_SPLIT = int(0.2 * SAMPLES + TRAIN_SPLIT)\n",
-        "\n",
-        "# Use np.split to chop our data into three parts.\n",
-        "# The second argument to np.split is an array of indices where the data will be\n",
-        "# split. We provide two indices, so the data will be divided into three chunks.\n",
-        "x_train, x_test, x_validate = np.split(x_values, [TRAIN_SPLIT, TEST_SPLIT])\n",
-        "y_train, y_test, y_validate = np.split(y_values, [TRAIN_SPLIT, TEST_SPLIT])\n",
-        "\n",
-        "# Double check that our splits add up correctly\n",
-        "assert (x_train.size + x_validate.size + x_test.size) ==  SAMPLES\n",
-        "\n",
-        "# Plot the data in each partition in different colors:\n",
-        "plt.plot(x_train, y_train, 'b.', label=\"Train\")\n",
-        "plt.plot(x_test, y_test, 'r.', label=\"Test\")\n",
-        "plt.plot(x_validate, y_validate, 'y.', label=\"Validate\")\n",
-        "plt.legend()\n",
-        "plt.show()\n"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsvXt8FNX9//+cmd1JEDUpUctHEbR4\ngWBCEvAyRXQwCl6r/eEV26WgpFoQsaiVfj62fIoV64VGBRWoIPl+VD7thxatN5CVEcShKBJuiwiI\nUFRaTU2ol+zszpzfH2c3uwlBbgmX5Dwfjzxwd2d2zq6zr/M+7/O+aEIIFAqFQtG+0A/2ABQKhUJx\n4FHir1AoFO0QJf4KhULRDlHir1AoFO0QJf4KhULRDlHir1AoFO0QJf4KhULRDlHir1AoFO0QJf4K\nhULRDgkd7AHsimOOOUacdNJJB3sYCoVCcVixfPnyz4UQx+7uuENW/E866STefffdgz0MhUKhOKzQ\nNG3Lnhyn3D4KhULRDlHir1AoFO0QJf4KhULRDjlkff4KhaJ9kUgk2LZtG/X19Qd7KIcFubm5dOnS\nhXA4vE/nK/FXKBSHBNu2beOoo47ipJNOQtO0gz2cQxohBDU1NWzbto2TTz55n95DuX0UCsUhQX19\nPQUFBUr49wBN0ygoKNivVZIS/3ZIXZ3Lli0TqatzD/ZQFIpGKOHfc/b3u1Jun3bGkiUu9fXlGIaH\nrpv07h0lL8/ao3Pr6lxqax3y8+09PkehUByaKMu/HeG6MH26A3iATxB41NY6e3RuXZ3LypXlbN58\nLytXlqtVg6LNUVNTQ0lJCSUlJXTu3JkTTjih4bHneXv0HsOGDWP9+vWtPNKWQVn+7QjHgeXLbW64\nwQACwCA/397pONeVx9o2WCkDv7bWIQgaTxrK+le0JQoKCqiurgZg/PjxHHnkkdx5552NjhFCIIRA\n15u3m2fOnNnq42wplOXfVnFdmDiR1dNcJk6UD20bwmHpK5TuQo01axqfNm0anH8+/Nd/QXm5PA8g\nP99G103AQNfNZicNheJAk7rNG+7T1mDjxo0UFhZy44030qtXLz799FMqKiro27cvvXr14je/+U3D\nseeeey7V1dUkk0ny8/O555576N27N5Zl8c9//rP1BrkPKMu/LeK6UF6OiHt0D0xe1qNMyLGIRuHR\nRx0SiSS6LvB9j7lzqwDo0sVh2zabkSMtkkn5NvG4XAFYFuTlWfTuHVU+f8UhQ+o2x/PANCEazaxU\nW5r333+fqqoq+vbtC8ADDzxAp06dSCaTDBgwgKuvvprCwsJG59TV1XH++efzwAMP8POf/5wZM2Zw\nzz33tM4A9wFl+R9m7JGl4zjgeWiBTxiP/oGD58GGKpfi6q0Q6AgBui4YNGg68fgANm++l/r6cnr0\nyLyxYcjVQpq8PItu3cYp4VccEqRuc3xf/us4rXet7t27Nwg/wPPPP09ZWRllZWWsW7eOWCy20zkd\nOnTgkksuAaBPnz589NFHrTfAfUBZ/ocRe2LpuC5s2GpzY8hEFx6JwGSxbnOu4XLjzHKMpMcx/y2o\n+T5oGhiGj4YPgGF49OnjEItZ6DpMntx6lpRCsb/YtvwdpH8P2YZKS9OxY8eG/96wYQOPPvooy5Yt\nIz8/nx/96EfNxtubptnw34ZhkEwvqQ8RlPgfRjRn6WSLc2ZysJhhRJlV4bCj1OayGoshWydiTJcn\n5/wL0hHCGqT2fnV03WTECJvTT2+82atQHIpYljSAmgYntDY7duzgqKOO4uijj+bTTz9l3rx5XHzx\nxQfm4i2IEv9DiN3F0e/K0klH52zdmpkc3sLiua4WdhHgwI7SzMmdFxpsvySJ0AK0JJw6WSNxUV/q\n/1nG5pq5DOwzhiMTx/PJJ5eQSNQoH7/ikMWyDryRUlZWRmFhIT169KBbt27069fvwA6ghdCEEAd7\nDM3St29f0Z6auaTj6IPg25Ov0kJ/eYFLUY3D6gKbs8dYeB6EQiCEFH/ThMpKGDNGTgjnGi7PX1rF\nf7AdOnemzjqa2jcmkb88IG9DGD8Q1J2aYPUkgcisVhHoaFoOpaV7ngymUOwL69ato2fPngd7GIcV\nzX1nmqYtF0L03cUpDSjL/xBhT+PoLQssMs7/HrpJmR9lSSCPHTECunaFggKYMwfq6+Fs4fKKX07u\n3DgQIDSdI57O4fM7ppA3pAaWLUOb+wL/LhGIMA0+ISFA0wKSSY/58x02brQaltfN5QIoFIrDByX+\nhwjpOPq05f9tyVdDtjp0S/l3QsLjAt1hqWZhmhCJyGPLy6XwCwE2DiYeOgEC0EQACY9Zk2q4ZopN\nr5f/Gw1BXjVoCRpZ/smkTjJpMmGCTSwmVxTP3eby3iSHNwK7IYRUTQgKxeGFEv9DhObi6LPFFDKR\nPvMMm2jIxMBDM02uqbTpUJMR3YkT5XFpj56DTRIDIxXVIwCfEG8ENhcsrWLrNR5mHXh58OXjJdSc\navIJx/PWhkvIz69h5UqbNWukmpfFXS5+uJzLA497MBkYj+I4VqPxfVvMtZogFIpDgxYRf03TZgCX\nA/8UQpzRzOsa8ChwKfA18BMhxHstce22RF6e1eDqaRrWOXRo483cZ0dEiXSVPv+XajLumLo6l3PP\ndSgutlm50qJHD5fupQ5/XHkpP177ApoQ+Gg8wzC+7g3cOJ2PNCFdPQHkJNby4J1vsnatDPfs2xey\nS5Wcj4MpPHR8BB625rB1q0VVVeNIpA1VLlYTlT+QSTkKheLbaSnL/xlgMlC1i9cvAU5N/Z0NPJn6\nV9EMrgvjx8sM2yCQYgmNI31OjVi4WI3E9PXXXXxfbhpPmmSyaVMlJ588Bk3z0ESIHXeGyVvlQ8jk\niGERKm+oIvB9KfwCMMAgwe1Dx/BYVSWbNlmUlUH2vvvCwCYIm2i+h9BkDsGS6aDrcHbgch4OdRRw\n48wxkGys8rsKVVWrAYXiwNMi4i+EWKRp2knfcsiVQJWQoUVLNU3L1zTtP4QQn7bE9dsSaes4Lfy6\nToMvPxJpLJITJ0L37i7FxQ6rVtls3OjQrZvcNAaPvn3n8MUXqcca1I67grw/fI0xeDCRCov166v4\nNPv/QACaDqeWLePR0gFs+2QhrmuRXTbcxWJAEOV/KhzexGbJdAvfhzN9l9cpx8Qj8HX0wAcRUNc9\nTu0H48kvHI9tWxQXu/Tq5bB2rY1tW2o1oFAcJA6Uz/8E4O9Zj7elnmsk/pqmVQAVAF27dj1AQzu0\nSFvHaeG/8EIYPDgj+uPGZY4999xpnHnmKDTNJ5HIoWPHSoTIbBofe+xg6uoWy8eEyJ/4CqzyYfFi\nKCqic2GE7dtnIgIPkYSjNgi+PB0wQA883njD4X/+R7p/giCzh/BFD3ihJ/TpA6GZ0pJPbyqH8PEJ\nCNDZfrnGxtsDAuN1jJWLOeWUSiZNGoMI4uiBQWEwmScWVXxr4ppCcaCoqamhvLwcgO3bt2MYBsce\neywAy5Yta5Sx+23MmDGDSy+9lM6dO7faWFuCQ2rDVwgxDZgGMs7/IA/noNA0kWvw4EysfigEw4bJ\nFUC3btPw/VsJhQIADCNO16415OdHWbXKobraJhy26N27SG4iv7iVvFXTwffx6z2WP+gQPWscJ5yw\nkNpah6//VsCIVbex9iGPQECAybvv2vi+FP0rr4S//hVOP93l4YfLCYc9EgmTO++M8tvfWo02lXUE\nX/SC9aNBM0DTBL4f57PP5gBxND1ABAG100dy+feLmGBajRLXlBtIcTDYk5LOe8KMGTMoKytT4p/i\nY+DErMddUs8pmtA0Zd1xMq6d6mqbqVMt/vY3l0mTRiHrMkg0Tdbmj8UsBg2ystwoFpZlQV8XPzSL\nwPdICJPb59osnQtgpf7glVAR09+qIncQPPdWhLVr5fNBAKedJnMIcnOrMM16dF0ghMegQQ6TJlks\ni1s8I4ZTIaaiI6grDkAXaJqcPILA4NgdJdQlFhBooCchf3lAt9MdolGr2agm5QZS7JYDZCnMmjWL\nKVOm4Hke3//+95k8eTJBEDBs2DCqq6sRQlBRUcF3v/tdqqurue666+jQocNerRgONAdK/F8ERmma\nNhu50Vun/P27JjtlPQhcSkszlvbYsVF69XIQItnIF69pd/DEE1ajEg9lcZf4eAfG22BZPDssyvqp\nDm8Im6Xs/EN5y7f4c0eLcf3BuQ8KC11KSuSk89ln8JOfVBGPP42miQZBr662qayEmhroVxBBHzML\nPI+8NQbJhIYuEgihs/6VO7CffJyOpwTU9ob8lZD3YQ7Ydubzui7OeIeyuM2SwFJuIMW3c4A2jNas\nWcNf/vIX3n77bUKhEBUVFcyePZvu3bvz+eefs3r1agBqa2vJz8/n8ccfZ/LkyZSUlLT4WFqSlgr1\nfB6wgWM0TdsG/BoIAwghngJeQYZ5bkSGeg5rieu2B7p0cdj8YRy0AEScsjK5WappISCROkpjxox8\n/ud/ZBnmUAjOES7zg3I6LPDw3zR5dliU+lKL3+dafPNN89cSQmYGA9x4o8txx8lJx/dDhEKCIEgQ\nCklrPgg0XnppOI8+apGbC7fdBr/fANf8fijf96BT3wjbN8DatQ6vvWZzo1/F1sH1dKqGbs8hNzSe\nrATLkjWNVlWRf/sMzl/pMz8wGahHec+0WrVSo+IwZ3eVDluIBQsW8M477zSUdP7mm2848cQTGTRo\nEOvXr2f06NFcdtllDBw4sMWv3Zq0VLTPDbt5XQAjW+Ja7YbUcjb/hFr04wKCEIREwFXFtdxyC+Tl\nXcbnn/8VEPh+DsuXS/98z54uI0Y4FFdvpcMsWdM/iHusn+rw+1yLykpYsQKefhqSSVJCLi+p69KC\nBzj5ZIdEwkPXfTQt7V6S2zBBoOF5ucyfH0EImUn80ksujzwiJ4t3MelbGCFiWUycaMnVy6Sn2RIS\nbE1CyR2Q9z5QU5OpaeTXo98v6D0Wjn7f474LHXLGWw0rArUJoNiJA1TTWQjB8OHDmTBhwk6vrVq1\nildffZUpU6YwZ84cpk2b1ipjaA0OqQ1fBVLoqqpg5kxIJsnTdU4ZBBtuB6HDMX0eIZGo5LPPfDQt\nxH/8xzA+/zzCpk0WZ5zh8tBD5eTmeujFIXZUGxxZDQlh8oawqa+Xwv/AAy5Dhkh3Tk6OxZgxMrRU\n16Xl77pw++02999vEgp5BEEI8GXtf83g5Zdv5rXXIsRiGSEuKXEIhz0MwyeZ9Fi1yqF/f4vLC1xO\nHDiGUDgBGogwbB8ER2zI4f0Cm9pVDr7voWmCIAS1ZRp5m03s8bbcilCxoIpdcYBqOl944YVcffXV\n3H777RxzzDHU1NTw1Vdf0aFDB3Jzc7nmmms49dRTufnmmwE46qij+Pe//90qY2lJlPgfSqSFLl2U\nB0AIEvk6QgvAgEDISBldFySTgu3bP6SkRP4GPvjAITc3VRwOqH10BLXPdeV30wuwfQeEvMSKFeWA\nR+/esnooWIwaJVfPt90GJSWwcqXF2LFRSkoc6uoKGD16NJrmAwa9e0d47DEr1QwGrrsOVqywSSRM\nhPBIJk3WrbPpH3IpGlOOecs3jWJ639HO4ia/EmMUXH76Vno/FEIPQUgPkX/WMPhZJPNDPkBLe8Vh\nygGo6VxUVMSvf/1rLrzwQoIgIBwO89RTT2EYBjfddBNCCDRN43e/+x0Aw4YN4+abb1Ybvoq9IC10\nKeEXmkbSyGHzsbfhJ34PwscPwgghMIwkhhHg+wtYsWIxq1dHKSuz8f2s4nDFEbaug0minBAeHib3\nnTE09bpPENSzfXsVNTUWQZDJJl62TA5n3TqLWMxiyJCJGIbs+wtJzj/fYdEiq5HB9YtfWEyZUkn/\n/nNw3cHcfbcFjiwy1HmetPaFCUnf5IF5lRwVwPygHHONR81Yg8qSEfwjJ0LOCKvxb/lAtmtSKFKM\nHz++0eMhQ4YwZMiQnY5bsWLFTs9de+21XHvtta01tBZDif+hRJbQ+brB08FwZiUjLH3Uose8qxoi\nbwCGDh1Pnz4LMAxZcnnZModf/GIcr78epVs3pyHs89WRE/lVIJOvBB7fWQmJhIFp+oDg009nct55\nEUzTarTgyE4w27rVRggTkJPKtm02ixY1Xmn/8pcuK1aMQQiPs85aTGFhESA/T956j5JxBuvHDWfE\nfTKE9B4mNiSFdYoBsa7M0i1mz27i2TlY7ZoUijaOEv+DTWozc3WBjROHs2cN5fTt8EIswq3TLAIB\nCIjFrEY+9lmzxlNcvLjBzfLeezaeB4sWWdi2xZsPuBz93kT+4RfgYSLw8HUTcUqEefPg8sunpmL1\nk3TpImPts7YaME1ZX0hqrUVdnaw4um2bzUUXWTu54GtrHcBLuYZS/QiscQ3CnWfbRB2LVFQcDjYe\nJprm4Wsmi4TdsPLYybNzMNo1KRRtHCX+rcy3BqqkfPwi7nFcD4PTH9H4dzjJ8gKT7mURjKczkTjZ\nnIPLgHUOs+6qxCyTJZfXr5f1/AsKYJzt8oon6+wMxOQOrZLj9BqOusIm/xKL2U/BwIGzCIU8QiHZ\nO6BbNzm+pvWD0qQrjj73XGMX/Msvu3zwgcP3vlfQfD+CLOG2kRNGadzFxmGsUck9I2rYUWqzYoyF\noTw7CsUBQ4l/K9I0UOX11126dMnq0Zvy8WuBz9clAaEw6IYgmfQIAofJky1GjpQTgGFAIiGFP0o5\nOXgE603evy3Kl7dk/O+OA99PZOrsaJrHJWfWcF31OPy/gjkPKistNmyQm7nFxY378+7OyM52wRcV\nuZx7rgzv/Oork44dKzn55G/v+fubS1xuf7GckPAgZPLljyqhi8Prr6dXLcrIVygOBEr8W5HsQJXu\n3V3q6202b06gaWFKSqQrBNMkiHscUZ3OiE02RMtEIi49e2ZCMkePhgFxKeyG8CHpcfQKh6KKzCbp\n6tXworAbXD1ayGRHmY2/PGOt19TAuHGZsg57Q7YLPggy4Z2IOF++NIduV43H/cRq1GcY224oP31H\nvYMhPAx86k6Ns7J+FMFmH03TGTp0CscfX7HTNVWYv0LR8ijxb0WyreSLL67CMGRh/iDweOKJKpYt\ne5Kht0X55yNVHBPbzo6xsL6kM6/FIpxzDrzzTjmhkAzJNIwoQlgNvnKBrNEzdIbNxKzIyJoaWKZb\nlAdRBmgOPW6yOTViYc5quYCZ9LUmTbLp08cEESecDCh7eQHJ3y/m7iBKEMDtohyhe2g5JhuGRvE8\nizeEzX+mfP21fTQCQ2YpCxHw/vuj2Ly5iH79Mgo/bRoNq5+cHBXmr1C0FPrBHkBbJm0lT5gAV1zR\n+LV//hPmzoXZL6+m/LrpXFA4l6Gxufz6uRl0XAU7djhoWqah+8aNDr4PdYXwmyFDeaRwBOVEecuX\nVnYa25Yi+Y5hUZk7jtJSsJyJ/K3SZcQI2RGsOVxX9gdw3d1/riVLXKZOncj778PYsVE2zbyQM8bq\nfGdtgEh4nOs7nCfkCkUL5HLjfBxMU05MA/Uoz/WcwCdXXNfofYXwmTrVIZ0k6bowapTcgO7Rw+WH\nP5zI9OnuHo1RodhbBgwYwLx58xo9V1lZya233rrLc4488kgAPvnkE66++upmj7Ftm3ezOyI1Q2Vl\nJV9//fVejnj/UJZ/K5P2odfVRaiunonvy+ic+fMjFBa6jHh4FH8P+3ycgN5j4chYgv6Bw4vVmaQp\nTTM55RSbM85weeCBTJG3Z++KYG5qbMlnu2UuL5BJVngehSGTdUJOFrNmNbag9yaJdskSl6+/LufH\nP/a4/nqTu+6KEts4nrxNi/E1uRpxkAPyMDF02We4W8SmshR+9jNY4lt8ATyWdy+GQGb+ChDCYPly\nm83PuQxa4bABG9+3KCzMlI5IJExGjowyZYqlVgCKFuWGG25g9uzZDBo0qOG52bNn8+CDD+723OOP\nP57/+7//2+drV1ZW8qMf/Ygjjjhin99jb1GW/wFE04axZs1PueOOhcRiFiUlDlo4KTN3Q1BbAgnC\nOMgY/bFjo1RVTSA3N0q/fhbDh2d87KGQx+DBTrNCbVmy6UtRjdMoNKdfwmmUKJumuSTa5nBdmD7d\nQdczY+jTx+G6SgtjYZRtP53ApWYUXYMLdId3bqxEu29Cw2xSU5OJXrqipApDy7SQDAKNysrJHB2D\neX45J069lxtnltM/5FJa6hAOxzEMn3A4Tq9ezi7HqGhf1NW5bNkykbq6/V8OXn311bz88st4qb6p\nH330EZ988gmlpaWUl5dTVlZGUVERL7zwwk7nfvTRR5xxhmxf/s0333D99dfTs2dPfvjDH/JNViXF\nW2+9lb59+9KrVy9+/etfA/DYY4/xySefMGDAAAYMGADA/PnzsSyLsrIyrrnmGr788sv9/nxNUZZ/\na5G1S1lXKEsq+L7HaaeZ6HqEwkIoooBQQhAIWd/+n9vP43+veoBlL1oQyNj+006TNXu2bJlIaWkB\nX32VKaFw0UV2I+HfaWM0e9MhZLJE2Bj+zn7/3SXRpt9361ZYvtzm+uvlGIQwGTHCbsgF6GZZPF7q\n0mNUOSHfQ/uzCSMzs5Nty+Qx34ce1dvRE7IjgSbgn5X9eeWViobkLz21oT1rhMOCvgXoeoAQoOsB\nX35ZoMJBFZmigKnw4t69o7uMMtsTOnXqxFlnncWrr77KlVdeyezZs7n22mvp0KEDf/nLXzj66KP5\n/PPPOeecc/jBD36All1TPYsnn3ySI444gnXr1rFq1SrKysoaXvvtb39Lp06d8H2f8vJyVq1axejR\no5k0aRILFy7kmGOO4fPPP+e+++5jwYIFdOzYkd/97ndMmjSJX/3qV/v82ZpDif9eskeRJ038KNtf\nG0QQ1GMYsgHKZcVV9Kp2+PHXy/h4CnzeH76zSOMPiy6myxSL0CuZpu0bN7osX16OrssbvGPHSj78\nsIZTTrHp189qGE9BQabjV8Z1k/EBGbbNRKxmx/5tSbTZHyUUAiEs7rorSp8+DiNG2I02ZyG12gg8\nCHauxWNZ8MQT0vXzTawzxWOhrgTyqiEWK0QAiw0bdFO+R8pddOHxDps360CAEDo//3mNcvkoqK11\nCILMvlhtrbNf4g8Z109a/J9++mmEEPzyl79k0aJF6LrOxx9/zD/+8Y9ddupatGgRo0ePBqC4uJji\n4uKG1/74xz8ybdo0kskkn376KbFYrNHrAEuXLiUWi9GvXz8APM+Tv+UWRon/XtCcb7yw0JVtErNj\n27P8KHXd42xP/rWhAQo+3FU9nbxYQC2CTY9AEIa6YsGGLQV0qIHhw2HqVOkHLy52ECJzg598cg39\n+4/baTxpi3qnLNmswP30w+bYVXx/tksIZDevrl0tCgosFi2S192bWjwVFVBUBH+4KcKw2EyOinkk\nMKkiAsDbwuK5m6JEujoN5+a/uBW9KExAEsMwKS5u/J6K9kl+vt18YuF+cOWVV3LHHXfw3nvv8fXX\nX9OnTx+eeeYZPvvsM5YvX044HOakk06ivr5+r9978+bNPPzww7zzzjt85zvf4Sc/+Umz7yOE4KKL\nLuL555/f78/zbSjx3wua+sbffdclkShvaJDee/Uw8vpGGglgbR8NoQdoSL/2Ca8FdIoJNKTVG4QB\nA3yhkdOnpkErp8t2u1SnNn6ln13e4NlumPR4pEtE1udvySzZploekRq96w3i3dTiSVes/n8bLWIs\nxMbBIdNZTNfh1EhqJkrNbnmeR1GRwZvDRnBsn8h+W3eKtkFenkXv3tGdja/94Mgjj2TAgAEMHz6c\nG26QbUrq6uo47rjjCIfDLFy4kC1btnzre5x33nk899xzXHDBBaxZs4ZVq1YBsGPHDjp27EheXh7/\n+Mc/ePXVV7FTP9R0GehjjjmGc845h5EjR7Jx40ZOOeUUvvrqKz7++GNOO+20/f582Sjx3wuaCmFJ\niaxFDz5B0qd22VTyfpEKpUkJYP4Jtej+7/E1EJ5B5/mJ9B4nedVIv7cAQcZ/7ro0tGiMxSzGjYvy\n2GMyGzcWsxqEN921C+R40u0UWzIZqjktnzhxN1WWd7GMaFqxeikWf9MsdB10IT/P5MlZp2bNtkdW\nwztjuvL7XEvF+isaSJcdaUluuOEGfvjDHzJ79mwAbrzxRq644gqKioro27cvPXr0+Nbzb731VoYN\nG0bPnj3p2bMnffr0AaB3796UlpbSo0cPTjzxxAa3DkBFRQUXX3wxxx9/PAsXLuSZZ57hhhtuIB6P\nA3Dfffe1uPgjhDgk//r06SMORd5+W4j775f/1ta+Ld58s4NY+IYm3nwVUVuIEIYhD0gf3KGDqD1D\nF5sjITHljBtFAA1/s7hRPFB4i/jLo7eI2tq3G9581i1vC8OQwY+aJsQtt2Suf//9ouE1w5Cvpcdz\nIL+DDh3k9Tt02LNr19a+LZ555n5xxhlvi3Rgp6bJ86dObf4zrJr6tvDCHURSM8RXdBDn8Hajr1fR\ntojFYgd7CIcdzX1nwLtiDzT2oIv8rv4OVfFvSm3t2+KjRbeI2lJzJzX86Jb7ha9JpfZ1Q7zGQJFA\nFwJEAl3cw/3CNFOHZylqMqeDON98u1lx3RfhbQ2yJ8Hd0TBJLjTEq692EGec8bYwTTlx7er89Ofs\np78t/tO4X5wXznwfd98txMCBctJIv/9HH90vJ1DFYYsS/71nf8RfuX32k7w8i1jI4s2zI5x/tkO3\niA2WjMIZN8PmNREiTEAiCPF/DKY/ixvKK3/nBzaPXyK9G8dvdegalxmxuvCYVeHwXNedC50dKuXt\n96bKcnZUhml6jBjhcOaZmSStuiXTqN04h/xTBpPXT9b2SXt8lgQWSw2LETfBxV2hthbSOTfz58OK\nFS7XXSc7k7VEuJ9C0V5Q4r+fZCJuLEzTIhqRUTWOI8sSpJueg2ANRdxOJVdrczjlzsH0vyrjv39V\ns3ktMAnjkQhMdpTajNu5xhlw+JW3l1EYJsmkzE945hmb3Fz5HQ06YRpfHvdTgi6g/3s+vZdAXr8K\nLi9w+UZzeEO3ec+0iKTqF2UlXwKyDIbvy6Szlgr3Uxw8RKolomL3iHTnpX1Eif++kBXsX1WV6YCV\nnR371VcuP7lhPF+vSNIpJgjwGUoVEWaRi4f++GKW7CjC8yx8H97SLC4kyvk4LNJsjpxjMb7o8BL5\nXZGXZ7F6dZRlyxzee8/m/fdTTRpzAAAgAElEQVRlqWoh4Lgb5tD9J6T6E8NHy+fQWy+iaEw5ZwQe\n9xom71dGKUp9EYMHS4s/TToaStNk0tm2bbI3geLwIzc3l5qaGgoKCtQEsBuEENTU1JCbm7vP76HE\nf2/JCq73QyaxQFbbBBmtUlAAI0e63H9/OeEBcdb8KKDXWJ0OMZMTToAOnzYtdmY1RO68p1n8LSH7\n6eoLYPHitlPFsm9fi1/8IvNZ0zkJC6oHc2pifkOW80MzBvPbdQ7dUn0OwponE8dSoaAVqdXQ00/D\nihUyNHTBAlmtbt68CB98YDF8OA0rBcXhQ5cuXdi2bRufffbZwR7KYUFubi5dunTZ5/OV+O8t2cH+\ngUc/4bAIC02TyVk1NdCrV7oGT0BC6Py55EKe3Tiex38F2phMbeVuEZtoVucskK0TFyxoJlnrMCd7\nryI7G/mFTRX0fwHCO+awoHowc9ZXMNByiewmUQxg1app/PCHI9G0AM/L4dVXI3ieTJBrWrxOcegT\nDoc5+eSTD/Yw2g1K/PeWVLC/iHvEA5OFqQqW4XAmAepPf7Lx/RCaFiAIs+Xk8Ux0LIosoKjxbq3l\nulg4gHw8fjzEHZd+CYclho1ttx31yt6rKCrKnvQqKC+vaND6UyMWRHa9qz1tGjz6qMujj45C15No\nGoRCcUpKHGIxq5ELTom/QtE8Svz3lpQJ+8IYh98ty2SmnnIKBIFs0/joowXE43IzxvcFr7wCl12W\ndf631FK2gKhWjoaH0EwMouxLx61Dnaab1tGozPxNI6dEi4LVkGoG1lBKY+lSm5ISB12Twi8E6JoG\n2BiGfKx6ASsU344S/33BsphXZrF0WfaTLl9+Kds0go5hCDRNYBh+qgRxM/Xnd1FL2Uh6kKpq2Z7M\n11kpj9jMmVLAk0np/tJ16N3bZdIkGdL5ox+ZzJ10m6yIiswOPqkSPnwZAk3uKVRWtpuvTaHYJ1Q9\n/30kEpHWZZqBA6swTQ8Z2umjaRrJpEEyabJmjc3Wrc10yUrXizCMjKna3HPtgKbzYCKRqf0fBFBY\n6BD4ccBH1+P8uMSheCycPANKbocTXw6wcVK5w3LvRaFQ7Bpl+e8jliUFq6oKZszI1OJJU1NzBUFw\nFitXyno8q1c3swm5q4ytQyGL6wDTqPVAaGfLP7GigNCPglRUUMB3N+eSF4P8mJxuk+g42Oh6u5oz\nFYp9Ron/fpD2W0ci8O67ETRtJuDheSbjx9/Npk0WQ4dKa3aviqAdbllcLUDTeRAykUFz5kD312s4\nY6zOv0sCjl6ps7ljIcexlDAeAQYjeYKjB1pciMwFaGdfn0Kx12j7myXWWvTt21fsrulxS1JX10xd\n/r08tq7OZe5ch4cftlmzRlar7NsXqqul+O+uP66ieVwXxtkur3jlhPHQwiaTLovy4ovQP9i5JHRO\nzs49itObySr+X9HW0TRtuRCi726PU+K/d+3glixxqa8vxzA8wGTBgig1NZnyA03LFqfr6w8bpoRn\nf3Bd2FDl0mO7w92v2Lzlyy8y3WQmG8OACRNkH2PXhQEDIFUZF9NsV3voinbInop/i2z4app2saZp\n6zVN26hp2j3NvP4TTdM+0zStOvV3c0tct6Vorh1cc6QbmIM81vfjHH30eBYtchkwQL5uWalIE1zu\nYSJnC5dEArp2VYKzP1gWRJ60iJ41jrd8q8GV1pS0z7+gQPYdqKrKtMQEuZGsmr8rFC3g89c0zQCm\nABcB24B3NE17UQgRa3Lo/wohRu3v9VqDb2sH57qyY1dJiUN1td3QwBzi6HpAnz4LKC5ezJ13RtlQ\nBZbj0GVZAa+LMZh4eJgM0qJtKlnrYFJQIAU+COTKCuTjCy+Uvv6aGnnM6NGZzeNQSIo+yGQ8tRms\nULTMhu9ZwEYhxIcAmqbNBq4Emor/Icuu2sG5bqZOj+d59OhhAlHuuivK6MgYupctwzAChPC4oncV\nQ56eBYHHxWgIAgwCBB6PXOFwljL79xvXhdtuk0KupeL5QVr648dnVla33ppx8yQScNVV8r8/+QRu\nukmtwBQKaBnxPwH4e9bjbcDZzRw3WNO084APgDuEEH9veoCmaRVABUDXrl1bYGh7TnPt4Bwnu06P\njxAexcUOW2bbDFq0nM0l0voMkiHyq4GEdAfpuk5gGPiBhm6anHW3fUA/S1sl24UjhLT+r7wS7rzT\n5fjjHZYssVm0yCLWxOz417/gnXegLO6y7T2HuZts1uXv3CtBoWhPHKhQz78Czwsh4pqm/RSYBVzQ\n9CAhxDRgGsgN3wM0tl1i27JOTyJhIoSsRV9dbXNXjwf5aKSP0EETUDP5bObGItzGLDTNw8gx0VMN\ndVcX2LzkWNgoodlbsipnN/vdCQEffuiSSJTz4Yce8bjJc89FWbeu8cH19VL45wflmIGH96DJw3qU\nCTmqH7Ci/dIS4v8xcGLW4y6p5xoQQmTnW/4BeLAFrtvqWBZMmWIxaVIU05Q+/1jM4oghnxCEAQNE\nEoy8epZiUU6UC3WHayttiiqs5kr3KKHZQ5r77iIRmD698UZvUZGT2qvxCYXkymztWgvDkCuDcFi6\nera952AGHiF8BB79A4elnqUifxTtlpYQ/3eAUzVNOxkp+tcDQ7IP0DTtP4QQn6Ye/gBY1wLXPSBY\nFvz85xYDBljE49LP/OV3b6JzYllDDfrPj74JTYOlwuIdLI6ogSKaL92jhGbPaO67GzcORoyAp57K\nHLd6tY3vy836IBmiqHorWzSXyBMWNTWZVcPcTTbJh0004ZEQJot1W2UCK9o1+y3+QoikpmmjgHmA\nAcwQQqzVNO03yEbCLwKjNU37AZAE/gX8ZH+v2+LsysfguliOwzuP2TyxwmL7dnjm8SLmzAvxVUmS\nvDUhvjOyiNzcncvPZ5csUEKzd+zqu4tEZJmMeFxu+n7vexZ//nOUvB1V3FE9gwti07n5jBl81XM4\n+cURYjGLW2+FGTMsziTKBYbDd6+z6fiZxVM3yr2Cujq5yb87N5NC0ZZQSV7QvI8hO2Mr1bWrXERZ\nlLD4hZjIBO4lhI+vGRi/nYBrj2skHGkhKSigkQWq2HO+ZT5uqKnk+zKUc2xiImN7/BefDQzYfgmI\nsAZaLj//eZTqaqtRWKhhwOmnuzz0UDm5uTK81zCiXHSRpVx0isOePU3yUrV9ABwHEZdtA5PfeDx/\ns8Mpf7CwmnbtwuFNYeFg42Ei8NBTZuluyvQrIdkHdlXiKF1ULzvR64SKAtZcGRCYgAZogiDwGNSz\nikErZAmIIlYzWMxhTmIwXxXXEA6nk/U8qqudhn7KykWnaA8o8QdWF9h0D0zCeCQweSJms/x8WD7Z\npqih1KTJEmFj+LAiZDH5kijXdXboFrF3Ugnl6299mrqFjr2ghmRYR9cDaeULjaQXYkz1DArw8dEw\nSYKAgcznP1fe3SiKa+ZMu1HegHLRKdo6SvyBl2osXiSKTVaRsIR8nsooNXMcCgbbTCyystwQFrvq\nsKV8/a1P0yqg775rc9RROYSEh+8brF07nOQMuCA2nRA+OnJBALIE9M3xama8FWXLFof33rNZv95i\nxAhZhkO56BTtASX+SL/83zQLBNg4ACwPWxQUwNljLOJxCy0KV1wBd9+9e2HYVZl+RcuS7RY6cjXM\nu3so/yqG12IRbrnF4rkNLqOYhcBD0zX0IAnISeB7dw3msiKL8vJMFFdpaaY5vELR1ml3G75NyzGv\nnubyx585bPcLeBRZjyepm2x4MspLNRYv/afLeSKzIsjJgYULlaAfUqQ2WUTcI2mYvD85SlGFxS9+\nAW895HK+cHjbtHl6zGq6V8+RRYBSKj9tGowcKXMC0qWgQU3cisMXteHbDHVLprGyfhSB4aPrOfQ2\nKukxagy/9j0CNHQCQgRowqNoRRXHbq/iDjGDED4eJuVEIQ7/GONApa2U4VAhtcmiBT5hzaOoxsF1\nLSZNgqSweBsLPQl/zLcYN6+xaV9TkykV4XkyiijdS1ht1ivaMu2nh6/rUjt9JAEJICAI4tRunMNX\np8f5eIjPl4U+AQYJDAgZ1C19mvgRT1FfKLNCw3hEqCJKOZcvuxd/QHkzTXkVB4Vm+h47TqYHMMiX\nmtt7aXoq7LxZr1C0RdqN5b+lyiHv3QD9emRmrmYQ/l4Jqx6aTxAGPSH4ovJaTln7GUedV89HP16U\neh6Kx0LOOhMEmMjJwFdhPIcOzWyy2Eg3TjwuY/snT5Y9FpjoNOoTadk20WhmIx8aW/5qs17RVmkX\n4u+6MG6GzSteDr3Gxvl3X53vVEymtksNwWYdCAg0nV4F/8tJ2wRboKF2TyBgfslZVMYqEcDQ1Aai\nrpTh0KJJUsBO8wFZCXu6gQg0DJFEC4ewhg3DymqzpjbrFe2BdiH+jgNv+bLw2gXrHE4/zybSz4I6\nF13PSdWF0elU7RMiIL9aZ2vSQIiAZNLkv6sriSF78v6yb5Q7ypqP71ccOjTNDt5yq8OJ9R668MEP\nZB4YAhH30aZOleZ+ysGfnkfq6ly2bNmzvs4KxeFGuxD/tF/3Hc9ipWlx29EwaBDceCMcddRQ3noL\ntr1WyszYGGoK43xRovOXx39OTV5+QyXPdGPw6yotuinRP6RJZ1inXT7XXQdbZtvMEzKRz8dABnx6\n6Ai545ve7U3NGEsCGno1766vs0JxONIuxD/bBVBbCw8+CIWFLscdV0447HHRRSZjX4swrLCSikdG\nQdjn0sTjjB0ra8NfdRWcdZZyAxwuOI4U/iCQf88+C6RKbqcT+TTgp70e5LzSv9KpWpD3gQEzZ0Iy\niR8yefXaoQwY2rivsxJ/RVuiXYg/ZFzCgwbBObgMKxlPOBxvaMNYUuJwIlvRwkl0QyCER2mpw+bN\n1h4ldikOHWw70+c3m6VYLMVC06BXL5fvPjSPzWHBlsDgiGmXcuaf/4oW+Ajf46jlkBgiyz9oWuO+\nzgpFW6D9hHqmuLXEJUo5V1cvIJwI8JM6yaRJsrqAO6pnEEoISIKhhTj7bFvFeR+GyCY8spGL3uQO\n13VZBbR3b9meUzcCklrA3JzOfBOYJDDwMJkbizB2bJSqqgnk5iqXj6Lt0W4s/zRX5TsEmscRsYDi\nu3Q23d6XDzqWMbxkBcc855M7Fr4o0YifPIwB96kf/OFKRQUUFTUuq53971NPpdtzxhFCY1VtKRcS\n4fzs+k4xOO88i3792H1PSYXiMKNNl3doWsoBaFRvua7YYMXDGgFJfC9E8VhBp5hPApNLzSgTHUv9\nztsorgux2DS+971RBIFPIpHDuHFR1qyx6Jt0sXF4O2zzwJtWozBRlfarONRp9+Ud6upcVq4sT/V3\nzURruFh8MaiS4tDT/P3aL/HF+xhGQBCC35eMQIt1xcHmHV/1d22rpI34Tp1qECLAMAJ03eOxxxxq\nXoKLHy4nHHigmxhEVY1uRZukzYp/ba1s7J0drRGLWYyzXf73lNtY/4gnM3h18H3p939pZYS1qXj+\nHJXD1SZJL/zq66FnT5tHHjEJhaSB0HFzAd88NJ6QiGMQIBJextWTVaN7dYHNSxOVB0hxeNMmxd91\nZX33oiIT8CAIsfm/t7LsC5frE1V8U+I1ZPCKJLy34kKefXY8/ftbjB6t2i62ZdJGvBAQi1mMHRul\npMThNK+AcX8Zg54S/iQ6gWbyzXkF1B7vkP96JXmLalhdYHP2GNXuUXH40+bEP+PStygujvLbn1ZR\n9tgMCmLTOYeZ6CT5ulrW7AkEBMkw69aN58knlX+/PZA24tN5AGkGHrMCI/AahH8BF/LhiMGc4Y8h\n2JxyHf4syktPWMoDpGgTtDnxz3bPJhLwzdoP8U5O8HGJ4Ohqn7wY5Meg91ioLYGF1Zdxzu07C78K\n7mibZCf8gUvfvuWEQh4JLcSOpQZHVkMCk4nh8dw3xMH3G7sObdtSXdoUbYI2J/5py657d5eHHion\nx6xnkyYgSFXovFMjb60gLwZHx6CeznxR0/g9VAP2tk064W/LFofNm1PiDux4bAQ7nuvKm9g8ELEo\nLISVK02CwMP3TbZts+nXTxV+U7QN2pz4py27Dz5wyM31ACGbthoQ6Dp1v/4BR1//V0QQ4GEy24ww\n0W78Htmrh/p6WfJF/cjbHvn5NrpuNkSE5RdHyOtvEUm9XlcH//rXUBYvhvnzI2zaZFFZqfaEFG2D\nNhvnX1fnUl09ACHiDc9pWg4lJQvJi8n6/m9ic2qkeZePbcsJAFCtG9swzeaCpJ5fsaIc3/dIJEzG\njo0Si1mEw3KvQK0IFa3F/rqc232cf16eRefOw/j006lI01+jc+dh8gduQTcrY+E1xbJg+HCYOlVG\nhSSTamOvrZKXZzVbuiEdKmwYfkPtp1jMIpnMFAFV94SipTmQLuc2Xdvn888j1Nfn4id1/PoQX/2t\ndI/PjUQgN7dRZ0BFOyI/3yYITJJJg2TSpLraBmRdIHVPKFqLqirpaj4QbUTbrOUPsGiRxarnKplQ\nPJJO1T5HbRgDpxbt0VTaTGdARRvj25bXeXkWHTpEmTrVYfly2dPBNGHMGKiuhpKSzA9T3RuKlsB1\nYcYMubIEaWi0poHRpsXftuGbX9Vw0hpBiIBA93DGO+SM37OY/iadARVtiKbL679VuhTVOI1mgn79\nLHTdoqoKzjsPSkul+NfXw/z5oGmycujw4XKlqO4Vxf7gONLiB3lvDRvWuvdUmxZ/y4Ijp9gEPzNJ\n+h5eYPJfC2zeW6w269o72RFdZXGXHqPKIfDwQybPDos2BAJkGwATJ8rksLRllvb9N+kCqVDsE7YN\n5xou/QKHJWGbSKR1b6Y27fMH+LLI4iI9yr1MoJwoSwKr1X1pikOfdD6IYcAFukPIlzNBEPdYP9Wh\nvFyuDtLU1bmce+5EzjjD3em9sjeAFYp9xcIlqpUzgXuJauWymmwr0ibF33Wllea6cgNlUcLiAcY1\ndHEyTVnTPX2Mov2R3tOZMAGumWKj5Zj4mkECkzeE3UjM0xViff9eJk0qp7Awc9OoDWBFi+E4GEkP\nXfgYyda3Jtqc26epL/fMMxu/3qOH9Ns+9ZRLr14Of/qTzZQpqq5PeyTj0rGgKMq2KoehM2Q57wYx\nd11qPxhP0C0OBOjEebQkwv/G7mKGXsFvL3c562uHgsE2ReomUuwPTarHtrY10SLir2naxcCjgAH8\nQQjxQJPXc4AqoA9QA1wnhPioJa7dlOzm3fX18OGHjV8//3yIx13uv182b08kTN59N4qlfrjtG8ui\nm2UxMZIVAbR6GowcSX4PH/0hgR/SMJIBpdUbKeennMYmfv7q49JKW2xCUZRPuq3ms8/msGPHYN56\nq0JFiil2y+ppLjVzUgZENErdu1XUlkB+IeS14nX3W/w1TTOAKcBFwDbgHU3TXhRCxLIOuwn4Qghx\niqZp1wO/A67b32s3R0FBplqjELBtW+PXS0uhZ08Hz2ucwAPqF9quScV9WraNNc6Sj0eNgmSSvDVw\nxliNzSXfoXv1v8iPybTBq4I/oyc8CGRQ9iexB/kgPhcAIebz9tswb3wRs4Y7dIvYahZQ7MTqaS7d\nf1pOTzy8+Sbvzarky96zCHwPfeWshiZUrUFL+PzPAjYKIT4UQnjAbODKJsdcCcxK/ff/AeWapmkt\ncO2dqKmRYVIAhYUuQ4ZMbPDR6rp8vbjYxjBMhDAIhUyKi+3WGIricCHtK7z3XtI7vVuqHIKkjLsT\nwJExg/nP3UxeSvgB/sz/R9IwG5z+n53ySaO3vaL/07zilXPi1Mz7KhTZ1MxxMPEI4RPGY8eKpwmS\n9WRXkm0tWsLtcwLw96zH24Czd3WMECKpaVodUAB8nn2QpmkVQAVA165d92kwti1/i6ed5vLIIxnX\nzl13Rdm0ycK2ZQJPaWm02ZouinZIkzaNW1K+/1dEDiZxAgxGMpkZegXixO6cuXUOfxKDmRmq4PQ7\nruKqfAdsm2O7reaLD5Y1vG1y8fGYLEcXqvi/YmdcF2LfK+DEIdCpGo6I6fSav4J1gwRBCNBD5Ofb\nrXb9Q2rDVwgxDZgGsrDbvryHZcGUKfDWWw7hsHTtaJrHnXc6nHZaZmN3VzVdFO2QJhttb2Lzlm9R\nTpQBOLyp2SzVLHJyYMDzFcydW8HTD4PwYcjjFtGovK+OT7kO0z7/I7sUoeXMg6Qq/q9ojOvCyJEu\nD9w/mr+HfT5OQNGdPt9ZK3uN/KtEY0vOMPIuaD2Nagnx/xg4Metxl9RzzR2zTdO0EHIfo0kV/Zaj\nogJ69bKpr5dtHEMhk6uusslrzd0TxeFLk1oep2JhzoJlcYulgYUGhAyorJSHT5qU2VeKxzMGvdw2\nqMC2K+jfH/r3ByKqRkh7ZlclRBwHevVyCIU9WW5ewI7ego5rQxwR0wjHTL6cuqvSky1DS4j/O8Cp\nmqadjBT564EhTY55ERgKuMDVwBuilWtJ9+tnUVenXDuKPSQrlddCzgXjx8OCBVLog0DuFzlO4/aP\nmgZbt8K0aTKEeKdqjKpGSLvl2yp02jb86U82yYSJKeLoSchfF+bvdz/O36trZORPReveN/st/ikf\n/ihgHjLUc4YQYq2mab8B3hVCvAg8Dfw/TdM2Av9CThCtjnLtKPYVy5Liv3jxzmHXOTnS4gcZUTZt\nmgwmSE8Syr2vgJ22khrdE5YFt9xiMfuPC7nm7Cq+70HelAh5lkX3AzS+NtvMRaFoCZpbtqczx6dP\nzxTi6tXLpazMYcUKm02bLFXnR7GT5V9ZCStWyNdKS+G222Sf8XA4a2Jogebh7b6Zi0LREmR7bbJ/\nl127Zgq8FRa6PPxwOTk5HkOHmuTmRgGLiRN3/g23wG9bcZiQvZVUUCDFPt0dML1SBPncyy+7HJ+s\nIv/2GeSt8g9Iqzgl/grFHtDUinvuNpf/1B2iwuZ7ZQ5mOI6mBRhGnCBwuOgiaydf74Hs0qQ4NEgb\nDxMnSis/Tfa+UWGhy4DzB7DZi6PfL6N98ta3vu9Qib9CsQc0LQF92e/LuTLw+C/d5DfVtxFKBAQC\n9GTA6hcKGo6tr4cHH4SzzpIbw7vyASvaNrYt3Ttpyz+bq8qqMPR4Q9TPFyUaeZsPk9o+CkVbJzsV\nIEIVoWQ9mhAYmkf/NdWcMVbn3yUBR1brzF9fg65LkRcC5s6FF1+UFUBDqV+cCvtve3ybS8+y5GsP\nPggvvJBxGQKcsg30BCnjAdZXn0n1bZVc1cqWgRJ/hWIPSPtvN1S5DJkue+0JICFC/B+D6R9bzFEx\njwQmjm5z5ZUupulQXS1bQAaBnAxGjJD7Bcrn37bYE5eeZckV4AsvNH7+6xMinD52Bl+XJDiiOsxV\nsUqO7mJxVSuPuU2Kv9pUU7QGlgXHVzng+2iAj8ZMhvEHKlhDETYOizSb436wmp/9bBRB4JNI5DB2\nbJT335dlolW7x7ZDts58W1hn9rEFBbL8TDIpn9d1WHOUxfPvO/SPOTjYLMVi6uDWH3+bE3+1qaZo\nTd7E5mpMBNLKr0JmYS7FYikWAy9yue22kWhaEsMAXY8zfrzDxo0WBQUyRLSqSk0ChzvNhXHuqhR/\n02N/8xuXzz+vQgjYvLmUnj1reKvQ5oE14wAoLISiotb/DG1O/Hc3AysU+8OpEYtLZ0Tpl3BYpNss\n9RvfXMce6yBEJpRD1w0GDrTp0kUKQnrDb+ZMWLhQ3ZuHK011pqamUYWQnUo5pI/t3t3lrLMGoOsy\nS1BWINZ56KEc7rorypo1Fu+/LyeL1jZc21wbx+zerGpTTdHSWBZMdCyO/O04fvyERYcOcumu6/KH\nvGKFTSKRgxA6YNCp02WAFIDsUD/V8/fwpjmdsSwYN25nwc4+9srSKnQtjqZlSs9DQG6ux+DBTkP8\n/4G4P9pkhq/y+SsOFNm+3HRtn+Jil8rKKoSYiRBJdN3EMKJccIHVYPnn5CjL/3Bnb3TGdWHxgy4/\n2WCzbpKHCGf6Qmiajq7nYBjRZvND9pZ2neGramkpDhTZ99qmTfDnP8NFF1l06+aweXMS8PF9D01z\ncByLqip5rPL5H/7src58+ZJDp6RPyR3wyUCNdzgTv6PNBadWk3/KYPL6Wbt0HbUGbVL8FYoDzbRp\nMoYb5L+9etl07WqSTHokkyZjx9pMmQJPPpk5p67OVVVn2wmOA28ENr8kxJGxgJNjJo8bN/F4aIzs\nAW0uhmgRlmUdMKNAib9C0QLMmdP48bPPWlx+eZRP/1bF0SvgiPcbBx/U1bmsXFlOEHjoutmqvVoV\nB56mE7ttw7wQ4Elnj6ELfnXFCvQXZQ9oEffQDnB0ihJ/haIFGDwY5s9v/NgCuj87CxOP0cxiU4Es\n+AZQW+sQBB7ZvVqV+LcNmk7shhFl0SKLBy91MF/w0YXA0HwE8E1gEsYjEZhsKrA5ABGeDSjxVyha\ngIoK+e/TT8Pxx8s47SLHQegeWuBj6B5FNY5sZ+Q45J9XgK6bDQLRmr1aFQeW7Ind9z3+8AeHZ5+1\nmBeyiZpmQ1vP1ztHmKZH6B84LNZtLquxlPgrFIcjRUWwejUsXw7z5sHfKm2KcmTmj2aaMiQole2T\nFwrR+85LqB3UmfziiLL62xDbttn4vomue8TjJsuX2wQBLE5aPFsRJdLVYXWBzV9etViqgavL/tAP\n2Qd2nEr8FYoWomniz0s1FkXRKFuqHN7E5vwVDt3SB/g+efe/QN6kXIhG0t4gxeGM67KlyuGe6Tb/\nOj1Kaals7hOLyf+5QQBYsPhkGD0aqqvlaUaqP/SBjv5S4q9QtBC2Lat2BoH8t6AAfvigxV//aiEE\n9A9BNGRiBPXU9RTUlgjyV8XJU2nohzf/f3tnHx9Vde7779p7ZgfbSoKhFpSCgmgBQ8JLbfdBcWtU\nfK32cNvbak8QPNAqaKNolbanNz21pfU1rdIWVLjMtZyeY6lagQo4soXiVkFICAQU0YKgVJs2AV8y\ne2bvdf9YM5lJSIAYNG/r+/nwSWayZ2bt5MNvrfWs5/k9mdZuCxcyyA9YiUVpbZzf1c7JKeRS3d5O\nPrmUVMpn7lyL2bPjTS41ILsAACAASURBVKZ/dXWf/LC1+Gs0x5BMzWQYwsyZWQMvUNv+2ePjfHfs\nXbx55ROEUTCSIcXHF5LfOcPVdJSMcU9jI0iJCUTxcXB5AZvBg+Gtt9Rmb/x4F9NUZwGRiE9JiUtt\nrU002jlOBFr8NZpjhOtmPfxPP92juDhr6QxqQnhgo039iLO4Nu9PIEJCIagPN2vx765kYn3pWT8U\ngqS0cHEA+P731VmQ68Kkkwt5LzAITUkkYnHqqQ7f+U7nFfxp8ddojhEZD5dhwzzuvruUaNQnmVTb\n++3b1f/us0KPkRv3IK8xESLESEkKZj8Cv9Ylv12Jo7ZucByCiAWhj4hEMK6byqq+ZfStUrbMmSww\nGw9Ky2kYFlA/zqBgeiXOnZ3799bir9EcIzINX1591aVPH7W9N2jkjqtjfPhZmyU3eqzwS7G2+bx3\ni6RhNBRUQX5tEmIxPLT9Q1egPbbwHjZzZJwJuKwXDnPLbK6yObQRS3qHkL81JH+7gDPqYMLHfCNH\nQIu/RnMMsW0YOdKhenOEMBVgpiRfWbqQ/HllTJrm0me+jyEDCrZCwVb1moaR8OagTdxwg0dVlVKZ\nhQu1HXln0R5beNeFvwQ2z0kbM2j9Ws+DnXscrolYmLRi+N9J9DhLZ42ms8nPtymumcqpiwXFsyF/\ni1KFIWUORh9L+T+jRP+Vcqi6H961NzJ3bikjR3qAsn/Wls+dQ2t2zZ4Hc+eqr0e6NpfMLmLaQzal\nMs7u6T/pMh2m9Mpfo/kYyB9fRv7ti5u3dsrEhSoqaHhrNdV3S0ILECBE2CUyQDTZP1Mm5g9th4Fa\nXttS03N3EX/BZslgmzmdr/uAFn+N5uOhLVWwbaiooH7+s4TRVM7eW3SJDBCNIteu+frrYehQj8uL\nY5xQDTtjZdi23ayXA0AYeuze7bJ3r8Ojj6oXjxnTdnvHzkaLv0bzcdGW4bttU7dzHoSzwAgQRoQB\nA6YxYEAZjqMVvyvheeB5Hvfdcx5WNIGRhKI5C6lZ4FJabpNIwBe+4DFpUoz331/E66+nSCQs1q5V\nBVyWpZr8VFWlzf660J9Xi79G8wnjeTBnehGXDr+OA2Phkm+XccYZdtvphbo1XafhulBU5BKJ+mBC\nKOHAmUl2PeLS2GgzYoTHvfeWYlmNCCERgmbhO9+H++9XNR7r1qmc/67yJ9Tir9F8wuyMZVM+66SJ\n9zKsB+Y4cHbK5faIwy/Wppt6ZE4MEwl1UDxvXjZ5XPOx4zjw+987pJIWlkxgpOD4LVHuq3WQEkpK\nXKJRH8OQSAlhKEilLKqqHED16Q2C5n15tfhrNL2Uc3Gx8Hl/ZMAr9wYUWPNp/GARj50uKawN8FMW\n/3lHHPdim6v3uAxJJJR6hCHMmtW1lo+9gG3bbG6evYYrSmL0q4YH6stYH6rff1WVcvCU0icITFat\nmsbTT5c1VXVffbVq7alj/hqNhiFlDqlHLP5R0kgYlWBIDOnzQQl8rlYi8THWuSz5B7w/Zg+3jBD0\n2wYC1DKyKy0feziuq5wbamvtJkG//PIFzL2pgrVrJ7N8+Qxmz45TXNzcwRPURm3UKOXx1BWjdlr8\nNZpPGttmyXVx9q+LUZJchCFTCBnhU1WSJAFJLHaNKGyyiNh0tcGY2XDCDonIywPH0ccAnxCOA3l5\nKuoGcOmlC7jllm8D8MUvrmIou5j351+wfbtNEGRfZxjqdZm/T1f8G3VI/IUQJwD/DZwC/BX4upTy\nn61cFwA16Yd7pJRf6cjnajTdneFlNt9ZbDPstjLGjXOZPt3hne/C0p+4/L+9DkPTsWTTDEhJuHfM\ndC4aNBinwsHDPmr7AU3HyM3YLSyEgwdVs2YhAAnXTryHDcuvorHEbvLnNwy44AKoqOjaf5eOrvzv\nAOJSyp8LIe5IP769les+lFKWdPCzNJoeQ0ZUli+HE09Uz71XZHPzOzY+UNhQQxgKpDRIpSyW15Rx\nykwbx4bY9U0Owl3uELEnYttpYzbX5aVTS3ifVZC27u6/VnIeLj+vtvkyHg4u6w2Higq7y/9NOir+\nV0LauxQWAy6ti79Go2lBGHqcfbYK7Rw8aPHkk3GCwObrIxcwY9YshBEQygjz5lWydatNeTn06ePx\n/vsuI0ao+HIk0rUOEXsiNQs8vjCrlEjgMy5qcd9F11B69n/Rf62k//I+rMHhS9IjTikWPqG0sIjT\n1duzddTb53NSyrfT3+8HPtfGdX2EEBuFEC8IIQ4xvMsghJiRvm7ju+++28GhaTRdm9dey4Z2IhGf\n995z+Rfh8bOSGzCjSQxTYhgphg/fTBgqq+iBA0uZMuU/uPfeUkaN8pg6Va/6jxUNDR67d8+loSFr\n4FOzwGP/9RWIZAIRKqe3+mWjmHn7X/jtip8y5eQ4LwobJ53BFSEgKv1uYcx0xJW/EOIZYEArP/pB\n7gMppRRCyDbeZoiUcp8QYijwrBCiRkq5q+VFUsoFwAKA8ePHt/VeGk2P4LTTHA4eVGmCqZTF5s0O\nP+4Xo7AqYG8A0gAhJJMmLWT1anU2EIn4CBEgpc/YsS5jxtjMnasPfjtKQ4NHdXUpYehjGBbFxXHy\na+ELs0oZESYwCUlhEAiL9RGHDYFNtWVT+SNYXg5rGx18qZq2G3ldLKezDY4o/lLKC9r6mRDib0KI\ngVLKt4UQA4F32niPfemvrwshXGAMcIj4azS9iQkTbGKxOKtXqzTBbdts3iVG/rsw4M/w9hUgDIhG\nA2691eW00xySSYtUyicMI/Tvv4cHH/SabARaO/jt6VlBx+r+6utdwlD1YAhDny1bXII7YWLKx0gL\n/zNcwM+MCr71gM2kOnUAXFenmq/X1dnsKoxTVHcMBvMJ0dGwz5+AKenvpwBPtrxACNFPCJGX/r4/\nqoVBbQc/V6PpEZSV2dxwwxxOPtnGMGAxZSTI48RVYPgQpAwMw+KqqxwmTLCpqYmzYsV0pJRcdtlD\n3HVXKWec4TUd/ObieTDH8XjvB3OZ43h4Xuuhje5Kpvj5P/5DfW1pt9weCgocDMMCTMBi/o2F7Fi1\nB19GSGGSIsobDOULqRpOfGQulxd6lJerzy4vV3pfNMOGOXO6hfBDxw98fw78jxDiOmA38HUAIcR4\n4DtSyn8HRgDzhRAharL5uZRSi79GkyZt9Mm6dbDBt7nYXMPt/V1eeqSQARfU4fsOr75qU1cHhYU2\n777rEokEmKYK/5SUuLzxhn1IpKHJRgIf37f40/JKksny5qGN/O4hVK3RnqYrRyI/38Y041RVuexb\nWciC6nIsfFKY1JxyBSP+uoLpLMAkJHjJIHw5j7EyzvrQ7rYZVx0SfyllHVDayvMbgX9Pf/88UNSR\nz9Foejq5+eT19TZX3m+TSoH8g8oplzJbOPTVr6rwT+asoKHBaTXkc27OIaTEZ8SJS/lnTmijvt7t\n1uKfaaTyUawTWoaL1C7CJpGwuYO5TfYb/ygJ+az/FtHdAaYMkUCEkCD0Od90eUHYXc624WjRFb4a\nTRchI94TJ0IqlX1eplMfMuZgffva3H57nDPPdKmqcnjtNZsf/ODQ9xtS5hAssgh8H8OyOGXcZBqC\ndU0r/4IC52O/p4+TIzVSaYvcHr2RCEydqp73ffXVxaFupMkr9waEUYkQm3hvCJz4Z0G/WkkKA2FZ\nfO1XDsfVdZsQ/yFo8ddouhCuq0Q+F8NQzxmGWuGWlQHYzJ9vI6VqIZgbdsiuam3sNVl1zLdtihuK\nqK93KShwuvWqP8NHsU7IDRcFAcyfD9GomgiSSXgBm1+NncaF1nwwJFKm2H8Z/O3iKB8uvZkRFDCk\nzKHItrt1SEOLv0bThcj1kjEMuOUWKCjIZpbkrjIXLz405LFggTISC0P1PvG4zcgbVDZLQYOKbfcE\n0T8SDQ1em5NcJlyUqZKWUk0C06dnr7n0W2UEyUWEYUI56gmQkYARdxQwZMicT/RePi60+Gs0XYij\nDWW0dp3nKcfnTMgokYCNGz2SydIec8h7NLSas59zz5nfXSwGCxcq4bcsuGGM1yxVs+GBqeyp/y11\n6ZcKTP70J4fx47tnmKclWvw1mi7G0YYybDxsXGpqHOa6Nnv2cIizZEmJSxD0nEPew5KOd9Wfvacp\nZz8IfJ54wuX00w/12hk8ONti8foSj6LyFm55Y8fwz4OAACHhyQdv5lfL2q6p6G5o8ddouiPpU0uZ\n8BkWWiw34myI2JhmNjNo3jwYPdqhutpqWgVHo4Xs3j23x8T8m8g5xe07ShD+AmTEIJmyuOceh127\nsoKd2xwtDOHMMz3qTqygfliCgq0hMuHzXIWL+UMI+xhASBgKxLADxySttKugxV+j6QbkpiYCJCpc\nzk34iDAgis85ocvzSVtZDaMOgYuKsvnrb7zhMnRoIa+91nPy/JuRc4pbsAXGzIa6kgg/qKpka63d\n7FA8FsvG+0eO9NJ9ExJsSYaMvs3A2mrxw2ccDu6He+6JKEsNQzJp0iJWrSpj165Dayq6I1r8NZou\nTsvURCnhiymHVaFFnvBJSgsXp+nwErINvwAuvNBm2DC49toKxo1LoFayPSwE1OIUt18tfKZWMpQ6\nDENNhnv2qAPxhQuzv6eLLophWY0YhiQQBlWTL+CHtRWqTeMWWLFiGldcMR8hJJaV4tZbWw8hdUe0\n+Gs0XRjPU9W/uW18AdZLmwtFHEe4PCsdXmhhHyyEErtYTLmBZla3UoYIYfSIPP9m5JziikWLkMkU\nmBZfutlhxgFYtAgeekiFwzLnIqNGeVx22UKEUM3Xk6ko+4dWsCnPhg/VNatWlTFp0mKiUZ9oVNls\n5Od33m0eS7T4azRdlJaxaSGUeGUE7AVsPGnTmv1tGKr8dSHgG9/IWEeHSGnwz39ewIknVvScVX+G\nzEl5WRnCdYk6DlfZNtvnqgyoIMiehwgB48e7GEaAEBAEgpUrp/LBBzZTpkBtLaxdq3r3zp4dp6LC\n5aKLetY5iRZ/jaaLkgljZwq8IJuXLoR6PiNmppl9nLtDkBKqqprbQfzoRxXs2mX3iIyVVrHTeVCu\n6jSViQgNG+YxbpzL+ec77NtnM3Fi1iU1lbKIx8vYsUNNFJYF3/ueygSaPNnmootUrQTQYyYALf4a\nTRcl17sms9rPCDxkJ4EzzoBzz4UxY2DpUti3z6O4WFk/1NbaTavXkhKX6mplHd2yKviIdCNv6Nwz\nkkxa5urVHo2NpZimOuy+8kp12N3QEGfLFpft2x1s22br1qxRXEEBrFx55LqB7ooWf42mi9KyeXh5\nuRIl08xaE0gJ27fDK6+oit7f/tZj4InnYUZ9UkmLW25dw7ZtagLYuVNlA5lm60ZobVbFtqamORNA\nV5sXcu0bEgl1ZvLDH7qYZqbeoZH9+2NN1c7nnGNzzjnqMDgTWsv9/bT0+u8pB+Va/DWaLkxuwVdR\nUXYiuOGG5tc1mb7Vx4ienAATLJngzhkx3uljN1lDQOtCfdjV7WG8k48wL3yiZCahwkI1lsxZyerV\n8PbbDpWVJoYRAJL9+xcxYEBZ0z16nppcw1BNjpWV2fvIeP33FEO8DFr8NZpuQmYimDs3G/rJkFmt\njngH/nY6hBKMFLy/HJiseozkvg+eB3PdplngsKvbdPxJJnxShsWOQqfJ0KyjnvrHatfQchL67W89\n/v73GG++qTJ2ampsli9XaZsgCcMU1dUxhgxROx3XtZvOV4RQPkoZ8vNtiovjPcoQD7T4azTdjlzz\nN9OEm2+GQYM8iotjwH5Ouz+C/+mA4zZHub22jBdWqdfNmJF+g1aW6wUjD7O6tW1qKuM8NtPl2cBh\nU7lNvEiJdUc99Q+7a2gxMxxuoshMQmec4TFpUoxBgx5h8OAkY8bAxRcv5JZbXFatKuOSSxYDPkFg\nIsQi3ngjhWFYTJwYx7LsNu8jP1+FzpYs6TrhrY6ixV+j6Wa0NHUbOdKjquo8wjDB28C+mVHefPhK\ntpcM4ABArToIbhJ/183GRBIJcF1qmUN1tToUHj360NXtsjqbn0mbIAQzZ4XfXk/9XAE/7K4hd2Yw\nTfZfOo05K8r4S9C6t47jwOjRHj/7Wakq2hJqayQERCJJxoxx+eMf57B5cyWwlA8//BQTJjxFZqcz\nZIhLPG63eR9dKbx1rNDir9F0Q3LPAnbvdpHSz/7QSHHSdcsYZEic5GJmz44zebK6uKHBo/60lyj4\nQkh+LRCG7KovTAubjWWpFFBoLuiHW+EfrRFdSwGtrDzMrqGF6f7nnpjPChZTSpwNvn1IeMm24Ze/\ndEmlfISQICFTAGGKCF/6ksP113skk+UEgU8qZRIEkXSOv8VzzzmUlbV9H8eyZWRXQYu/RtPNKShw\nEMJS3vNAGBoYRpgu6vIZM8alqMjOHur2b8S4F4pnQ/4Ogzer6poJWyzWvFdAZjKYMkV9PZxIHo6W\nAlpXd5hdg+PQMNqkfkRAQRXk10qi+JwvXKqtQ711PA9WrnRwHIuI0YhISU54Aax6GHDqdTg32uze\nPZc33vAxzQDDgD17prNq1eCmlNjGxpzdUQs6Et7qqmjx12i6Ofn5NiUla3jssRjbtsHOnWOYNau8\nqairutrBdeGkk9KHukISRuAfJYJP78yjcLKDtS4rbMBhJwPVSewwpGM7NYUOy+psLi9UPvmXFzr8\npEVcvbVdg+fBxo1QdI/qomL4kuLbDD6z0+KMqQ7x9OfPnZsVYcdRO5fHH49z1dgYN21ayAm1AUks\nVn2vjKtonrVjmhbPPVfGkiXZD28WGmvBR20Z2ZXR4q/R9ADy821GjbKZOVO1Ijx+N5w/einx6sns\nel2tlAsKHAwihKkAIwV9qwyuT1byRexmwgbNxR7aEfJoYTX9V1HJMFmONHyK8ixerIyzrM5uU0Az\noaHJk11GjkxhmpKwj0H9rReQf3oFZemD39zw0ZQp2f67maK2ZynDwcXF4eX7bZ67Cmy7edbOl79s\ns2hR9rMnTz787/ijtIzsymjx12h6CLathHlnzOOaReUYtT7XmutYfnMRrmsDNsU1U/nnC/PpVyX5\nVC30p65pxZsrbC0ng4ULsznwjnOYFM10bCdjNf1VuRQL9Rjfp6jOpWhO2wqaOYv2NxXCNQZSSMxI\nHgVXVUD6ELpl+Gj/fvXakSM9SkpcDhwopCh/M303A7VZh1Pbbt7GMrPKX7pUCX9bq/6eihZ/jaYH\nYdtguy6kfAgDIvhsus/lZ1JlybxYWcaIxxYjkz5JlBX01FZWvLmrXM+jqU+AEFBTk602zs188TzY\nucfhmoiFIX2SocUfmcxE1mEYPsaRguWeR8nTLtPCQn5ZW07j7ICGcQYnfLuyWfaR42S9jEwTBgxQ\nDVkyzqWGESJCML4FU2cv5H+/5uI4zSecTDXzqFEOdXU2Rd25E/tHRIu/RtPTyDmdTBkWzwYOQboC\neFmdTdFzcbxYjE194fpRR47hu64yO5NSfV26tPnKe2fM46SYy5yFDqkUNIopXPEVePH0Mv7v/Tbb\nUkWUGi5fq3Qoaitu4nmkzi3lwqRPKQKDkE/VhuRvF5gj6mBC88uFyDZe79sXxo3LOpciAVMVun1Y\nkmTxRJchOZ/bdPAdJAgSJlWPPsiPfzyDNWt6VljnSGjx12h6GjmnkzsKHTbcaCOSWY//9SEE31zM\nqNDHMBazfn2ctWvbjsO3zHS55hqPgQNdXn7ZofBVuGZRKcL3eVpGAEmEALnc4lXKSKXgeWnzorQ5\nrg7aWmDvjrmcnPSJEJDCIMQkiWh1t+C66lwDlPjffz88/LADWEACRAgpVeHcb3uU/HnNX6+qmdV1\nZiTkpyUz2VNbRCzWM5q0HC1a/DWabsZRWSKk4zbvedlVciqlzMs++MBlyhQfw1AFTqsejtF3U4zl\n2yEMy5gwwT7krW68Ef74R5g2zeOUU0q5dkqCa68xGfLHyzAf8kEGRFE+0iaSMPA58JSLlOq9IpHD\nR3yew+F/YSFR4ajvUsnEkXX828PZm8yEaiZOdDAMu8m2Oghg3z6bK69Uh7nRaCHJXZsp2A758w7N\nS9271yH0TQwjVBNEVYiDSz29SPnR4q/RdCvaW2nqujB8uMfo0VmL540bHb75TUv1ppURrvvHw7x5\nT4owCokPF9HQsKZZjD0W89i718WyHN55J0aQakQYEkSIeP8pkjKCIUCaqseklAGBabEm5QBq8pk6\nFWyyfkINI2nmlTO8zOaSR+L8SzKdoRO1mfYwZPQ4azyXAASx2BXceef3qK1Vk8BLL4Hj2NgZw7nd\nsOQ95eff0jHivPNsvjr8QX5aMpN+VSHH1eaxPuLwiyOlsPYwtPhrNN2I9laaTjp5AWfdPQuiAclk\nHrfeGmfbNuXvP3asy9cb90D+fMIoYIIR+uzfH6O6Osa+fTBo0BgGDixn2jQ/XREbQrqCVgRw/MuS\nh8Op7GEwzwuHB+dBUZ3L8nqH5+9SA5MSJvXNzloNo02q7xOEpACLmpo4Th7ErnP57/0OA7C5bkDz\n+2gK1aR3FwMGPMF9963glltcamttnnhCee9nCtLamiAzIaM9tUWsrv13AJ49uYxfPNa7Qj6gxV+j\n6Va0q9LU8yh8diYH/i2lhF0kmDHD5bbbbHbsUP7+k2/26PvnhRhJn1BCgGDv3ocxjBQDBkAiYWKa\nUmXQCCW8QgAhfO5pQYoIu6+Gp6octm+3WVYHRXNU60TDUBk5Z57pUXBcBQ3DEuRvDdnvhIRSgoBU\nyuftF2MM+91ijjN8ZkctSmWcpwKbxYuzwh2NFja7NSEgGk1y0UUxSkrUruaVV+ympvUtrIuahN1x\n4GzTY2VQioWPj8XAa8p6nfCDFn+NplvRrkpT16Xg5RDjGyrzxRQmU6Y49OkDM2eq3cM3fmkzPuny\njQfvYkT5UwgjQIgwJ7UzIAyjhKFAygimqcI6ST/CMzsv4fR7/8yF0Ydwkou5/fY4e/aoIqyM82im\neTx5Caq/HHLaPMH+SUr4VcvJCH03k60FSDTyv4mRAM5rdFl1XyHJGzcj5SLI6VasuphJLrnkYUxT\nkkxaxG6rpPSlOt463SEM1S8mDJW/f+7v79HpLnm/9TEJMAyfqwpc6GXxftDir9F0O4660tRxyP9J\nHsW3JagfZ1Aw/UHy81Vjl0yvX9+H9dJmSP5ZjBRPql7BaVM0CaRSFg888AD9+tVx1lnK/Oz++10e\ne8yhpMRlRPQpTDNACJ/iYpeHHsqu2ONxePVVlz59fCAk7GPw7rShSOt1VPhGsHr1VLwdZZSzUIkx\nkut4hKks5IMRKbZeF5JKCQyjeQODTA/jSCRQP5MJ/nP0TE5ZIhltWNjE8bAxjObe/ABDyhxYrLZP\nR6w96MFo8ddoeirpbUK+65Kf3iY0NHicfbaybd6yxSYSUTuAqioHkVTLcRHA8dvh4HHH8+Cye1ix\nYgbRqOoelp8PH35oU1urPiLTGB4sNm92+GLgcd6HLuvucvje4zYjRzpUV2f7BBzofxuN75cTiSjf\noXHjxnDmQpeVCy7l8uefxERiksIEDpZIwigYhkyv9AUgmxrZBIHakUQiAaQMTqgKiBAiQ59zcXnR\nsMnLa0Xbe6JRz0egQ+IvhPgaUAGMAM6SUm5s47qLgV8CJvCwlPLnHflcjUZzlORsE3JbNd5zj8Wj\nj8b58pdtNm+G+fNtVs2+lWsvugu/H/zjSxBE3mPmzHKkhBNOqCMMHcCmoEC9dW5jeN93KNgOK0nH\n0p+wqFkQp2hGcz+dX//aZsmSIkaPVjYMN91UTjTqI/8zwt9vinJCbUBABMOQfKYqhZEMSUoDYUTo\n27eEgwc3IkRIEAiefvo61qwpY84clyd/WohdW04ynSq6VjiMHw9jx6qK5J0xj3Nx1ao/8zvppaKf\noaMr/63AvwLz27pACGEC84ALgb3ABiHEn6SUtR38bI1GcxRk8uMbG/c0a9WYTLqUl9tUVkKfPrDM\nuIrSSfcTsZJIAYaQRGSC8vJZCBGQSJi89daDOM6Mpk5iGSO1fxEeP5IVWCSIEAIJ3r+tghoqKJqh\n/HQWLIANv/K4fL+Lu9Vh6NUuhqHGI0zYdNN03r5nMCf8q8Orr0L9Ey67ZhcSKakjL8/huusASpHS\nRwiLU04pY948ld45aBA8eFcRB55ycaXDxoiNqFbuoGeFHvH0pBQssjDX9IBOLMeADom/lHI7ZLZj\nbXIW8JqU8vX0tb8HrgS0+Gs0HzO5q30hTISIEIYqlr9pk9PMV//VV10ifdIZPUAQCKQ0MM1UOvQS\nsmPHTPLyilizxiYWg02bILLBY7UsxSKBSUiAgUnI+APP4H97Hc+treS49+vY8EQhj1LelGUztaqS\nZNICmcCUBp87bgxV02aQKoQf3g9JbKUStSpzaMkSmHx6JU7RUp7bNpmZv8mmZ9o22I/beJ7Np10o\n2gMPPaTOBRxcLFT1cNBTOrEcAz6JmP/JwJs5j/cCX2rtQiHEDGAGwODBgz/+kWk0PZzcxuxSwsCB\n03nnncHMnq1SI3N99XPj80JEiEansmrVGM47bxZSJtNdr0Ieesjl29+2+c1vVNHUnye6WCmfCCEp\nDF5nKP1H7uJgScjxVY2cufkGDpSE/HykgVUrVVwen6G1dSyaXclPS2bSvzrA2lbOTUYRLwibIGh+\nH2EIYxMeD9WUY9X4XMM6/hArwm6lt2/GZG7x4nSqZ+jgp6uHe/MBb0uOKP5CiGeAAa386AdSyieP\n5WCklAuABQDjx4+XR7hco9EcgdwGJoZhMWBAGWecYTNv3qHnnfn5zePztbU2990H1dVw000zESIk\nlcrj5ZedZj18T7ylkIMr4ECxoO+2COuH/CvDvnMXYRREIEEGyAgYyYBRs0361prsvUxw6sQn+Py6\nkxj6XxJDhiTxOSd0eV60vir/6pkx/ja6kROqJJ+qVYe6nme3WtCVe6ZbWGjzh83x5jF/zZHFX0p5\nQQc/Yx/w+ZzHg9LPaTSaj5mWgp6xbchdIWc6YuX63Tc0eGzYMJehQx2WLZvBX/9a1FRMtWtXThtF\nz6P/6hupvjtQ9XoEXwAACitJREFUVcKE9K13SUUFhikJhQohYahag2XOFTAaBs94guG8BF+EvUaE\nzy8zSYYW6wyHc0yPs1Muz0qHF9L596NGeRTfvYjdUcmbSSiaYzKkzGGJ23bFc/MzXZvemMt/OD6J\nsM8GYLgQ4lSU6H8DuPoT+FyNRkPzBia5tOUT1NDg8fLLpYwapbKCZs+Os3OnzaWX2rzzjjJ5axJV\n16V+ZLLJHkLKFP36vdRUBdx0HKisgLjq6kuoSi0lDLOGc3/76gDyi8bzSvEAZlXX8LV7yjFIEAiD\nuwbP4z/enEFxsYsZVZXKoSE48Ktp9LNtHHpeb91PCqMjLxZCfFUIsRc1pS4XQqxMP3+SEGIFgJQy\nBcwCVgLbgf+RUm7r2LA1Gk1Hac0nCGDVKhcpVaPzSMSnpMTlkkvggQfg/Wc89s6cS80CT13sOHym\nOoqRRIk96nC2KQVE1XJBCANWQv7aOoYPn9wk/AI44fm9VJ/9BB98dgEDnVkcHNaIkCERmeKOvbM4\nJ+KxZYujDocxMcw+FIxWLmyZ8M5PfnJkkztNczqa7fM48Hgrz78FXJrzeAWwoiOfpdFoji0Zn5sJ\noct608Fx1OHpnXc6/OIXVlMD+JoahwkT1IHrqrAUK/QJb7CIbY4zvMxm3Wku4ewYZ1y0ln6X1apq\nnszKX4JMe+uf+KwFv3Y46SSl0O++eDefXbiLZF/ZFDJKoRrL59dKNZHIgMXTXJYMnsPxx8cZNKh5\n+Ap0yv5HRVf4ajS9FBuPuCgFfILA4pUa1Vx969Zs8VZ1tUNJiVJWR2RTJpOBzyvzXb6z2Kay0uam\nP9uMrfT43c6J7ClPIQWkklFenXcZqQJo3DyAH+0sYy42NnDSSTM4aUARxEtpGJbASIYEwiCZyuOh\nqhv5MfcTEQFGXh5DyhxU218dtz+WaPHXaHorrouR9BEyIAx8HpvpMmieskTYsUMVbwFs26ZCOWeb\nDiEWQeiTlBbPymydwJo14Lo27xWupWR7jFcGwIv7y6gdpIq7wlC9R7MU+xz7ieLjC9kS1jH7Voct\nr9h41lUsnnZods5RNbLRHBVa/DWa3orjkDItCJUlwrOhw2Xpgq+KCnjmGZq6ZYUhrBc2v5+uUian\nLHTYEDSvE7BzVudnoao7M8KfeY/C5s7MeNi42DgGnDOBnBRUu6nvbkbwCwvbbhyvJ4T2o8Vfo+mt\n2DY7Hozz2EyXZ0OHTXk2dztKQCsqYN26rC++YSjBHV6mRHlu2ZEF1/NUs/fM4W5Lh81MttHYhMeH\nhstn5jnYM5o3VcnNSDIMdTidcSPNHFCXlqpxGoaaPGbM+Fh+Wz0OLf4aTS+maIZS2vOXuhROhiI7\nWweQLZJSop0r9G3VCWTIiHYikRX+lg6brquE/5nwPKKhj7zBgqI1zd4oNyMp8z5CZNM6XTc7QYUh\nzJoFRUV6B3A0aPHXaHoznkdReXppvc6Comy+pLJ88A4pEMt5aevtEj2PRIXL2ITD+lB56l9wgdpN\n5Iqy40ChiJFHAgHIIAGxWLOLWnYuq6w8dCLKdAwDNUlo656jQ4u/RtObOUxT4FxTOMOwKC6ON5sA\nNm70mDzZZdOmbAtFGzUjnJvwWRVaXGTE2ZRnHyL8oB6fcgXwhHrcmj3kkaz3bVuFembNUrfQqn+/\nplW0+Gs0vZnDNAXONYULQ5/6erdJ/BsaPIqKShk50ueaayy+//04jmM3TSYiDDjO8LnzApe8CrvN\ng9mB3yuDFQtVV/VoFMrKDhnikfL4Z8xQoR596Ns+tPhrNL2ZwyytW5rCFRQ4TT+rr3cBVQVsGD6/\n/KWLbduA0zSZCMvCmVwI7lxqahxKy+1DQ0R2esLooHLrQq/2o8Vfo+nttKGcbZnCQfOJwTQtRo92\nsu+Ve1Kczs38gmExNoizPrQPMWDTyt05aPHXaDRt0pYp3OEmhiYxnzu36TwhIn3ON1xeELY2YOsi\naPHXaDQfiWYTQ2sB/ZzzBGFZfK3S4bg6HZfvKmjx12g0HaOtnM8W5wlFtk1RZ49V04QWf41G0zEO\nky6q4/ldlw75+Ws0Gk1TeMc0dUeVboRe+Ws0mo5xpEosTZdEi79Go+k4OrzT7dBhH41Go+mFaPHX\naDSaXogWf41Go+mFaPHXaDSaXogWf41Go+mFaPHXaDSaXoiQUnb2GFpFCPEusPsjvrw/8PdjOJzO\noLvfQ3cfP3T/e+ju44fufw+dMf4hUsrPHumiLiv+HUEIsVFKOb6zx9ERuvs9dPfxQ/e/h+4+fuj+\n99CVx6/DPhqNRtML0eKv0Wg0vZCeKv4LOnsAx4Dufg/dffzQ/e+hu48fuv89dNnx98iYv0aj0WgO\nT09d+Ws0Go3mMPQ48RdCXCyEeEUI8ZoQ4o7OHk97EUIsFEK8I4TY2tlj+SgIIT4vhFgjhKgVQmwT\nQny3s8fUXoQQfYQQLwkhqtP38OPOHtNHQQhhCiE2CyGWdfZYPgpCiL8KIWqEEFVCiI2dPZ72IoQo\nEEL8QQixQwixXQjRpWxPe1TYRwhhAq8CFwJ7gQ3AN6WUtZ06sHYghJgIvAfEpJRndvZ42osQYiAw\nUEq5SQhxPPAycFU3+xsI4NNSyveEEFHgL8B3pZQvdPLQ2oUQ4hZgPNBXSnl5Z4+nvQgh/gqMl1J2\nyzx/IcRiYJ2U8mEhhAV8SkpZ39njytDTVv5nAa9JKV+XUvrA74ErO3lM7UJKuRb4R2eP46MipXxb\nSrkp/f1BYDtwcueOqn1IxXvph9H0v261ShJCDAIuAx7u7LH0RoQQ+cBE4BEAKaXflYQfep74nwy8\nmfN4L91MeHoSQohTgDHAi507kvaTDplUAe8Aq6WU3e0eKoHvAWFnD6QDSGCVEOJlIcSMzh5MOzkV\neBdYlA69PSyE+HRnDyqXnib+mi6CEOIzwFKgXEp5oLPH016klIGUsgQYBJwlhOg2ITghxOXAO1LK\nlzt7LB3kbCnlWOASYGY6JNpdiABjgd9IKccA7wNd6gyyp4n/PuDzOY8HpZ/TfIKk4+RLgd9JKf/Y\n2ePpCOmt+hrg4s4eSzuYAHwlHTP/PXC+EOLRzh1S+5FS7kt/fQd4HBXW7S7sBfbm7Bj/gJoMugw9\nTfw3AMOFEKemD1i+Afypk8fUq0gflj4CbJdS3tfZ4/koCCE+K4QoSH9/HCqBYEfnjurokVLOkVIO\nklKegvo/8KyU8ludPKx2IYT4dDphgHS45CKg22TASSn3A28KIc5IP1UKdKmkhx7VwF1KmRJCzAJW\nAiawUEq5rZOH1S6EEP8FOEB/IcRe4P9IKR/p3FG1iwnAvwE16Zg5wPellCs6cUztZSCwOJ09ZgD/\nI6XslumS3ZjPAY+rtQQRYImU8unOHVK7uRH4XXoh+jowtZPH04weleqp0Wg0mqOjp4V9NBqNRnMU\naPHXaDSaXogWf41Go+mFaPHXaDSaXogWf41Go+mFaPHXaDSaXogWf41Go+mFaPHXaDSaXsj/B5mj\nDFi6aXY5AAAAAElFTkSuQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "t5McVnHmNiDw",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Design a model\n",
-        "We're going to build a model that will take an input value (in this case, `x`) and use it to predict a numeric output value (the sine of `x`). This type of problem is called a _regression_.\n",
-        "\n",
-        "To achieve this, we're going to create a simple neural network. It will use _layers_ of _neurons_ to attempt to learn any patterns underlying the training data, so it can make predictions.\n",
-        "\n",
-        "To begin with, we'll define two layers. The first layer takes a single input (our `x` value) and runs it through 16 neurons. Based on this input, each neuron will become _activated_ to a certain degree based on its internal state (its _weight_ and _bias_ values). A neuron's degree of activation is expressed as a number.\n",
-        "\n",
-        "The activation numbers from our first layer will be fed as inputs to our second layer, which is a single neuron. It will apply its own weights and bias to these inputs and calculate its own activation, which will be output as our `y` value.\n",
-        "\n",
-        "**Note:** To learn more about how neural networks function, you can explore the [Learn TensorFlow](https://codelabs.developers.google.com/codelabs/tensorflow-lab1-helloworld) codelabs.\n",
-        "\n",
-        "The code in the following cell defines our model using [Keras](https://www.tensorflow.org/guide/keras), TensorFlow's high-level API for creating deep learning networks. Once the network is defined, we _compile_ it, specifying parameters that determine how it will be trained:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "gD60bE8cXQId",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# We'll use Keras to create a simple model architecture\n",
-        "from tensorflow.keras import layers\n",
-        "model_1 = tf.keras.Sequential()\n",
-        "\n",
-        "# First layer takes a scalar input and feeds it through 16 \"neurons\". The\n",
-        "# neurons decide whether to activate based on the 'relu' activation function.\n",
-        "model_1.add(layers.Dense(16, activation='relu', input_shape=(1,)))\n",
-        "\n",
-        "# Final layer is a single neuron, since we want to output a single value\n",
-        "model_1.add(layers.Dense(1))\n",
-        "\n",
-        "# Compile the model using a standard optimizer and loss function for regression\n",
-        "model_1.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "O0idLyRLQeGj",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Train the model\n",
-        "Once we've defined the model, we can use our data to _train_ it. Training involves passing an `x` value into the neural network, checking how far the network's output deviates from the expected `y` value, and adjusting the neurons' weights and biases so that the output is more likely to be correct the next time.\n",
-        "\n",
-        "Training runs this process on the full dataset multiple times, and each full run-through is known as an _epoch_. The number of epochs to run during training is a parameter we can set.\n",
-        "\n",
-        "During each epoch, data is run through the network in multiple _batches_. Each batch, several pieces of data are passed into the network, producing output values. These outputs' correctness is measured in aggregate and the network's weights and biases are adjusted accordingly, once per batch. The _batch size_ is also a parameter we can set.\n",
-        "\n",
-        "The code in the following cell uses the `x` and `y` values from our training data to train the model. It runs for 1000 _epochs_, with 16 pieces of data in each _batch_. We also pass in some data to use for _validation_. As you will see when you run the cell, training can take a while to complete:\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "p8hQKr4cVOdE",
-        "colab_type": "code",
-        "outputId": "3f1a7904-ffcd-4bb7-8bbb-bcd85a132128",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "source": [
-        "# Train the model on our training data while validating on our validation set\n",
-        "history_1 = model_1.fit(x_train, y_train, epochs=1000, batch_size=16,\n",
-        "                    validation_data=(x_validate, y_validate))"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Train on 600 samples, validate on 200 samples\n",
-            "Epoch 1/1000\n",
-            "600/600 [==============================] - 0s 412us/sample - loss: 0.5016 - mae: 0.6297 - val_loss: 0.4922 - val_mae: 0.6235\n",
-            "Epoch 2/1000\n",
-            "600/600 [==============================] - 0s 105us/sample - loss: 0.3905 - mae: 0.5436 - val_loss: 0.4262 - val_mae: 0.5641\n",
-            "...\n",
-            "Epoch 998/1000\n",
-            "600/600 [==============================] - 0s 109us/sample - loss: 0.1535 - mae: 0.3068 - val_loss: 0.1507 - val_mae: 0.3113\n",
-            "Epoch 999/1000\n",
-            "600/600 [==============================] - 0s 100us/sample - loss: 0.1545 - mae: 0.3077 - val_loss: 0.1499 - val_mae: 0.3103\n",
-            "Epoch 1000/1000\n",
-            "600/600 [==============================] - 0s 132us/sample - loss: 0.1530 - mae: 0.3045 - val_loss: 0.1542 - val_mae: 0.3143\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cRE8KpEqVfaS",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Check the training metrics\n",
-        "During training, the model's performance is constantly being measured against both our training data and the validation data that we set aside earlier. Training produces a log of data that tells us how the model's performance changed over the course of the training process.\n",
-        "\n",
-        "The following cells will display some of that data in a graphical form:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "CmvA-ksoln8r",
-        "colab_type": "code",
-        "outputId": "1b834831-81e8-4548-dd8c-f5edf2c3ff43",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 295
-        }
-      },
-      "source": [
-        "# Draw a graph of the loss, which is the distance between\n",
-        "# the predicted and actual values during training and validation.\n",
-        "loss = history_1.history['loss']\n",
-        "val_loss = history_1.history['val_loss']\n",
-        "\n",
-        "epochs = range(1, len(loss) + 1)\n",
-        "\n",
-        "plt.plot(epochs, loss, 'g.', label='Training loss')\n",
-        "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
-        "plt.title('Training and validation loss')\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('Loss')\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzt3Xd8FHX6wPHPk5AQamhRWiBRUHqN\nYA6BIIjYQJTzQFHh9FB/Kp7lFMspopzlPAse56l32FCxIIoKogIRPKIUpRcJECDUEDoB0p7fHzNJ\nNstuNm0JhOf9eu0rM9/5zsx3djb7zLfsjKgqxhhjTFFCKroAxhhjTn0WLIwxxgRkwcIYY0xAFiyM\nMcYEZMHCGGNMQBYsjDHGBGTBwpwUIhIqIodFpFl55q1IItJCRMp97LmI9BORFI/5dSLSszh5S7Gv\n/4jII6Vdv4jtPi0ib5f3dk3FqVLRBTCnJhE57DFbHTgO5Ljzt6nq+yXZnqrmADXLO++ZQFXPL4/t\niMitwHBVTfDY9q3lsW1T+VmwMD6pav6XtXvlequqfu8vv4hUUdXsk1E2Y8zJZ81QplTcZoaPRORD\nETkEDBeReBH5SUT2i8gOEZkgImFu/ioioiIS485PdpfPFJFDIpIkIrElzesuv0xEfhORAyLyqoj8\nT0RG+Cl3ccp4m4gki8g+EZngsW6oiLwkIukishEYUMT786iITPFKmygiL7rTt4rIGvd4NrhX/f62\nlSoiCe50dRF5zy3bKqCrV97HRGSju91VIjLQTW8P/BPo6Tbx7fF4b8d6rH+7e+zpIvK5iDQqznsT\niIgMdsuzX0TmiMj5HsseEZHtInJQRNZ6HOuFIvKLm75LRP5e3P2ZIFBVe9mryBeQAvTzSnsayASu\nwrnoqAZcAHTHqbGeA/wG3OXmrwIoEOPOTwb2AHFAGPARMLkUec8CDgGD3GX3AVnACD/HUpwyfgFE\nAjHA3rxjB+4CVgFNgfrAPOdfyOd+zgEOAzU8tr0biHPnr3LzCHAxcBTo4C7rB6R4bCsVSHCnXwAS\ngbpAc2C1V97rgEbuObneLcPZ7rJbgUSvck4GxrrT/d0ydgIigH8Bc4rz3vg4/qeBt93p1m45LnbP\n0SPAOne6LbAZaOjmjQXOcacXAcPc6VpA94r+XziTX1azMGXxo6p+qaq5qnpUVRep6s+qmq2qG4E3\ngN5FrP+pqi5W1SzgfZwvqZLmvRJYqqpfuMtewgksPhWzjM+o6gFVTcH5Ys7b13XAS6qaqqrpwLNF\n7GcjsBIniAFcAuxT1cXu8i9VdaM65gCzAZ+d2F6uA55W1X2quhmntuC5349VdYd7Tj7ACfRxxdgu\nwA3Af1R1qaoeA8YAvUWkqUcef+9NUYYC01V1jnuOnsUJON2BbJzA1NZtytzkvnfgBP2WIlJfVQ+p\n6s/FPA4TBBYsTFls9ZwRkVYi8rWI7BSRg8A4oEER6+/0mM6g6E5tf3kbe5ZDVRXnStynYpaxWPvC\nuSIuygfAMHf6enc+rxxXisjPIrJXRPbjXNUX9V7laVRUGURkhIgsc5t79gOtirldcI4vf3uqehDY\nBzTxyFOSc+Zvu7k456iJqq4D7sc5D7vdZs2GbtaRQBtgnYgsFJHLi3kcJggsWJiy8B42+jrO1XQL\nVa0NPI7TzBJMO3CahQAQEaHwl5u3spRxBxDtMR9oaO/HQD8RaYJTw/jALWM14FPgGZwmojrAt8Us\nx05/ZRCRc4DXgDuA+u5213psN9Aw3+04TVt526uF09y1rRjlKsl2Q3DO2TYAVZ2sqj1wmqBCcd4X\nVHWdqg7FaWr8BzBVRCLKWBZTShYsTHmqBRwAjohIa+C2k7DPr4AuInKViFQB7gGiglTGj4E/i0gT\nEakPPFRUZlXdCfwIvA2sU9X17qKqQDiQBuSIyJVA3xKU4RERqSPO71Du8lhWEycgpOHEzT/h1Czy\n7AKa5nXo+/AhcIuIdBCRqjhf2vNV1W9NrQRlHigiCe6+/4LTz/SziLQWkT7u/o66r1ycA7hRRBq4\nNZED7rHllrEsppQsWJjydD9wM84Xwes4HdFBpaq7gD8ALwLpwLnArzi/CynvMr6G07ewAqfz9dNi\nrPMBTod1fhOUqu4H7gWm4XQSD8EJesXxBE4NJwWYCbzrsd3lwKvAQjfP+YBnO/93wHpgl4h4Nifl\nrf8NTnPQNHf9Zjj9GGWiqqtw3vPXcALZAGCg239RFXgep59pJ05N5lF31cuBNeKMtnsB+IOqZpa1\nPKZ0xGniNaZyEJFQnGaPIao6v6LLY0xlYTULc9oTkQFus0xV4K84o2gWVnCxjKlULFiYyuAiYCNO\nE8elwGBV9dcMZYwpBWuGMsYYE5DVLIwxxgRUaW4k2KBBA42JianoYhhjzGllyZIle1S1qOHmQCUK\nFjExMSxevLiii2GMMacVEQl0JwLAmqGMMcYUgwULY4wxAVmwMMYYE1Cl6bMwxpxcWVlZpKamcuzY\nsYouiimGiIgImjZtSliYv1uDFc2ChTGmVFJTU6lVqxYxMTE4N/s1pypVJT09ndTUVGJjYwOv4IM1\nQxljSuXYsWPUr1/fAsVpQESoX79+mWqBQQ0W7j171rnP7B3jY/kIEUkTkaXu61aPZTeLyHr3dXMw\ny5m0NYln5j9D0takYO7GmErHAsXpo6znKmjNUO7dPyfiPE4yFVgkItNVdbVX1o9U9S6vdevh3Io5\nDuce9kvcdfeVdzmTtibR992+ZOZkEh4azuybZhMfHV/euzHGmNNaMGsW3YBk9znDmcAUCp5HHMil\nwHequtcNEN/h3AO/3CWmJJKZk0mO5pCZk0liSmIwdmOMKWfp6el06tSJTp060bBhQ5o0aZI/n5lZ\nvMdejBw5knXr1hWZZ+LEibz//vvlUWQuuugili5dWi7bOtmC2cHdhMLPCk7FeUC7t2tFpBfwG3Cv\nqm71s+4Jj8oUkVHAKIBmzQI94dK3hJgEwkPD82sWCTEJpdqOMebkql+/fv4X79ixY6lZsyYPPPBA\noTyqiqoSEuL7uvitt94KuJ8777yz7IWtBCq6g/tLIEZVO+DUHt4pycqq+oaqxqlqXFRUwFub+BQf\nHc/sm2bzVJ+nrAnKmCA7Gf2DycnJtGnThhtuuIG2bduyY8cORo0aRVxcHG3btmXcuHH5efOu9LOz\ns6lTpw5jxoyhY8eOxMfHs3v3bgAee+wxXn755fz8Y8aMoVu3bpx//vksWLAAgCNHjnDttdfSpk0b\nhgwZQlxcXMAaxOTJk2nfvj3t2rXjkUceASA7O5sbb7wxP33ChAkAvPTSS7Rp04YOHTowfPjwcn/P\niiOYNYttFH6wfP4D2vOoarrH7H9wHq+Yt26C17qJ5V5CV3x0vAUJY4LsZPYPrl27lnfffZe4uDgA\nnn32WerVq0d2djZ9+vRhyJAhtGnTptA6Bw4coHfv3jz77LPcd999TJo0iTFjThiXg6qycOFCpk+f\nzrhx4/jmm2949dVXadiwIVOnTmXZsmV06dKlyPKlpqby2GOPsXjxYiIjI+nXrx9fffUVUVFR7Nmz\nhxUrVgCwf/9+AJ5//nk2b95MeHh4ftrJFsyaxSKgpYjEikg4MBSY7plBRBp5zA4E1rjTs4D+IlJX\nROoC/d20cpeZCfPmwbZtgfMaY0rvZPYPnnvuufmBAuDDDz+kS5cudOnShTVr1rB6tfc4G6hWrRqX\nXXYZAF27diUlJcXntq+55poT8vz4448MHToUgI4dO9K2bdsiy/fzzz9z8cUX06BBA8LCwrj++uuZ\nN28eLVq0YN26dYwePZpZs2YRGRkJQNu2bRk+fDjvv/9+qX9UV1ZBCxaqmg3chfMlvwb4WFVXicg4\nERnoZhstIqtEZBkwGhjhrrsXeAon4CwCxrlp5W7/fujdGz7/PBhbN8bkyesfDJXQoPcP1qhRI396\n/fr1vPLKK8yZM4fly5czYMAAn783CA8Pz58ODQ0lOzvb57arVq0aME9p1a9fn+XLl9OzZ08mTpzI\nbbfdBsCsWbO4/fbbWbRoEd26dSMnJ6dc91scQf0Ft6rOAGZ4pT3uMf0w8LCfdScBk4JZPoDQUOdv\nbm6w92TMmS2vfzAxJZGEmIST1vR78OBBatWqRe3atdmxYwezZs1iwIDyHVzZo0cPPv74Y3r27MmK\nFSt81lw8de/enQceeID09HQiIyOZMmUKDzzwAGlpaURERPD73/+eli1bcuutt5KTk0NqaioXX3wx\nF110EdHR0WRkZFCrVq1yPYZAzvjbfeQNkqiAQG3MGaci+ge7dOlCmzZtaNWqFc2bN6dHjx7lvo+7\n776bm266iTZt2uS/8pqQfGnatClPPfUUCQkJqCpXXXUVV1xxBb/88gu33HILqoqI8Nxzz5Gdnc31\n11/PoUOHyM3N5YEHHjjpgQIq0TO44+LitDQPPzp4ECIj4R//gPvuC0LBjKmk1qxZQ+vWrSu6GKeE\n7OxssrOziYiIYP369fTv35/169dTpcqpdT3u65yJyBJVjfOzSr5T60gqgNUsjDFldfjwYfr27Ut2\ndjaqyuuvv37KBYqyqlxHUwrWZ2GMKas6deqwZMmSii5GUFX0j/IqnNUsjDEmsDM+WFjNwhhjAjvj\ng4XVLIwxJjALFu47YDULY4zx74wPFuAEDKtZGHN66dOnD7NmFb4L0Msvv8wdd9xR5Ho1a9YEYPv2\n7QwZMsRnnoSEBAINxX/55ZfJyMjIn7/88svL5b5NY8eO5YUXXijzdsqbBQucfgurWRhzehk2bBhT\npkwplDZlyhSGDRtWrPUbN27Mp59+Wur9eweLGTNmUKdOnVJv71RnwQKrWRhzOhoyZAhff/11/oOO\nUlJS2L59Oz179sz/3UOXLl1o3749X3zxxQnrp6Sk0K5dOwCOHj3K0KFDad26NYMHD+bo0aP5+e64\n447825s/8cQTAEyYMIHt27fTp08f+vTpA0BMTAx79uwB4MUXX6Rdu3a0a9cu//bmKSkptG7dmj/9\n6U+0bduW/v37F9qPL0uXLuXCCy+kQ4cODB48mH379uXvP++W5Xk3MPzhhx/yH/7UuXNnDh06VOr3\n1pcz/ncWYDULY8rqz3+G8n4AXKdO4H7P+lSvXj26devGzJkzGTRoEFOmTOG6665DRIiIiGDatGnU\nrl2bPXv2cOGFFzJw4EC/z6F+7bXXqF69OmvWrGH58uWFbjE+fvx46tWrR05ODn379mX58uWMHj2a\nF198kblz59KgQYNC21qyZAlvvfUWP//8M6pK9+7d6d27N3Xr1mX9+vV8+OGHvPnmm1x33XVMnTq1\nyOdT3HTTTbz66qv07t2bxx9/nCeffJKXX36ZZ599lk2bNlG1atX8pq8XXniBiRMn0qNHDw4fPkxE\nREQJ3u3ArGaB1SyMOV15NkV5NkGpKo888ggdOnSgX79+bNu2jV27dvndzrx58/K/tDt06ECHDh3y\nl3388cd06dKFzp07s2rVqoA3Cfzxxx8ZPHgwNWrUoGbNmlxzzTXMnz8fgNjYWDp16gQUfRt0cJ6v\nsX//fnr37g3AzTffzLx58/LLeMMNNzB58uT8X4r36NGD++67jwkTJrB///5y/wW51SywmoUxZVVU\nDSCYBg0axL333ssvv/xCRkYGXbt2BeD9998nLS2NJUuWEBYWRkxMjM/bkgeyadMmXnjhBRYtWkTd\nunUZMWJEqbaTJ+/25uDc4jxQM5Q/X3/9NfPmzePLL79k/PjxrFixgjFjxnDFFVcwY8YMevTowaxZ\ns2jVqlWpy+rNahZYzcKY01XNmjXp06cPf/zjHwt1bB84cICzzjqLsLAw5s6dy+bNm4vcTq9evfjg\ngw8AWLlyJcuXLwec25vXqFGDyMhIdu3axcyZM/PXqVWrls9+gZ49e/L555+TkZHBkSNHmDZtGj17\n9izxsUVGRlK3bt38Wsl7771H7969yc3NZevWrfTp04fnnnuOAwcOcPjwYTZs2ED79u156KGHuOCC\nC1i7dm2J91kUq1ng1CwsWBhzeho2bBiDBw8uNDLqhhtu4KqrrqJ9+/bExcUFvMK+4447GDlyJK1b\nt6Z169b5NZSOHTvSuXNnWrVqRXR0dKHbm48aNYoBAwbQuHFj5s6dm5/epUsXRowYQbdu3QC49dZb\n6dy5c5FNTv6888473H777WRkZHDOOefw1ltvkZOTw/Dhwzlw4ACqyujRo6lTpw5//etfmTt3LiEh\nIbRt2zb/qX/l5Yy/RTnA2WfD4MHw73+Xc6GMqcTsFuWnn7LcotyaobCahTHGBGLBAuvgNsaYQIIa\nLERkgIisE5FkERlTRL5rRURFJM6djxGRoyKy1H0FtYHIOriNKZ3K0ox9JijruQpaB7eIhAITgUuA\nVGCRiExX1dVe+WoB9wA/e21ig6p2Clb5PFnNwpiSi4iIID09nfr16/v9sZs5Nagq6enpZfqhXjBH\nQ3UDklV1I4CITAEGAd6/aHkKeA74SxDLUiSrWRhTck2bNiU1NZW0tLSKLoophoiICJo2bVrq9YMZ\nLJoAWz3mU4HunhlEpAsQrapfi4h3sIgVkV+Bg8BjqjrfewciMgoYBdCsWbNSF9RqFsaUXFhYGLGx\nsRVdDHOSVFgHt4iEAC8C9/tYvANopqqdgfuAD0SktncmVX1DVeNUNS4qKqrUZbGahTHGFC2YwWIb\nEO0x39RNy1MLaAckikgKcCEwXUTiVPW4qqYDqOoSYANwXrAKajULY4wpWjCDxSKgpYjEikg4MBSY\nnrdQVQ+oagNVjVHVGOAnYKCqLhaRKLeDHBE5B2gJbAxWQa1mYYwxRQtan4WqZovIXcAsIBSYpKqr\nRGQcsFhVpxexei9gnIhkAbnA7aq6N1hltZqFMcYULaj3hlLVGcAMr7TH/eRN8JieCkwNZtk8Wc3C\nGGOKZr/gxmoWxhgTiAULrGZhjDGBWLDAahbGGBOIBQusZmGMMYFYsMBqFsYYE4gFC6xmYYwxgViw\nwGoWxhgTiAULrGZhjDGBWLDAahbGGBOIBQusZmGMMYFYsMBqFsYYE4gFC6xmYYwxgViwwKlZWLAw\nxhj/LFjg1CysGcoYY/yzYIHVLIwxJhALFlgHtzHGBGLBAuvgNsaYQCxYYDULY4wJxIIFVrMwxphA\nghosRGSAiKwTkWQRGVNEvmtFREUkziPtYXe9dSJyaTDLaTULY4wpWpVgbVhEQoGJwCVAKrBIRKar\n6mqvfLWAe4CfPdLaAEOBtkBj4HsROU9Vg3L9bzULY4wpWjBrFt2AZFXdqKqZwBRgkI98TwHPAcc8\n0gYBU1T1uKpuApLd7QWF1SyMMaZowQwWTYCtHvOpblo+EekCRKvq1yVd111/lIgsFpHFaWlppS6o\n1SyMMaZoFdbBLSIhwIvA/aXdhqq+oapxqhoXFRVV6rJYzcIYY4oWtD4LYBsQ7THf1E3LUwtoBySK\nCEBDYLqIDCzGuuXKahbGGFO0YNYsFgEtRSRWRMJxOqyn5y1U1QOq2kBVY1Q1BvgJGKiqi918Q0Wk\nqojEAi2BhcEq6K6M7RzPziRpa1KwdmGMMae1oAULVc0G7gJmAWuAj1V1lYiMc2sPRa27CvgYWA18\nA9wZrJFQSVuT+Hj1h2Rl59L33b4WMIwxxodgNkOhqjOAGV5pj/vJm+A1Px4YH7TCuRJTEslBIDeE\nzJxMElMSiY+OD/ZujTHmtHLG/4I7ISaB0BABDSU8NJyEmISKLpIxxpxyzvhgER8dz02drgcNZfZN\ns61WYYwxPpzxwQKgWV1n4FX3JhYojDHGFwsWOL+zAPuthTHG+GPBgoJgkZ1dseUwxphTlQULoIo7\nJsx+mGeMMb5ZsKAgWFjNwhhjfLNggQULY4wJxIIFFiyMMSYQCxZYsDDGmEAsWGDBwhhjArFggQUL\nY4wJxIIFFiyMMSYQCxZYsDDGmEAsWGDBwhhjArFggQULY4wJxIIFFiyMMSYQCxZYsDDGmEAsWGDB\nwhhjAglqsBCRASKyTkSSRWSMj+W3i8gKEVkqIj+KSBs3PUZEjrrpS0Xk38EspwULY4wpWpVgbVhE\nQoGJwCVAKrBIRKar6mqPbB+o6r/d/AOBF4EB7rINqtopWOXzZMHCGGOKFsyaRTcgWVU3qmomMAUY\n5JlBVQ96zNYANIjl8Wt1+nIAVuxYUxG7N8aYU14wg0UTYKvHfKqbVoiI3CkiG4DngdEei2JF5FcR\n+UFEevragYiMEpHFIrI4LS2tVIVM2prEnTNvA+DR758gaWtSqbZjjDGVWYV3cKvqRFU9F3gIeMxN\n3gE0U9XOwH3AByJS28e6b6hqnKrGRUVFlWr/iSmJZHEUgOxsJTElsVTbMcaYyiyYwWIbEO0x39RN\n82cKcDWAqh5X1XR3egmwATgvGIVMiEkgrIoAEEoECTEJwdiNMcac1oIZLBYBLUUkVkTCgaHAdM8M\nItLSY/YKYL2bHuV2kCMi5wAtgY3BKGR8dDzvXjsJgEd+9zjx0fHB2I0xxpzWgjYaSlWzReQuYBYQ\nCkxS1VUiMg5YrKrTgbtEpB+QBewDbnZX7wWME5EsIBe4XVX3BqusF0R3BiA2smWAnMYYc2YKWrAA\nUNUZwAyvtMc9pu/xs95UYGowy+bJhs4aY0zRitUMJSLnikhVdzpBREaLSJ3gFu3ksWBhjDFFK26f\nxVQgR0RaAG/gdFx/ELRSnWQWLIwxpmjFDRa5qpoNDAZeVdW/AI2CV6yTy4KFMcYUrbjBIktEhuF0\nQH/lpoUFp0gnnwULY4wpWnGDxUggHhivqptEJBZ4L3jFOrksWBhjTNGKNRrKvfnfaAARqQvUUtXn\nglmwkykvWGRlVWw5jDHmVFXc0VCJIlJbROoBvwBvisiLwS3ayRMa6vy1moUxxvhW3GaoSPcOsdcA\n76pqd6Bf8Ip1coWEOC8LFsYY41txg0UVEWkEXEdBB3elUqWKBQtjjPGnuMFiHM5tOzao6iL3fk3r\ng1esk8+ChTHG+FfcDu5PgE885jcC1warUBUhLMyChTHG+FPcDu6mIjJNRHa7r6ki0jTYhTuZrGZh\njDH+FbcZ6i2c24s3dl9fummVhgULY4zxr7jBIkpV31LVbPf1NlC6R9OdoixYGGOMf8UNFukiMlxE\nQt3XcCA9mAU72XLkGL9uW27P4DbGGB+KGyz+iDNsdifO87GHACOCVKaTLmlrErsytvHr9hX0fbev\nBQxjjPFSrGChqptVdaCqRqnqWap6NZVoNFRiSiIakoXmhJKZk0liSmJFF8kYY04pZXkG933lVooK\nlhCTgITkgIYRHhpOQkxCRRfJGGNOKWV5rKqUWykqWHx0PC0aHKFKvTD+e9Ns4qPjK7pIxhhzSilL\nzUIDZRCRASKyTkSSRWSMj+W3i8gKEVkqIj+KSBuPZQ+7660TkUvLUM5iqV2tBudEnmeBwhhjfCiy\nZiEih/AdFASoFmDdUGAicAmQCiwSkenu7c7zfKCq/3bzDwReBAa4QWMo0Bbndx3fi8h5qppTvMMq\nORs6a4wx/hUZLFS1Vhm23Q1Idm8NgohMAQYB+cHCvZNtnhoUBKZBwBRVPQ5sEpFkd3tBG6ZkwcIY\nY/wrS59FIE2ArR7zqUB370wicidOZ3k4cLHHuj95rdvEx7qjgFEAzZo1K1NhLVgYY4x/ZemzKBeq\nOlFVzwUeAh4r4bpvqGqcqsZFRZXtB+UWLIwxxr9gBottQLTHfFM3zZ8pwNWlXLfMLFgYY4x/wQwW\ni4CWIhIrIuE4HdbTPTOISEuP2SsoeEbGdGCoiFQVkVigJbAwiGW1YGGMMUUIWp+FqmaLyF04D00K\nBSap6ioRGQcsVtXpwF0i0g/IAvYBN7vrrhKRj3E6w7OBO4M5EgosWBhjTFGC2cGNqs4AZnilPe4x\nfU8R644HxgevdIVZsDDGGP8qvIP7VGHBwhhj/LNg4bJgYYwx/lmwcFmwMMYY/yxYuPYe38W+jIP2\nLAtjjPHBggXOw49mbPiSg0cz7OFHxhjjgwULnIcf5cpxyKliDz8yxhgfLFjgPPwoJFQh1x5+ZIwx\nvliwwHn40R86XEMY1ZltDz8yxpgTBPVHeaeTmPqN0RwsUBhjjA9Ws3CFhTlDZ3NzK7okxhhz6rFg\n4QoPd/5mZVVsOYwx5lRkwcJlwcIYY/yzYOEKC3P+ZmZWbDmMMeZUZMHCZTULY4zxz4KFy2oWxhjj\nnwULV17NwoKFMcacyIKFy5qhjDHGPwsWLmuGMsYY/yxYuDYeWAvAkq3LK7gkxhhz6glqsBCRASKy\nTkSSRWSMj+X3ichqEVkuIrNFpLnHshwRWeq+pgeznElbk3jsh4cAuH36aLtFuTHGeAlasBCRUGAi\ncBnQBhgmIm28sv0KxKlqB+BT4HmPZUdVtZP7GhiscoJzi/JsyQAgOzPUblFujDFeglmz6AYkq+pG\nVc0EpgCDPDOo6lxVzXBnfwKaBrE8fiXEJFClqvNM1Sq5Ne0W5cYY4yWYwaIJsNVjPtVN8+cWYKbH\nfISILBaRn0Tkal8riMgoN8/itLS0Uhc0Pjqe169+GYAnL3re7jxrjDFeTolblIvIcCAO6O2R3FxV\nt4nIOcAcEVmhqhs811PVN4A3AOLi4rQsZbgwtqOz05rnl2UzxhhTKQWzZrENiPaYb+qmFSIi/YBH\ngYGqejwvXVW3uX83AolA5yCWlWrVnL9HjwZzL8YYc3oKZrBYBLQUkVgRCQeGAoVGNYlIZ+B1nECx\n2yO9rohUdacbAD2A1UEsqwULY4wpQtCaoVQ1W0TuAmYBocAkVV0lIuOAxao6Hfg7UBP4REQAtrgj\nn1oDr4tILk5Ae1ZVLVgYY0wFCWqfharOAGZ4pT3uMd3Pz3oLgPbBLJs3CxbGGOOf/YLbVaWK87Jg\nYYwxJ7Jg4aF6dThypKJLYYwxpx4LFq6krUlI1YMk79gdOLMxxpxhLFjgBIq+7/blgGzhm1UL7N5Q\nxhjjxYIFzr2hMnMyIfwgucdq2b2hjDHGiwULnHtDhYeGQ8RB5Hik3RvKGGO8WLDAuTfU7Jtm0z66\nGdFV29q9oYwxxosFC1d8dDxdm7chN7NaRRfFGGNOORYsPFSrZr+zMMYYXyxYeKhe3YKFMcb4YsHC\nQ3pWKkePKgu22NBZY4zxZMEY0jgoAAAd/klEQVTClbQ1icmr30RV6DvpcvuthTHGeLBg4UpMSSQn\n9DAAmcftOdzGGOPJgoXLeQ53FgDhufZbC2OM8WTBwhUfHc+YhLsBePfKT+y3FsYY48GChYdOzVsC\n0LJWlwouiTHGnFosWHjYkb0KgPlrVlVwSYwx5tRiwcKVtDWJ++ePAOCB6X+z0VDGGOPBgoUrMSWR\nrKo7Acg6HGmjoYwxxkNQg4WIDBCRdSKSLCJjfCy/T0RWi8hyEZktIs09lt0sIuvd183BLCe4d56t\ndQiA0GNn2WgoY4zxELRgISKhwETgMqANMExE2nhl+xWIU9UOwKfA8+669YAngO5AN+AJEakbrLKC\nMxpqzh9nEl4tkyGxo2w0lDHGeAhmzaIbkKyqG1U1E5gCDPLMoKpzVTXDnf0JaOpOXwp8p6p7VXUf\n8B0wIIhlzVet1lEO7qtyMnZljDGnjWAGiybAVo/5VDfNn1uAmSVZV0RGichiEVmclpZWpsLmP1o1\nZCPfrFhsHdzGGOPhlOjgFpHhQBzw95Ksp6pvqGqcqsZFRUWVqQz5j1atlk5uRh3r4DbGGA/BDBbb\ngGiP+aZuWiEi0g94FBioqsdLsm55ynu0qlTfixytbx3cxhjjIZjBYhHQUkRiRSQcGApM98wgIp2B\n13ECxW6PRbOA/iJS1+3Y7u+mBU3eo1VbN2tI1czGwdyVMcacdoIWLFQ1G7gL50t+DfCxqq4SkXEi\nMtDN9negJvCJiCwVkenuunuBp3ACziJgnJsWdL9lJHHsUHUufruf9VsYY4wrqMN+VHUGMMMr7XGP\n6X5FrDsJmBS80p0oMSWRnGppoKFkHq5FYkqiDaE1xhhOkQ7uU0VCTAKhjVYCELKju/VbGGOMy4KF\nl5Cz1jgTe2MrtiDGGHMKsWDhoeD+ULlk743myyWLKrpIxhhzSrBg4aF+9fpoSDZU2wtJ9/PM4NEV\nXSRjjDklWLDwkJ6RToiEQPU9FV0UU0mtXQs33gjZ2RVdEmNKxoKFh4SYBKqGVoWGy/PTcnOdvxkZ\nMGkSqFZQ4UylcOONMHky/PprRZfEmJKxYOEhPjqelwe8TEjbqflpc9YuBGDMGLjlFpgV1J8GmsrO\nLjbM6cqChZdfd/xKbqtP8ufvfGQLANu3O/OHDlVEqYwxpmJZsPCy8/BOCFE4/wsAfvtiSKFfcn/i\nxpHcXEhOrogSGlMgIwOeeebM6gP54gtISSn/7ebklP82i+Nf/4LvvquYfZeEBQsvDWs2dCa6vpGf\n9tbnG1i3zpn+5BNYuRKeegpatoT16wuvn5EB48ZBZmbJ9vvee3DNNWUo+Blq1Sr46KPSr//llzB1\nauB8p6onn4RHHnE+PyXx8cfwpz8Fp0zBdvXV0LVr+W5zwQKoUgXeead8t1scd94J/fuf/P2WlAUL\nLzd1vIkQQqD+uvy0N0cPZ+XKgjy//QbffutM79pVkL5smdOv8cQT8N//+t7+3LkgAnu8BlzddBNM\nm1ZOB3EGadcOhg4tmN/itBqSng779wdef+BAGDIkOGU7Gfbtc/4eP150Pm9/+AP85z/lX55gyxtw\nsrcc7xT34Yfw9tvO9Pffl992KxsLFl7io+Np1aAV1N3oN8+11zpXIgChoc7fnBzo1AmmTHHmDx/2\nve7zzzt/F/n5vd/IkU7NZPdu38v9OXgQGjSAOXNKtl5FUg3cnLB/f/H7iWbPhubNndpfgwZQv/6J\n+1u7tvjlGzMGbr+9+PlLIiMjcB5vubnw7LNw4EBBWl7TSZVK+HDHl15yLqyOHi1IK2mNvTiuvx7e\nfNOZFin/7VcWFix8OK/+eU6/xY1+73OY73//g88+O/Gf1V/7Z96H0d+omLffdmomt93mzE+bBhdd\nVHBF5c+yZc7V9F//GrDIJ0VyMmRlFZ3nX/+C2Niih5HWrQtNinq+ois7G5YscabzmqW837N33oHW\nrZ2/IvDHP/re1ldfOV9Kzz0Hr78eeN9563z9dfHyAiQkFPR/FdeMGfDww3DffQVpeZ+zvIsWEadZ\no7iOHIE+fZzmvLJIT3fOU945KI6UlKKDZt6FlWcNMRjBorzs2FHRJQguCxY+PNjjQQSB5vMC5v3L\nX+Cep1afkO4vWIS473jeF5mqcwXlLe/q8ZprnIDkr6aSx7OGA87VeKB1fMnIgEsvhTVrSr4uwPLl\nThBo2RIeeKDovImJzt9AAwWKU7M4erTgPfXXB5H3hfivfzl/33rrxDw//ABXXVU46L77btH7zsx0\n1rnyysJXwYF8/nnx8wIcO+b89fzy9A4WUHB8xTFnjnMeAp2r4mxn+3ans91Tdrb/C6PYWBg0yAkw\nG92K/OrVTsCb4XGvas+g79nc5mu7WVmBL6yK4mubqvDgg0Vf1CxeDI0bFzRn5dm/P/Dn53RhwcKH\n+Oh4/vfH/1G/Zm24rXPA/KnbTvx0PjnnKeo9V4/oF6Op9bdaVB11MZH3JPDD5kQAJv78GoOnDKbf\nC/cUulLMM3du4fkjR3zvW9XpP8n7kOd9edSu7f+KPCcH7r0XfvnlxGXz5zvbu/tu3+sG0rFjwZVt\noN+k5JXZs+q/ahX89FPJf4+QkRH4S6JOHeevry/08ePhm28g71HueVe1AJ9+WjD9wgtOTdLTgw8W\nTD/0UMH01Klwxx3w5z/DaB93jinuMf7yi/MebXWfSv/ZZ05t4NChwsGiNFfdJe3r8LZoUeHOde9j\nCgvz/VnKq3V+/z3ExcG55zp9gR9+6KR79t95ltFzev78E7cbHu6/xpiW5pyrko4cO3wY/v536N3b\nf568CxHvZuARI+Dmm0+8+Jowwemo93VRGRLi9H2Cs94HH5SsvEGjqpXi1bVrVy1vC7YsUBkryoN1\n1fk38POKSD8x7ZxZyuX/p0TsVUb2KEg/7wvn77V/UMai3NrN73Zr//mi/OlGf75aGw78p5739wv0\nrN/N1AGv3ay9JvXSc255TEG1Ra+F+XlH3rM1f/pv8/6mC7YsKHRcM2YU7MPzWP8272/6z49WKqhe\ndFHp3jPP8rdsWXTea6918j32mO/1o6JOLKdn+VevLlj+/vuqY8ac+B56mjDBSYuN9X8uP/nkxLQb\nbzyxfJ569ChIHzzY97HkrdO1q//yff656qRJqjk5ql9+qZqb66T/6U9O3ksvLbzukiWq113nTL/3\nnurevb6364vn+5a37dLwft88jz8z0395Dhwo4v8J1dtuU23UqOA850lOLsjz1VeFt5mTU7DstddU\np00rvPz6651l3un+znWeXbucZRER/t+Hd95x8gwfXji9fXsnfdky3/s8cqRg+tZbVRcuLPyeFXU+\n8z4fZQUs1mJ8x1bCbrHyk1fDuHnazawfUxsW/R/88ifYd27hjMfqnbjyxv7OC2Cqx6WBuJdeU6dA\n1BpI8lGtcB18ueDSacfLzqXWzi9vBw3lmwUDYKxAahcAkpdF5ed965Wm+dOPzHkEgFAJJTTEaavI\nXT0QcBrMqzxRnarhQka223i88TtgDj/+CBEjryTsvB9oVLMRmTmZiAjNIptx8NhBth3aRpPaTUBh\nT8Yeru9wvbvH5/L3vWnvZmJfSaBORB2OZx/n/Abnc98FDxFaRflhcyJTpz4MwNNPQ9crl3B240yg\n4GFTeVf5AHd8dQcAnRt15t+vxAPtGTGiYPkNN/h+D+/46g46N+pMekY6u9KHArGkbs8CwnzmX7R+\nA1D4/LZo4XvbeTyv6DccXE3S1gPFfmhWv35OLaZ2bedKE5wa0q23OreXGTmy4Grdu6bmWbPIyirc\n7Ji0NYk5GxOpvnUQf76hDatXQ2QkNG3qv1mnLH7/e+fvuj3rSNq6l7iG8YwZ4z9/oOa6Pcd2cjiz\nBlCLDz5wOqFbt/Zd3gULnJp3jx4FaXc4H5f89w4KrtBXbPuNNfOnkhCTQLfGhc+TqlNLaN3aaRpr\n0aKgX8VXzVXVOUd5zcue+4OCWoy/Zum8UZXgjE7zbsbKs32708zlKSwMLrvMGf59Moh6H91pKi4u\nThcvXhyUbT8z/xkem/sYuerxadnfDA42gakfwoHmTlqH92DV7yEnIijlOMETAotvg6//DaHHIafq\niXnGerTxHI6Cb/8B+2NgS08n7S9RUGMPHGwE1fbBlGmwYcCJ6+eGwFevwbnfQbP5UGuXkya5IMDO\n9lB/PYz3+BaoegBG9nLutZUVASiMPwYXvgT9xsDTHv/5t1wI0T/DWD+fx7xyZIfBR5/B+iuL9x49\nWA+yakBkKvz4IHz/XOB1vFTt/RI1Ln+a2lVrk/LnTQA0fKERDWs25Hj2cTY8M5XMba2dzB3fgcEj\n4Pk0yGhQaDu1/labw//8Ad1edNNmzf5/5/C3f6HaBR+itVM5NvsvPvM1aLOCw7npHFubQM0BzxLR\nbiZ7XvgBABkbgv50F3wzgaiRd5D21msAXP3hYNbNb8eafz0FQIsbXyL5vXup2+5nat/yB0QkP7hH\n1Yji4LGDrE9PJmvBnTTsuIwaTTYTVSMKFFbuXsneMemFC9VqGuE3DKXvgfeY+cJ1+ck3T36A7Go7\nSUxJpPbhriA5rHn8K/9vQvdXnP+lw+43pOTQ8O9Nqb2vF7+Nd0YwNL3t/6jSemb+OWn394tZ+ZfC\n7UBXfziYhjUbsubzq/jhv5c7idfcAB2cyBEV1py0R1Py84fV207W3saE1NhL7pF6XPXq/Szdtpqt\nz8503vOnY6hbO5yj+2sjAlE5nfhlbMEY5OiuK7nygc9JTP+Afcf2see5BWSnxdJ//JOsrPomUTWi\niK0Tw+fDAo+Tb/3PNqy5q6A/tNek3qRlpBFVI4p6EfXyt9Hsrlt5dEQ3RnUdFXCbvojIElWNC5TP\nahbFkHeDwcycTMJDw7m7+918tPIj9h5dSZUHu5N5pBqZx4UqtfaSdemjZM+7HxbfDioQmuV8WQXD\nS1vgYLQz7StQAGyJh8ZLYG8LeGseHPUaT/rWDxD3b/hmAkRuLgh83t5YDDs7wy/uB7LuBqeG1ekt\nGHAP/Hs5tPX6ddzxSPj3MieojT8K1d2qwk/3wnavX1UdbApZS4s+3uRLYPK3Refx9rw7IH+sQHbp\ngvjxH+7lePwT7M0uGLqz8/BO59f+AMc9AlyIeynpFSgADmUeAg3c+3o42/nxxNFFw4rMt2d1ewhz\nOrMOfzOGwxva5y/T+Q/BIeeLNi9QAHy+7nPYWvAZSN7u/FBo39F97Duw2UnMioCke1nzu39AlUzY\n0RFmPMuWJUvhjs6s2bgfdnSF83z82EGFzLSmzHyzb6Hkd4a/4EzU3sq2g9FwXoDL4ZDsglo4gIY6\n7/mSs/OTUtPTYc/2/PmVqScOd/f5pawFXbVpa84vtChrv3Peco84rQVfLlkEYQUXQHv+tpA9D56d\nf1Gz1WvTW5e047Vh7WDsX0GBvY0A+HbdPJg+n+2tprFswP3+j9vDmj2FOzrmbZlXkH68Zn76ln/+\nh9tqRjJv8zwmXzO5WNsujaAGCxEZALwChAL/UdVnvZb3Al4GOgBDVfVTj2U5wAp3douqDgxmWYsS\nHx3P7Jtmk5iSSEJMAvHR8TzXz/8VatLWJBJTXmT/8f3MWT+fs6s1p0HKbSzfsZK1yyLJOh5GiISQ\ns+ccqJdMTs/H4Z+/QUgW1NoGB2KKLpDkgIYWBIqiTFpQ9PI9bZxAAb4DxViFS+91AoWnvKa4pSOh\npTtmdMMlvvex0f3iyChoKmNLr8J5DjSHxCeLLmtJA4W3rGqlX3feX2GBxxX+8ZpQ1W33yfbY7q+3\nQJSfcage/+BFCvHogQ3JhNxw/3k9L0TWX1EwnfgEdH3zxPyLbndqg/llql0wvSXe2V5qd5j7NCy8\nG7q8CfV/c5bvbg/J/WGy2x52vcf+8gl8NvnEi5I8eZ/Z367yf0xQuIx5Vg8u+KwC7D23cO10ge8a\n2AnyLhoONCk4ljze7/WxOoWCCxlnwe7WgfehwNxxBa0MR+vC/lj46T4oZrDgO6+hZYpzAbo9Dqoe\nLLzsnTm8HxFHr+a9Sl3DCKg4HRuleeEEiA3AOUA4sAxo45UnBidQvAsM8Vp2uCT7C0YH98myYMsC\nveqd67TVhLba8V9dNOS+5srDNZT7muh5j16nLR64WRnVRbnlQqXTJK11++Uaes5crdr6W61Sf4tS\nO7Wgk67uegVVaf2Zhpwz208HYk6RnYun5Gvw8LKtf8GrZVu/2bzC8+0+UFp9psS/UP7HetH48tmO\nZAXOc863zt9zZxakdXulcJ4ubxRMR24qmK637sTtnf+5cvbSspe92ytKra1l346/V42dziCU4uS9\n/rLy3fclD5RuvbvOU6occabbv1d4Waupyli0/7v9S/z9QzE7uANmKO0Lp6dylsf8w8DDfvK+fSYH\nC295I5M8RzH5SvO3jq9REuvWqb7w9mq9/4PXdMGWBfrrr6qPPKI6ZMR2rV5vn0Zf/p7yu+eVyE0q\n50/XGg23FfowhjVepY2ufkURJ9DUar1AQbVK1MYTP9Qd3lHO/Sbwh7/hkhPTRvRS2k4J3peEvU6P\nV/XdFV+G0+nV5mNlLPr64tdL/H1T3GARtA5uERkCDFDVW935G4HuqnqXj7xvA19p4WaobGApkA08\nq6on/IRJREYBowCaNWvWdfPmzcE4lDOG03xW0NTmT06OM64/K8sZkXHsmDMevE4dqFkTko85I3H2\nrWtHVtWddKh3IXt2hzHv11Q6dj+I1kylfe1exLQ8yjPvLkRzhWZHB5Fw2V5GzO/B8ezjcLQeXWpf\nRvamC1n70U3ExEJ2Rk169XJGCQG06LyDTc2eICfpbkJjFzCg+WDWrs1lw/KGtLjic+rntuXnmS0B\n6Dl4DUt+aETj9r+R/EM3QmunERaZxsjnp/HasEf9Hmv1jjPJWHbZCelh9VMJb76EI78MAkBCs7nx\nrUdZ8G1DkiffC8DwR+cxeXyvE9YFnKaqtLZ+9xtacy+aWZ3cTKcZI7zJajK3tXEWXvBP6Pgu/Gdh\nfv7IVr9yZPP5ZB+t7pT77FQydjU9YbulFV7zIGFNV3BkbY8TF3Z50xklWAJnjW3H7injYXNPOOpj\nNKGner/B3vPgd8/Dggdh4C3Q9iNCX00h57DbNxSaCTleTUiSg5y1Bt3VLnCB6q+F9FYlOoZTSbO7\n/8ijN19Yqiao4nZwn8rBoomqbhORc4A5QF9V3eBvf8EcDWVOnuIGrNLm90UVfkp1ttO7eQLdm8QX\n+kX0/zYnMfXH5Qzq3pHOZ11IrVoFPyQ8ftwZslq7thM4wRlKqwpVq8K2bc49qo4dc34w9nPqz8xc\nuoSB3ToTHx2PiDPscs8eiIhwgu2xY852Q0OdwFy7trO/nBznYVyL987OP95du6BePed2M7m5zisr\nC6pXd349PHu2Mzx39cGC96lb43hSU2HdoYW8My2Vo7ubEBG1nQtbnI+EKPPTphEReYjOMoIhCa35\n6CPnvmf9+jk/otsb/ivL9s9j2VJh5vHHyQ07gCwczTUdL6FDtSupVs35UVndurBpE+wKS+Kdpe8i\nAlc3H0lc427Ur++cu9nJP1B7x0CGD2jDuAmb2J6zgo5xR/j24xh2bq1JnYRJjLiiDXWPxrEp9Bt0\nXwzUSaFPbAKta8WzcKHzPvfq5QwtDo1eyEffr6ddg86MHtaGI0fgnx+uQ1t9xvmhlxIT0YXDh51h\ntFdc4axXp45Tlv/+9BGZGRG0rNWZ9CMH6BzVncu6dOTxf2xhe+RUdv8WQ9927Rn1+xZERzu3eGnf\nHlJTnXOflLyaTz7PIPxILN071GfcOOdcrtyfxISvv+WzBcvIafEF8svtXNLiYr7fMJuc3S0J7fw+\nF2Y/SEyDRpx3cRIcaI7W2srG6UM5q1pTdhzaRvKunVzVvybNa51Pt27O3QciI53PxYMPlu2eVqdC\nsIgHxqrqpe78wwCq+oyPvG/jFSxKshwsWBhzsiVtTaLvu33zRwnOvml2qQP2mcD7wqY8LnTKw6kQ\nLKoAvwF9gW3AIuB6VT1hqIh3MBCRukCGqh4XkQZAEjBIVU+8CZPLgoUxJ9+p8oVnSq/Cf2ehqtki\nchcwC2dk1CRVXSUi43A6VKaLyAXANKAucJWIPKmqbYHWwOsikotz/6pniwoUxpiKER8db0HiDGG/\n4DbGmDNYcWsWdtdZY4wxAVmwMMYYE5AFC2OMMQFZsDDGGBOQBQtjjDEBVZrRUCKSBpT2fh8NgD3l\nWJzTgR3zmcGO+cxQlmNurqpRgTJVmmBRFiKyuDhDxyoTO+Yzgx3zmeFkHLM1QxljjAnIgoUxxpiA\nLFg43qjoAlQAO+Yzgx3zmSHox2x9FsYYYwKymoUxxpiALFgYY4wJ6IwPFiIyQETWiUiyiIyp6PKU\nFxGJFpG5IrJaRFaJyD1uej0R+U5E1rt/67rpIiIT3PdhuYh0qdgjKB0RCRWRX0XkK3c+VkR+do/r\nIxEJd9OruvPJ7vKYiix3aYlIHRH5VETWisgaEYk/A87xve5neqWIfCgiEZXxPIvIJBHZLSIrPdJK\nfG5F5GY3/3oRubm05Tmjg4WIhAITgcuANsAwEWlTsaUqN9nA/araBrgQuNM9tjHAbFVtCcx258F5\nD1q6r1HAaye/yOXiHmCNx/xzwEuq2gLYB9zipt8C7HPTX3LznY5eAb5R1VZAR5xjr7TnWESaAKOB\nOFVth/OsnKFUzvP8NjDAK61E51ZE6gFPAN2BbsATeQGmxFT1jH0B8cAsj/mHgYcrulxBOtYvgEuA\ndUAjN60RsM6dfh0Y5pE/P9/p8gKauv9AFwNfAYLzq9Yq3ucb56Fc8e50FTefVPQxlPB4I4FN3uWu\n5Oe4CbAVqOeet6+ASyvreQZigJWlPbfAMOB1j/RC+UryOqNrFhR88PKkummVilv17gz8DJytqjvc\nRTuBs93pyvBevAw8COS68/WB/aqa7c57HlP+8brLD7j5TyexQBrwltv09h8RqUElPsequg14AdgC\n7MA5b0uo3OfZU0nPbbmd8zM9WFR6IlITmAr8WVUPei5T51KjUoydFpErgd2quqSiy3ISVQG6AK+p\namfgCAXNEkDlOscAbhPKIJxA2RiowYlNNWeEk31uz/RgsQ2I9phv6qZVCiIShhMo3lfVz9zkXSLS\nyF3eCNjtpp/u70UPYKCIpABTcJqiXgHqiEjes+Y9jyn/eN3lkUD6ySxwOUgFUlX1Z3f+U5zgUVnP\nMUA/YJOqpqlqFvAZzrmvzOfZU0nPbbmd8zM9WCwCWrojKcJxOsqmV3CZyoWICPBfYI2qvuixaDqQ\nNyLiZpy+jLz0m9xRFRcCBzyqu6c8VX1YVZuqagzOeZyjqjcAc4Ehbjbv4817H4a4+U+rK3BV3Qls\nFZHz3aS+wGoq6Tl2bQEuFJHq7mc875gr7Xn2UtJzOwvoLyJ13VpZfzet5Cq6A6eiX8DlwG/ABuDR\nii5POR7XRThV1OXAUvd1OU577WxgPfA9UM/NLzgjwzYAK3BGm1T4cZTy2BOAr9zpc4CFQDLwCVDV\nTY9w55Pd5edUdLlLeaydgMXuef4cqFvZzzHwJLAWWAm8B1StjOcZ+BCnXyYLpxZ5S2nOLfBH9/iT\ngZGlLY/d7sMYY0xAZ3ozlDHGmGKwYGGMMSYgCxbGGGMCsmBhjDEmIAsWxhhjArJgYUwAIpIjIks9\nXuV2d2IRifG8q6gxp6oqgbMYc8Y7qqqdKroQxlQkq1kYU0oikiIiz4vIChFZKCIt3PQYEZnjPldg\ntog0c9PPFpFpIrLMff3O3VSoiLzpPqPhWxGp5uYfLc7zSJaLyJQKOkxjAAsWxhRHNa9mqD94LDug\nqu2Bf+Lc9RbgVeAdVe0AvA9McNMnAD+oakecezitctNbAhNVtS2wH7jWTR8DdHa3c3uwDs6Y4rBf\ncBsTgIgcVtWaPtJTgItVdaN708adqlpfRPbgPHMgy03foaoNRCQNaKqqxz22EQN8p87DbBCRh4Aw\nVX1aRL4BDuPcxuNzVT0c5EM1xi+rWRhTNupnuiSOe0znUNCXeAXO/X66AIs87qpqzElnwcKYsvmD\nx98kd3oBzp1vAW4A5rvTs4E7IP9Z4ZH+NioiIUC0qs4FHsK5tfYJtRtjTha7UjEmsGoistRj/htV\nzRs+W1dEluPUDoa5aXfjPL3uLzhPshvppt8DvCEit+DUIO7AuauoL6HAZDegCDBBVfeX2xEZU0LW\nZ2FMKbl9FnGquqeiy2JMsFkzlDHGmICsZmGMMSYgq1kYY4wJyIKFMcaYgCxYGGOMCciChTHGmIAs\nWBhjjAno/wGVkooxFkdVNgAAAABJRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iOFBSbPcYCN4",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Look closer at the data\n",
-        "The graph shows the _loss_ (or the difference between the model's predictions and the actual data) for each epoch. There are several ways to calculate loss, and the method we have used is _mean squared error_. There is a distinct loss value given for the training and the validation data.\n",
-        "\n",
-        "As we can see, the amount of loss rapidly decreases over the first 25 epochs, before flattening out. This means that the model is improving and producing more accurate predictions!\n",
-        "\n",
-        "Our goal is to stop training when either the model is no longer improving, or when the _training loss_ is less than the _validation loss_, which would mean that the model has learned to predict the training data so well that it can no longer generalize to new data.\n",
-        "\n",
-        "To make the flatter part of the graph more readable, let's skip the first 50 epochs:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Zo0RYroFZYIV",
-        "colab_type": "code",
-        "outputId": "e6841332-0541-44bb-a186-ae5b46781e51",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 295
-        }
-      },
-      "source": [
-        "# Exclude the first few epochs so the graph is easier to read\n",
-        "SKIP = 50\n",
-        "\n",
-        "plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')\n",
-        "plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')\n",
-        "plt.title('Training and validation loss')\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('Loss')\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl4lNXZuO9nZhJQQbGRFpFAcKkC\nRhYjGgEJov1A0Wqx1q3giqJYqbV1aVWq9odrxQUVKiKpWvWTT9xArUDYDPsiRURRIomCQiooAknm\nfZ/fH2dmMjOZJJNkJpkk576uXJl3O+95t+c5z3LOEVXFYrFYLJb64mnqClgsFouleWMVicVisVga\nhFUkFovFYmkQVpFYLBaLpUFYRWKxWCyWBmEVicVisVgahFUkliZHRLwiskdEuiZy36ZERI4WkYTn\n1ovIGSJSFLa8SUQGxbNvPc71rIjcUd/jayj3PhF5PtHlWpoOX1NXwNL8EJE9YYsHAmWAE1i+VlVf\nrEt5quoA7RK9b2tAVY9NRDkicjVwmarmhZV9dSLKtrR8rCKx1BlVDQnyQIv3alX9oLr9RcSnqv7G\nqJvFYml8rGvLknACrotXRORfIvIDcJmI5IrIUhHZJSLbRORxEUkL7O8TERWRrMDyC4Htc0TkBxEp\nFJHudd03sH24iHwqIrtF5AkRWSIil1dT73jqeK2IbBaR70Tk8bBjvSLyqIiUisgXwLAa7s+fReTl\nqHWTReTvgd9Xi8jGwPV8HrAWqiurRETyAr8PFJF/Buq2ATgxat+/iMgXgXI3iMi5gfXZwJPAoIDb\ncGfYvZ0Qdvx1gWsvFZFZInJ4PPemNkTk/EB9donIPBE5NmzbHSLytYh8LyKfhF3rKSKyOrD+GxF5\nKN7zWZKAqto/+1fvP6AIOCNq3X1AOXAOprFyAHAScDLGCj4S+BQYF9jfByiQFVh+AdgJ5ABpwCvA\nC/XY96fAD8AvA9tuBiqAy6u5lnjq+AZwCJAF/Dd47cA4YAPQBcgAFprPK+Z5jgT2AAeFlf0tkBNY\nPiewjwCnA/uAEwLbzgCKwsoqAfICvx8GCoBDgW7Ax1H7XggcHngmlwTq8LPAtquBgqh6vgBMCPz+\nRaCOfYC2wFPAvHjuTYzrvw94PvC7R6Aepwee0R3ApsDvXsCXQKfAvt2BIwO/VwAXB363B05u6m+h\nNf9Zi8SSLBar6luq6qrqPlVdoarLVNWvql8AU4HBNRz/mqquVNUK4EWMAKvrviOAtar6RmDboxil\nE5M46zhRVXerahFGaAfPdSHwqKqWqGopcH8N5/kC+A9GwQGcCXynqisD299S1S/UMA+YC8QMqEdx\nIXCfqn6nql9irIzw876qqtsCz+QlTCMgJ45yAS4FnlXVtaq6H7gNGCwiXcL2qe7e1MRFwJuqOi/w\njO7HKKOTAT9GafUKuEe3BO4dmAbBMSKSoao/qOqyOK/DkgSsIrEki+LwBRE5TkTeEZHtIvI9cA9w\nWA3Hbw/7vZeaA+zV7ds5vB6qqpgWfEzirGNc58K0pGviJeDiwO9LAsvBeowQkWUi8l8R2YWxBmq6\nV0EOr6kOInK5iKwLuJB2AcfFWS6Y6wuVp6rfA98BR4TtU5dnVl25LuYZHaGqm4A/YJ7DtwFXaafA\nrlcAPYFNIrJcRM6K8zosScAqEkuyiE59nYJphR+tqgcDd2FcN8lkG8bVBICICJGCL5qG1HEbkBm2\nXFt68qvAGSJyBMYyeSlQxwOA14CJGLdTB+D9OOuxvbo6iMiRwNPAWCAjUO4nYeXWlqr8NcZdFiyv\nPcaF9lUc9apLuR7MM/sKQFVfUNUBGLeWF3NfUNVNqnoRxn35CDBTRNo2sC6WemIViaWxaA/sBn4U\nkR7AtY1wzreBfiJyjoj4gJuAjkmq46vAeBE5QkQygFtr2llVtwOLgeeBTar6WWBTGyAd2AE4IjIC\nGFqHOtwhIh3E9LMZF7atHUZZ7MDo1GswFkmQb4AuweSCGPwLuEpEThCRNhiBvkhVq7Xw6lDnc0Uk\nL3DuP2LiWstEpIeIDAmcb1/gz8VcwG9F5LCABbM7cG1uA+tiqSdWkVgaiz8AozFCYgomKJ5UVPUb\n4DfA34FS4ChgDabfS6Lr+DQmlrEeEwh+LY5jXsIEz0NuLVXdBfweeB0TsL4AoxDj4W6MZVQEzAHy\nw8r9CHgCWB7Y51ggPK7wb+Az4BsRCXdRBY9/F+Niej1wfFdM3KRBqOoGzD1/GqPkhgHnBuIlbYAH\nMXGt7RgL6M+BQ88CNorJCnwY+I2qlje0Ppb6IcZtbLG0fETEi3GlXKCqi5q6PhZLS8FaJJYWjYgM\nC7h62gB3YrJ9ljdxtSyWFoVVJJaWzkDgC4zb5H+A81W1OteWxWKpB9a1ZbFYLJYGYS0Si8VisTSI\nVjFo42GHHaZZWVlNXQ2LxWJpVqxatWqnqtaUMg+0EkWSlZXFypUrm7oaFovF0qwQkdpGaACsa8ti\nsVgsDcQqEovFYrE0CKtILBaLxdIgWkWMxGKxNC4VFRWUlJSwf//+pq6KJQ7atm1Lly5dSEurbqi1\nmrGKxGKxJJySkhLat29PVlYWZtBlS6qiqpSWllJSUkL37t1rPyAG1rVlsVgSzv79+8nIyLBKpBkg\nImRkZDTIerSKxGJpJAoLYeJE8781YJVI86Ghz8q6tiyWRqCwEIYOhfJySE+HuXMhN7epa2WxJIak\nWiSBkVc3ichmEbktxvbTRGS1iPhF5IKobQ+KyAYR2Sgijwdmt0NEThSR9YEyQ+stllSmoMAoEccx\n/wsKmrpGLZvS0lL69OlDnz596NSpE0cccURoubw8vmlLrrjiCjZt2lTjPpMnT+bFF19MRJUZOHAg\na9euTUhZjU3SLJLA3A+TgTMxczCvEJE3VfXjsN22ApcDt0QdeyowADghsGoxMBgowEyAcw1mUp7Z\nmIlw5iTrOiyWRJCXZyyRoEWSl9fUNWrZZGRkhITyhAkTaNeuHbfcEiFmUFVUFY8ndnt6+vTptZ7n\nhhtuaHhlWwDJtEj6A5tV9YvAzGUvY+amDqGqRYGZ26KnyFSgLWbK0TZAGmbmtsOBg1V1qZphi/OB\n85J4DRZLQsjNNe6se++1bq3qKCwuZOKiiRQWJy+ItHnzZnr27Mmll15Kr1692LZtG2PGjCEnJ4de\nvXpxzz33hPYNWgh+v58OHTpw22230bt3b3Jzc/n2228B+Mtf/sKkSZNC+992223079+fY489lg8/\n/BCAH3/8kZEjR9KzZ08uuOACcnJyarU8XnjhBbKzszn++OO54447APD7/fz2t78NrX/88ccBePTR\nR+nZsycnnHACl112WcLvWTwkM0ZyBFActlwCnBzPgapaKCLzMVN6CvCkqm4UkZxAOeFlHhGrDBEZ\nA4wB6Nq1a91rb7EkmNxcq0Cqo7C4kKH5Qyl3ykn3pjN31FxyM5Nzsz755BPy8/PJyckB4P777+cn\nP/kJfr+fIUOGcMEFF9CzZ8+IY3bv3s3gwYO5//77ufnmm3nuuee47bYq3npUleXLl/Pmm29yzz33\n8O677/LEE0/QqVMnZs6cybp16+jXr1+N9SspKeEvf/kLK1eu5JBDDuGMM87g7bffpmPHjuzcuZP1\n69cDsGvXLgAefPBBvvzyS9LT00PrGpuUzNoSkaOBHkAXjKI4XUQG1aUMVZ2qqjmqmtOxY62DV1os\nliakoKiAcqccRx3KnXIKigqSdq6jjjoqpEQA/vWvf9GvXz/69evHxo0b+fjjj6scc8ABBzB8+HAA\nTjzxRIqKimKW/atf/arKPosXL+aiiy4CoHfv3vTq1avG+i1btozTTz+dww47jLS0NC655BIWLlzI\n0UcfzaZNm/jd737He++9xyGHHAJAr169uOyyy3jxxRfr3aGwoSRTkXwFZIYtdwmsi4fzgaWqukdV\n92BiILmB47vUs0yLxZKi5GXlke5Nxyte0r3p5GXlJe1cBx10UOj3Z599xmOPPca8efP46KOPGDZs\nWMz+FOnp6aHfXq8Xv98fs+w2bdrUuk99ycjI4KOPPmLQoEFMnjyZa6+9FoD33nuP6667jhUrVtC/\nf38cx0noeeMhmYpkBXCMiHQXkXTgIuDNOI/dCgwWEZ+IpGEC7RtVdRvwvYicEsjWGgW8kYzKWyyW\nxiM3M5e5o+Zy75B7k+rWiub777+nffv2HHzwwWzbto333nsv4ecYMGAAr776KgDr16+PafGEc/LJ\nJzN//nxKS0vx+/28/PLLDB48mB07dqCq/PrXv+aee+5h9erVOI5DSUkJp59+Og8++CA7d+5k7969\nCb+G2khajERV/SIyDngP8ALPqeoGEbkHWKmqb4rIScDrwKHAOSLyV1XtBbwGnA6sxwTe31XVtwJF\nXw88DxyAsVRsxpbF0gLIzcxtNAUSpF+/fvTs2ZPjjjuObt26MWDAgISf48Ybb2TUqFH07Nkz9Bd0\nS8WiS5cu3HvvveTl5aGqnHPOOZx99tmsXr2aq666ClVFRHjggQfw+/1ccskl/PDDD7iuyy233EL7\n9u0Tfg210SrmbM/JyVE7sZXF0nhs3LiRHj16NHU1UgK/34/f76dt27Z89tln/OIXv+Czzz7D50ut\n/uCxnpmIrFLVnGoOCZFaV2KxWCwtjD179jB06FD8fj+qypQpU1JOiTSUlnU1FovFkmJ06NCBVatW\nNXU1kkpKpv9aLBaLpflgFYnFYrFYGoRVJBaLxWJpEFaRWCwWi6VBWEVisVhaHEOGDKnSuXDSpEmM\nHTu2xuPatWsHwNdff80FF1wQc5+8vDxq604wadKkiI6BZ511VkLGwZowYQIPP/xwg8tJNFaRWCyW\nFsfFF1/Myy+/HLHu5Zdf5uKLL47r+M6dO/Paa6/V+/zRimT27Nl06NCh3uWlOlaRWCyWlCCRUxFf\ncMEFvPPOO6FJrIqKivj6668ZNGhQqF9Hv379yM7O5o03qo6yVFRUxPHHHw/Avn37uOiii+jRowfn\nn38++/btC+03duzY0BD0d999NwCPP/44X3/9NUOGDGHIkCEAZGVlsXPnTgD+/ve/c/zxx3P88ceH\nhqAvKiqiR48eXHPNNfTq1Ytf/OIXEeeJxdq1aznllFM44YQTOP/88/nuu+9C5w8OKx8cLHLBggWh\nib369u3LDz/8UO97G5Pg5C4t+e/EE09Ui8XSeHz88cd12v/DD1UPOEDV6zX/P/yw4XU4++yzddas\nWaqqOnHiRP3DH/6gqqoVFRW6e/duVVXdsWOHHnXUUeq6rqqqHnTQQaqqumXLFu3Vq5eqqj7yyCN6\nxRVXqKrqunXr1Ov16ooVK1RVtbS0VFVV/X6/Dh48WNetW6eqqt26ddMdO3aE6hJcXrlypR5//PG6\nZ88e/eGHH7Rnz566evVq3bJli3q9Xl2zZo2qqv7617/Wf/7zn1Wu6e6779aHHnpIVVWzs7O1oKBA\nVVXvvPNOvemmm1RV9fDDD9f9+/erqup3332nqqojRozQxYsXq6rqDz/8oBUVFVXKjvXMMMNZ1Spj\nrUVisVianGRMRRzu3gp3a6kqd9xxByeccAJnnHEGX331Fd9880215SxcuDA0YdQJJ5zACSecENr2\n6quv0q9fP/r27cuGDRtqHZBx8eLFnH/++Rx00EG0a9eOX/3qVyxatAiA7t2706dPH6DmoerBzI+y\na9cuBg8eDMDo0aNZuHBhqI6XXnopL7zwQqgH/YABA7j55pt5/PHH2bVrV8J71ltFYrFYmpzgVMRe\nb+KmIv7lL3/J3LlzWb16NXv37uXEE08E4MUXX2THjh2sWrWKtWvX8rOf/Szm0PG1sWXLFh5++GHm\nzp3LRx99xNlnn12vcoIEh6CHhg1D/84773DDDTewevVqTjrpJPx+P7fddhvPPvss+/btY8CAAXzy\nySf1rmcsrCKxWCxNTjKmIm7Xrh1DhgzhyiuvjAiy7969m5/+9KekpaUxf/58vvzyyxrLOe2003jp\npZcA+M9//sNHH30EmCHoDzroIA455BC++eYb5sypHIi8ffv2MeMQgwYNYtasWezdu5cff/yR119/\nnUGD6jRnHwCHHHIIhx56aMia+ec//8ngwYNxXZfi4mKGDBnCAw88wO7du9mzZw+ff/452dnZ3Hrr\nrZx00kkJVyR2rC2LxZISJGMq4osvvpjzzz8/IoPr0ksv5ZxzziE7O5ucnByOO+64GssYO3YsV1xx\nBT169KBHjx4hy6Z379707duX4447jszMzIgh6MeMGcOwYcPo3Lkz8+fPD63v168fl19+Of379wfg\n6quvpm/fvjW6sapjxowZXHfddezdu5cjjzyS6dOn4zgOl112Gbt370ZV+d3vfkeHDh248847mT9/\nPh6Ph169eoVme0wUdhh5i8WScOww8s2Phgwjb11bFovFYmkQVpFYLBaLpUFYRWKxWJJCa3CbtxQa\n+qysIrFYLAmnbdu2lJaWWmXSDFBVSktLadu2bb3LsFlbFosl4XTp0oWSkhJ27NjR1FWxxEHbtm3p\n0qVLvY+3isRisSSctLQ0unfv3tTVsDQS1rVlsVgslgaRVEUiIsNEZJOIbBaR22JsP01EVouIX0Qu\nCFs/RETWhv3tF5HzAtueF5EtYdv6JPMaLBaLxVIzSXNtiYgXmAycCZQAK0TkTVUNH9VsK3A5cEv4\nsao6H+gTKOcnwGbg/bBd/qiq9Z8swGKxWCwJI5kxkv7AZlX9AkBEXgZ+CYQUiaoWBba5NZRzATBH\nVffWsI/FYrFYmohkuraOAIrDlksC6+rKRcC/otb9TUQ+EpFHRaRNrINEZIyIrBSRlTZzxGKxWJJH\nSgfbReRwIBsIn3z5duA44CTgJ8CtsY5V1amqmqOqOR07dkx6XS0Wi6W1kkxF8hWQGbbcJbCuLlwI\nvK6qFcEVqrotMHlXGTAd40KzWCwWSxORTEWyAjhGRLqLSDrGRfVmHcu4mCi3VsBKQUQEOA/4TwLq\narFYLJZ6kjRFoqp+YBzGLbUReFVVN4jIPSJyLoCInCQiJcCvgSkisiF4vIhkYSyaBVFFvygi64H1\nwGHAfcm6BovFYrHUjp2PxGKxWCwxsfORWCwWi6VRsIrEYrFYLA3CKhKLxWKxNAirSCwWiyXFKCyE\niRPN/+aAHUbeYmnGFBZCQQHk5UFublPXxpIICgth6FAoL4f0dJg7N/WfrVUkFkszpTkKnETTEhVp\nQYF5po5j/hcUpP61WUVisTRTmqPASSQtVZHm5ZnrCV5XXl5T16h2rCKxWJopzVHgJJKWqkhzc41S\nbE6WllUkFkszpTkKnETSkhVpbm7zep5WkVgszZjmJnASSWtXpKmEVSQWi6XZkixF2hKD+MnEKhKL\nxWIJIxjELysDjwcmT4YxY5q6VqmN7ZDYSDS3DkYWS2uloMAoEdcFvx/GjbPfbW1Yi6QRaKlpipbm\ni3XdVE9enrFEXNcsO07LyQhLFtYiaQRipSlaLE1FsGFz553mv21tR5Kba9xZaWlGobRp07IywpKB\nVSR1pD4uqmCaotfb8tIUG5tEuAhbu5vRNmxqZ8wYWLAA7rvPehDiwbq26kB9XVQ2TTExJMJFaN2M\nLbv/RSJpzanVdcUqkjrQkJ609qVsOInoydxSe0PXBduwsSQaq0jqgG3JNS2JuP/2GRpsw8aSSKwi\nqQO2Jde0JOL+22dosSQeUdWmrkPSycnJ0ZUrVzZ1NSwWi6VZISKrVDWntv1s1pbFYrFYGkRSFYmI\nDBORTSKyWURui7H9NBFZLSJ+EbkgbP0QEVkb9rdfRM4LbOsuIssCZb4iIunJvIa60tpTSy0WS+sj\naYpERLzAZGA40BO4WER6Ru22FbgceCl8parOV9U+qtoHOB3YC7wf2PwA8KiqHg18B1yVrGuoK7aj\nl8ViaY0k0yLpD2xW1S9UtRx4Gfhl+A6qWqSqHwFuDeVcAMxR1b0iIhjF8lpg2wzgvMRXvX7Yjl4W\ni6U1kkxFcgRQHLZcElhXVy4C/hX4nQHsUlV/bWWKyBgRWSkiK3fs2FGP09Yd24PdYrG0RlI6/VdE\nDgeygffqeqyqTgWmgsnaSnDVYmJTSy2W5o8d0LLuJFORfAVkhi13CayrCxcCr6tqRWC5FOggIr6A\nVVKfMpOK7ehlsTRf7BA69SOZrq0VwDGBLKt0jIvqzTqWcTGVbi3UdHqZj4mbAIwG3khAXWvFZmNZ\nLC0fG+esH0mzSFTVLyLjMG4pL/Ccqm4QkXuAlar6poicBLwOHAqcIyJ/VdVeACKShbFoFkQVfSvw\nsojcB6wBpiXrGoLYVorF0jqwQ+jUj6TGSFR1NjA7at1dYb9XYNxTsY4tIkYgXVW/wGSENRp2oL/U\nJejPzsiA0lLr17Y0DBvnrB8pHWxPFWwrJTUJn1vbdSsnIbIWo6Uh2Dhn3bFDpMRBsJVy771WSKUS\nQUsxOCWq61q/dkvFxihTG2uRxIltpaQeQUsx3CKxFmPLw8YoUx+rSBqRROSn2xz3SsL92YmKkdj7\nm3rYGGXqYxVJI2GniU0OibQU7f1NTWyMMvWxMZJGIhH56TbHPbnY+5ua2Bhl6mMtkkYivFXl9cLW\nraYFXJePwrbMkkuq39/W7HazMcrUxs6Q2IgUFkJ+PkyfDn5//dwnrVmYNAapen+t283SFMQ7Q6K1\nSBqR3FwjpPz++gcObcssuaTq/bUBZ0sqY2MkjYwdat5SH+x7Y0llrEXSyLTmIRjicRsl2rWUqq6q\nutKa3xtL6mNjJJZGIR4ff6LjAKkYV2gpis3SOog3RmJdW3Fgh2doOPGk1iY6/TbV0nmDiu3OO83/\nZLxP9l21NAXWtVULqdiqbY7Ek1qb6PTb6PIyMoyQbSprINkBc/uuWpoKq0hqoTGyZVqDuyMeH3+i\n4wDRQ6iMH9+0QjbZ/VRSIbOrNbzLlqpYRVILyf74W1MrMp7U2kSn3wbLmzix6YVssgPm9X1XEyX8\nW9O7bInEKpJaSPbHnwqtyNZAqvRaT2Y/lfq8q4kU/vZdbr1YRRIHyfz4U0XAtXSS1SBINVdOXd/V\nRAp/+y63XqwiaWJs/4DGI9ENgpbgykmk8LfvcuvFKpIUIFWH5Whp1GQ91MeyaAmunGQkODS3e2Bp\nOHEpEhE5CihR1TIRyQNOAPJVdVcyK2exxEttiqAm66G+lkVLceVY4W9pKPF2SJwJOCJyNDAVyARe\nqu0gERkmIptEZLOI3BZj+2kislpE/CJyQdS2riLyvohsFJGPRSQrsP55EdkiImsDf33ivAZLEkiF\nDnDxdPSrqXNifTsu2nkyEkMqvEOWhhGva8tVVb+InA88oapPiMiamg4QES8wGTgTKAFWiMibqvpx\n2G5bgcuBW2IUkQ/8TVX/LSLtADds2x9V9bU4654QUi2omgqkSowgHhdTTdZDQywL25pvGKnyDiWD\n1iQz4lUkFSJyMTAaOCewLq2WY/oDm1X1CwAReRn4JRBSJKpaFNgWriQQkZ6AT1X/HdhvT5z1TAot\n+WVvCKkSI4hHEdQUC4jeBk3bA741UdM71JwFcWuTGfEqkiuA6zAWwhYR6Q78s5ZjjgCKw5ZLgJPj\nPN/PgV0i8n9Ad+AD4DZVdQLb/yYidwFzA+vLogsQkTHAGICuXbvGedrYpIrATDVSJUYQb8C4Jush\nuK21CYCmprp3qLk/h9YmM+JSJAF31O8ARORQoL2qPpDkeg0C+mLcX69gXGDTgNuB7UA6Jl5zK3BP\njDpPDWwnJyenQUMcp4rATDVSKd0zUS6m1iYAmprq3qHm/hxam8yIN2urADg3sP8q4FsRWaKqN9dw\n2FeYoHyQLoF18VACrA1zi80CTgGmqeq2wD5lIjKd2PGVhJJKAjORJMJ10NJiBK1NAKQCsd6h5v4c\nWqrMqI54XVuHqOr3InI1Ju33bhH5qJZjVgDHBNxgXwEXAZfEeb4VQAcR6aiqO4DTgZUAInK4qm4T\nEQHOA/4TZ5kNoqUJzObuOkgWrU0ApCqp/hziaYS1NJlRE/EqEp+IHA5cCPw5ngMCWV7jgPcAL/Cc\nqm4QkXuAlar6poicBLwOHAqcIyJ/VdVequqIyC3A3IDCWAX8I1D0iyLSERBgLSZ2k9KkYtCwubsO\nkklrEgCpTKo+B9sIq0q8iuQejEJYoqorRORI4LPaDlLV2cDsqHV3hf1egXF5xTr235iOj9HrT4+z\nzilBqr50iXAdpKKCtLQumuIdbGgjrCV+N/EG2/8X+N+w5S+AkcmqVEsiVVv+DXUdpKqCbApaomBo\nCup6H5vqHWxII6ylfjfxBtu7AE8AAwKrFgE3qWpJsiqWitRHYKRy0LAhroNUVZCNSWEh5OfD9Ong\n97cswdDY1EfANtU72JBGWEv9buJ1bU3HDIny68DyZYF1ZyajUqlIfVsSqR40rC+prCAbg+D7sH8/\naCC5vCUJhsamPgK2Kd/B+jbCWup3E68i6aiq08OWnxeR8cmoUKpSUABlZeC65n9dBEaygoZN6VJp\nqQoyXoKCL6hERFqWYGhs6iNgm+M72BzrHA/xKpJSEbkM+Fdg+WKgNDlVSk0yMowSAfM/I6Np65MK\nvtZUzappDMIFn9cLV14Jo0a13vtRE/GmytZHwNb0DqZq7Kq+302qXg/Er0iuxMRIHgUU+BDT07zV\nUFoKHo9RIh6PWW5KWqqvtSmpy4famC3LVBYgtVGXBk8iGyap0NBKJKl+PfFmbX2J6dkeIuDampSM\nSqUieXnQpk3ifZv1FRIt1dfaVNTnQ61N8CVCAaS6AKmNpmrwNNeGVnXvTKpfT0NmSLyZVqRIEtkC\nDb4sGRkwfnz9hERL9bU2FYn8UBOZzRWrXuvXw8yZMHIkjBlTvzo2Fk3V4GmODa2aGg2pfj0NUSSS\nsFo0ExJhek+dCuPGGcEQdJW5bv2EV2uOUSSC8NZfbR9qvNZForO5ouu1axfccYfZ9v775n8qK5Om\navA0x4ZWTY2ZVL+ehiiSBo2o2xwoLC6koKiAjNIRlG7MToglcsMNppUKRtD4fMnL+GnOvvVkE6v1\nV92HWhf3UqKzuaIFyIQJkdtnzmx6RVLbe9ZUDZ7GcD0mktoaM6nccKxRkYjID8RWGAIckJQapQiF\nxYUMzR9KWVE/3Bk34XGVNul2zFsRAAAgAElEQVRSrRCJ56UsKKjM/AKjRJ580gTuE/0yN3fferKJ\n1fq7/fbY96gubq9kZHOFC5CRIystkeByU9Jc37NUrHddrI5UU4I1KhJVbd9YFUk1CooKKHfKcbcM\nAn86rkq1QiTelzIYsC8rM26tJ59MXmsy1YNzQZrqg6iLz7ku+ybbBRF8X1IlRtJc3rNoUrXe8Vgd\nqagEG+LaatHkZeWR7k2nrPsiXF85HtdLerrEFCLxvpT1FTItbWiWIE35QcR6FtXd57o+t2S7IMaM\naXoFEiQZ71ljNC6aw/dRHamoBK0iqYbczFzmjppLQVEBu059n7VLOzByeAa5udlV9q1ri7UuD72h\nQ7Pk58d/rsamqT+I8GdR231OZf90U5JoC6yxGhepHryuiVRUglaR1EBuZi7rv13P3SUX4ndPYt5T\nQwHIPnEPBUUF5GXlkZuZm9SXsqHCdsYMc9yMGalhAoeTl2fiCK5r/jflB9HUSq05k0glW1BQ/6GI\n6kpzbRykohK0iqQGpq6ayth3xuJu7Q8z/o3fSef6BS7ey3+Bc8Ri0r3pzB01N6RMkvFAG9L6aA7C\nUSTyf3Uk292Riq281kiqDUWUqqSaErSKpBoKiwu5YfYNuOpCUR446aA+HL8f9/NT0c4LKCvqx4T7\nyphwefLiHQ1pfaS6cCwoMKnQquZ/dYquMdwdjdnKS7WMm1Qi1YYissSHVSTVUFBUgOM6ZiGrALzl\n4Adw0QN2QHEu7oz3+UAPYNE/4xNuDYl31EfgpKIJHE5Q0ZWVGYukutZnY1lWjdHKS8WMm1QiWUMR\nWZKLp6krkKrkZeXh9XjNQuZSGHYTeFxQD7z7GKz7LTjpuI5JC86f9SUTF02ksLiw2jJjCcT6UFgI\nEyea/7WRm1t9/4imJjcXJk2qjJOMHx/7moIKx+ttHOFSl/tbVxL1DrRUgo2fe++1ShaS+y4mEmuR\nVENuZi6Tz5rM9e9cj6MO7DvMKBH1gV9gz8/AW464gi8Nnts1Gv+8RXg8HiafNZkxJ1bNz4zX1VST\n66OltWhLS2sfJqahllVdXEnJvr/VvQPW3VVJoi3D5npvm9O3bhVJDWT/NBufx4fjOMa95fGD4wU8\n8NlZ+EbczNXH3sr2jq8w68cFALiuy7jZ48j+aTa5mZFPvTaBGM9gf80hgF4X4nVv1Ve41PVjTPb9\nra7/SnMRGM2N5nxvm9O3bl1bNVBQVIDfDQyMlbkU+k4HXEDA9TKiy+WMGvc1s/ffGXGcow4FRQUx\nywy6miDSZA2+8FOmGKFaneujsd08ySZe91aQupr6dXUlNcb9jXY3WndX8mjO97Y5fetJtUhEZBjw\nGOAFnlXV+6O2n4YZiv4E4CJVfS1sW1fgWSATM97XWapaJCLdgZeBDGAV8FtVLU9G/UO92/1luLjQ\nOx/WjgYnDbwVLOCv/HfuHiqcisprQmjjbUNeVl5o0Mdgf5PwQSDHX5Id0UoKvvC1DfaX6gH0+hCP\newvq17qsa+ZaU9zf6DpmZBhl2VKeb1OS6pmLNdGcvnVRTc4gviLiBT4FzgRKgBXAxar6cdg+WcDB\nwC3Am1GKpAD4m6r+W0TaAa6q7hWRV4H/U9WXReQZYJ2qPl1TXXJycnTlypX1uo7C4kLGvzue5V8v\nNyuKTzHpwAfshO39zLre+ZC5FA8ezj3uXIYfPZw129Ywfe10KpwKRIQBXQewrGQZftePLL4Dd95f\ncR3B6zWBxby8SiHZkqdujeWvjldBTJwId95ZOQT/GWeY0XDjiXuk+seYiDlqLLFpDs8/VRGRVaqa\nU+t+SVQkucAEVf2fwPLtAKo6Mca+zwNvBxWJiPQEpqrqwKj9BNgBdFJVf/Q5qqMhigTg/FfOZ9Yn\nsypXrLwa3nnKBN4BvGVw+RDIXIrP40MQKtyK2IUBFOfi/WcBOOkRwqKlv/A1KYx4rj14fLDns8dj\nUkVbkrANV5bBRkbQFWqxNDbxKpJkxkiOAIrDlksC6+Lh58AuEfk/EVkjIg8FLJwMYJeq+msrU0TG\niMhKEVm5Y8eOel4CTJ21nree7WUsETD/Z08OKBExf06asVIAv+uvXokUnwKLbgMUz+gzueYPX0YI\nwVRO1U0ENfmr47n2oKl/xhmVndaam9+7NpqTX9xiCZKqwXYfMAjj8joJOBK4vC4FqOpUVc1R1ZyO\nHTvWqxKFhTDuouNw5k6AGXOhOJeee64H9WKUiJo/b4XJ6qqJ4lNMGfPuhRlzcdSh64iXamx9N4f8\n8bqQCCGZm2vcWW3atExha/tRWBpKU8iOZAbbv8IEyoN0CayLhxJgrap+ASAis4BTgOeADiLiC1gl\ndSmzzhQUgOP3gQo4im/rGdx0fR/Gv+llf5mLqgPHvgUDHjJZXVF4xYtHPMZCCRtmBUehaDAZB8bO\nda3RBRQVwK8rDT2+ISQqeFjfcpqL6zDVxlGKh+Zyb1s6TZXunExFsgI4JpBl9RVwEXBJHY7tICId\nVXUHcDqwUlVVROYDF2Ayt0YDbyS+6oa8PGiTLpSVK14fPHn9rxlzXja8tJ4bnvpf/F3/jWQu48wj\nz+TAtPN4Y9MbaNiEkhkHZHB538v5fv/3TPvqQyoWlBsl4q3A7TaPcbNXAVC6tzRCsFeXPx6ctbHc\nKY8YMDJeGnp8IkiUkKxrOanWn6CugjeVBXWq3dt4SOX72RCaqu9J0hRJIBg+DngPk/77nKpuEJF7\nMErhTRE5CXgdOBQ4R0T+qqq9VNURkVuAuYEA+yrgH4GibwVeFpH7gDXAtGRdQ2XLV8jLSyM3N5vC\n4kJm/jAB/wAz36kC73/xPqd1O61SiQQyu77NKuDBvQ/ypwF/4qpze7L950+ybMkBbMt4CTKXUuHC\n2HfGIkiEYK8uZTE4a6OjDuVOOQVFBXVSBA09vjkTzwfWWMKlroI31QV1c+o4B8m/n02ppJoq3Tmp\n/UhUdTYwO2rdXWG/V2DcU7GO/Temf0n0+i+A/omtaXyE5nH3l1XZtmbbGvMjGAtx0s1Aj6OH8tCS\nh/CIB6/HS0WfCgizWlw1Y2bv29KH8X/5hknXVe+6CfZrCVoUeVl5dap/rOMbU3g2ZQuwtg+sMYV1\nXQVvqgvq5tZXI5n3s6mVflP1PbFDpNRA9Esx+pHPzDzuxf0r+5LsOwyyCvghGCOpEgvJQzOX4qiD\n67gRrq8QxafAjA9Y7qQz5FWHxx/zUloKGT3WU+B/G4or3V6je48GYFTvUVCSy8QX6jAkfdisj3lZ\neVCS2ygvfVN/XFD7B9aYwrqugjfVBXVN97apGxCxSOb9TAWl3xQxNqtIaiD6paBoMN5dA3FmzAZ/\nOuAFcU0/ktFDTcA9q8BYIk7VbK6YSgQilE9ZmcO4ceC4ius5Cs/od2iTdS+Thk1i/LvjQ9ZEX//1\njL+kHkPSZ+aGlNLEFxrnpU/Ux9VQoVTTB9aYwrqurcZktDITLeBj3dtENyDi7WtU2z7JbLU39D1K\nRcUbD1aR1ED0SzHqvG4wawZT3LZoMHNaveCkIUWn4+22En/mUqNUghZLoH9Jz37f88nOT0KuLA8e\nM+wKhA0IKYCL3/GgroCbhrtlEOWZS5n58cyI+MbMOaUNFs6NJTwTcZ5ooXTjjbB2LYwcCWPGNLyO\nje0SCAreYKpmbees6/410VgWYiJb5/HUuS7XlaxWe0Peo1Sw3OuLVSQ1sH49ZGWZca9uuin4ULsx\n44nI3tVen4e+B40kr8tJTPrqN5QH3VxhsZLDut5DG+8Wyp1yRMQolFgGiriopxzwgqcCshbg8/jo\nc3gf5hfNR1HSvemMHJ7Bon9CWbni8fnJ6PEJkF2n62ss4ZmI84QLpf374cEHzfr3Tc5DwpRJY364\nyQ66h7duofJ3Y7lfEtlQiafOqeBWgvq/R6lS//pgFUk1TJ0K115buTx2rPk/ZkylUMzIgDlz4K23\nvKx8ux/r/92PJ15ayRrfUyzceCofh8VKFi308sfbbqRDmw5kHJjBjXNupMKpwCMenKI8cH2AF9SF\nPs/CIVuNpZK5lHJHeLTwURzXwePxMGnYJMacWJmG7HSbx/gNq8k+sWo6b039RhrTjG6okA4XSqqV\ng1sCzJyZGEXS2CQz6B6udLxe0xgKTk0waVLjWKKJbKjEo5SSZWGHj4NWWpq876Wu9U8lN5hVJNUw\nc2bksuvCuHGQnR3pZrjhBvNhg7FSSjdm8/TtT1OYAYNeL8epqABvBZo1j0cLV7HgcjNviaqiqJk0\n64CdJtaC38RVeuebAgNuMc1cGhp2RVQo3Wsmsi7NeBsd+P9w1aHc8Uak8xYWF5K/Lp/pa6fjd/1V\n+o1MnbXe9Nr3+2iTLilvRocLpV27Ki0SMO6thtBUH2Qyg+7hSscNeFBVzbrS0sZz4yWy31BtdU5W\nLKmxxnerS/1TzQ1mFUk1jBxZ6TYJ4jhm4qlwF0HwIwXT8gt+3Lm58NQrmxg7+WXcbvMgcymOeigo\nKmDr7q2V43EVn2Km7nU9ZirfYTeZ9VEpxMGe8x7xhNJ+w4e5B1j+9XKmrpoaGnm43CkPBfjLnXLy\n1+WbYewPzOCGp0rwl98FajpcFhRIwl/EZAZ0jzrKKPuGxkiaIiAcpDbBEV1WXQRNuNKJtkjCy2tO\nxFPnRF9XUCEHv/PapjpoKPHWP9XcYFaRVENQOE2aBJs2md8+X+XshV4vnHVW5YRMXi88+WTkwxxz\nXjZkFnL9Mz7cRbfjOXIxW3dvZfue7WaH4lOg4G6jMPABfpNOHCOFOKhIMg7IYPy74+l8cGf+dOqf\nuPHkG3loyUO46jLrk1mRoxRHMW3NNFx1ERGcbv3Bexs4ptd+Xl5aQu9fsltMY8Ykxp3V2AHhaKoT\nHNWVFb1/dYorWukErzUV3CDNiaBCDrdIUiEFO9VSwq0iqYGgsAp+rFu3mtiJ6xrBMysgs0XM35w5\nsGZN1Dwiq8bA81ejjuJfUMYUPZO0bivxlAzAnfF+WBqxHzzllenC1aQQb/9xO9t/3A5fwxufvIGI\nVJ9WHMAjHlx1jRsN8KgHX9cV+EefiXyZx+8vOYnc3PPqfH9qir+kWoupOho7IJzIsmpTXNFKJxXv\nf6oTrpCTHSOpb71SoT5WkcRB8IMMKpFoVI2VElQs06fD/Pkm62vsWHBdATzgb4NuOQ1/5lJyym9h\nhdsGxQv44cgPIO+vlYM/BlOIsxbEHBASTL+UeOaTCaYcQ2AGR18bftX+IV4q+ho3az6PljzE929f\nxajeoyJmcgy60ILusPAxwWobtyvVWkzV0dgB4USUFd6waQ7KurmTqm7AmurV2HE/q0jqQGlp5TwY\n4YhEZhGVlZlZ7latCu4bHHLeAwfsxCterjr/KNa9CmVlJhgfVCKC4BEPAwam85MzPuGtT5fjKJUz\nMwYyuWokfBbHQM97yVxGmjeNs44+C0py+dcfr0YrvOD9MxWjhzLFncKMdTOYNGwSN059iYrPT0W6\n/xFv1+U4roOLGxoTbP7o+RHjdu337yd/XX6EImmKFlN9RzZOdEA4P79ux8X66GO5piZOjJxB0es1\n7lZIbWWdaqRStlMyaIpAvFUkdSAvz2RshE+H27evcWm98UakMlm+PPxIxSgTP7Lvpzx51pOMOTGb\nNX/PZ8rMTWjWvIhgukc8LNm6BMC4o6LH7xp2U0hBVFEqwX1DLjMHvOX0vvUWso7dxpzNcygv6GmU\niPrAL7BuFJq5lHKnnEn/u4zy52aDk456y3HDAv2KUuaUkb8un1G9R+H1eHEcB0WZvnZ6yKIJEi6g\nEzl8fayyUmFk4yAzZph3ZMaMhvUNCc8ODO4jUjm/PcA110DXro0jFFuCAE61bKdk0BRuZatI6kCs\nVnZhoWkh1o4DvnI83ReyZtvxFBYXMmrEMcz473Xs9+8PRTlcdXE1akyu8OC7n8AMjZ4qGV0R++ID\nNBSwX7v0ENalPWPKzZoHnjvB8QIeWHMF0ucFpOtyPln5s2oD/dEcfejRfLzzY6AyKyyW8K5OyEe4\n0Epy40t7rKasmkY2bsw5WJLRNyR8H4+nMgsrPT0qHpdEGiqAG2vY/NqOay6xu3Dqei+awq1sFUkd\nCT7I/PxKF0awk1w4Ho/5UwXHUfA4MOwmnC6LmbJqCTPWzWDuqLnMHTU3or+H1+M1c747FZFDqASD\n7xDovOiJLeiD+/ohFMQPBOxDyilzKfSdDivHmH1cLz1+uI5NugzNml8l0O/BE+qNn+ZNo+/hfcmb\nkUe5Ux46raL8Y/U/6Ht4X8acGJlOFUvIAyGF4P1qIJI/F3+F13SYe2k9a3xPAVSxcgqKCigr6oe7\nZRBl3ReFFEZeVh5ejxfXcfF6vKH4TmNYKuGKKi8vt/rYRgyFVp+OdpMm1R70TbTyrE0A19bxtTGG\nzY/nuOYSuwtS30zAxnYrW0VSRwoLzcMpD8hQr9f893iMvzro7lqzBlavhpUrATxmlsV9ZspfRUMt\n+K6HdGVU71GM6j0qIsA9oWACH2z5wATKM5fiufwXuGsvhdVXBuaLDyinsIwuwCiJ8LG+qnOB9c6H\ntaPNfPPeCooOnWHcaMHj140K7aooAzMHst+/n7bbh/DYwwdQflC/yDKLT8EpyuPardN5ceCL9Dys\nZ0gJZByYEcocExEyDsyIUC7O6t/AfgHMkC/XT34FZ+AzAExfO535o+eHhFNG6QjcGTeBPx3XV07G\n2Z+HqiCYMlSV/HVGyyd7DpZYimru3KrWVXUKLRkd7ZKhPGsM/tdyvmRYafU9LtWynWqjvveisRME\nrCKpIwUFUFFRuRzs1e7zwRNPVKYL/+53JugORsmkpXsYPqwDc/a3CVke0b3Obx90e6jcCXkTWLR1\nUejjnHTdKGZO/TkfrEnHRQDHWBWx3E6ZS2sPyIcrnKwC9naK2n/taOPiWjsaHT2UhSwMxF/uCsRq\nRla61aJiOAsZysLMZ5i+djqPD3+c8e+Ox+/6TU9+12H8u+OZNGySGR5m60mw5gqCCQnicXC6zQtV\no8wpY0LBBEa2f5jSjdls3ZqNx1VcFTyul9KN2XAe5K/LD3XArPjyRJ5ZcCjPHXU7T4y5JK45WOrb\ngo+lqG4flFvVPVWDQov+6KfOWs/MOaWMHJ5h+iLF2Aeqd3kkQ3nWJIDDz1fmN89rQt6EWq2u6uqf\n0WM9Ht9xKD58aS5bO7xIYfExtV5DvNZGlb44CbLeCgsrvRR9+1ZajVB/xdVcLCirSOpIXh6kpVVa\nJEFUzYsD5mUKKhGAnBy46iovpaV/YvjxwynNeJutu7fyj9X/wNl6EvuLTif/J5+ROzYsUB01d0hu\nZi7ZlxMaqNGVssqhVMIQaulXEp39FUvhVNchMt7160ZBUR5lWQuY+fFMyor6oVsGGfdaIKi/Ztsa\nY20V5QVcdUY5dhzwNtszCyPq+/7b5/H+mmPwoPi8QppP8APp6UJGj/WMffsppq2ZZq47TKmVLyhn\nTZ/XmDRsEjM/nsnIniNZv6od4y6qiBgahi71b8FXN9lYtHCKd1KyqbPWc+2FR4G/B+9PL4dX14eU\nSTjRY2ldeWVlvKShE6BVR3Wt3PARFlxcPtjyAYu2LqrR6qrOZVNYXMj4DUNxftsPKcrD7b6If+xY\nwoz82p9LfayNulpv1cX1INJTAZVeivBRBeoaW2ouFpRVJHUkN9c81Px82L7dZGwFLZTly80HEk3n\nzpUpm+np2dx414Gs3rgedgnMfhh10pm+ROh7+HpKM94mo3QEpRuzycvLJc+XS8ELQB7QpZDRj3wG\nRYPZ3vEVZn0isOg2JGshZw5ux8ieI1mzbQ0Lv1wYCoJHsPLqmgP1YATx7q5mWHs3ECc5YCcsus38\nj9VRMnysMI9jLAzXZ/ZtPxv3+bMiMs6cfR159/si3PZulflbth/1QGRdZswFfxvAg4vgYDKVOPhL\nPj7oacZ+9FBEP5lopfbxyo5M2/lLHHUo+LIAZ8GfcMrvjhgahoGRLfjwoWSi+85UabmW5DL6+42Q\ntYBRI44BYOzbY5m2ZlrI2gy65sIVWnXCauacUvD3CCRWKDPnlJJ9YtXzhrs8HEeZMgVmzAgoRiLr\nVF0CRE2t8Lq00oONnnB3bG1WV7TLJn/WlxT4X2Lr7q1m8rguS5AuH+JiXJXxWlbx9vwP1aMOSRrh\nSic6rjd6dKSnAkxmXXBdcJyz+gT36+qmaszkkiBWkdSD8Ac7dSpcf31lT/c5c8yQ88GhU9LSoFOn\nsCHQy5QH/5wJ2g04OxDvECoqlBue+l/cbvNwZ9yExzWt72BrxpfmoKNuxzliMekHp3NjxkuQ/wH4\n01FfOSNHfE72T/cw/t3xMacCpvgUo0TcNEAQx8txX9/P5m5nRo77FXRRefxw4rPQabUZCywq9bjL\nCZ/T/qjdfLp2IE74WGHHzIZN54YE+b/fOjhmxlnRgnIYvcWct88M8793fjUZaF5AEVHS04WD+8/i\nkeILQj31w6/R+/2RiA8cx8GXJizx/D+cwPWVO+XQbS54bwdHUY+f5WlPMPzAjFALPuhyDCY7CILX\n4+Xm3Jt5YtkToX2u7HNlYHKxbMrLu5GePoq+h69n/IaTA1l4xioMpksDoYnJFm1dRPZPjZURLagO\nPGY5ePsbxeqroM8puxgy46JQizmolPLyzDthXKseVIWycpf8fE8g/djUaVQfKCyJsgZqaYXH20qP\nFljR7thYllDQ/bN9e2UfGF+aw7Pf/RZn3mK8Hi8+jw9cQoknQYVcnbVXXZ2CM4CWlRnrYPJkM+hq\n+L2IOf10NQOehisd9/MBUC6oW2mFRHsqoi0SX5pTxU0Xb0ZWrE7C1V1/U6TBW0XSQEpLIzsolpXB\nww9Xjr81fjx8/715oczw52omwwoIxyDicYwS2TLIBJJVqAgbtdVV4PMBaOcFlDvlrF3aAY97QESs\noCBjomnN4eLBQ07nHDq370zR+sNZW3CuEfaBWIQqbJ47iN//+lXe2nsHG3dujGzNu2qGst93WKTb\nat9hMOh+SgDZKXiK7kCCPfS1Atp9E2FhaI//hS8HmmVRYw2FucA8665AnTTUU9VVJ1kL8aS5uH4H\nnw/OvnAH9M7nkeI7KvvXBN10ADPm4jjpiMfhhLNW06bvK6zwLol8YJmFodiQZhUw68elvPWOlz+c\n+ge+3/89q7etZuW2laGMOUXxu34e/vBhwKRnO47DM6uewbP4J2hZL9T1UF5urInyI8pjuhajW775\n6/KZsW5GZQwsbAZM3xVv8/Pvr+HnOdtYKu9Q5piGQVApBd1FV/z9RZ557seABejFlQo+3rGF8vJe\nlS39/Mp+LUHXSoG/5s6k8cRYqhNY0e7YiGOiElXS0uCci7ez6Yg/s/GARQD4XT8jjh1B/yP6xxSa\nNQnK6G2jv99IWVm3UL+bsWMrOw9XjuAbNf00JpswvDGwf0tfRv1hI78a3qOywXHUEnSxi79C8aXB\nqFFeRo2qjJEc3O1z1m4pZuTwDLJ/lk3+rC95btdo/rFjcchNZ9ystY/AXVhcyJAZQ8y74fHhEU/M\nEb3jfXbJwCqSBhIrZhJULI5jlAqY1okIuCoE4wFgBLsInHPhDt7LWs3+b3qh4uIRxecLt0hAj1qC\nI17Sven06Z7JfI+AQpt0MX7aLpGtq0nDJkFJLoMvr4AywSivyvNWVPh55KVVuAM/MZUMdzP5/MZl\ntXk4oBFpxEEUxc2ajy/tbly/F1+ah+G/+YG3+vwPzpaBlXGYn/2nMovs3ccqXWOA6/cZxappVVKZ\n07NW8fgrm1hTeDDbO77CnP13Uf5jeZVYCN5yY9UEFJ66yrp9b4L3sSrPSxA0KjbkqMNDSx7C5/FF\n9OIPVwiqis/jCw3/D5hRnT1/xiNtSU/3mMnGNlTGCoLWDEBGmNWT7k0HiAhQ37vg3pDw8hyxhM8y\nl7LxRz/6Y6RSWr1tNYXFheRm5jJqxDE8u/M0/L3zQwp1ifjwpRWgePD4/GzfU0p5eacIFxKDtppE\nB43dmTRmKz18kqwuhUwomECZU4arboQyCp/KOZroRJUKv8tb2yfjHj09Yr9O7TpFJJ7EKyijt23v\n+ArIHwi+764bvJdCWVmlmyli+ulFEyNGzab4FHTGv9nsT+fBmS6X3jaHXsM+NHMKcQZ8fir+7otY\nn/5bxpw4JtLiO6KcRRvSmXviXLqOKMCZvziiIfHsUx1rHIE7aIW8+/m7ocZEhVsRejdjNQKSFR+r\nDatIGkgwZjJ+fHRvdkP4XBDmhyAeJSPzv/y35DCjCNrAn244nOHfLGPcvcfhx4fHIzzxRLgp7oUu\nEykoKmDXkgt59K6jQqMQT5oUbMlUbRFOfAEcfzCY7YcjVsL2PuB6wVuB020uIcsokMnl+XIobttv\nYc4T4LQJXgmc/FiVmIpkLuXCB6eyY0OvQJbRnxj79haeWXV/5U4Bwd2nUx/W/uwMKBpslMw3x0fO\nwxKVynxFnyvI7r3HuIt+3E+1nTSD/WuqGehSEHweH66aPiaqWunOCz4WKtcJQuf2nel4UEfWf7M+\ndNzvc3/P0uKlLNy6MOJ+adEQbry0P2POOw8yTRykz+F9+HTnp7y56U2mrJqC1+NlxDEj6NSuE30P\n78uabWuMYnIUF5eSH0oq6ysSynKLZuW2lQzNH8qkYZMo3VvKb3r9hhfdF0PPxUXodP1lbF13JP7u\nBcz2pOFLmwt48aU5PLdrNM7qxaHrVJQKpyIi0yoYz5m2ehqdD+7M+lXt+N3FDuXlgjfNj/72T7hd\nloTqF1RGfQ/vGxFTiqay0RW4Lk85TrcPCLfMveLl4LYHM3HRxJjl1CQow7d5PV7m7L8LPetzeOcJ\n875TaZGLR8nL84SODQrtjCg350//+1tK/OmAsdL/9cAAFo8YTMHeifg7L0I7L8ABxs1eBsCabWtY\nvW11SMkGlV10vQHz7QVG4BavsrXDyyG3V2FxYZW+WqH3I/DcYjUCarMKk4XEM+hfvQsXGQY8hmkK\nP6uq90dtPw2YBJwAXMpip5UAACAASURBVKSqr4Vtc4D1gcWtqnpuYP3zwGBgd2Db5aq6tqZ65OTk\n6ErToSNpFBbCwIGxB3WEynGRKiqMZeLxVPZUnjzZpA1PnAh33mnWe71w771w++1Vz3PaacZKAXP8\nffdV3S98/6FDTaaXeCsY8Oe7KCwpxP/FQLxHLsLbdXkoHfmso8+iU7tOAEyZdCg6917MowPTb6UC\nrsgz7qEoBKGtry1zR80FYOBzAys7VAboeVhPNu7cGGlR+NNNbOWsGyDn2dC+aZ40zj7mbD4t/TR2\n4kDIIknDm+Yy4M93GwEfVFJRCq/HYT0Y3G1wSIhv37Odol1FrPtmXewst5DbbAGezGW4uHjFa6yw\nYHA/zLWW1m0Vv8/9vZnJUh18Hh8VTkWVstO96SG/v9fjpXO7zhTtLorY57xjz+OtT9+KiAGF+sgY\nWwOvx1utsglHEE5yfke/st9D1gL+seNKHHVCZYRbYD6Pj6v6XkXfw/tyw5QX8H8xwIzTVjQEnXeP\nUdpSAaffBYPur3KeoLIOulxijVYwddZ6rv3bEkCrxMQ84glZfB7x4PP4uLLPlRzc9mAKthTQNq0t\nPQ/rWUVhTV01NUJ5byrdxD7/Pr7c9WXlu7ZulHEBOj7wuPQY/RTT/noy679dz7TV01izfU1oBtLf\n9PoNO37cwcieI6E4l2tHHhuKLSJ+rvvjV4wa9zWnPX8aftcfqr9XvBHPzCMe2njbhNxPU1dNZdrq\nabRNawuKeV8D75C3+2LILAx5Eqatnsbyr6u2TD14OO6w40LfkQcPZxx5BiN7jqw9MaQeiMgqVc2p\ndb9kKRIR8QKfAmcCJcAK4GJV/ThsnyzgYOAW4M0oRbJHVdvFKPd54O3wfWujMRQJwK23Rs7cF8Tj\ngaefNr/HjTNKIHjbwxVGdErkpEmmYyNUpnZOnAh/+UulwkpLgwULwvpDxOojUVg12FpT4K6wuJC8\n+26n/Nn3wE0PXIX5iHpfMpPy3L9Suq+Ub3/8NuI6BeGkzifRuX1n3tj0RhUhF+wh76hjssDm3RsS\nTt6hf+Wcqzbw333/ZcfeHXxa+qnpYxKeqhxN4CP0dF9EWreVVDjGojjsoMOq1A2Mcgr6l0UEVQ19\n+IKQ5k2jz8/6sHyZp9qJxSLOHbHPGXi6LovMIItBuEIILmtUi/yps5/i+neujxBKl2Zfyv9t/D/K\nnXJzD12nViUSJCiUzzr6LN757B0q3Aq8YuJCBVsKIgSWIHhKBuA8/15kgsW7j4U6rwbvR5VU8zDl\n279zf9Y99Egoqyno/x/79lieWfVMrfelJtI8aaGZRh9c8iCzNlU/B0/w+l11TdbixpHQYyaek54L\nvQuxCLolJ581mRenH8jCpy8MZTv2v/0OJl3zGyavmMyL61+s9rxHH3o0v+r5Kzq06cCGHRt4af1L\ntU/5EFDw0RYzmHfDI54q24LPIai4bjz5Rh758JGQUg/v0FtX4lUkyXRt9Qc2q+oXgQq9DPwSCCkS\nVS0KbKv562smPPCAmblv2jSjAGJZHK4bOZyKiBnRFSJzxjMy4MYbK2MvwaHp8/KMKyyYiRI+mVa8\nkyFF+7GjX7LczFyeGP4E10/34gSfjDj40l02HDgZ/86NMa9f0ZitqPDt5/78XNPaDovHeNJcBgwy\nH/Syr5YZH3XxybUL84DLzAXKAjLXK15O7XJqSGCGE7Ec9T0rSteDu9LW1xaKTq19vLGiIVX2cWNY\natEEBVSwLtGC5aLjL2La6mlVMtLap7cPDaezfc923vz0zVqnEAgKZ3drf8qL8phVVACZ5ryOOjy2\n9DGGHz28yn1wtgysmmAR1nk1NIhn8SmVFiBEPK/lfWZAmYIaazh/1lYK/C9VsS4FQURC1khwnLma\n+kNVuBVc/871rP92fdXMvRgMzBzI4g8d3GD24Zen4f7sP7jVddotPgUtysOfVcC42eN48oonWVrx\nCyo+PxXNms8K7zJOe35ylfsfXefPv/ucB5fEaFlWgyB4PFWVm1e8XNPvGgCmrJpS7Tldddnn38dD\nSx6qkjWYbBdXMhXJEUBx2HIJcHIdjm8rIisxSaP3q2p4s+NvInIXMBe4TVWr5LuKyBhgDEDXrl3r\nWvd6Ez0ZVngHrK1bK1Meg9kjrmviK9mBPmfBY6IDk8Ec9Ntvr6GHcUHiBqQr3ZiNupWjFnuOms+I\na9fwxo+LI/aTklPRLYOR7gvQLh/WWKaiDD9mOJ3adWKKTkFD2VMLWagfwqawnaNjILGEeZAwF5On\n2yrmbJ4Ts0VXG5u/28zm7zZDVnm18RYwH++g01wWLgjfZ35c5zj32HPp1K5TzFY5UG0Ld/ue7aGU\n1Ihx2KgUxgA+j49TjjiF/f795HXP4++vFuKf8W5MhVzulNOpXaeQ7z5EVN+eCIuwKK9yvxkfxEx2\niI5ZuVLBP/57Ge68JVUsGA1kz9F1OaoaSrX+fv/3VaaLDmftN2vjnlqhsKQQ3XJLRP2kaAiSuTzS\n/RruAgv0g6oYPZSHljzE+F//ioItc1n+9XIUYloyQVdT1w5dK91qNeAVL1p8CrrlNLxHLubqc3sZ\nt+LsG0LlC8I1/a7h6RFPU1hcyNTVUyMUmIhJuIlIDInTUk0kqRxs76aqX4nIkcA8EVmvqp8DtwPb\ngXRgKnArcE/0wao6NbCdnJycRr+zEUOoR/VCPucc+PRT2LjRKJPg/CVr1lT2gL3ppkjLxeer7EFb\nXaerjIy6DadQkx81Lw+8Pj+uI+Bx0R6v0ek4D2lr/3975x5eRXXu/8+ayd4Bj1UwakEJBJEq2lQC\nFomUkIpFsag5pb961NOgUmnwSmvLkd4OWg+0tFbqpTZ4vECrrZ5S8Qbe0ACSILeAUdACEgIKFoOo\nFUiy96zfH2vWzJrZs5NAQETm+zzzJHv2zJp12+ud9X7fS8JbdBLvlMCf5pNqsbBfSeF89xycHkFz\n24BuX1g07mqkqHuRmuzujsJrprkw6MUshTIb7vx+dCNCKqYTb/geDUc9mnHZcUccx/Zd21vvEA2X\nRBf15yB6L8CRArFoEhQsQOQvITcnl8vPP4nqLed7PEKbIWlQKpaRfUdSeHwh/1v7v1nVKmEkrARP\nr3s66/UXn3IxE4dM9HYrz6x7hpSTYuW2lTgb/yurQBYItn2yjQmDJ/Db6t/6ajm3/QU7r6S+y0OR\nYXBaExzYLYr/MKzJ0j1aVw06rn9SqmABHxV9RPkZ5Wx7szfv1n2JLqeuYmXOPby/6/2s95sC0hxr\nzUfJzttBOAjhkJMQOL0X4YTVcobzKwivv9bn/4ppi6dxcteT2xwrB4eGnQ2tLubdjuzG4B6DGZl7\nKzdMPY2mZnAWtnBU8bOMG6UylV439zrSMk2unUv5GSruXXF+MZd++dLAy8a5vc9l/sb5WXdmAkFR\n96I2691RHEhB8g6Qb3zu4Z5rF6SU77h/3xZCVAFFwAYp5Vb3kiYhxIMofuUzDXOnICU89ZQfowvU\nrsS0+NKmiWbCrKIilXExiv8wna5++EPlt9IW2nJcKi6GH0xucJ0nLeSzd1B01QaqxpR7DnbsupkZ\nKRvpCFLNAjaWYOcvofCLhdS9V4dEmcyaTmV5R+Qxe83szAoZC4PIaUGWn6N089oT/9nfKzPisNVY\n/deRxoK2aXUBDA1xM8Li6NyjfUFiCCyR/2okl3PRud3oduQOjtr+I26vuIB0i9KPW1ecx/WXnM3s\nNbNJn/gKnLio7c524UiHa565hqsHXM0Pi38YXLwjIBBcfOrF7Ni9g4WbFma9rtuR3aj7Zx33PfG6\na3a9DfKXKMHTaz7YP/EW+Msv6sEnx5fx5FtP4uAw5805nlopgPwlbO65DCHd5Ta8Q/zXFz2LO5GT\nYuiFm1jsmn2LgoUM/VqSRZsWKVPrKJiqwRTwzB8BCXYz94nzuP/Jm2l5cJ4rKEZgXfEiVo8d/g6i\nlR3rlUVXes6jQgjSmwap+eNYYKcp/t6jLD6+JthmXZ727xISkZNSuyUX6z9Yn3UMTITnk0AwtOdQ\n9qT2sGJpkvc2ljC3z2K6dTmK5mYBjuJwpj38Ks8338LgEwdz9wV3U7u0k1IdbulFDcpJ8rE3HguU\n25oQ0Zjw7AQKjy88oOqtAylIlgF9hRC9UQLkP4DL2nOjEKIrsEtK2SSEOBYYAkxzv+supdwq1F6+\nDHj9gNR+P6K01N8pCBEUItnQqVNmoqylS9X9nTr5/EdVlRIi2unq9tvV7iWVaj2xUnscl7rIPlhC\n4ji+02Nxmc+pzEjV4YjdIBKeeseRDm/88w2klFiWxV0j74LNxcye10j/wTuZ8Oxl0Z73xsIgU9JX\noZgOjPWlWPlLvZD2tmXz5d49WGWYEMuClwkTII50/EUg9CY79Oe3Ui1u9972LWHxo7N/5C9ErxyH\nk7pQ+bqkFX9we/VvMnPGtBNpmaZyRSWdcjpx6Zcv5S+v/0UZxFlWBoEukcxbN4+Tup6UtTyB4OPm\nj7nmj38KEuT6DT0UnHOl/SFf+PgLAZWOI51IYZKWaf+8qe6y0rDuAi+awY9vfZdf3/wrZqyYwXVz\nryPlpFjcYHFM52No3N0YXXGdrkCnO5CqNaQE6VWXkT56c0BQOBuHYvWo8SyjRMFCsNPItPCjYG8e\njKj/Oku2n0bhx4/Qqc8Savgdsr4EnaNHOi0senMt1vHh+gTbV1BaxZCLNvDwB23vNE2EyXKdqE5H\nzk4/9AsvDtySKx4C+0rVRleFuGrbKlZtW0XinRKsP71EqsXmwTtVVIvmExYEniWR0ULEyJAqdx8b\nSLdwoHDABImUMiWEuA54DiXmH5BSviGEuBVYLqV8UgjxVeBxoCtwoRDiFinl6UA/oNIl4S0UR6JZ\nuoeFEMehlPergIoD1Yb9hdZI9CjYNpx2GrzySqY5sVaFaf6jtDSY/tdx/Pwoe/YoT9uMqLGba2j4\nsMELRZHNcam0VDk7KlWZyFCVNeY9DedvhLXfgn6zlSWP6+jm4CCkoHZpJ2beVEhzM8x/MIVzxu+Q\nZ8zEyl/KKbuvQNSX8tYXZpAuWBDNSxjnrN6LuHfUvRQeX6hs/htHccOU05QXvuWoHYyhtoEIfXHo\nTXbPhsHIPr4F1bgB4+iS28UTslavlxA5P0W2OF692kPwtgaJpCnVpN4uXSFyzwX3ADD+mfGBXUpT\nuoldLbtaLevhuodh483ZOSXDAXNtFg1hNuLeka5zpiGQxIcFyJXfA2wEaT76IIepi6bS8GGDJwzT\nMp1diGj0nwlbi+CdM/ESsWEpjmLk9aH5sMAzzX3sjcdoQbqhItQ4896X4dnfI1NJFs7XmUFHwJgF\nGZyPLHgpw1Q3LHDr85dwRM5prZL+UbjolIuYt36eZw5d3KOYhQ0LlRHKonMCY7Rq42YoPyeS52l5\n+2xEM0gH0lLChrMhJEgiEZEhNZxu4UDggHIkUsq5wNzQuV8Y/y9DqbzC91UDmSFP1Xfn7Odqfiow\neY3CwmDQR73wa/+SCy9UqixtnRUWJratBJLO4X322UrogB+KpaVF/X3wwWAWvUDgOUtZg4STR5l1\nbi3yaF7jKHi2j2cJY3Vby48uGeK9zSftJNuqR7Bnj7u7Stuw/GpYVY71zR/x9nN30dJi4YjRMOZc\n7CvOc8n3l8ktWM31Z13PU8dez+71Z9F/8E5Gnnwvjc8XQilMGlrM1KmQ0py6bFHWRS70rsWD8ZZm\nJ9I4KUEyaTH23/tQ94bvKKb10Z5TWq9lyDEjcN4+u91cSFuLj+cIqB0gpaB2ay09j+7JpV++NMNM\ndNOHm9p8JtkEcTuRtb4uIa7VgHav5QxouZalq5pAJpCWItLlS9WK+FX+flnbnEFoWymwU5AWeNyE\nY6vcPd7CvsA1rYZH33iUtJOG1d9V5shY6vrasS6/4aqmTIE69FcZVmfNmyKI+lDEgzXb1wTrHtE3\nZhm5di7djuzm+fc40uHdj9/1r4/i/bJF4C6oUmGDZEL5b2ljDveZomAhX+jzOh81h/TYnorOz5Bq\npls4UPgsk+2fW4SJeL1Tqa1VpsNPPAFz56r8Jo2N8MYb8Je/KIFiWfCd7yhyPixkLAsuuED9r3PI\nt7So8kH9beiyznvbxoGeR/dUDkxZgse1Fnm0cW0hllS5QYQjGHfMw/z63F6UnVLm7Riuv7WboaJT\nYVqEk8uAD/6HFS02ThqlGqsfBiW/4eqLv0zPo79JacFvYEsxXY6G0pvU3WHTZq0ybGqWOJbasVhu\nGBMtRLRfhPzTCzgtbkTiCybw/VNvprysF8XFhRQOnO95NWvjA+0d3PBhA5XpSjjR5yi0lZQZLgVQ\nQSM3ncuQkhYWOdMiHcaqXmni+fnN0Hm7l3RM5i/1ogXr8mxhc8qxp7B2+9p2vRH3/+puXhMjPAsg\nu2etZxK9L7CwkFtUeBCtLjv1pus5pf8OnnjrThjzaiaRLl1foYiFt0/XPpz0yeU8P3NiiNAGceJK\nhn65DzUvd6WlRako7d6LyOlVS0v+UiVbGs6C+mHIgoVqGtVe6ZYhFVeztcj/DJkhffKX+HxYK0R9\nNvQ9pi/rdqzzPostZ3t9YyXSXPQ/v2fiJUOp+2edUgciSdpJzmIC6xdt8QVWO3g/Xd8Mk2uj3tJu\n5qOoepvCChthOeQmrQOexyQWJAcZ5kI9frxv8tvcrBwVhw2Dxx4Lhlp59FGfuDfhOEqA2Lb/nePA\nzp3+IpyTuBy7/AE48RVPpTVjhnKUTKfVLqg9qVxBCT9bx/vKtSkv66Xa5PqpjP+vTbSktHbSzyaZ\nTNqMvbwrdctcIUAa8WEvxJazYQCByK1acIwZk2na7JtCC/L6baAx75s0fFjIfSvvCyziJ+2aQWUq\n4fMcn3SBob+iuPher75AICTF5YWX8+dv/ZkZK2ZkLIoSSY7Ioah7kQrwKB3YPBhr1stIJ5eaRWms\n7y5G5leTa+d6oUdmzKnj+V/0CagdsJtxxgzP8GmQSEp6lrCucV3AlFmgfC4uPOVCvpT3Jao2VlG7\nrZa69+rI6WVz1cWFlJ/xa+r+WefxFZawMoSSdsCMChdjCTVebCwNqGL+seIE3uz8YMDiTl/v7f4E\n9Mvrl+Evsv6D9WxY1BwktJHq/61fZdkOwQ9u2cDt8x8i3Ws+Ob1WcuPgG7mj5g5aNp3pmRpLu5mT\nz6lmvZHDhu618O6Z/udTn0CcuJyTB77DuiP8fv3KF7/C6vdWZ6g3rU3DuejcbsqooWEhYUgk63as\nU17lx53KjWfdSO1j51HpdEJKC5HOYVDLROpW1HHNPQ2kO41B7D6eoYVF/PXWi6BFxT0795Zf8fzu\npgDvJ+q/jui51IvzptVtCStBuufS4LxoxcDAE3SBDKmN9DtyKDde0p/i4kgFz35DLEg+w9iyBR4O\nuRVI6YdHiYLKER88Z1qNgc3VXWbS8+uPeAv2tdf6Ze7Zo4SK47SeiKemRu2KtNPl9Onq/NSpfmC/\nB3ZOQlpzQSbIzbW48b/rvYio48oKXRWf4P4HkqRWXk1qVTkz5Ahmrh7OmI/W0tzcyxMcEG3a7Avi\nQqCQms01gai6k0snw8m9uG96M2mD54D+gfboDIsaD9c9zIlHnUiX3C6Rb9gtTgsfN39MwkqohW7B\nfyNTSaRUwQHFxqHYPZcw/fzpnqC6//ENkO6HqXbI5iNjC5ui7kWIVcL7fNPZN9Elt0vAXHvqoqms\n2LpCBWBMS1ZuXemVYRoElPQsYcOODTSnm7GExcDuAyntXcrvl/iBLTUx7EhHCYaCl8D+qacucwpe\nCvSDQPDjIT+mT9c+XPPMNR5pb765m5CaYHcJ7eP7vMv29b2RjgpauGrjZhg6FWSalrTF39f8XS2s\n9cMCC+iGHevJSZaQakmB1QxF98N7X/HVekN+g8xfwjrw/Dr6d+vPyJNHct3c62gJcSY/uuxMyoZ8\nk6r6KgbnD6ZqYxXNTrMXZ81z+MNhXaPbtoIFJJOXk2pR8zEvD8Zf8iWc5smAjRRpnn/Z8YSGk5Ls\nfLM/FEwLPPvHlw+iy8m3eRylNuHudmQ31ry/JmitV1ClLBpTmerLQJ8bQn4N93HD67kUDtx37/b2\nIBYknyGUlytOw8yuuC8I71RWrPAdIZNJXJWOCs419c9B9Zi2KtOkfTanRi2cHEfdU1trJu+CMbev\nU+axrj/GyPOP5q6myV5E1MKB8ykuVrGYnLSFdACZwNk4lOb8Je4Ptdwrr7wcis7zU9CG37B81VxE\n0Lp8+MOjb3HNPY+R7jWf3IJays/4XZv9+Pc1f2fWv8+iU04n9mwsQtYPU7pq90f65vtvugmOXiTd\nkqOIX+Fbj0kpadzlE84nFP4jaKkUEVEZlND45pe+Se1WFf9Jo0tuFyYNnUTN5hovqGE4O+HSd5ey\n9N2lJKxEwJjiqE5HKdXZ5rNI15eyrPdCVm77nbeTsLA4t/e59E+N53d/WYnT88VAyH0KqsjpuYK0\nYwUiG3+0R+noc6wc5TwoZUaMNQ8hdc37wka+/bwad6uF405/A+tDV5BhWNpp/sflFmS3FTj9/wwb\nS/zx0BGmQzyWg0P9znoaPmzgufXPcfcFdzN7zWxeEN9AbizB6r2Ij44rZPismRmm8DovyX0r7/N2\nCiknxXVzr1NWg+UPcOEnf6HbF7ozb9E2nJZjCbwkyJQyAqEF7BZOKPwHnZtWsWfMNxCbSvnRZV/l\n11eWYZIXeiep47WZRgGJXiso/tktLF6UUAEfo8LURODTCCcfC5LPEIqLVRiUadNUkqz9BceBK6+M\n/q60NBhy5Yc/VNyMXsA1qR9Wc5kmzcmkOmeqnqgfRvKoJM09l5HsvZpup46heWWmuXEUz5G0k5SP\n6kt5/2Do8gkvDQ8KIiM5UJA/KWbS0OCPZlxZIYUD/0VV/RGUFtye8aMqP6OcGStmBBbBb532LRUJ\n93Q3KnOLjbT2eDp1iST99lBI5ag3T1Jw0otQegsi/1WSdqeANdzES4by5FsjVM6Zzu8jdh/PN4Yn\neDm1grT042HNWz+Pp956KpDkyVNDuia22lltfvl8LzvhC2+/4C0qKSfF9wd+n55H9yTviDyunXut\nil1m6NhTY4Zj91yG7aYmGP2F3zLhskKc5oux7Z9B+XDS+a94RPKNg3/AI689wpaPtyCRpDadyR8X\ndMXu/SdkfotHMOuQJ5awcBwn0KdWz6VIl6twIECoP/bh8oDg9JBfgzXyhzhPu1F8592Fc8U5avfi\nXZOFtHahI/E27mp0E3ANpzn/VTcSb2GkKbw+iroXeX2uw+870kE6KZ7527GkUxJJV2U44O761UuC\nnwgucVI1Ey/5FRPx+bjGXWup2fzFQIw706s95aQYN2AcoCIbPPPSDha9bWOftICKi86gqPsV1G6t\n9RJw6cCrQCBE0KcRTj4WJJ8xFBfD44+rzIuzZ0P//srB8L772ud/EoVkEo46Cu64Q5XxwAPBHN+m\nZRb4Do1FRcFdRpg7Cd9nJlAqL+tFeY9gwiBT5ZTXOMoTUGGeo7TgN95OQguvqYui/V5qNtcw+aEm\nmpqH4aRF5C7K3K1MGloceKM3w2+/ctUr3Pzizbz9wdtc9pXLKDulTJm11lyGk0ogHbBEJ07Y8V3e\n67VCvZX2WYxYLGlpkThWM9bXbyOn10qu6v/9SGu4nF7LaPa8/wWLnE7cfcHdXuTWqvoqP/KvA1cP\nuJqeR/f0+tBcaJrSTVTVVzFp6CQml06malOVp57TPIcu03GcaB17z2We5d6su49iT5ODdCxsklx9\nzJ9hoIryW9S9iOvnXe+r/wziN203Y19xHlaPaiU0pHq+NsG2hc2QnkO8yL0Tnp2gcq9sPssPbdNz\nKWlJ1t2Ms7U/OpsoaQtr9RjsXsvbDIVjBjTUC2o41Hp4boYX3XEDx/km50fk+VlI64eRarGQjgBh\nqYyiRzcoayzXkELvGr556sXU/bOOxl2N5B2Rxw3zbvCed+fIO2nc1UjDhw1qnAwUdS9i3MBxjL93\nFi0PfhvSSVILmqHobxSe0ZfGXY3e/eHAq9MWT+Pdj99l7ICxh3SsrRgdgI7ZpVFU5EcO1h7v2mRY\nCF89ZVnqvGXBkCHKH6WoiAAP0tyMm+M7GNgx/Gavr02n1Y4lijsxF+ywqXBNTTG8Ugw5ruBxf7x5\njaPcFLV+WSoMfiFZrL6zpkQdPms4Tc4AHOt5LDpn+LtkRFR+pI4JbwynqX4A1qbd3HPNkYwrU88s\nzi9mwZXKVj9gJr3zOXIS85FYONYe3s17hBwhuLroasqvLIcrbGXO/a+P6DZgHOWjfhP5w62qrwq8\ncUuk95ZsJnIy22kKo6mLpgYWGlvY5B2R5wnFu0be5UUNTss0M1bOYObqmUw/fzq5ObnsKViItJvB\nwTUpDfrEPLBzjMdp5SQsVwV6r/dsHV0ZyBBKF+bezq6Tfu7lbHek45VtY3N+n/O9NhYeX8i0Rxcx\nZ+Z1nuXUudfNZf7rq5QnfmhnYQlL+ScZ576W921O+9f5bDvuUbqdupFtn2zjiTefiLSUU2//jZ5V\nnl7QzYW3rcyOVVXFlJYWUzxQnbtu7nWkTBNdHRYmYlckkcx5cw5z3pwDm4tVNIaCIshfQlO6iWue\nuUb1k2WTsBNeEisppeeVHuaJtr1xKsN3lAbUcWxR+YfUDh6e2/Aczelm6p6tO6Q922PsR4wb5ye5\nysuL3ink5SlyXjsyLlsGv/qVuifKsTH89m6S8mGCuz3cSbb4Yn4d1Y+xau3eB5eMStijs9k5PRZj\njRnBudZtTL6itNVAlrPnNSrB89DzOOkk1y2UFL6c+XzT858TX+Hq3z3M26t68qLzM5wei0k7tm86\nvUXvxrqRnF1OkQ1VEVZvYT7DfEturZ3m/TnvDqVlw9lYvRfxg0vO9tLzJu0kY84YE2iDqc7xhPio\nDdz/+AaWJn4dWPSq6qsCnNaVo0+huLg88OyEbcRZO6kaXoFUS5pk0mLi5YOgh8rZrtunkWPleIJf\nt2tQy0SedE3HaAHyYAAAIABJREFUSdnMv+vbOPLfwfqJSq6Wv9QTBrVba9l2xE6eWS1JtQgsW7L4\npaN55cVjyE1OZP584Iwanlv/XMDIwuy7GXPquPaX/0e653xkfnVGrpBsmR3DcfKuugq29dmp+KYe\n1Ygx33B9n17yrdhcK8WM3dXmwTDzRRXSx/6ppyL1CH0HLvzShTz5jye9c3rXWV42ift/n6KlJUUi\nIeh2+ps0bzdSNz+9jpk3FQc4yk8z5W4sSA4hhJ0ag2//wXzYoCywJkyAE07w0wGbRLxlBQM7hnmP\n8nJ1RAmvbHbpWo3U0JB9NzN9usuLNAHCYemOZ6nZ3LXNiR7+sQd2KQUrmVyeS3F+8J5wm0aPzOOl\nP5yD477dpVMyUpCFd0Dlo/rCKFg0ayXNaTsgAExhFbVz09eUliq+RYeL6XLy2sg34KhFraYGpt1T\nQOqxFyBtkbMYPhrwcGCxALWbUYYBJYjeC0kW1FJaUErdiiOpmlfM6JEw/bYvUjpzJS1pZQIccMJ0\nOa3yUfMz6lQ1psqLs1Z+VTl1F6wNGT8Ue3yN3pkIBFf2VwSdGdtt+umvkmMX0uxuM9Jp1xRYJhD1\n55Dbe7UnRDwO4Iq5XJyYxlNLV5FefhVIwZ49klmzBPfem10A19TAdf9xqkpra98Mrrl1WwtsWG2a\nTkNlpUTa18GYxyF/CcmCFYz8xvHMeQGVY6egCpn/Kt8f+H1AcRueqnJ1ue806aoW7Z7LvCRoAYdO\n7XjY+xV3ntUgxkxCbBiC6LOYokGXkXzWyLhYPyyao/y0Uu5KKT/3x8CBA+XnHVOmSCmEVnhFHwUF\nwWtsW8rKSnVvdbUqp7JSykGDpCwr889pVFcHrw2julrKzp1VucmklLm56v+cHCkty3/mlCnqOXZO\nWiJSkpxPZHLcMFndkKVg8xkN1XLKwineteHP2eoVaOPjr8lEbrO0bEd27txKeyLKrm6olhV/mCkr\nJtZ795ntDre1oiK6T1p7brjuFRVSJpJpCSmp4sFIadmOrJhYLzvf1lnat9iy822dZXVDtdc2YaVl\nIrdZVj7+mqx8/DVJ4hOJaJEkPpGVj7+WtW2t9aXZj9UN1RnPNssJfzdl4RRp32JLJiPtW2w5ZeEU\nWVEhvfaovymZyG2RFX+YKSuXV8rOt3WWYrKQTMa7b8SsEdL63hCJvdu9x5G5udF9qZ9bMbFeWrb7\nHNEsGX6ztG6xMuodvrfzbZ3VsxKfSCEc/7ckmiVn/kEyfJIs+82vVf/muP2b84lMXF0SKLdyeaW0\nv/e1QJ2xd0vGFsvcX+bKiS9MlIlbE9K6xZLJXyZl4uoSr7yc3CZZWSnliHEvq7oY/WeOlzkH9dxq\nz2+jLaDCWbW5xsY7ks8JSkv9XQcEIwdr1Nf7HAqot5drlHqWZFLFALv9dp/UnzcP7rzTV5tFOSma\nHvFBfxW4+mro2TN6NzNrFqTTQlk7pRO0bBjS5vY7W8TiNncyIZVb49pC7r4TajdsUqalPfoCbW/7\na2pg1qxiHnywWAXFvMvnisxYamZbwe8T06m0NZWeGe3Aj8umQ4hIII2d42QaNGwpZva9kG5RMZqc\nlEXjWkUSk+qn9Ospyex5jYwrCxKzugzNY4RTDISjTI+a8AHNX4hWnWRTz4U5rjovurk7IU99krE3\nfMy948s9taXpQJm0k4w+bTSLGiawu+ghFW4Hm1QqwsAixHElEvNploprufSs8zn960eR1ziKqj8X\nUhcxt7VqU6tNz9x+J6ufHUBLSjnQ6hAvTy1y4Ds7sGRnL8LD2K6zKM7v5dVl3MBx1B5zHpUyiUQo\nT/yihyC/hpRjs2rrKp9XctKc8tH3WOPumFNNKa651kHKYYoHHDOCZMHKjB1GdDijtn8b+wuxIPmc\nQEcCnqU0DxQVKSERtvQKcyX6+z174Le/DX7f1KTKkNL3F0kk/JArs2YpvxedQ0WrrEzVGKjrr78e\nVq2C0aPVuQcewF0/lHNaos9iSgumtpoq2Azvkk0tkS3Ui/7O9/BPI8vHkN7+CjNnZYbRDwut6ae/\nyoTLCv24YQSFQTa1I/jWbLat+lD3V5R60KyjOT7u6IHdjD1gFnf/pNhTJ4UXeh1KRz8j7708nn+w\nGVoAIenf29f/RQlnIONcVVVxIMr0U9PPx77qa4EICSaisnCGhUtVozJ2ko5KnpaTX0v5KJWx0VQt\n2pbNVf2vChgeXFP/J9KryiGdwLItGhpsamoyBYHmuIZ861Ve+evZSMfm73cMo6TXMCZMwBvPcFTt\nsNp0+i+aYIuyLly6ZidPPHIcUtqkW1p46q2nSCSuIoVNMulHeDBRXtaLmXfpuSeRA/5KWptdnzaa\nRQ2LvP7+0plbWTPbdVoU0nvhsuiseMDy3MgxKi4u3udEdh1FLEg+R4iKi1VRkbkziYJWeIVhLmT6\nTXraNHjuOTIW1cZG/61o504YOxbeessv27Jg0SIV7kSVKxBC8tVRa5j+s6kZYVE0v9BaeBcT0QS/\nL1TMHZMjgQ1DkCcsiBRK5kLUVD+A3zzxbzQ1+e0VIrswyGbNltevjtqttVA/zLWIyrzXrKOb9NCF\noKDvLo7v9xalxcNpXNuHmi9mGkpoIXLuuTB5slsXCtlw6wZ++7MCpExw1619KBvm9onRTt0PDR82\nKPNc16qsqr6K0tLiQJRp6VhcZURIaM+bbwbHVQqdcpVXu50jufua/0dxvm9Bl43zqF3aCWdjifLR\n2DYQZ/X3uO++oBViQBC98zUWPzbYq3tTk4ppt3u3X7dwVG3z+X4MNpg0qZiamm48838ttDSrSAlO\nt+WcccJABnQfEAiQGp4T/o7Bhh5Tg21zUy2MHplH4cB/MXfdBSq1b+ftKiZXOkEiYTF6YClVf4aG\nLrMC4zbr6XVU7SxuM6zRAUN79F+H+nE4cCTZUF0t5Wmntc6d7O0xaJDPA+jD1FNXVma/N8wbmFzB\nlCnqnMmlhM9VTKzPqvc1r7UsKROJkM7Y0CPndkrJ5LhhkTp+KSN05JbSkQuh7o/ikHR/m3yM5jjK\n/nNrq88z7zc5lURCPTORUH2s+92ygn0XpSPP1jdCqDqZ7dT1qlxeKZO/THq8RO4vc726VlaqepjP\nrqyUcsQI9be9CHAtbfBuUfcmkinFF9m7pf3VSonwx0aXNWWK4sI8jsQKzsFEInNuJhIRvGAWLqjy\n8ddkzjd+LsWF4ySJT9rk29rqgyh+Y8SsEdK6xZKMHSzF8J/Ish89Ezl/k+OGydxOKWlZiqPbm7Fo\nC7STIznoi/yncRzOgkRKNTGTSf8HY1mZgiB8WJa/gJWUBL+7/HI1YfVnc2GSUi0s2crNzVUTvaxM\nCaTKSuOHXxnxg2pjgQy3MxvpPWWKf41JGLdKLjdUK5LT9hcq3XfhRTyq/pWVZr9rgnWwR5Zmg7k4\n67IrKnxBYC6Iui0VFapPKyqyCzhzDuhxMBfcMCkuJgtZ8VRFRjm6rWVlwfq0ZwFrz3i2Ni6KoPf7\ntG+/jzLmZrY5pBfasrLMvgQ1z70x1UT9UxUZRgLZ5oc5z1prf0VF0OjCHNvAXA0JsYqJ9ZEvVWFB\nGSUQ9xWxIIkFSQB6Ausj6ocUJUiSSXV92NqrpET9jVpUwwuMKXDKyoILmn7T1m/gUYvh3ry16msn\nTvTf5s23tPZYnoV3FHphsm2/HyzLX+yzCa8RI8KWdCkphv+k3TuSqB1HeEdSWRkU6npnGCVczHHU\nOzbLdjzrLimj38Cz9Ul4fE8+uW0LuPBiGF54s+0A9P1l/7k18Mzjj8+sg7krHTEic+ej6x+2chTC\n7dPHX/PqkPxlUub+Mjf7zrUNwRhlWWU+V/8mspXRlmWWroM5ByyrbYHWXsSCJBYkWRFeHKMWfL1g\nCaGERrYdTEmJ/2ZrLt6WpcyNS0r8c1FCST8j48dcuXcqD90u/bacmxssUwspsy5R5s3hHYUur6Ii\nUx2i33DNxd1UpwV3JMqEt+IPM1s1x4xS70W1z9yJhMekoiL4XHMHYgo9ra5DNMucb/w80qQ6avEy\n6xi144xaTJPjhkkx/Ccy5+LxMrdTKuvCW/GHmVIM/0lg52YKl+S4YTKRTHu75XA9Jk6MFrhRY613\nBuGXpBHjXg7sQiqeqmi/WXRox5ttRxE1Nu2Z71FC3fztZWvvvqK9giQm2w9DhM1VzfAptg033aSI\nau3AuHBh9rIWLlQkd0so5JGU8M47KvTJq6+qc0IoazLTTDmRUOSwfpaU2cOxtAaTaJcyaH0mpTpv\nBsJsbs5MQ9yaY+GYMZkWb/qztsaKIvh1NkyA8nIr4C2u621aeDU0BCM1m2R+lDGFLtvEtm2Z42Ea\nQ+jsnE89k1ZpXO0WnF4vUVXf2SfEtxQrUtdwLNV9Bn4dhYCuXWH7dvW5qcnvV922pTs+oPmBuSpO\nlN3MqAnPMeiYCyJNyR/84eXIJgn2T7GvusCLFWZaYF14w7Pseu0CjjgCnnrKv7+kBH79aygrUybY\ny5Zlj8Sg+7K8PGh9aNtwxK5Tsd/xLdK0tVhNDX4IEoLWgVEhhsLe5RQswLbLMywpUyk1NpMm0SZa\nix5x993tyyN0IBALksMUUal/wQ/k+NFHKh6XlG2XFV60NNJpZR2jF3f9g7nrLnX+hBNg4kR17bRp\n8OSTvjAxE3y15W8R5cPSXphlmF7w4ZAwoBYZs3zL8hOB1daqc4WF0QtWtmeb4Te0abBtKx8cPRat\nmTSXlwcDegoB3boFhTUEhZI2R5YI+MJWKHyE3IKVKitlRL200LDt4IKr6zhrFvzxj9nbhjhfmbK6\nicW6Wad7Y6b7CNTnVIuNSnoouKrLTM8fw7TAmjfrPFItqg6W5bf91VfVc0GZmuu5m5OTPRKDKVCm\nTVOC6clHupFIzufq3z1MUfciz9dE+weFzbj1i05GiKGQd3n5qL7wWmZftVa/1hB+XnuF0QFBe7Yt\nh/oRq7b2HmGdfFte81FH2DomkVDqpbB3d5gINg+TOGzN0sVUq4XVT/qZZlvCqh6tdjPVWWGVTlgt\nN2hQZl10ORMntm3NFLak0mWHSfRs3vCtqTV0hIL+/X2jhvAz3b2bBEdOnLo+sl7ayi5M+IcNGJLJ\noMowbEGnoxjYOWlP/ZSNBwqrFk3DiDDHMmhQZr9ls1Bra76HeQbTutBUYUaNVVT9oww6ws+Jql97\nOcE2+Zl2ltMaiDmSWJB0FGGd/MSJQa5E8yfZBInJtegfnbkQa1KwtfAugwb5dTF/NOai5hHHli+8\nwqaQUfxCeFHV/EyU4NKfTcGo+YDWOANo3VRY6+hNowO9iIaJ2dYWrcCiW52dJ9AmvOE6jhgRrJdp\n5WT2YVZSOKKvogR9mFcyBZXJMUQJ+agXiI5a+mmE52AiEZxjpsVea6Fu2rN4m2bUuZ1SAd5sb+ue\n7Xn70gdRaK8giVVbMbIiSi1TVhZUg4FSYZjOXRpSKtWDVgWE88w7juJoCgszVTEaffuqxFqmrn7P\nHqXjj1JD2bZyhAR1TW0tnsdzlIrJVFdJqcrWKpcodZLJk7S0BDkDx4lWBc6ZA3PnBnPAmH0Eqg06\nHI2pqjO/N9VTrak1pk71Pdx1nTW/MXOmnx7ZbEv//sEEZtOn+2kLrr1WXTNuXFQYDgWTJ5g6VY3r\nmDFqDLp1U6pS7RWv+12IoLrMTCkwdarfPhUsUdV9+nRVLvh9GY4kUFWVyVW1hby84Nj94Adqrs+c\nGexL21YqLp2zJzyerakyNXQk71lzNvHAzjHcZ0RXMCMImA6S2ZDteeH50Z4I2x3BARUkQojzgd8D\nNvC/Uspfhb4vAaYDXwH+Q0r5N+O7NFDnfmyQUl7knu8N/BXIA1YA35VSRixBMQ4EoiauSdzPm6e4\nDjM/ytixmTlRQC0kjY3B8C7btsEbb8A6NwX1ww+rMnJyfH24lJlxwMz4VkVFZowqtQhdfLHiY8Jh\nVwYPDhoTSKm88sOhw3UU5DCJf//9fviYqPhmGlE5YGbNUsJIStWu2loVmwyCfI1ZB13/cFRjU8e+\nc2dm+mQhVN9q73fbVgvlrl1KiJhZMefPV/2q+zqVUkJF8z9tcT5mxAM9ByzL/1/zSo2N6gVBczxa\n2Om5lEz6ZckIIwz9IhMmu81sn2EeJhvf1NjoC1ch1DX/+Aecdx68+y4sX+4LwjvuyKxDNqj4bOr/\noqJQpIXUI6RffoX0sivZvfbbTNv5ASP7Bl8A8vJaLz8bWpsfBwTt2bbsy4ESHhuAk4AksBo4LXRN\nAUqIzAK+HfruX1nKfQwldAD+CIxvqy6xauvThcklmOqYysqgCihsKtoaVwJSHndcUP1QUuL7xWgT\nXe3oGHW/6WNhqn7CnMqIEdEqr4kTW1fjhVV+/fpFczZaFWG2VZtim2qoKHVcNlNTsw/DarawSias\nsoriFNryTYh69pQpbTu6hj2vTTVPWGWkx7S1KNJmfTL9doL+Nm3xTVG+JVrN1VYdso1FeD4Hxreh\nWuZcPF56EYFx5BmDdkpEOqPP2zPu+reQTTW7L+BgcySocKrPGZ8nAZOyXPtQewQJKgTq+0BO1DOy\nHbEg+XTRli49POE12hMKv7WjLSdL8PXx2fxnshH3lhUkdls7NJEfJRS1kAkLuyjuSMrsC21OTpBE\n130bFVXAdELs1y9aYIW93qurfU4sfG22BTksfLKNUZjnMT3OTV4nvIhG8SHhcqKeqTm0bHyTRmVl\ndBlauLZm2BDlw5FtPpvPHlSyQ4ZD6WPvlohmmdspFWnM0ZaxivnC9HkQJN9GqbP05+8Cd2e5NkqQ\npIDlwBKgzD13LLDeuCYfeD1LmePc+5f37Nlz33syxj5hXyZxWzuS/XHYtu+0pn/k2vu9rMz32A8b\nCoQXe/Po0SNo0aMFZUfqefnlmREITGsh89ChVKLeqG07aLEWbk9ZmRqnsrLgvf36Ze5eop4Rtjoy\nd51RoXi0FV3YSEHHLzOvbW9OnPCOSkdl0M8zdxH6mmzxyKL611yYtaOrbft9km3n1taORPdX8Hkp\nyZl/kGL4T2TFH2ZGts8sq6IicyemxyQm2xV6SSnfEUKcBLwkhKgDPmzvzVLKGcAMgDPPPFMeoDrG\nyIL2kI5R92iuZM0aWLx47/1CTNg2XHih0nWvWaPOpdNKx/2d78Ajj6hz+if4zDNBnxgZmjXmZ53X\nJZmEn/88yNFo3woTxx3nO+21Bw8/nMm5aK6oqSl47ezZSvcejk6seSFQPNXatZntmTNHcVo5OerQ\n7Tev1VxW2ABAl3H//er/oiLF85hcQ1QagzlzlL/GTTcF9fjdugWNAJ54QkWZNrNMRnEGYT7ATD0d\n5tBsW/Fie/ZAXZ1fbmlpJuEO6vo771Rzc/x4v+81p9XYmMmb5eX5XMxdd/k+RkcdFUylMH684q38\nNksQDqL7Kjqd9ScvS2VeXrBPdR0dR/V9KhWsdzKpytX80qFOtr+D2jFo9HDPtQtSynfcv28LIaqA\nImA20EUIkSOlTO1tmTE++wh77k6eDM8/73+vPe9XrVIk8ZIl2T3vHQcGDVKLaUmJT/SnUkqImD8+\nbR3UHmgB1a1b0HJIGwvMmqUW1dxcN/x9DuzYsdddEahfTg7cc496zs03B9u8Z48i2E0IodquSeh/\n/CP7cxxH9UleXrSwkxKWLoWRI/0FW98npRI+2snOXOha608t0E1vbFDGCCbBblqbmblWcnN9o4Xo\npE5BaMuunTuV4yGoNiUSvvFB//6ZwltKJagLC6PbUVqqxkb3iZTKIMCygpZo4BtwVFX5/RaEAJlD\nzvN3c/3ZE6n6cx/PEVJfGxZ0ZhlCwNChcMwxSlDra/fV4XGv0J5ty74cKCH1NtAbn2w/Pcu1D2Go\ntoCuQK77/7HAOlyiHvg/gmT7NW3VJeZIDl1k82kwoaPlTpwYrSvW15h+DPuqcjLjaYX9G8LBKMvK\n1NGv374/Tz8zHAm4PVyNSaa3RYK359DcjzZqMOOZ7csRCGdv8CAmB5NMRqctEMIPyGjOlbaMElqL\nTG2WHf5sxk4zHS+rq6ONO8LGJu0dM31oHmtvxy3KKbesbN9/fxxsjkTVgQuAf6Cst37qnrsVuMj9\n/6vAFuAToBF4wz1/Nsr0d7X7d6xR5knAUmC9K1Ry26pHLEgObewN39IamW8uVlFcjCbUs5HqQqjv\noiy69nahMJ/ZnuvCud+jnArD5WqCWJPUemEK69P3duHXTpS2LWWXLpnX6MV/bwwTopwK9bOy9ZHJ\ndWhBrvku0yjBNC4IW95FLbyDBmU63+rz2lCiNV4qLBDCLzhRY9W//96NQ7ZnRp1vj2d/NrRXkBxQ\njkRKOReYGzr3C+P/ZSj1VPi+aiByMymlfBsYtH9rGuOzjL3hW1q7Niq+2LZtyifFTBcMShVgOkhq\ndcrYsUq3Hla/QHanymzIyYFRoxQXoN6RopFIqL9mLLGBA5VqxoRWy2gu5cEHlepD+1R06RLOA9/6\nc8OQUqnJTJ7AVKmZKr+PP1a+F2b5OnPjEUf4bU6llOrIdDadNQvuvdeNvZXyOYgePVQgUF2m7vuq\nKtUXuk2pVNBfSaOpSfVBZaV6Zv/+yrFwwYIgJzRggLou3Ddmf69eDVdemekzE0Y6rVJYh2FZkJ8P\nmzer+19/PXsZYQwdqgJSak7MfH5YNZdItO3rsj/wWSbbY8Q4YAhzMWH9+lVX+UErw6lrtRAyPbLL\ny32SXQunlhZfp6/JeSnVIq8dDEERyk1NahEwSdW+feGUU3zCXAdbTCZ9gWY632lBoR39ZswIOtEt\nWKDqX1vb/oCcYaxenf27IUMyDRZAtVdKVe/Jk/0267aMHq36XztAPvig4pgaGoJBGd97T11v9msy\nqdpsRgHOBiH8/hk92ifgw/1QVKTG2LajBRKofl+ypH19qMdTQwv6LVt8IZntOWEkEooDOeUUxROG\nERYqY8d+OpGAhdyX2XSI4cwzz5TLly8/2NWIcQghHKI7KpR9a1F59Xd64YoKPR5VTrZrop7X1vNN\nAwPLgttuU+FHTA/wcGh8DS3U2muAoEnrqOsHDVJe9GY9w3UfP94XbtoTH4Jv3ELA97+vvP/Nfq2q\ngp/9zG+Lea+JkhL1Jq/D6pghW8x2/8//qH6aMcMPE7O/lkkdNRl8Qd8eWBZcdFG0oM6G3Fx4+eWO\nCRIhxAop5ZltXtge/dehfsQcSYx9wf5w6DqYyOYoJ2V2T3CtszfJ/XBgTjMNc1mZykc/6KIVfqKs\n0LE3KXjb4owmTmz93pyczORWOtpzVKDPqLZHEfjaCCCckdG8T/d1WxyH5u9M3qqte3QQyb3htEpK\nOj6H+CyQ7Z+VIxYkMQ5XtCYMw2RxOC2xeZ0m2MPZ/HQWQOt7QySJTyTCCSyuUQt/a3XNJtz0YUYp\nbq2d2cLLhCMHh50g26pvpgNhMKyM+cxsKaejLNXaQ9pnS5GtoyWcdlrmPR19CWqvIIlVWzFiHMaI\nUsG1lUTM/H7qoqn8/OWfk5ZprC1DONe6jdEDS/c5U5+pdoNM1U9lpYqeu6+IaseMGYp8Hz26fWXP\nmKEcAWtrW8/iGQ5iKYRSAUY5B+p67dyp+Kx0WvEo4QRadXVwzTV+BOeLLvKDkdbUKCJeqxdNdea+\nor2qrViQxIgRY59Rs7mG4bOGe1kA55fPV6l6O1KmsdjX1SlLOiHgxhs7JkT2N1rjqMLXtCWos5UL\nmc9o7bma10mng06b+4pYkBiIBUmMGAcONZtrqKqvorSgtMNCJEbH0R4B117EgsRALEhixIgRY+/R\nXkFifRqViREjRowYn1/EgiRGjBgxYnQIsSCJESNGjBgdQixIYsSIESNGhxALkhgxYsSI0SHEgiRG\njBgxYnQIh4X5rxBiO7DpYNfjIOFY4P2DXYmDiLj9cfvj9u87ekkpj2vrosNCkBzOEEIsb48d+OcV\ncfvj9sftP/Dtj1VbMWLEiBGjQ4gFSYwYMWLE6BBiQfL5x4yDXYGDjLj9hzfi9n8KiDmSGDFixIjR\nIcQ7khgxYsSI0SHEgiRGjBgxYnQIsSA5hCGEyBdCvCyEWCOEeEMIcaN7/hghxAtCiHXu367ueSGE\nuFMIsV4I8ZoQYsDBbcH+gRDCFkLUCiGedj/3FkK86rbzUSFE0j2f635e735fcDDrvT8ghOgihPib\nEOJNIcRaIUTxYTj+P3Dn/+tCiL8IITp9nueAEOIBIcQ/hRCvG+f2esyFEGPc69cJIcZ0pE6xIDm0\nkQJuklKeBgwGrhVCnAbcDMyXUvYF5rufAUYCfd1jHHDvp1/lA4IbgbXG518Dd0gpTwY+AMa658cC\nH7jn73CvO9Txe+BZKeWpwBmofjhsxl8IcSJwA3CmlPLLgA38B5/vOfAQcH7o3F6NuRDiGOC/gbOA\nQcB/a+GzT2hPYvf4ODQO4AngG8BbQHf3XHfgLff/SuBS43rvukP1AHq4P5xzgKcBgfLkzXG/Lwae\nc/9/Dih2/89xrxMHuw0daPvRwMZwGw6z8T8R2Awc447p08B5n/c5ABQAr+/rmAOXApXG+cB1e3vE\nO5LPCdwtehHwKvBFKeVW96ttwBfd//WPTmOLe+5QxnRgIuC4n/OAnVLKlPvZbKPXfvf7D93rD1X0\nBrYDD7qqvf8VQvwbh9H4SynfAX4LNABbUWO6gsNnDmjs7Zjv17kQC5LPAYQQRwKzgQlSyo/M76R6\n3fhc2ngLIUYB/5RSrjjYdTlIyAEGAPdKKYuAT/BVGsDne/wBXHXMxSihegLwb2SqfQ4rHIwxjwXJ\nIQ4hRAIlRB6WUv7dPf2eEKK7+3134J/u+XeAfOP2Hu65QxVDgIuEEPXAX1Hqrd8DXYQQOe41Zhu9\n9rvfHw00fpoV3s/YAmyRUr7qfv4bSrAcLuMPcC6wUUq5XUrZAvwdNS8Olzmgsbdjvl/nQixIDmEI\nIQRwP7BWSvk746snAW2FMQbFnejz5a4lx2DgQ2M7fMhBSjlJStlDSlmAIlhfklJeDrwMfNu9LNx+\n3S/fdq+6BakOAAADBklEQVQ/ZN/WpZTbgM1CiFPcU8OBNRwm4++iARgshDjC/T3oPjgs5oCBvR3z\n54ARQoiu7q5uhHtu33CwSaP46BDh9jXUFvY1YJV7XIDS+c4H1gEvAse41wvgHmADUIeydDno7dhP\nfVEKPO3+fxKwFFgP/B+Q657v5H5e735/0sGu935od39guTsH5gBdD7fxB24B3gReB/4E5H6e5wDw\nFxQf1ILalY7dlzEHrnL7YT1wZUfqFIdIiREjRowYHUKs2ooRI0aMGB1CLEhixIgRI0aHEAuSGDFi\nxIjRIcSCJEaMGDFidAixIIkRI0aMGB1CLEhixNhHCCHSQohVxnFz23e1u+wCM7prjBifZeS0fUmM\nGDGyYLeUsv/BrkSMGAcb8Y4kRoz9DCFEvRBimhCiTgixVAhxsnu+QAjxkpsXYr4Qoqd7/otCiMeF\nEKvd42y3KFsIcZ+ba+N5IURn9/obhMpB85oQ4q8HqZkxYniIBUmMGPuOziHV1iXGdx9KKQuBu1ER\nigHuAmZKKb8CPAzc6Z6/E1ggpTwDFSvrDfd8X+AeKeXpwE5gtHv+ZqDILafiQDUuRoz2IvZsjxFj\nHyGE+JeU8siI8/XAOVLKt92gmtuklHlCiPdROSNa3PNbpZTHCiG2Az2klE1GGQXAC1IlKkII8V9A\nQkp5mxDiWeBfqJAoc6SU/zrATY0Ro1XEO5IYMQ4MZJb/9wZNxv9pfE7zm6j4SQOAZUaU2xgxDgpi\nQRIjxoHBJcbfGvf/alSUYoDLgUXu//OB8eDlnz86W6FCCAvIl1K+DPwXKgx6xq4oRoxPE/GbTIwY\n+47OQohVxudnpZTaBLirEOI11K7iUvfc9ahshj9GZTa80j1/IzBDCDEWtfMYj4ruGgUb+LMrbARw\np5Ry535rUYwY+4CYI4kRYz/D5UjOlFK+f7DrEiPGp4FYtRUjRowYMTqEeEcSI0aMGDE6hHhHEiNG\njBgxOoRYkMSIESNGjA4hFiQxYsSIEaNDiAVJjBgxYsToEGJBEiNGjBgxOoT/D+Vislm1Q+UtAAAA\nAElFTkSuQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "W4EQD-Bb8hLM",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Further metrics\n",
-        "From the plot, we can see that loss continues to reduce until around 600 epochs, at which point it is mostly stable. This means that there's no need to train our network beyond 600 epochs.\n",
-        "\n",
-        "However, we can also see that the lowest loss value is still around 0.155. This means that our network's predictions are off by an average of ~15%. In addition, the validation loss values jump around a lot, and is sometimes even higher.\n",
-        "\n",
-        "To gain more insight into our model's performance we can plot some more data. This time, we'll plot the _mean absolute error_, which is another way of measuring how far the network's predictions are from the actual numbers:\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Md9E_azmpkZU",
-        "colab_type": "code",
-        "outputId": "39b97561-b01d-49f2-c35c-fbd8db663806",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 295
-        }
-      },
-      "source": [
-        "plt.clf()\n",
-        "\n",
-        "# Draw a graph of mean absolute error, which is another way of\n",
-        "# measuring the amount of error in the prediction.\n",
-        "mae = history_1.history['mae']\n",
-        "val_mae = history_1.history['val_mae']\n",
-        "\n",
-        "plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')\n",
-        "plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')\n",
-        "plt.title('Training and validation mean absolute error')\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('MAE')\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXmYFNW5/z9v98wALoiOUSIMYIiJ\noqOAhNjXJU0gRo3EBe+9GnPHuBFZvEGNXk00GRMTlBglUWPAhTC/GDAJEVfckFHEVgQBUVwQHQEV\no6OIiszSfX5/nD5d1dVVvcx0z8b5Pk8/3VV16tSp01Xve95dlFJYWFhYWFhkQ6izB2BhYWFh0fVh\nmYWFhYWFRU5YZmFhYWFhkROWWVhYWFhY5IRlFhYWFhYWOWGZhYWFhYVFTlhm0cUhImER+UxEBhWz\nbWdCRL4qIkX32RaRcSLS4Np+TUSOzqdtG651u4j8rK3n9zSIyGYRiRa5z7+KSG0x+7RoO8o6ewA9\nDSLymWtzF6AJiCe3f6yUuquQ/pRScWC3YrfdGaCU+nox+hGR84AfKqWirr7PK0bfFsWBiPwVeEMp\nVdvZY+mpsMyiyFBKpYh1cuV6nlLq8aD2IlKmlGrtiLFZWFi0H37vbKHvcXd8760aqoMhIteIyN0i\nMk9EPgV+KCIREXlWRLaKyHsi8kcRKU+2LxMRJSJDktt/TR5fJCKfikhMRPYvtG3y+PEi8rqIfCIi\nN4nIMhH5UcC48xnjj0XkDRH5WET+6Do3LCI3ikijiLwJHJdlfn4uIvM9+24RkRuSv88TkVeS97Mh\nueoP6iulGhGRXUTk/yXH9jJwuKftlSLyZrLfl0Xk+8n91cDNwNFJFd+HrrmtdZ1/QfLeG0VkoYh8\nOZ+58RnzNSIyP/l8fCYia0RkaHJ8H4jIRhEZ52rfT0TmJP+TzSLyKxEJJY8dICJLROQjEfkwef97\neObnYhFZm3wG5olIr4BxZe0riW8m/5uPReQO05eI7CMiDyWfnY9E5ClXvweLyJPJY2tF5HsB1z9P\nROpd26lnXUQmA/8N/Cw5Z/ck2wwUkXuS8/aWiEzJMu+9ReQGEdkkIu+LyJ9EpHfy2DgRaRCRn4nI\nFuA2v33Jtrmeg8ki8gbwatBYuiyUUvZTog/QAIzz7LsGaAbGo5l1H+AbwDfRkt5XgNeBqcn2ZYAC\nhiS3/wp8CIwCyoG7gb+2oe0+wKfAScljFwMtwI8C7iWfMd4L7AEMAT4y9w5MBV4GBgKVwFP60fO9\nzleAz4BdXX3/GxiV3B6fbCPAt4EvgEOTx8YBDa6+NgPR5O/rgXpgT2AwsM7T9r+ALyf/kx8kx7Bv\n8th5QL1nnH8FapO/j02OcTjQG/gT8EQ+c+Nz/9ck72lc8ty/AW8Blye3JwHrXe3vT15vF2BfYCVw\nbvLY14CxQEXy/14GXO+Zn2eB/sn/5XW0JOw3rnz6ejH5H++d7NfMz+/QDLc8ef4xyf0VyXu7LHls\nXHLev+ozx2n/Af7Peq3reAhYDfwseZ2vot/HsQH3dxNwT/L56As8BPza9Vy1Ar9N9tUnYF8+z8HD\nyWv06Wz6VDA96+wB9OQPwcziiRzn/RT4R/K330vxZ1fb7wMvtaHtOcBS1zEB3iOAWeQ5xiNcx/8F\n/DT5+ylcRAg4gQBmkTz+LPCD5O/jgdeytH0AmJL8nY1ZbHT/F8Bkd1uffl8Cvpf8nYtZzAV+6zrW\nF22nGphrbnyuew2wyLV9CvAJEEpu75nsbzdgAJqx9HK1/x/gsYC+TwOe98zP6a7tG4Cb8/z//fpy\n/8ffN/8bmqD+Cxjq6WMM8A4grn3/AK70meNCmcWRwJue610F3OZzLyFgBzDYte9okkw5+VztACpc\nx/325fMcHJPP/HbFj7VZdA42uTdE5EDg92jVyC7oB+u5LOdvcf3eTnajdlDb/dzjUEopEdkc1Eme\nY8zrWsDbWcYLejV9RvL7B8lvM44T0S/9AeiXfBfg+Rz9gZYaAscgWv12EVrqIDn2vfPoF/T9PWM2\nlFLbRORjNDE3c1LIf/a+6/cXwAdKqYRr24xvMNALeF9ETPsQepGCiPQH/ogmnLsnj33guZZ3XHv5\nDSjPvrzzu1/y97XA1cBiEYmjFzC/Sx7fqJKU1XXeAL8xFIjBwCAR2eraF0ZLl170R8/jGtc8iqfN\n+0qp5hz78nkO0t797gRrs+gceN1GZ6FXsl9VSvUFfkHmw1psvIde8QAg+i3J9pK2Z4zvAVWu7Vyu\nvX8HxonIALSa7G/JMfYB/glMR6uI+gGP5jmOLUFjEJGvALeiVTyVyX5fdfWby833XRwmg4jsjpYA\n3sljXO3BJpIEXinVL/npq5Q6NHn8OrQ3XnXyP/sRbX+u8unLO7/vgiaaSqmLlFJDgJOB/xORbyWP\nV4mLQifP85u3z9ELA4P+nuPe/2gTWjLo5/rsrpQa79P3+2jV8NddbfdQSrltMn7PgHdfPs9Bt03z\nbZlF18DuaFXD5yJyEPDjDrjmA8BIERkvImXAT4AvlWiMfwemicgAEakE/i9bY6XUFuBp4C9oVcb6\n5KFeaP3wB0A8KWWMLWAMP0sahAeh7SgGu6Ff4g/QfPN84EDX8feBgZI06PtgHnCuiByaNOpOR6v4\nAiW1YkAptQl4ErheRPqKSEh0DMsxySa7o4nsJyJShVYdthX59DXV9R9fgbaRkXzGhiaZwido1UwC\nvQpvBS4RkXIR+TZaRXm3T99rgENFpDq5aPil5/j7aFuWQQxoFpFLksbrcPLcwz3nobTL+e3ATBH5\nkmgMFJFj85wbg055DjoKlll0DVwCnIU2OM/C/2UpKpRS76M9SG4AGoGhwCr06rHYY7wVWAysRauM\n/pnHOX9D64VTKiil1Fa0qugetJH4NDTTywe/REs4DcAioM7V74toA+fyZJuvk65iewxYj1b3uNU2\n5vyHgV8lx/UeenV8Zp7jai9+COyKNth/jNb5m1X3L4HRaAJ9H7CgHdfJp695wOPABuA1tK0C9Hw+\ngTZeLwP+oJRaqpRqQjssnIR2xPgj2la13tuxUmpdsr/6ZN9PeZrcDhyW9MT6p9JuqSckx9yQ7H8W\n2o7gh0vQKrDlyXt8FK3qzBud/ByUHJKuLrTYWSEiYbQYfZpSamlnj8fCwqJrwUoWOzFE5LikWqYX\n2mjcgl5ZWVhYWKTBMoudG0cBb6J19d8FTkmqBiwsLCzSYNVQFhYWFhY5YSULCwsLC4uc6DFBeXvv\nvbcaMmRIZw/DwsLColth5cqVHyqlsrnNAz2IWQwZMoQVK1Z09jAsLCwsuhVEJFdGBcCqoSwsLCws\n8oBlFhYWFhYWOWGZhYWFhYVFTvQYm4WFhUXHoKWlhc2bN7Njx47OHopFAejduzcDBw6kvDwoxVl2\nWGZhYWFREDZv3szuu+/OkCFDSE8Ya9FVoZSisbGRzZs3s//+++c+wQdWDWVhYVEQduzYQWVlpWUU\n3QgiQmVlZbukQcssfBCLwfTp+tvCwiITllF0P7T3P7NqKA9iMRg7FpqboaICFi+GSKSzR2VhYWHR\nubCShQf19ZpRxOP6u76+s0dkYWHhRmNjI8OHD2f48OH079+fAQMGpLabm72VT/1x9tln89prr2Vt\nc8stt3DXXXcVY8gcddRRGbaCE088kX79+qXtu/7669lll1349NNPU/sef/xx9thjj9Q9Dh8+nCVL\nlhRlXIXAShYeRKNaojCSRTTa2SOysLBwo7KyktWrVwNQW1vLbrvtxk9/ml64TymFUopQyH89PGfO\nnJzXmTJlSvsH68Luu+/Os88+yxFHHMFHH33E+++/n9Fm3rx5HH744SxcuJD/+Z//Se0fM2YMCxcu\nLOp4CoWVLDyIRLTq6de/tiooC4tiIbYpxvSl04ltKp0h8I033mDYsGGceeaZHHzwwbz33ntMnDiR\nUaNGcfDBB/OrX/0q1faoo45i9erVtLa20q9fPy6//HIOO+wwIpEI//73vwG48sormTlzZqr95Zdf\nzujRo/n617/OM888A8Dnn3/OhAkTGDZsGKeddhqjRo1KMTIvTj/9dObPnw/AP//5T0477bS046+/\n/jqtra3U1tYyb968os9Pe2GZhQ8iEbjiCssoLCyKgdimGGPrxnLVkqsYWze2pAzj1Vdf5aKLLmLd\nunUMGDCAa6+9lhUrVrBmzRoee+wx1q1bl3HOJ598wre+9S3WrFlDJBLhzjvv9O1bKcXy5cv53e9+\nl2I8N910E/3792fdunVcddVVrFq1KnBs3/nOd3jiiSdIJBLcfffd/Pd//3fa8Xnz5nH66acTjUZ5\n6aWX+PDDD1PHlixZkqaGamhoaMPstA+WWXhgPaEsLIqL+oZ6muPNxFWc5ngz9Q31JbvW0KFDGTVq\nVGp73rx5jBw5kpEjR/LKK6/4Mos+ffpw/PHHA3D44YcHEuJTTz01o83TTz/N6aefDsBhhx3GwQcf\nHDi28vJyjjjiCObPn088HmfgwIFpx+fPn8/pp59OOBzm5JNP5p//dErVjxkzhtWrV6c+nZFh29os\nXLCeUBYWxUd0SJSKcAXN8WYqwhVEh0RLdq1dd9019Xv9+vX84Q9/YPny5fTr148f/vCHvnEGFRUV\nqd/hcJjW1lbfvnv16pWzTS6cfvrp/Od//ifXXHNN2v5Vq1bx5ptvMmbMGACampr42te+xgUXXNCm\n65QCVrJwwXpCWVgUH5GqCItrFvPrMb9mcc1iIlUdswLbtm0bu+++O3379uW9997jkUceKfo1jjzy\nSP7+978DsHbtWl/JxY1oNMrll1/uq4K65ppraGhooKGhgXfffZe33nqLzZs3F33MbYWVLFywnlAW\nFqVBpCrSYUzCYOTIkQwbNowDDzyQwYMHc+SRRxb9GhdeeCE1NTUMGzYs9dljjz0C24dCIS699FKA\nlHSilOLuu+9m8eLFqXYiwsknn8zdd9/NYYcdlrJZGPzyl7/klFNOKfr9ZEOPqcE9atQoVYziR7GY\nliiiUauCsrDwwyuvvMJBBx3U2cPoEmhtbaW1tZXevXuzfv16jj32WNavX09ZWddch/v9dyKyUik1\nKuCUFLrmHXUiIhHLJCwsLPLDZ599xtixY2ltbUUpxaxZs7oso2gveuZdWVhYWHQA+vXrx8qVKzt7\nGB0Ca+C2sLCwsMgJyyySsPEVFhYWFsGwaihsfIWFhYVFLljJgrbFV1hJxMLCYmeCZRY48RXhcH7x\nFUYSueoq/e1mGJaJWFiUFmPGjMkIsJs5cyaTJk3Ket5uu+0GwLvvvpuRxM8gGo2SywV/5syZbN++\nPbV9wgknsHXr1nyGnhW1tbWICG+88UbatUQkbUyrV69GRHj44YfTzg+Hw2n5o6699tp2j8kNyywo\nPNNskCSSjYlYWFgUB2eccUYqe6vB/PnzOeOMM/I6f7/99kvLu1QovMzioYceyqhL0VZUV1en3ds/\n/vGPjHxT8+bN46ijjsrITNunT5+0/FGXX355UcZkYJlFEoVkmg2SRGy6EAsLfxRT4j7ttNN48MEH\nU4WOTHqMo48+OhX3MHLkSKqrq7n33nszzm9oaOCQQw4B4IsvvuD000/noIMO4pRTTuGLL75ItZs0\naVIqvfkvf/lLAP74xz/y7rvvMmbMmFQepyFDhqQyxN5www0ccsghHHLIIan05g0NDRx00EGcf/75\nHHzwwRx77LFp13Hj5JNPTo15w4YN7LHHHuy9996p40op/vGPf/CXv/yFxx57rF01tQuFZRYu5PtA\nB0kihaqzLCx2BhRb4t5rr70YPXo0ixYtArRU8V//9V+ICL179+aee+7hhRdeYMmSJVxyySVky1Jx\n6623sssuu/DKK69w9dVXp8VM/OY3v2HFihW8+OKLPPnkk7z44ov87//+L/vttx9LlizJqFa3cuVK\n5syZw3PPPcezzz7LbbfdlkpZvn79eqZMmcLLL79Mv379WLBgge94+vbtS1VVFS+99BLz58/PyCH1\nzDPPsP/++zN06FCi0SgPPvhg6tgXX3yRpoa6++67C5vYHLDMIolCH2g/ScQWTrKwyEQpJG63Ksqt\nglJK8bOf/YxDDz2UcePG8c477/hWpDN46qmn+OEPfwjAoYceyqGHHpo69ve//52RI0cyYsQIXn75\n5ZxJAp9++mlOOeUUdt11V3bbbTdOPfVUli5dCsD++++fyu2ULQ06OEWSFi5cmJH/ydS8MO3cqiiv\nGsrLaNoL6zqbhN8D3RZib9OFWFikoxQJOk866SQuuugiXnjhBbZv387hhx8OwF133cUHH3zAypUr\nKS8vZ8iQIW1S1bz11ltcf/31PP/88+y555786Ec/apfKx6Q3B22IDlJDga7NfemllzJq1Cj69u2b\n2h+Px1mwYAH33nsvv/nNb1BK0djYyKeffsruu+/e5rHlCytZJGFVSBYWpUEpJO7ddtuNMWPGcM45\n56QZtj/55BP22WcfysvLWbJkCW+//XbWfo455hj+9re/AfDSSy/x4osvAjq9+a677soee+zB+++/\nn1J5ga6l/emnn2b0dfTRR7Nw4UK2b9/O559/zj333MPRRx9d8L3tsssuXHfddfz85z9P27948WIO\nPfRQNm3aRENDA2+//TYTJkzgnnvuKfgabUFJJQsROQ74AxAGbldKXes5fgEwBYgDnwETlVLrXMcH\nAeuAWqXU9aUcq3mgbcZZC4vioxQS9xlnnMEpp5yS5j105plnMn78eKqrqxk1ahQHHnhg1j4mTZrE\n2WefzUEHHcRBBx2UklAOO+wwRowYwYEHHkhVVVVaevOJEydy3HHHpWwXBiNHjuRHP/oRo0ePBuC8\n885jxIgRbSqBalRNbsybNy9DLTVhwgRuvfVWampqUjYLg+OOO66o7rMlS1EuImHgdeA7wGbgeeAM\nDzPoq5Talvz9fWCyUuo41/F/Agp4LhezKFaKcgsLi+ywKcq7L9qToryUaqjRwBtKqTeVUs3AfOAk\ndwPDKJLYFc0YABCRk4G3gJdLOMYM2KA6CwsLi0yUUg01ANjk2t4MfNPbSESmABcDFcC3k/t2A/4P\nLZX8NOgCIjIRmAgwaNCgdg84FoMxYxxD3JIlVh1lYWFhAV3AwK2UukUpNRTNHK5M7q4FblRKfZbj\n3NlKqVFKqVFf+tKX2j2WujpoagKl9HddXbu7tLDokegpFTZ3JrT3PyulZPEOUOXaHpjcF4T5wK3J\n398EThORGUA/ICEiO5RSN5dkpEWGLc1q0ZPRu3dvGhsbqaysREQ6ezgWecC42fbu3bvNfZSSWTwP\nHCAi+6OZxOnAD9wNROQApdT65Ob3gPUASqmjXW1qgc86glHU1MCdd0JLC5SX6+1CYdOdW/R0DBw4\nkM2bN/PBBx909lAsCkDv3r0ZOHBgm88vGbNQSrWKyFTgEbTr7J1KqZdF5FfACqXUfcBUERkHtAAf\nA2eVajz54pxz9HdNTduIfLGC+ywsuirKy8vZf//9O3sYFh2MksZZKKUeAh7y7PuF6/dP8uijtvgj\ny4RXImiLVAFOcF9TE4hAZWVRh2lhYWHRKeh0A3dXQbHy10QiMHOmjgRPJGDaNOuGa2Fh0f1hmUUS\n0agm8CL6uz3pPhobNaNIJGyqcgsLi54ByyxcMI4d7XXwsHmmLCwsehoss0iivh5aW3WMRUsL1Na2\nXX1kU5VbWFj0NNgU5Um4DdOJBDz+OCxd2nZi702cZmMvLCwsujOsZJGEkQbGjYNQqLj2Blub28LC\norvDMgsXIhGtfurVSzOMYrm+2trcFhYW3R1WDZWEW000cyZMnaqJ+7Rp+nhjY3YVUjY1U6GVwqzK\nysLCoqvBMgsyA/LOOstxfW1q0owjkQhO35ErxUchhZVsuhALC4uuCKuGIlNNBI7rayik98fjmnH4\neUnlo2aKROCKK3ITfquysrCw6IqwzILMuIiaGr2iP/98OPFEnVTQGL0ffzzTSF3MuAobo2FhYdEV\nYdVQOCk6FiyACRP0diwGc+bo1X1ZGYwaBStWpHtJGSmhmPW7bS1wCwuLrgjLLNCMYdo0zQSWLoXq\naqcQEuggvU8/1RJGa6v/ir+YBelLUdzewsLCoj2waijysxO8+qqO7j7/fGt0trCw2PlgmQX+doKa\nGv3bQCnNTAYNsozCwsJi54NlFsDChbDXXnDkkY7UEIloCeOCC3SQXiEG51gMpk+3kdoWFhY9Bzu9\nzeL//g9mzNC/33lHMw634dqNfKrnueMkwmFdec8UUrJG654PG1Bp0VOx0zOLf/0rffuuu+C66/Tv\n2bOdSO5evfKrnue2f8TjMGuWrust4hjHrc2jZ8IGVFr0ZOz0aqhTT03ffv99/dLHYjBlivaEMpHc\n+QTIGfuHqYlhUp7bQLueDxtQadGTsdMzi+uug2OOcbaV0m6ztbX6pTfIt3qeiZP48Y8dW0d5uQ20\n2xlgAyotejJEKdXZYygKRo0apVasWNGmc712BhFHojBlVm+5BSZOLLxfo78Gq8veGWBtFhbdDSKy\nUik1Kmc7yyw0zEu+cSPcdpuWKkIhXd+itrawF98SDAsLi+6CfJnFTm/gNjDusrEYzJ3rGCkNo5g9\n20kHkk3CsEZOCwuLngjLLDwwNoe6Or29dq12rV24UG8/+qj+njjRX4LwM3JaZmFhYdHdYZlFAObO\ndepxe7Fggc4f5SdBeAsdVVbqAL32qKSsWsvCwqKzYZmFD4x04McoQKui3BKEqXNhVFYma2xlpZOg\nsK0qKavWsrCw6AqwzMIHRjowkoWIdqkFna68ulr/drd5/HGdsXbmTKcEa5DffSFSglVrWVhYdAVY\nZuEDr3SwYIFmBomEJto1NXDppbpNba1zzFuCdebMTJVUoVJCofW7LSwsLEoByywC4K4pUV2tpYYd\nO7SE8cYbOuhu1izNLJYu1cRcRDMTUyCpsTG9kFFbpARbDMnCwqIrwDKLABijcmWlJvoXXgi33w4f\nfeS0WbBAe0UF2SgMcXcT+GxSQpAh2xZDsrCw6GxYZgHENsWob6gnOiRKpCqSMir72Szc2LHDSUO+\ncaP+uG0WXgKfTUqwhmwLC4uujJ2eWcQ2xRgzdwzN8WYqwhUsOWsJ9fWRNG+ooCD3pUs10TfJAkHn\ng1qyJJjQB0kJ1pBtYWHRlbHTM4u6NXU0xXWx7aZ4EzOWzeCy6D2+3lDeb8Mk3MykqUkH9AUR+qB8\nUdaQnQ4bW2Jh0bWw0zMLLxa+tpBdK37IWb8/Fhq+xYihg1m1CrZsgf79YcQIWLUK5szR9SnCYad2\nhcFtt/kXSvJLWOiucVEKQ3Z3JLpWJWdh0fWw0zOLEV8ekbHvrrV3IfyN3n17M2Kf55g7tzpFuGpq\ntFG7psYxat9xByxf7pwfj6dLF4ZgL1/ueFQlEo5EYmplXHFFph2jPYS+uxJdq5KzsOh6KCmzEJHj\ngD8AYeB2pdS1nuMXAFOAOPAZMFEptU5ERgOzTTOgVil1TynG2Li9EUFQpBsmFIodrTu4454NNDdX\nZxCuSETnjZo6VUsHQfAayw1CIUcaSSQ00/E7rz2EvrsSXauSs7DoeigZsxCRMHAL8B1gM/C8iNyn\nlFrnavY3pdSfk+2/D9wAHAe8BIxSSrWKyJeBNSJyv1IqC1luG6JDopSHy2mON2ccUyhW9bqRsvLx\nQBgRnVCwslLHXkyZks4oQiEtLRgJBIJTh3z96/Dqq3p/KKQ9qNwohNAHSSDdlegWK7akO6rgLCy6\nKkopWYwG3lBKvQkgIvOBk4AUs1BKbXO13xX08l4ptd21v7fZXwpEqiKccMAJLHx1oe/xxMBlnHvD\nXWx5tIaFC7UqaflyXV3PzSjKy+HmmzPdZg3B/uKL9H6/9jV4661gQp4voc8mgXTngL72xpZ0VxWc\nhUVXRSmZxQBgk2t7M/BNbyMRmQJcDFQA33bt/yZwJzAY+B8/qUJEJgITAQYNGtTmgfbftX/GvrJQ\nGYlEAhFhxOgdLPDwkqVLnd/hsGYUfnUu3CnP77hDM5jycjj+eG0wB39jeL6Evr7eUXEZ20dnBvR1\nldV8d1XBBaGrzKvFzotON3ArpW4BbhGRHwBXAmcl9z8HHCwiBwFzRWSRUmqH59zZJG0bo0aNarP0\n4WfkjifiCEI8EWfaw9O4cMxYHn10qOva+lsExo/XEkUs5v8iG4LtNoq7I72NyirovGyorHRUXH62\nj45EV1rNd1cVnB+60rxa7LwIlbDvd4Aq1/bA5L4gzAdO9u5USr2CNn4fUtTRuWCM3GnXRZEggULx\nResXrB4wmcumb2D0aM0gDEIhuP9+uPJK/UKbiG43YjFd0wK0x1Njo3822jaNvVGPwYzFa/soFsw9\n+N2fQVCW3Y64thdGMvv1r7s/cS3VvFpYFIJSShbPAweIyP5oJnE68AN3AxE5QCm1Prn5PWB9cv/+\nwKakgXswcCDQUKqBRodE6V3Wmx2tO1AoX++ox958jKVl1Xyj30aU2ju13x1f4VUDxWJa/WRiMsyq\n0J0CXSS7NJBL/RCN6qjxUq6g813ZlmI1355VdU/JqdWTpCSL7ouSMYskoZ8KPIJ2nb1TKfWyiPwK\nWKGUug+YKiLjgBbgY5IqKOAo4HIRaQESwGSl1IelGmukKsLimsXUN9RTuUsli9Yv4t7X7k1jGApF\nU2sTb25qCuwnHHYq4xlVk4mrAGdVeMUVOofU1Kma2Uybpr2rsgXxBRHKjjBi56v/z3cshejfe5rt\noS3ozo4KFj0HJbVZKKUeAh7y7PuF6/dPAs77f8D/K+XYvIhURXQSwU0xpj08zbdNggTfOuUN7npl\nQMYxEbjoIscWIZIeeCeSvipsbNTHTTpzPyJYCJEuFQGJxXSCxLLkk5JrZZtrLIVKCnZVrdFTpKS2\noCcb97vTvXW6gburob6hnqbWppQ6auieQ9nw8QYUipCEOPi4Z5g1+FvccQesXOmoocrKYNs2h7gb\nu4aIljjOOy/d6ykfItjZhNKbnuT88/09twpBoZKCd1UN7a9pbtF9UGrjfmcS6+7muFBKA3e3xNam\nrSTQ7kUKxanDTqUiXIEglIfKiQ6JMnEiPPecJp6GKRiPpIoKJzjPfEQyiWw+BtjONtK6CXs8DoMG\ntX8MhgGGw/kzwEhEq+5Av1wkobxhAAAgAElEQVRXXRXsTGDRs1BK434sBmPGwM9/rr87+nnqbo4L\nVrJwIbYpxg2xG1LbIUK8/uHrtCZ0iEdcxalbUwdotVVNDcyd66wMRiQ9cF94AZ5/3lFBtbb6r6C9\nqgW/VU5nqh9KIdm0R/9u7ReFozupOfxQSum6rk47mUDubNGlQGdrDgqFZRYu1DfUk/Dk5XAbulsT\nrcxaOYu5a+ayuGYxkUjEt0peOKzVUqbGRSiU7vFkvKTAkTi6okhaKsNqWxlgd3u5Ohtd8ZkqFD3Z\nuN/d7s0yCxeiQ6L0KutFU2sTItp9VqnMBINN8SbqG+q1UTxJ+KZPd1a9oFVUW7boGIxEQueRAu31\nFI3qtqDdapcsSV81NzVpxjNyZPttBO1FVzKs5qo02F1euo5CT5HESvUM1tTAnXfqRV15eXBwbCnR\nld6vXLDMwgW3C+3yd5cH5osShOiQaNq+aFRLFImE/jbR2vfdp9VRra3aVfbccx2JA5yX2B17kUg4\nOagMM+kuD1Sp4fdyeQ3x55yTm8nuDMzFSmLZEYnoZ6CnPwfFgmUWHkSq9BNT+2RtYJvxXxufaueG\n2wNq7VrtcuqGkTrKyx3JwrzEZtVcWwuPPZYZm2Ef5GAC7zXEz5qlbUlBapeeoJ7JB91NzdEZ6E4r\n+86GZRY+qG+oJ56I+x4LSYjjDzie2KYY9Q31RIdEiVRFqK/X0oMptWoC7twmEBHo2xdOOAFee02n\nKb/sMs1YamthwgT9bYgf2BWhQTYCb1bQJgBSqexMtqeoZ/KBJYYWxYJlFj6IDolSEa5Ipf9wI6ES\nTH5wMmWhMloTrVSEK1hcs5hoNJIS+UUyGQXofTNmONtvvqlTlZt9jz6qV8X19ekGcLCxBdkIvDuz\n75w5mllnS6Ni1TMWO4MastgQrwG3u2LUqFFqxYoVRevPRHIvf3d51nZhCXP+yPMZtMcgKhtPpPGV\n6pRnlLE/iDhqJTdEYMAA2LzZ2XfssfDII87DvHUr3HijJpK9ejkr6p3tYc9XdTR7tiPVuefLr7+d\naf4sHOwsash8ISIrlVKjcrWzkkUAIlURZh43k+jcqG8VPdCGbhFhzuo5SSnj19qltipCdbXjUrtq\nlSZiXkmjrAzeey9934QJwaVYTaJCKMyg2xMIY77693zSqJj+uuJc9IT/qqtjZ1JDFhOWWWRBpCpC\n/Vn11DfUs7VpK79b9ruM5IJKKVpUCwmVoDnenOFSa7Bliy7JajBsmK62d9ttzr5jjtEFlIwbrpe5\nhMOaiBRi0DVRqmYV1Z09q/Ih8NlUTF2dENsVb8fAqiHbBssscsCdYPD+1+7nlQ9fSTseV3HCEiYs\nYSrCFRkutQaXXQaLFjkP6O236/133ul4ST33nCYYXjdak1/q5psd4pGvQbezo1Q7GkESSEcT4rYw\nJrvi7RhYL7G2wTKLPBDbFGNs3Vh2tO7wPa6U4qjBRzFs72GBfUQiTvCd+wE95xwtGZhYjPp6nQfJ\nHRnurevtNeiaWhlddYXU0St6PwkkKA9PPuMqdPxtZUx2xdtx6KpqyK4MyyzyQH1DPc3x5gzPKIME\nCZ56+yme3vh0KhWIOc+41oLzgJrKb9EoqfxSphDS1q3OMZM8zw+mLxP8F0TIOjtK1W1/CYXgllv8\na5WXGl5CXFmZH0FvC+Fvq4RgV7wWXRmWWeQB40rb1NqUykjrh4RKsKN1BzOWzeCRDY/QHG9OudYa\nhuFHPGfOhMmTtYQwY4ben82Tx41sKySzIr7ppkzppKNQX++o0xIJmDRJ7+9ohuElxPkS9LYQ/vZI\nCPmseLu67cWiZyIrsxCRvkqpbQHHBimlNvod62lwpwHZ2rSV3z/ze+LKP2hPobj3tXsRkQyjN2QS\nz8mTdXCeuzyrnyePm0CYfnJVo+ssY6l7rNGoZn7GWJ9IaNdWv8qApYaXEOdD0NtC+EspIfRkI7hl\ngl0buSSLemAkgIgsVkqNdR1baI7tDDCG7ulLp5NQwdIFkCqc5Gf09hLPeBzWrcvsw62Scme0NTEb\nSgVLH7GYjgQ3TKkjjaV+xOyWW7RE4b7nzjbe5kvQ20r4S6UT76lG8J7MBHsKcjELcf3eK8uxnQbR\nIVHCoXCqxkUQlFJMPHwiNYfVpOWRikQ08Zw6NT2hoEE4rL9NtHco5DAXryutibtwSx/uKOZEQp/b\nkcZSP2JmbC9Tp2pVmzdle2chX4Ju2hijeGcSsZ5qBO+pTLAnIVelPBXw2297p0FYwjnbKBQvvPdC\n2r7YphjTl06n+vgYTz4Jo0ennzNwIIwfnzw/ObuJhCawvuMIO8TCrMxmzXIkilAIxo3r2FVaUCW8\niRO1629ZmR7btGmZlcmM4b+rVcAzc9sVKvQZSact1RO76vxC2yooWnQsckkW+4jIxWgpwvwmuf2l\nko6si6K+oT6nVGGw/N3lHHnnkVx65KUAXP/M9Sil6F3Wm8U1i5k5M8KYMU4cxDvvpKf+cMMvQO+i\ni9JdQJubHSYjotVUtbUdm+4im9omW3R1qdQQ3vtsy33X1TkxLV1h1dsWFVcx57cUz471BOv6yMUs\nbgN29/kNcHtJRtTFYTyjmuPNKSN2NhuGQjFj2Yy0fU2tunjSFUdHWLIkMy15Phg/Xns5Ga+qiy92\n1BMmBciIEf6qk1Lqh7MRkmwqlFKoIbz3OXOmY/vJ975jMe16bP6bsrLuueot1vy29dnJh8HY2Ieu\njazMQil1ddAxEflG8YfT9eH2jDKG63y8pNIgpM6NRDSzeOKJYHWTF+Xl0L+/s9pNJHSywZtvdlxk\nIfilLpV+OBchybZ6LIUu3nufCxY4KjqvvSdbH8ZTTQTOPrt7ErRizW9bnh1rvO4ZKCjOQkSGAWck\nP1uBnJkKeyKMZ5R7e/rS6Wlt+u/any2fb/E9/6f/8VNfo/cFF6RLF8OH6/oXy5Y5BGvIEMdg7G4b\nj2tGYY65y7x6X2o34QiHdZGmWKz9L3A+hCRo9ehmJJWV+nvt2vbFh3gJ5PDhOg08aIaRj5Hd20dn\nlN4sBoql5mkL07HG656BnMxCRIbgMIgWYDAwSinVUMqBdTe41VMV4QquHnM1kx+cnCZp7NVnL/rv\n1h+A6Uunp0V3T5yoYw8uv1zXufjWt+Bf/3II+vjxOrfUpk1alXLWWempz93Gbsj+UkciWiVzxx06\nI+5tt2WvLJf3HGS5Zj4w13Zn3M03QNFPzeEXiGc8y0IhzYjyGVNH6NI7IsagGGqetsxHd07umA96\nwj3kg1xBeTGgLzAfmKCUWi8ib1lGkQmveqq+oT7DlvHRFx/x0Rcfse4DHVhREa7gpuNvYtV7qwCo\nOayGJ5/UT9v06TB/viNRbN+u1VTxuFY/bdkCvXs7NoubbyZ1nnlog17qWEwzHKPGAifJYK7Av2wv\nRjEIq1mFuoP4cq1Gs6k5vASyV69gZhZ0b0FEtlhEorupaQplOkHPRSH33VUJcnf779qDXJLF+8AA\nYF+099N6dmKX2VzwqqdCEspqw2iONzPpgUmpFCJzVs9hyVlLiFRFMlZjEyboRITxuCbwDz6Yn40i\nWwoLtxorkdCSRiKhpRQRJ0HhYp3qKiNxod+L0d7Vq7lvt2SRS0rJV83hp+oy+wt96d3t86kpkg09\nRU2TayHh3ZfvfXeVbATZ3qXu/t/lg1wG7pNFZA/gVKBWRA4A+onIaKVU9hJyOzkiVRH+9L0/8eMH\nfpy1nTvXlLcehns1BukpQVpaNHGfOVM/nNlsFF4YguyWLAxzMAZzcFxF6+q0msrdvtDMrfnCS9Dz\nsVkUov5yq7rcxKfQl97dPldNkVzwG//s2dogP2FC5yReLBRtIej5/m+dRZDzuaeermJzI6fNQin1\nCTAHmCMi+wL/BdyYzA1VVeoBdjfENsVSqqjqfaodN1uEb+z3jYwyrSFCKYYhIlTu4lhd3aux6dMz\nXWuXL9cFk265pTCjtSHIRlLwShlKaY+rREL3CZkxHBUV+WduLRTFUnMEwY/4FGpv8TLc9sRgeMe/\ndi38OLnGMAb5rs4wCiHobiKaz//WWVHr+TpsdIX6KR0CpVSbPsDgtp5bis/hhx+uOhvPbHxG9bmm\njwpfHVZ9rumjLrj/AhW+OqyoRYWvDqsL7r9Alf2qTFFLat/wW4entqlF9fp1L/XMxmcy+35GqbIy\nQ5bSP+Xl+vgzzyh18slKhcNKhUJK9emj1KxZSv32t85x89vgsssy+wuFlLrggvTz+vTR/VZU6GOm\nr3BYnxMO6+3uAPf99OnjzIff/OTq54ILlOrVq/19uXHssen/x7HHFt5HRyNoTtvazu88v/lszzzn\nc822jFWp7vVuACtUHjQ2l4H7vhy85vvFYlo9AabuRVzFU3W73R5SgGG0gE5pvvr91Wl9NMebqVtT\n51sL47zz4M9/zrxuPK6lBID773fUVTt26HxMQXaISARWr87sr1evTP170AqwFCu+QjPsBp2bLfjL\n737aItFEIpk1Rdq7qpwwwZEozHZXR77SXVtVSn7/TalX7+1x2OiJObxyqaEiwCZgHvAcO2nywHxh\n3Gd3tO5Aoejbuy+LaxZTt0ZT8hFfHpFWFyOomNLsF2YD0CvcK60WRk2NdnN12y5AM4PZs521qIGI\nbutOQqiUZiKmvKqXMJ18si4Bm8tAaYjyzJnFrZXhJgD5ZNgNOjcX8SiGG2lQX0EEMV8dtlE5dSeb\nBeQ3p8Ukoh1hy2jrc5LLG7E72jJyMYv+wHfQMRY/AB4E5imlXi71wLojIlURLvzmhcxYNgOlnDQf\nc9fMpTneTDgU5oSvnsC7n76bYbsAEARFSs1HU7wprRZGJAJ/+lN6um8Dv9xRRx6p63q3tuptpbRh\nXCltq6ipaRthCiLK2SQCcyyX0dpNANzIJ+K6EO+aUr6sfgSx0FXwxIndh0kUgmK4Vxt09dV7W6Sh\nrsxIcnlDxYGHgYdFpBeaadSLyNVKqZs7YoDdDavfS9fr/Gvdv1KqqXg8zr2v3Us45J+11itpKKVS\nBu+U4fz4KE8/HaGuDl54AZ5/PtPwPWSITkq4bJlmEuefrxlDXZ1T77ulRacZqa0tnDD5EWVIdyV1\nq7xMTqZ8Au38PLUgM+jQD/kQj44wPPoRxEK81Xo6iiXV5cN4uhrxzbag6epG8XwiuHsB30MziiHA\nH4F78ulcRI4D/gCEgduVUtd6jl8ATAHiwGfARKXUOhH5DnAtUAE0A5cqpZ7I8546FROGTeDRNx29\nzqnDTmXmszOJJ5fKCkU8kRl7EZYwCpUWyKdQTHt4Ghs+3sCNsRuJq3hKNXXrrRFiMf0SNDen99XQ\nkL49aJDz0BkX2ERCJy9cujT/hHrmpfMjyu6XwOt6u2BB/oF2Xk+tlhYn6DDXGPMhHh3lhukliN45\nq6xMD6C0aBuyMZ6uSHyzLWg6y0U4X+QycNcBhwAPAVcrpV7Kt2MRCQO3oNVYm4HnReQ+pZS7Ltzf\nlFJ/Trb/PnADcBzwITBeKfWuiBwCPIIODuzymHi4XqIvWLeACcMmMPHwiWzbsY0/r3Qs0yEJEZIQ\nLQld/SgsYS75j0t49I1HMwzeX7R+wfXPXJ9iIiZjrYnFqK/Xhu+ganvuBzIS0av8SZMcW4A3cjvf\noCg/oux23XVLFhMmaKaUb6BdkOE4H+RatXak6sK7qnXHjxSa/daicHRF4pttQdPV1Wq5JIsfAp8D\nPwH+VyRl3xZAKaX6Zjl3NPCGUupNABGZD5wEpMiaSq/vvSvJ6HCl1CrX/peBPiLSSynVlPOOugAm\nHj4xxTRAp/GYu2YuTa1NhEIhbjnhFqr3qU4zfF+46MKUB5UXbmkjQSIjFuP22+HoozP1/CJw4YWa\nGcyYoTPVeiGSOyrb76W74or0dt6XwJxnXojq6sIC7Tqj3kYxEbSqtSqp0sH7zHRV4hu0oPF7NruS\nGi2XzSLUjr4HoD2pDDYD3/Q2EpEpwMVoldO3ffqZALzgxyhEZCIwEWDQoEHtGGpp4c0bFamKENvk\nlCtb9MaiQEYhSKpuhsGCdQsAaNzeqPuLRPjTnzKz1iYS8PvfpzORcFh/jDvt+PGOu63xkoL0B7Sy\nUksDSuUnEbi3g45lQ0e4RJb6xcu2qg0iYl2JMHQEinm/Qc9Mdyuo5H42u5oaraAU5aWAUuoW4BYR\n+QFwJXCWOSYiBwPXAccGnDsbmA0watSoLp2zyp03KrYpRnRuNJBBGIQIURYuY9jew3jx/RdTkd6P\nvvkoj775KCEJpWwYEydG2LBBSxCp80OZXlJum4K7nck5dccd6ZKGMU7H47rdhRem51TyQ3uq08Vi\n2uhuVFbFWnl3NCHOtqoNWkF2JcJQasyerWOA4vH83KJzIYg5d8TCoFToamq0UjKLdwB3OpCByX1B\nmA/cajZEZCDakF6jlNpQkhF2Euob6mmJt2RtEyLEqP1GsWrLqgw7hkFCJdJsGNddB0OHaoLf3KwJ\n7uuvZ6qnjPQRj8O992omYGIaWlqc337G6RtvdNKA+Ln9eZMN5ludzn1uS0v+SQTzQSkIcS7mk2tV\nm29sRk9ELAZTpjjFvvItRJUNXVXl1B50tXsqJbN4HjhARPZHM4nT0bEaKYjIAUqp9cnN76Gz2iIi\n/dAxHZcrpZaVcIydguiQKOXh8qySRXm4nJFfHukbj+FGggRbm7amto0LrMktBHDAAfDGG/5lW03i\nQHdtDKUcQm2M0yZIzkgm3mAzvzxTbmaTjQgaYu52lw2FYNy44BrihaDYhDhf5lPIqnZn8paqr0+X\nbvNxi86F7qhyyoWudk8lYxZKqVYRmYr2ZAoDdyqlXhaRX6FzkdwHTBWRceiiSh/jqKCmAl8FfiEi\nv0juO1Yp9e9SjbcjEamKUH9WfcrA3bd33zSPJ0E4e/jZ9O2dzX/Awe+f+T0nf/3klJprwYL04xs2\nwKGHwpo1/ucrpRmBG6NGORlt3cZpt5TgDjbzxkUYTyw3s/FbHbnVTu5Ehb16FYdRQPFXaKWQAorl\nLVVqdVsx+o9G9f/rrsVSrLgLv4VIVyG2bUFXUqOV1GahlHoI7Xbr3vcL1++fBJx3DXBNKcfW2fDW\nvhi651CmPjQ1FUtRc1gNtfW1efUVV3HOu+88fnLET2jc3gjDDoBHTUIhQSl48cXg80Uy7Rhr1ujs\np+ZFM+Vaq6sdIzg4hNNN6MvL0+s7GGbjl/bAWxWvrCyzNoTfC58PESg0u2m+KJT55Euw2ustVWq7\nR7b+CyHKne2RZtFG5JNtsDt8ukLWWTee2fiM+u1Tv/XNIJvvOZc9dllaRlpqUYNvHKxCtaGM/Rmf\nI3+rkFYlEldlZUqJ+GesdWeudbcR0fv8sqm6M3HOmuWfkTYfuDNzglKjR/tnFe3TR2fCLSvT15s1\nS4/NZNb1u157Mobmg3yznQZl7M3WV1vHXupMp0H9l3qu24rulPm1M0Exss5atA2xTTHG1o1NZZt1\nJwPMBiNtxDbFmPTAJOasnpPRZtMnm9IKJgXiOz+DA+9jwEf/w6ihQ1h083dpbgr72i1CITj3XF2q\ndeFCvU8ppxiSO/GgVwXT2Ni+zJzhsGOA91OT1dc7kkciAZMn6/3mnCDjaFcxGLvH4VckqZgun6U2\niAb131Xm2otSz0dnq7g6+vqWWZQA3lTl7mSAuWAYjclcC06CQSA/RmFQ9SzvD17J/SpBuOYoTvp8\nHov++eWUx5Nxra2ocKKl77033dBtvk3iQT9DbNADm4/H0DnnOPmqWlszCU00mu4CnEjklzOqlISi\nEPWGGUdQkaRsLp+gt9euzR7IWCp1mxdBTKzYc12oijGoTSnVXZ2t4uqM61tmUQKYVOVGsogOieZ9\nbn1DPU2tTWlJBQWhIlxBS7wlkFkYhhIixKH9D6UiVMF+fffj/tfu13XABzzN6DF/4bIpV6ReHnDs\nD2vX6up6bq8oN5qbddtbbw02xLrTlUN+D3NNjeNFVVaWSWgiEV0J0Pjkl5XpMebKGVVsQuEmToWs\npM04vC7F5j6DvKDM3OZKvuhHNIx9qRTwM7gWc67zIYKdlYrejc6Wpjrj+pZZlAB+Edv5IjokSigU\nIuGyOCdI8JMjfsLq91bz+FuPp7ymwhImoXRdDLfksXrLairCFZw78lweeeORNKYVqUqqPzbFqHtg\nPXP+ciYtzeEM91kv3NKFnyH2iy+0isi43Z54Yv4Ps1eS8a4aq6u1mgz09aEwg3F74SVOM2cWtpI2\n4/DLdRXkBWWcDnIlX+xsomVQrLnO535Kcc/5ptA36OwYiM64vmUWJYLX28nAXaPb73ikKsItJ9zC\nBQ9ckCZdrH5vNbXRWpZuXJqqjTFs72GBAXvN8WZWvbcqrfjS2n+vpb6hnspdKpn28DR2LLkI1aTA\nQ6yD0NKSmbbCbXMw34mETiFSVua45VZW+vWo+zPR46bi39y56YTZLb24mVU25KvPzaddsew0QeP2\nY76hkJ5byB6g2NlEq9jI535KofbyeuXliirv7BiIzri+ZRYdiHwN3xMPn8iGjzekiieBTn1uJJYZ\ny2Zw/+v3BzIKgy2fbaFuTR1zVs+hOd6MQiEI4VBSIhnyBIR/DnEBFU5JFkESRiKRSfS/+lX/jLdK\nwfHHO3mn/vd/tYTgfai9Lz6kE+Z8gvq8yFdNkW87P+JUjJV0rsR3Rq2XbbXb2USr2Mjnfop9z2Yx\nkE8Kfe84OnO+O/r6lll0IAoxfF837jqG7jk0LdW5wQPrH9B2iCwISziVoNAtoZh6GiEJQdWzcNZY\nwm+P45KxP6KfGsrWrfC73wX3+6tfwV13wV57wYMPamnDQMQJ7isrS081YlKh+/nle7PVuiULb1Bf\ntshm0+fGjfkxmELUGWclw0W9tcnbikK8oMx9+d1rsRhXUL8dfT7kdz/FlC4Nk843hX4xUGi+tK6w\nGLDMogNRqOHbm+ocNMNx2zNChKjcpZIPtn+Q2jdw94F8Za+vsPTtpb51vhWKIwcdyVNvPwVVz5Ko\neo7VA5ZTG62l7reRAHWU3vnOO/DOO8Gl2MvK4IQTYNGiTInjqaf0gw/ZjbJeghkUQe4NCnNX6itL\nPtnZXvp81Bleom5sJu1FNi+oIAN2OKy9x0aM8J+HYhD69njYdLaHUFvH4rUbFbOmfHvH1pXm1DKL\nDkR7DN8GlbtUEg6FUQlFOBRO1cYYWzeWptYmEiTY/OlmNn+6OXVOiFCaEVwQ9uq9V+q4QvHom4/y\nRMMTHLz5KeAIdMkSjeHf/ITVa3fA9n3S9nth7A7btztJ4txYtw7GjIGzz86+ovcSTD+dvvc8N/EF\nXUp20KD2u1aWyoCcr97dfX0Tp2FSzLvVJdB+otLee+0oY3tb7ExdSaVUyNhyte1IqcMyiw5GkOE7\nH8Q2xZj28DTiiXiKURjJY3HNYqY9PM038aBCcdi+h/Hi+y+i0Ezmox0fZbRrTbSyZt+LIbwE4uWI\nCJdeGqLfiX9i9cQD4dWTUz16mUYopJlFWZlWHRnDtRfNzfq7LQbKbATWeyxfdZEhErGYv3rL3W84\nrFVcsVj7X8x89e5+cRqmFom7EmIxCHV7DccdYWxvj52pWNdvrwRSyNiyte1oqcMyi24EY/NIkECU\n6DxQSUSqIoFZahUqzRjemmjVKig/VD0LPxoDDVFk/6WcPPV3rP13JeGjbiD++gmQKMePUYTDTsR3\ndbVWRZlocDdEtITx3e/qyn2F2ACyEVivKsGstvPpO9tLZ/o1cRK33ZYegd0e5Kub94vTcMe0mD7a\nSxzbazh2j3XLFv2dK6CwUPgxRbM/yCW5WNdui9eUHwoZW7a2HSXJGVhm0Y0QZPMw7rgjvjyCsIRz\nGr9zoupZqHoWhVC3po65a+aiBjYRPmccR348i6X/GpZm1xg0CDZtSnd/feih9C6Nh1UioW0XoF80\nYwMoNNle0DHIjInIRaxyvXSRiN7X2qrbNDXpjLjZsuIWYwXqvn4kkr0mebGIYzHUMXfe6UiQkD2g\nsNDxelfalZX+jL4U6pm2ek35oZB5Dmrb0W7Tlll0IwSVZzXuuOFQmCOrjmTZpmXtZxhAWaiMLZ9v\nSaUeCVfFOK7mXvhsX556ZC+MhDFoEGzanEBCUFaugHBgwSU32qpvdxMCSCcKbsLf1KQjv4OKNRkU\n4ttvVpWPP669tLJFGLd3BepFNpVZV/GYqa9P95ADf6LaVhWKlyl6GX1dXaYUlk/fpiYLZEq7bsbv\nfgaM919lZed4kZVCesoGyyy6Gbw2D7c7bjweZ+nGpdotNgD9evVLK5YUBEGIDIxw/2v3pwzjZaEy\ntjZtZVnVyRB+DOLlhMuE2LMQb02AQOs3bqTv4FNRamhaf6FQZhp0pfTK/+GH8xenvd5BIulEwU34\ng4o1ZcxpHi9dJKLH+rvf6fog2fos5go02/2be4a2667bUwLX7/xoVKeo90oWXibcHhWKd6VtCDjo\nKpFGHQr59R2L6bGZMc+ZA0uWOPPhJ6lu3aqrRsbjutSw9zksRIJqj+2hIw3zlll0cxjVlFn9K1RW\nqSIbowgRQkRIqAQhCfH0pqfTCjJ9c8A3dZGmgQlt11hTw64fRdn25tcBHa4df3oa1z8jaYxBBL7/\nfe1Oa15qg3//W3/KyjTxzyVOu4mMuYY7Od8VV6TrzRctyszH5Idchu5YzMnV5K4kmI8UUky//SCd\nfVsIrx8hzFV0ySvV+RG5+npnlT5ihL8arlgqFMPEp0511IQGbgeAoPuvr9dOC25pyD2H3vlubNTP\n2PTpjkeaOdebJNJcIxcj6GjbQ1thmUU3h1FNmUjt1kQrIkJrwsd31QOvlHFo/0PpW9GXZZuWpXJO\nGQiSxjwAWH0W21or0OqopIeUCpGIp0s2ZWVw2WX6Y1QEXqax7766LnO2FW1sU4yN/dZTVn4mEE5J\nFiaLrju63AT2hcPajTYfQ3q2F9stLYRCmSVfvavHUvntBxHZtnhseYmUO1renZY+aH7OOiu/WBE/\ntFeF4p7vxkb/bMTjx1imPDsAACAASURBVOtnLtdq3sTlGKLvnteg+fZ6ybkli0IlqI62PbQVlln0\nABjVVM1hNSl7xtp/r2XBugUM//Jwtu3Yxh2r7qAlka5M3ta8LS39+eotwelDEiRIi+9riEK8Av0I\ntSISAlE6GE5J6sULh9MzwxpD7eWXO4ZugDPP9M+WmtIXH7SWCxddQfMLpxPa/36+f/gRXDa5P2vX\nOhlpp03T5yxYkO5qOmhQfsSovt6RBrx1MrwvdG2t3u/OEOtlMqVYHQYRWbfH1OzZcPvtOluvqcnu\nB7cEJALDhzsuz97EkaD7N/Oarwu0m6ivXav/m+HDoV8/vS9XhtygKolBiR2NI4VS8Mgjmln49WlK\n+Rrp9PzznePuew6ab+9+8Gd8+TCCYjLOUkokonJlj+smGDVqlFqxYkVnD6PLYtIDk5i1claatGDU\nTm0yhm86AuYuhng5hFs45sf/YtiuRzMiso1V761i3eJR7GjZwblnlzPx5OqM02MxzTBeeQUGDIAj\njsjMKAsOUSDUSjweh0QFAOHyOH+6uYw//MEVKS5xwmEhEQ+lrTJnzcokmn4v2OzZ8OMf+5/nNYC6\nx+bOECui+7j11sKntL0v/fTp8POfOyvs8nJ48snsfc2e7TDbXr20S7OpaeKWoCBdr9+rl9brQ/CY\n3UTdrLwNRKB37+zeasaW0NKi78Uw70mTnBoo4TD8+teOsXv5cmf85pibIZXK+cCLbE4Y2dq2hVG0\nN9ZCRFYqpUblamcli50ENYfVMHfN3FSUd0hC9Ar34rtf/S4LX00PiHBLG4FI5pWiIUpo/6Us2+dZ\nnhFBvai0CqtaJy1c81IF1YcvSTPKew2KH3wAq1dr42QopIlKWXmcw45dQ1PzCBJxQRJhUCGMB1a8\nJcykSWZlaNLmQrw1fdyhkCZGbgS9YI2NjiHefZ5fyg/3KjsUcnJiKaXvAwqLIfES7Xw9eNxEprIy\nXRXT2prbxdeocIwRvn9/TcS9Xl9nneXYA0R0FL57le03ttpaZ468UCq3t1pdnfOMGE+ntWt1rIvp\n09RAMefV1jrHQqFMlVwudWIx4GZIoZCW8IIkqPYS+460dwS7zVj0KBjbxjXfvoZZJ87imjHXsLhm\nMZf9x2X0KetDiBBhCXPM4GOyelOloepZOPpaEgO1q25ropW4iqcYjULRFG9Ky54L6UTAjZYW/YJp\nt9cEy99dTiL0BaGworxcCJfpXkFpCSJlPjFBgiH9EX3ArBpN8kGTlyrISByN6vbhsP52rwq97pl3\n3pm+gh8/3mEYLS165Tt2rHNNL4whPRbTnylT9HmJhCawtbXB55rzx46Fq65yrtPY6IwB9Pgefzxz\nHO5rGzWJcS6oqdEEa9w4h3G6VU7hsGYm2XJkmbE99lhw2vtQSH9MGhP3/2CwZUvm9pQpwUzLqNAM\nEgnNWNz3777fXr2KzyjMOAyzbW3VDDHovwx6FvOF9/8rpb3DShY7EYJSjbhjN+rW1AVHd7cR9752\nL7NXzs5IiujATVGSxnJJAAo57iLGffkMJhweZdWqEFu26NXviBGacGjVhnKd28rooz/l3DP3TKX3\nNvYEdyI+Pz2y1zDtZiJl5XESCsrKwR1HIqJTsffv7/TpVz7VDT9Dsdt7zBB5vziObJl1o1FHKjD9\neN12/Vayfvry2tr0bL81NdmDAt0wBDCo3vsZZ8DBB2faetyELhZLD+wsL9dz7J6nsrJ0puV1m/bm\nzzJ2pGLEJmRTHUWj6a7i8Xjwit/PplGIWqojYy0ss7BIYyKmUJLBkD2G8N5n79ESb0FEqN63OsMQ\nLgiD9hjE25+87du/QjH5wcksWr+I/rv1Z8R3JxO+rZp43OSYUtB7K+zY03VSGaw4HxVK8KWJ/+bC\nCzN119XVMGMGvPaasP6NBPF4gooKYea1e6ZemunToalZkYgL8bhi1izJqis3v9MMqH9bi6q5EDYc\niRq6jBHfvYmKudUpBmTcc8NhOOmk7O66XuPqjh16xdyrV3Yib87NllnXy+z8CLHfSvaKKzKJjOmr\nri59Xz7EyOs67K2P8sEHznhNRmGzPXu2NoLvsotj4xCB731P/y4vz15S16SS92bmdf8Pue6jvXER\nkUh6KWC3lOqFn6E831os7jF2hKutZRYWaag5rCbNc+qdT9/hoshF3Bi7kdZEq6/HlIgEMgqDuIqz\n8DVtGykP3cGX//tKNs+7QjMFBHb0dffo2CcSirv+vB/GNbe5WXH55cKwYaSkjOOPJ03qcGOrbCCR\nGIxWUQlKaQK9alWwEdrrFbVgUSPxAU+j9nuSuIRprHyAxYurUyv8225z1B+jR2sPHD9i4zWugiai\nixbBH/9IhiTkJnKxGJx3ni5fa+CXWdcdL+JXg6NQN03jglxIPiwv01q0SBfBMit9r9QU5GBgoJSu\nnWISKE6c6B9l7bUrtWXFXYy4CKMSvPnm/Nym3XOQLbNyIWMsBSyzsEjB5JiKVEVSqqiWRAv1b9Vn\nxF0YCJIee5EHWhItbP7aL+HwL8GKiUCyfmiaOslsZ6ZEf+oplXS7zTwmou0JRoX0+18MBhV2tVAo\nJcyZExwwVlmZHn09fP8qlsYr/GuZx9KLNQWt9LwShXu13drqjKO+PlPqicXgmGPSvYlCoWADupeY\njBjhHwMSRMTcq/v2RlnHYjrCOR530mMERbQvWODfl/GkMvPl5wrtR8Dbor8PYgTulB8bNwbXS2kv\nIc+HmXekUdsNyywsANJyTHnx/ufvB3pHBe0fvMdgNm/bnN0t97A6WH0WtFbgMAyAOIhyEXkT9OdS\nWwXU1TC2gro6TVQSrWFPWz3e5hbF5MmS8sQx6R0g0yuqnxqqAx8fWA8N34LNg6FKtw2yc2STKEIh\nTWzcgVzuhHjGtmJQX59ZH2TECP3tF23uJiZBHkdBxMW7ujfjNF5H+cIQ1+XLHQO5cWcFf0I4YQI8\n+mj6Pr+58huHm8iGw/q6V19dWH4obz/mf5k0ScecGAcEMya/YM+2EvKgypH52jk6ApZZWADpOaaM\nZ5Qh9G4VU1jCjP/aeN799F2ef/f5QGaxedvm/N1v62vhzbFaJSVx+MrjcNA/4aGbU3EVSAtaPSVo\nxuI1iruheODJd/lBzQ7Ky4fS3OwdRxyVgHhSNeUt+Wq8oozrY2UlsDnC3EsiNDXB7TdonbRb3751\nK1x5pSaIvXunZz91SxTe2AWTluSOO5w2psjR3Llayti4URNAt6dPNBqcXddr6A3Kj+UXC+BNK2+Y\nVEuLVo+de25woJ979W1UaV4j95FHwnHH+RPCiRO1ysqMwczVhAlabbhunVYhrl2budpvbNQSTH29\nbmtiLaAwou1n9/G6/5r/yE/C8WM2boaeT5Cht3JktjGW2qjthmUWFgAZ6c9nHjeTBesW8Nibj6UR\n/YRKMHrAaKJDoqnqfO4qfAZKKcTlxzmk3xAG7TEo09Oq6lmI1sLbR0NcQbhFb1c9C/u+BGuS7i6H\nJS2tDVHo8yGsPRPePoZMlZUex+ZX92XGVc2MPvUJXnp0NNu37orxltLf6QzmySf1CtJ413z3u3Df\nfU6iuHPOcYh5IgGTJ0MonEilGlEJx93YRH8DjPl2nKYmzeRCISEU0oxl7Vpgn7XcdsdBxFsc6ccd\ngbxjhx4TOLEcSmkj77Zt2aWHmTOdaOmbbvK3gfglZBR/gY1EQq/WlyfLpfgFOfoFKXoxbFh2QnjZ\nZTry2ox3woRMgr18uU7meNNNmWo9rzFdxD8FSjYjtpG8jP3Ay/BCoewr+iAju1/uLUhfTBipOBcj\n6Cijths2gtsiBWOzcKc/j86NpqmmeoV7seSsJanjdWvqMlKJCEJIQiildJqQJI4ZfAzLNgakT990\nhGYEQ55Eqp7NLpVsOgLmPOkqxKRA4oz+z6d4Y11fPnp5eNJwbhhDyHVyHC2Z+KmzdD8hCZNIpFPN\nY46BZ55xq4OMWiyE19YSCsHTT8OMP21h4V+/lLxeK4MPaOLt9bumzg8ddB+JV07ErYIbPRrWrHFU\nHqk5dQX9mbxHDz7oEHjTNhTShNxtR/Hz/Jo+XcdoGFuCt+/t2zWBfsrD2wG++tV0ScxIT48/7khP\nRhIyfScS6Z5s2eCWGBYscPr1juGtt/yrMZr5Ki/XRbgefNDJH/aDH8Duu+eXwtzLUE84Qe8PKtrl\nDcY78URt2Dfz8I1vwMqVetvkLJs7N53hhcNOITG/sZUitYeN4LYoGN44jEhVhPqz6qlbU8eWz7bQ\nf7f+1BxWk2oTqYpQt6YulbRQEI4edDSxzTFaE60ZBD9r/Eay4JJOQRIKNKgDmqkkkt5SSQLP9yax\nfNjtsPsR8OpizSdw2zwAErDLh7B9X1dnCRxJQ0CFSfhcdulSTWjmz3cTKDfDcU469FDt0nvf/XuT\nYiai+KJlO7Brql1i275akkomXgyFhP32g+gpG/jXv4Q3VuyfVLs5xM+46Br3XCOFpPpM6JTv7hxO\nq1ZplYkbXh2/2yZgku/FYrpmujfp44YNmii606O7o9l79dLSmEnhHaTfD0IkQirnl9uw7capp2rJ\nwlzXSBTGnnDOOU6kvTsr7F13pfeTTUWVzRXZ6zQA6V50iYRWhYVCzrVXrUo3jJvruz3jlHIWCt6x\ntSXKv5iwzMIiK7LVDI9tinHn6jtTRL0iXMGwLw1j2aZlue0VARARlNJ1wkf29y8Ty5B6KGvWDCGU\ngBOmwKjb9bGUHeSXsGEc+hF3rfy37538rUBak0Z0r+4lUxejFPz971rn7nhitabaSkgxqKqMjRsV\nq1fr9CVpEowq48NNlWl9hr+ylMTIOajYT5DGg0gkYOFCBfcNhK89CDIAVAUgiDjutRs3asIRpBRo\naEjfDlpFu11rwT9Z3pIlev/LL8Pf/uYQNLeqzaRtNzCSjCGaQfr9IJiIdq9R3+Dkk+G662DoUIeh\nhMNw8cVOgkJzLXeciBcmhbnXruCGVyWVzWnAG4xn4mXcVSLPPddxdwYtWbhVbEa686ZX986Je/47\nynZhmYVFm1HfUE88oZfZgnD28LNTOahM5b4jBhzBi++/mJYKXRDKw+Wc8NUTeL3xddZ9uC51zKio\nEomEP6OAtLxUDKnX297j0au1TaMV7Vm192vw4YEp9VTfYSvYtmc9LPs/MqUDfxfelpYES5929ksI\nFHFIhFEJ2LgpgVLiOc/pKxEX9toLPvooea/LLiIUVqiWcFoyeBIV8OpJrnM1kVq1ShP2GTP87QF+\nMOk0ID2dhNe1dtUq//PdxNJtDwiHHULmJpCJhFYdTZjQ9sjk+vp09ZIJQHRLPrGYvo7JkKuUZhRe\ne0hNTXocDDjqnvPOy7Qr5FNNERyJxxsdfsstuHKWaZSVOYzFK13lW1+9vj5TLRlUUrZUsMzCos3w\nGsWNisqdPgQgOjeaOkcQTjrwJC77j8tSdo9j/nJMXvU3QoQIhUK6bVJt5QdBUF6GAsksudqIvst3\nZrDtpa+jbRhu6cMwDj/DeQKVcKQFlTD2kHByG/CopMyITF+GUWimEE4SALdrsEE4OTZS5z75pI4P\n8curBZnGXS/KyjSBcRtUm5q0sd4QU1MlDtKz7Ho9xNzR0xdfrBmYwWOPabWdm+i5U8l7VSi5EiJe\ncomWJsx41q5NN3pnMzhHInosOtIf1q93bAYjRmiGk49x2R3Rfscdznx5XYqN4d99rzNn5mbGuVKp\nRKP6Wua/D4V0nx0Zb2GZhUWb4VcT3Ow3v6cvnZ6SPkDHZSxav4jL/sMpNHDiASemoruDcGb1mexe\nsbsu8EQwYxGEP5/4ZwB+u/S3vF11rXPQxTy27PksDDnCUWelDOFug7UzaiQOg56Gt7+VulIQQ4EE\n9NsIWweRoQZLtff0jwAJKvZ6l+aP93W5CDvtX33VywycjVBIckobxx+vbQlugmOS+Rm4EyWadoaB\neNNSGNVNv36Z6hd3lHyQCsXYRbyrY2+cy7Zt6atv4w7sVt1ceGF2Q/A99zhGfaNGmzw5XVUETkZb\nw9AgvR/3Ct+byNBg4kTHrbqyUs+DidMIqjPiDmL0U4lFItoOY1KzmzF0ZLyFZRYW7YLXpuH1qPKW\nfQVSmWj779Y/Vd0vRCjNcyokIc445Aw++PwDJgybQPU+1dTW12YUcHIjJCF++h8/pXF7I5W7VPLe\nZ++lHZeq59hv2Cbe+fQdvcMtfbzzDXj1lFTb3b/0MZ9+0BcnpkNB9V2wKeLEfoRaCGGkAzdDEPhk\nEOEyIR5vTRJ+45GVRdUlcZo/2Vu31/64yetrI7wmjm7GFIf+a5FwC4cO/Dqrn9sjNf7+/dOztpaX\n629HKlEkEoqhB25nw6u7paX83rIlXXppanJSnV9xRabXz8UXO1KHm2HMmuWkZHEzMrcKyy+Izayi\nTQ4oI025U4+7pSiltDF9aGQtjZUPUNl4ItN+UJ2hnolG0+NV3ExSqfRtE3vj9irz1ng3aiU/GELv\nNv6DnoepUzUzKTSNR01N+ngKSe5YDJSUWYj8//bOPUyK8kz0v7e6h0FUboNyneGyAkpCYJRFRtSg\noEFQ5FlysjHuQhSdmCMJiAkb92x2PXGfwzmuBowSIt4CWY2bhCwoAl6ACUSHmwKiXARh5A46CIjI\nTHfVd/74qqqrarqnZ2CGufD9nmceuuv6VVXzvfXeZSTwBPoX/6xS6v9G1t8H3I/Wt08CxUqpLSKS\nB/wJ+Fvgt0qpSfU5TkPdEMwCbxFrwbLxy3ztY8rSKSEfxKsfvRqKeJKIU1kpxZ+3/pll4/Xr3fB5\nw32B45mjbMcO7T+mzxieXPMklXalburk2KFjxq04f9v1b9m/bX9qoWfKOl4AViU4cYgl+OLqabB4\nViDqSuCrDnDXMD/3o23Ldpwo/XuqRkXFQCktKK56Fk52DAki2n8ERy8jpMVYNvR5FbaPQf+3tF1h\nY6PrZAEhjcQd05GvoZTF5iOpaKl4jk3B4E0cenWg3lcUEyemd+Lv2HIhQQHmKMVrr8WI8uabsHy5\nfisuL09NgI4Djz+uw22PflXOyjfaucJRC7cFC/S4vAKAYjkU/Y/VPDqrJ53mdc5YATgYzptIpCZb\nkVS01WOPBSu7Ku7/9R9xui+HkkGoiq+jHKniUwi+nVeHUjqQIWiiKilJ9XgPTtBeeZRx48KJmp4g\njJ4rUxXaaCfC6DbpkvFKS3XAg2eia5JmKBGJAbOAm4B9wDoReUUptSWw2UtKqd+4248BfgmMBE4D\nPwe+7v4ZmgDBLPBKu9KvM1WUX8TMkTMZNncYCTuhczAiiXxVkvpQ/jEAP/nPW9cnrw9bP93qbx+3\n4nS6qFMqC11ZWGL5DvOYxHig6AFOnD5BjpWT0lD8jn8twErCoDk6AdATIotn6Qk3Vplyprvrju0d\nAqvHAC3wcilSmog7mbfZo4+3Y5TfVZBey+Hzv3EnVRu6rkNGPqjHu3Mk2IqcFsIDD5e5IbQ9XIFh\n4wsjAHH0chXHsW1uu+MgnbpW8vyxCayzExB70z9n68FvMPbysTzzjINtBwVH0DRmYSdtHFH+8m7d\nYN8+PYElk9p5e8cd4QnQtpXOupbWrtwJC6ZEAu67D2j9Cc+U/oGVv5/iBhroPiWjRwMXHaLTNW9A\nt96U/GdRKCzYiw7ych06ddI+DC8ayrYhlpMk2fIw6rdvuOVj8GtRHTuWMu2MH69NQeF8mei90Of1\nOjB6eSPBxL50xQ/feEMLRdtRxOJJvnPXYZAuiAWWpUDFfBNX1GRUWhrukWJZ4fMFzWqeEz/aRMwz\nF9aXwKhPzWIwsFMptQtARF4Gbgd8YaGUOhHY3n/FUUp9CfxVRC6rx/EZ6oCg2Snq8PYc3JDK2Sgp\nK+FYxTFmlM4A9CQ/sONA1h9c7xckjImeDIPHsCwLx32NVCi2fJp654hJjKdGPaW3c3M0LMtiatFU\nTpzWP7HWLVszo3SGFiRipboBlg3TgkLFESXQZp92joMOx+34QfVRV54Z64LPtOZxujWUPhgSMJK/\nBuuum7B3X5tytm+ckMpYH/kAKn81DuIfz+n5V55IrqPyG1fBhtSkz8jJcOhKfYxO78HSJyAJCodF\nJx/hnusUyfdWoZQTGttjL3Vk/+B22GoI4CUzegRMY1YSKxYDJ06LFjq3JOi8dhzFiy8FgwAC5jQV\njxwvdY7W3T/m1WP/B/vt2fiVhtGCZOFChYq3xmo5h7lH3+NHsc3A3/i5JdGKvBWVimeeTzD03j8w\n+jvD6XRxZwq/tY37f92RpNcXXpTv23j00VT+x8yZ6bQKLbQHDtvDyS8VO9f1IJjIOWiQTpR85hk9\noXs5HEVFqa6IHomEvjeObfHi0x21KdFyoOgJbiv4Bzpd3Nk3XQV9E9EIMNvWgsgr+RIs0f/kk6kQ\n6kTAKlvfTu76FBZdgb2B7/uAq6Mbicj9wFT069mNtTmBiBQDxQAF0awjQ72TzuyUzuHt4X0fPm84\ntmNjWRZP3vIk/S/tHzrOzJEzKT9VHjrGrFGzmLR4UtpkP4CPP/+YJ9c86WsMSSfJzNUzKZlQAsB1\nL1znaxlKBbSaHiV6UrcVykrQ/vJNHA0euJqoq4zrL38F65MbcbqvcNcJ5JdidXsn5ZdJE/qrUP7x\nbNzJI/+d6sOEAV77NTgxkot+yZK/uQuntZMa2+Gvw+JZOI7Fi295vpN0yYSulnPRYYaPLadXq0IO\nHYp2bnP3UZHv/ufUcS5qV8HJzy8ABMtSPL7sBWx1aTiZ0t1eKYFkDs7u66gAHp/b3Z04FYmkDZdu\n5aHi/qHeJNgWK2d/B5RFvEUlvSuW8fXcW9gcB+UoLNEO/6Cv4HSF4qFffI7ttEtzDyw2Jf6AaqGA\nfwqMD1q2FJLJVBhysG7Xe+9VfRze8VJl9pPYb09m4dtxWuaGw3WjDbmCSX2gv8+cmdIeKiu1Yx5S\nIcWewKhvJ3eDO7iVUrOAWSLyPeBfgAm12HcOMAd0uY/6GaEhE+nMTg9d91DGJL7gPg4OooTyU+UZ\no6qCFF9VTP9L+zNv0zxe2PgClXalP+Hbyuaxdx4jWrqm0q70mzkFS4wI4mtA0RDbo5dkFgwWFn07\n9GX7Z9tDzvgq5K9GCtbRL68vWz/TGkyVEidZhJCnXdnKrn7bQ1em3tRti09WXg+3vqzX7R3i+l0C\nZVFCqMhnC07k88a8fG2Sc6JVfyGc8R4N9wXEJqeF4vT1P4HXHtMaUdzG7u6GFsWSYOt9ewzYz8Ft\n3UkkHRxJID1XYn0yHDuZOq9jC/f/+o/0v+okw4YVIZbt7i/u+GIkK5Js/e2PAEUsDsX3Cq1ba6e3\nZ8oS0aHNR/d7QQDRUGmFevvBgDKUEoJvvx2OGvN8CvPng6OCAkfcj+699O+PHqvC4vRpeOKJlG/C\nthW/eRpycx2+/f2DLHm1FUf3t/PPr5SOggsSHMegQdClS+YSJHWJlX2TM2Y/fiFnALq5yzLxMjC2\nHsdjqGM8s1NMYlXMTrXdpyi/iIeu08bY6aumU7q3atPiovwiZt86mxUTVvCDq37gT6hAlcKF1TGm\n7xhWTFjBTb1u0o51t5d4dZO3IOTGc/lm92/WKDvdVjbbPtuWdVvPsR908AvCvVfey4PXPFij68lI\n2bDIm3zUdBSYzKKmo6Bj39/Ghq5rIVYBktABAblHU/uJQ7+rD1A47SfYhb/RQvjGf8X5xxuq3ttY\nkoJxs5k8ewHWjQ8jE24iVrCWqXdcSU5OQBBZSZKfd2HKM//F5sObA5Fl3p8bMeb6buykcOiLg8x4\nIkEi6SCWzZ337aPL5Qe8EwPQNv8gd963H3Hb9+rri5HK6E/dG9sOm3tAT9THTh/FsW13DNp5H4vb\n2j8R0rpSIdmeLyQV2QYooeI0vPh0J47u95qAKf886ZzxXj2w9et14cX6FhRQv5rFOqC3iPREC4nv\nAt8LbiAivZVSO9yvo4EdGJoMNdEIarNPpmiqdMcoyi+isHMhkxZPwlY2cStOvw792Hg41ckvbsUZ\nP0AbiD1txBKLA18cYPORzTw87GFW7VkVCusFPVl3vKgjR7484h/n7oF3+8d65r1nQpqChUV+m3wq\n7AoOn0z1/qhW+wByrBweKHqAx995PHS8uBWnsHMh87dk6AYUZMA82HBXyqfhVecFUmVRvIkfqpid\nLMedgBWonMj6wD2xHJRVCSMfgMP9YfWPdUZ8RfvUca0EW772HYi5giGqEZUN09FmxMBWrJw7jL/e\n8O9wbSlKOSgV48TpEwy5bQsrP9yuj7ljFLx7D2s3VrJu4O9QyX5priU1XhEdaWdX3g3KwrGTvPTm\nh3Rq1wbo4m99bN8l/MEegRrdV5vxqgiJIFV9MI4Da1e2C2/VZwHOtY8T+3QA8tpTOLYX7hzV6lTo\nWCmzlbdtkvZdT/D5gfbpBYWluDjvBCc+bY0TifiqT+pNWCilkiIyCXgdLc6fV0p9KCK/ANYrpV4B\nJonICCABfE7ABCUiZUBroIWIjAVujkRSGRoB1dWOqu0+maKpMhE1TW06vMlfJwj3FN7j779iwgoe\nfftRFmxfwNoDa1l7YC1jLx/rl2J/a/dboY5/R786ypg+YwBCBRTTaTwKxZ7je8iJ5XD75bezYFv1\nCYaDuwzmys5XMn7AeErKSqp0Gkw4Cd8/k5X81fD9G9L7NFwTW+5bs6n4ZEB4P0lCrJIr/nEOhz9N\n0r5yIDvfuIFwrxBXoFy+kK5XHKLDFR+w6YiFWvxkKtfEn/QcKHyhev+O5x/yijx+PAJn941I0Qyk\n5Qnkos95bun3SFQCsR4wcK4WLiqufUo4ocKLmqC5BxCF3aLcjRTT0Wlq13AOiqcBpDSmxK6hcN10\nve9rswMCwwkcO2p2C/4bvH4FX3RBdXsHp9tqir81BN4fz2+edlK+i5AmR2B/FV4visu+8SnrDrQN\nnBv/PEolOfHZBf73eFzOSQOkevVZKKUWA4sjy/418HlyNfv2qL+RGRoj1UVTZaIov4iSspKQ41sQ\nWsZb+pqAt92peW0NrgAAGTxJREFUxKnQvgu2LeD1na8zc+TMkIbhhe16WeWWWMzdNNfXiKK+Ee+8\nlXYlKK0ZZJroc6wcZo6cCWjhmNcqLxWZFaC65EMgnMRYnU8jfzUVI34Iz68MRCElodcyBn9vCWtj\nT0B3OLp3CFh/iZitbIhXwND/YF/+avYBbPy1Kygib/exyrBW49Lt4m7s+2JfapzRIo+OQr09DVDY\nlmtyUZaOFPOO60WNDZin/zaN56LKy6jYNpxk0kFE+zZAUDY6Gs2x3Mtw9HUrL7kRfV2xSqyeK/US\nrwilFyZtJfX+Khg1prL8Cxy8EvYOwclfTeHg0xT/ELbsPsbK1z3tK2oCzICKs3ZJH1Ll9COajVUJ\ndiv/eL0Gb4duR4H6VS0a3MFtMHiciVkLwkImZsVCJqPpq6b7xxrXbxxv7Ar37axIVlB+qly3TU3j\nPAfd8MnTdIb1GEZuPJeKZIX2W0a0gk4XdWJq0VQefTsVb3pn/zvZUb6DLq27+GVOgua2a7tfW335\n9gC+j0MEUVWFTFryV8Po/xnKGbl+/ApOdy6FA4FtRt0fnjALXwjnnFRBAQ5cvhAZ+ngq5DjA4S8P\nh/Na8lfDFfPh428RjUZSfm14OywcolpT/mpOAld8dTd9T/6APt3yePRfO0MyB92O1wLiWrOwbFdG\nWPgTrygYORmn29upgQ56Fjp+SLej/8C+9r+DpTNg/9Wp8eUehz6vwKlLodUR/W+njVyyfyKflnXQ\n2zmW3q/zBpZceIz+l5bSfsQqeHNyRBNz75t//VENxru3wVIxpD7brUL3eOtXKxg+b2pGs21dYYSF\noVFxpmatqJBJ5/8ovqqYJTuXhMxEDg5LP17KnuN7GD9gPIWdC3nuvefYcGgDtmPj4GCJ5Ws6wXPl\ntcrjR0t+5DeHyrFyfNOS9+YvCBe3uJg1967xzzl91fSQua19y/ahNraZiEkMhcJRDo7Sb9Se8IgK\nrSpEckZK5V2cg06126QVEkEfiThaCA16NqPISjgJruhwBbmx3JQ/6asOZC7g6H4fOTkkHNKx9YLn\n2XrB8/Rr3Q/Gt9H90S/4TOee+Dksbl7Ku/cQzO/QY0ghCC17buTnP/w+9y9eT7LwOVdYuONJtILB\ns0NjscTi5rYX8+KDd6X6yO+/GvZfzYINFbz20bdIdl0Fd/03vP1T2H6b9g+JK7AcnV1PzyWw+ya8\nRMUUQZNV1G/iBQAkYMDcGpltzxYjLAzNgqiQyeT/mHbNNBZ9tChkJlr5yUpWfrKS5zY8hyW6qm3M\nilF8VTGFnQur5HwEz+X5TABfm1m7P1XWRKF4YeMLoaZRUU1oyc4lKKWIW3Guyb+GVZ+sQqEQJNTf\nY2rRVJ5c8yQVyQptglL4y2eUzqjWdHVJq0soL1iL4052iUyypSZ5JZl8JBnY+tlWYhJLmdt6/AXJ\nSaDctupaE/D8EO7bdWQyr44tn27RcZf5rj+p4we03HcLp7stSY2v03tVs/EDeJWQQRe2fEU9j7Pz\nFtg2Vo9Nib7mwPU6yuHlY5Nhwovwp9/B8V74k7ndgsSua6DrSr3Pd8elukEeL4D196K1HwW7bw5c\nuw0dtkL55a7pLBpwEPneZxGSvwaRGHmtwv1S6pr6DJ01GBqM6kJ0Z42aRY6VQ7QeVcJJ+ALGqysV\nFRRBvOz18QPGM/vW2fq8c4exYPuCUCRU0klSUlZC6d5Spq+aDsCy8ct45IZHuHvg3SSdJA4OSin6\ndehHy3hLt2Og+AmEjuOw8eBGZo6cyYheI1IlU5SibW5bJhZODIXhXl9wvT9Bt4i1YNwV4zKayXOs\nHHJjuXjtcG/udTM5Vk7mbXtswLruUWIF60LhyxYWPdr2qHJfAT9iLSYxLui5kcunToLhP4fRPwTx\nypiA/zZ9wWcZz399wfXpL8QjfzWni/4tLMgGPQt3fRNu/Ln2m0Q0hE4XdmLepnncMPcGFm5fqDW1\nof8B8dM6TDiWqCJgvOsSBI73CCx1NYfo9l6I9oB5OgrNu1aF1tIkoX1EPVa5OwSd3kkdstx2d/iY\nX3RB7b0a27GZsnRK2gCMusJoFoZmSXX+j2AUVbB/eI6VE9IsvIq4QT+Id5x0Zq6SshISdvjt3pus\n81rlVdn+oeseonRvqd8syusJ4oUEe057QXBweGv3W6zas8p3yEcDAYLHufMbd7Jm/xpdVBGhdcvW\nxKyYXzIlOL7RvUcDOuRUoVhetjytWcsSy6+vBXDoy0Ms3LYwdSwR9hzb44856E/JsXJ4atRTbDio\nGzt8UfkFWy8I1BVd9BtSkUp2Rs3CUQ79LunHnuN7KDtelv7hZyKD1iQIz214rmp1gGxNtly6H5tA\nWdSMds2j1QYdhPxDsUptLvuqA+nLwUTW/XaFNgNiwYFBMHcZasJwKgvW1aspyggLQ6MkWur8TKjO\n/+GtGz9gfBUzUklZCXuO7/HzKWzb5ul3n/YjorwIrKiZa1iPYeTEckI+jNG9R9Ppok5sOLghY5HF\nqFDzwmm9Cru92vVi17FdvqPdc8hH748XBjyu3zjKT5X7k1/CTvi5HNFJ3BKLJTuXhJ36rnkrVBYF\nPVHPKJ2BoxxiVixU9deryeWNuW+HvpxKnGLP8T0AfsLk3E1zU2Y0Fxn0nD5KNWai4HiDAv5MiN4D\nW9mZ/UVZzHIWFnvazYP4P0ASxLJod+NzHL32n6sfRDb/UHVC6vs36IiyXSP8sGIpu4EWPTfVKILw\nTDHCwtDoiL61p6sVVVekEyieg3zuprlVwmm9ST5dmK9XLNETPoWdC5mydIrvm4hbcXBIW2QxOIbo\nsX869Kf+cYLnCmo5XiRXwk6wvGw5U4um+seAVLkThdKOcqWwLItb+9zKq9tfDYUd58ZzmTlyJkt2\nLOGV7a/4E7tXxddRji6dHiAqWLZ+tjW03nZs5m+Z75d6Ce2Lyjh5xiTmCyFBuKz9ZVWOXVN6tOnB\nyMtGsuijRalw3rNEoVDd3oYJw5GyG7h9ZFsWffW/IEu8AZA15LnadV7bYFf7uHzQYZ4z0VCG843g\nW3tFsoJJiyfhKKfarO66xnvj9ybhpJOs4vuoSZdA7zpw4N4r76WgTUFWoZfu2P0v7V9t1nswC91x\ntAbw1KinKD9VztoDa0MRYH3y+vDN7t/0NaklO5bg2A5xK87Ewon+8ilLp1RpSBWTmB8cEHwbD2kg\naWbKuBVnXL9xrNqzytcsquSXRCbIfh36MXnI5JCgbBFrUeXYNSHHyuGlcS8BsHrf6jMSFp6Q9SLk\nQgIyfzVWwTrIvw17W3otJSYxhhYMpV+HfrRu2ZrH33lcR7W5ZsZaETGR7bzwXeCeWl9TbTDCwtDo\nCL5Zi4j/NnsuwgODBE1V6SbqbGG+mXqU1+bc2c7lCdZovoWtbF8bW7t/rW8mAthevp2yY2W+UPC1\nChF/jNNXTde5JAEc5XBP4T0UtCkgr1UeP17yYyrs8DbpEIRbLruF8lPlvpZ4rOIYJbtLQqXpg8Qk\nxrNjng0JymMVx0L5K9no3qY7e4/v1VqJCJuPbA6FOgNc0eEKtpdvr6IZeWPwBGLQpFjYuZAX33+R\nlXvCuTH9L+3P4h2LM+a+2Mrmnb3vcGf/Oym+qpixfcf6v6vNRzYzc/XMKlpTuoRNn4BwTTpiQmcN\n5x/RXIaoCaYhxnMm/wnPNMkQqvfZzHl3ju+biApWb9LLjeWS1yqPYXOH+ZNjMCfDq8i76/Ndvm/D\ndmx/wslrlVelpFFMYiGB1//S/kxZOoV1B9aFzFgt4y35uyv+jt9/8Hud0R6Ls2TnEl796FXfrPjI\nykd8YRR9S/cKKUavu2R3Sei75QZziugormBeTG4sl1suu4Vn3ntGm81cM1g0AGHn0Z062Eh0VJI3\nhrF9xzJt6DTmbZrHoZOH/PHHrBijTo5i1Z5VRHn/yPshwefd7+Bkn3SSTFo8if6X9g/9roryiyg/\nVc4/Lw/7Oq4ruI51B9ZRkazw/T6ez0gQ33cTt+L1/n/DCAtDoySay3C2zu6G4kwETXUFFee8O4cf\nLNKt2d7Y9QZP3/p0SCAB/udodJZCkWPl+JON5+OIJh6W7i1lytIpflkTQYhZusFU9Fo2HNrgT4be\n2zfAxS0uZvbo2Ww4uIH3Dr7naxCVdiXzt8wPObljxBjTdwxLdi7xzX2e1hO8F9Gqwj8Z+hPa5rYN\nXXdeqzxfo4JwhNi4fuMo+aTEF56e5uDgYCmLuBX3zZ3Thuqci4I2BRz68pCvvdm27ZeBiZJOQ4pb\ncV+IedgqJZSjzcOivehPJ0+HfHbB6/R8Sp7mVN8YYWFo9Jzpm31TpbqCitFKtPO3zKf4quIqJiuP\nYHRWbiyXX93yK9+PsXDbQj96aUTPETw87GHfBOVNjhYWI3ql1gWZt2leKCqpqFsRi3cuDkWDWWKF\njuVN2svLloc6Hw7uOphpQ6dVeSkI3osYMcb2HcupxCnG9RtH8VXFofGk+42k8/1kCkAYddkov2gk\nUMUXVFviVtwPFw5WKs6xcnyhHA3kiIY3rz+4ns1LN1fx1UXHFtQK6wsjLAyGRkZ1BRWj9a3G9RuX\n8TjR6Kxg5dyH//JwSiOI5YSEQfT86QRFOk4nT4c0mSrhraLDe/tf2p9be9/Kqx+9qlvgikVeq7y0\nLwV5rfL8BETvjT/TWNKZ7rL5foJViz0zmeejCvZ99/CSEIMmv7gVZ0jXIfx1z19T2pLb6rf4qmI/\nEVPfAuGugXeFhLL3UjB/y/wqZqyor650bykPlzxMhZ0am5fLY8xQBsNZUBf5GueabAmFgO+ziL5d\npztWMMR2+qrp7Dm+x89QD05eNTl/kPEDxvP8xudJ2AlyYjlMvHIiGw9vDGkWjnJCUVMbDm7w3+ZF\nRJtdlMOUpVN8O76HZw7zWvDOHDmzWkGRrRdKJmHiVS2O5swE+7571+NFmEVNQlOWTgG0kLit721M\nu2ZaRuHraS7R5V60mKfpCBKKwvOu0TPhWWKFeq3U9+/bCAtDs6WmzZQaI9WZ3oqvKs4qJKIE70U0\n5yNYyr0m5w9uUzKhJKOZZ/yA8Ww+stlvUJUbywXw36ZFpRzA6SLdgi14cbSAjAqU6LaZeqFU91vI\nlDMT7Pvu+WzSmb48DcHBIUaMwV0GhwR0SVlJ2lyhbCHS3nV564LniZoOzwVGWBiaLbVtptQYqC9N\nKHgvapPzkY1sZp50E6DndE739hzEm8S9N2mv3Ek6oZ+tF0p1v4VMmpRXFibb88h07kwCKvqMs4VI\nl5SVpD3PuRQUYISFoRlzJs2UGpL61ITOJufjbIlOgJmit6Lj8Sbxh0se9jsZRif64MRbnemsugnd\n28frAV/d2DNdX7pzpxNQQI2rE6T7PaQ7z7kytRphYWi2nE2eQ0NQn5pQY7oX6d6mq9vW65Vekzf3\ndBO+d5ya9Dw50/uSTqikE1C1qU4Q3PZ08jTzNs1j9q2za2xeq2uMsDA0a5pS2G19a0J1dS/OddBA\nbd7cswmeTJNxfZgpM427ptUJhvUYpgs22rpgY7Qvyrm4hiBGWBgMjYTG9PafiYYKGqjpm3ttOBdm\nynRaVE2rExTlF3H3wLt5+t2nUSi/L0pUoJwrU6tEm883VQYNGqTWr1/f0MMwGJotXoy/5z+ISYxH\nbngko+nnTI5fW0F5tlpOQ4dWZzv/mYYE1wYReVcpNSjrdkZYGAyGbKSL8c+N5daZZtGUw5zrm/oW\naDUVFladn9lgMDQ7gjkPXox/XU7omSKHmgteQmR9tj2tb4zPwmAwZKW+Y/ybWphzbTgbrakxaVxG\nWBgMhqzUtfM9XWJaY3funylnE7HUmBJLjbAwGAw1oi5Db9O9LTelMOfacDZaU2PSuIywMBgM55TG\n9LZcV1TnhD4brakxaVxGWBgMhnNKY3pbrgtq4lc4G62psWhcRlgYDIZzSmN6W64LmqOmlA4jLAwG\nwzmnsbwt1wXNTVPKhBEWBoPBcBY0N00pE0ZYGAwGw1nSnDSlTJgMboPBYDBkxQgLg8FgMGSlXoWF\niIwUke0islNEfpZm/X0isllENorIX0WkX2DdQ+5+20XkW/U5ToPBYDiXNMVaUfXmsxCRGDALuAnY\nB6wTkVeUUlsCm72klPqNu/0Y4JfASFdofBf4GtAFeEtE+iil7Poar8FgMJwLGlO9p9pQn5rFYGCn\nUmqXUqoSeBm4PbiBUupE4OuFgFcv/XbgZaVUhVJqN7DTPZ7BYDA0aZpqhd36jIbqCuwNfN8HXB3d\nSETuB6YCLYAbA/uujuzbtX6GaTAYDOeOppqX0eChs0qpWcAsEfke8C/AhJruKyLFQDFAQUFB/QzQ\nYDAY6pCmmpdRn8JiP5Af+N7NXZaJl4HZtdlXKTUHmAO6U97ZDNZgMBjOFU0xL6M+fRbrgN4i0lNE\nWqAd1q8ENxCR3oGvo4Ed7udXgO+KSK6I9AR6A2vrcawGg8FgqIZ60yyUUkkRmQS8DsSA55VSH4rI\nL4D1SqlXgEkiMgJIAJ/jmqDc7f4AbAGSwP0mEspgMBgaDlGqeVhvBg0apNavX9/QwzAYDIYmhYi8\nq5QalG07k8FtMBgMhqwYYWEwGAyGrBhhYTAYDIasNBufhYh8CnzS0ONoIDoAnzX0IBqQ8/36wdwD\nc/1nfv3dlVKXZNuo2QiL8xkRWV8TB1Vz5Xy/fjD3wFx//V+/MUMZDAaDIStGWBgMBoMhK0ZYNA/m\nNPQAGpjz/frB3ANz/fWM8VkYDAaDIStGszAYDAZDVoywMBgMBkNWjLBoAohIvoisEJEtIvKhiEx2\nl7cXkTdFZIf7bzt3uYjIr9we5u+LyJUNewV1g4jERGSDiCxyv/cUkTXudf6XW90Yt1rxf7nL14hI\nj4Ycd10gIm1F5E8isk1EtopI0fn0/EXkAfe3/4GI/F5EWjbn5y8iz4vIERH5ILCs1s9bRCa42+8Q\nkRr3CkqHERZNgyTwoFKqHzAEuN/tU/4zYJlSqjewzP0OcAu6rHtvdHOo2VUP2SSZDGwNfP9/wAyl\n1GXoqsUT3eUTgc/d5TPc7Zo6TwBLlVKXAwPQ9+G8eP4i0hX4MTBIKfV1dBXr79K8n/9vgZGRZbV6\n3iLSHvg3dIfSwcC/eQLmjFBKmb8m9gcsBG4CtgOd3WWdge3u56eBOwLb+9s11T90A6xl6Na7iwBB\nZ6zG3fVFwOvu59eBIvdz3N1OGvoazuLa2wC7o9dwvjx/Ui2a27vPcxHwreb+/IEewAdn+ryBO4Cn\nA8tD29X2z2gWTQxXpS4E1gAdlVIH3VWHgI7u53T9z5t6D/OZwDTAcb/nAceUUkn3e/Aa/et31x93\nt2+q9AQ+BV5wzXDPisiFnCfPXym1H3gM2AMcRD/Pdzl/nr9HbZ93nf4OjLBoQojIRcB8YIpS6kRw\nndKvDs0yDlpEbgWOKKXebeixNBBx4EpgtlKqEPiSlAkCaPbPvx1wO1podgEupKqJ5ryiIZ63ERZN\nBBHJQQuKF5VSf3YXHxaRzu76zsARd3lt+583doYCY0SkDN2r/Ua0Db+tiHjdHoPX6F+/u74NUH4u\nB1zH7AP2KaXWuN//hBYe58vzHwHsVkp9qpRKAH9G/ybOl+fvUdvnXae/AyMsmgAiIsBzwFal1C8D\nq17BbUXr/rswsHy8GyUxBDgeUF+bHEqph5RS3ZRSPdCOzeVKqTuBFcC33c2i1+/dl2+72zfZt26l\n1CFgr4j0dRcNR7ccPi+eP9r8NEREWrn/F7zrPy+ef4DaPu/XgZtFpJ2rnd3sLjszGtqJY/5q5Oi6\nFq1yvg9sdP9Goe2wy4AdwFtAe3d7AWYBHwOb0VEkDX4ddXQvhgGL3M+9gLXATuCPQK67vKX7fae7\nvldDj7sOrnsgsN79DSwA2p1Pzx/438A24APgd0Buc37+wO/R/pkEWrOceCbPG7jbvQ87gbvOZkym\n3IfBYDAYsmLMUAaDwWDIihEWBoPBYMiKERYGg8FgyIoRFgaDwWDIihEWBoPBYMiKERYGQxZExBaR\njYG/n2Xfq8bH7hGsLGowNFbi2TcxGM57vlJKDWzoQRgMDYnRLAyGM0REykTkURHZLCJrReQyd3kP\nEVnu9hZYJiIF7vKOIvLfIrLJ/bvGPVRMRJ5x+zW8ISIXuNv/WHQPk/dF5OUGukyDATDCwmCoCRdE\nzFB/H1h3XCnVH3gKXRkX4ElgrlLqG8CLwK/c5b8C/qKUGoCu7fShu7w3MEsp9TXgGDDOXf4zoNA9\nzn31dXEGQ00wGdwGQxZE5KRS6qI0y8uAG5VSu9xCj4eUUnki8hm670DCXX5QKdVBRD4FuimlKgLH\n6AG8qXRDG0Tkn4AcpdS/i8hS4CS6vMcCpdTJer5UgyEjRrMwGM4OleFzbagIfLZJ+RJHo2v+XAms\nC1RYNRjOOUZYGAxnx98H/i11P7+Dro4LcCewyv28DPgh+P3E22Q6qIhYQL5SagXwT+gy21W0G4Ph\nXGHeVAyG7FwgIhsD35cqpbzw2XYi8j5aO7jDXfYjdFe7n6I73N3lLp8MzBGRiWgN4ofoyqLpiAH/\n6QoUAX6llDpWZ1dkMNQS47MwGM4Q12cxSCn1WUOPxWCob4wZymAwGAxZMZqFwWAwGLJiNAuDwWAw\nZMUIC4PBYDBkxQgLg8FgMGTFCAuDwWAwZMUIC4PBYDBk5f8DAkVpn8pWhMcAAAAASUVORK5CYII=\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ctawd0CXAVEw",
-        "colab_type": "text"
-      },
-      "source": [
-        "This graph of _mean absolute error_ tells another story. We can see that training data shows consistently lower error than validation data, which means that the network may have _overfit_, or learned the training data so rigidly that it can't make effective predictions about new data.\n",
-        "\n",
-        "In addition, the mean absolute error values are quite high, ~0.305 at best, which means some of the model's predictions are at least 30% off. A 30% error means we are very far from accurately modelling the sine wave function.\n",
-        "\n",
-        "To get more insight into what is happening, we can plot our network's predictions for the training data against the expected values:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "i13eVIT3B9Mj",
-        "colab_type": "code",
-        "outputId": "afc103e2-0beb-4a26-fe18-c0cccc6d3d2a",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 281
-        }
-      },
-      "source": [
-        "# Use the model to make predictions from our validation data\n",
-        "predictions = model_1.predict(x_train)\n",
-        "\n",
-        "# Plot the predictions along with to the test data\n",
-        "plt.clf()\n",
-        "plt.title('Training data predicted vs actual values')\n",
-        "plt.plot(x_test, y_test, 'b.', label='Actual')\n",
-        "plt.plot(x_train, predictions, 'r.', label='Predicted')\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJztvXmcVNW16P9d1c3kiLQYvaLigANK\nBMXGUkB8GjDRi6hPkwhB41AgmheTFxm8zye5MSDo517yokj3zwnSSJKnVxxeEohDi9oVCEaMEYyi\nYsCIYCMIyNi9fn/sc7qrq6uqq7rmqvX9fM6nhrPrnH1OVa299lprryWqimEYhlFeBPLdAcMwDCP3\nmPA3DMMoQ0z4G4ZhlCEm/A3DMMoQE/6GYRhliAl/wzCMMsSEf4EjIhUiskNEjs1k2wz062IRWZft\n8+QCEakUERWRvt7rh0Xkzhyc9yYRqc/2eQoBEdkgIiMyfMw235uRGib8M4wnfP2tWUR2Rbwem+rx\nVLVJVQ9S1X9ksm0uKTYhp6o3qeqMjtqJyGsicn0OupRzSvnaDEdlvjtQaqjqQf5zTzO+SVVfiNde\nRCpVdX8u+lYuiEiFqjblux+GUciY5p9jROQeEfmNiCwSke3AOBEJisifRGSriHwqIv9HRLp47aNN\nEnXe/t+LyHYRCYvI8am29fZ/U0TeE5FtIvJLEXk9nrYnIgeIyK9E5AsReQc4O2r//xKRD73zvCMi\no733BwAPAMO82c/n3vujRWSViHwpIv8QkbsS3LOLRWSdiPxvEWkUkY9E5DsR++tE5EER+YOI7PTO\n1V1E/kNE1ovIZyIyV0S6R3xmqohsFJFPgOuizlcnItMjXl8Z0de1IjJSRGYBQWCed11zvLb9ReQF\nEdkiIu+KyFURx+ktIs97x/kTcDxxEJE/isjEqPf+5t23gPe9bvK+u7+KSP84x7lJRNZ438sHInJT\n1P6krk1EThIRjfpsy+xARPqJyMvedX/u/VYOjXd9Ecc4X0Q+EZFAxHtXi8hfvOdx/xsxjtVmtiJR\nM84OvpvLIu7TBhH5UUd9L3pU1bYsbcA64OKo9+4B9gL/iht8ewDnAENwM7ETgPeA27z2lYACfb3X\ndcDnwGCgC/AboK4TbY8AtgOXe/t+DOwDro9zLfcD9cBhwHHAamBdxP5rgKO8a7oW2AF8zdt3E1Af\ndbz/BpzutT/T6+dlcc59MbAfuA/o5n32K+CkiOv8AiewAl6bXwJPe/09BPgd8DOv/WXAp0B/4EDg\ntzHu23Tv+XnAVuAi79jHAKd4+16LvF/AQcAnwHjvuzgbaIxo/ySwCDgA+LrXh/o413wD8ErE6zO9\nY3UFLgVWAId6feoPHBnnOP+K+02Jd992AV/vxLWdBGjUsVvaACd7x+nq/bZeB+6PaLsBGBGjf4L7\nn1wY8d7TwE+856n8N6L73PK7S+K72Qyc5z3vBZyVb/mR7c00//zwmqo+p6rNqrpLVf+sqstVdb+q\nfgjUAhck+PyTqrpSVfcBC4GBnWh7GbBKVZ/x9v0nTgDH4xrgHlX9QlU/xmnzLajqb1X1U++ansD9\noQfHO5iqvqSq73jt3wJ+3cE1NwN3q+oeVX0J+ANwdcT+p1U1rKrNuEHsZuB2r79fAjMBf7ZwDfCI\nqq5W1Z3A9ATnvRH4/1T1Ra+v61X173HaXg68p6oLvO/yDWAx8N89bXUMcJeqfqWqfwV+leC8TwHn\niEgf7/W1wFOqute7vkOAUwG869gY6yDe7+xDdbwEvAgM68S1JURV3/OOs1dVN+F+T4m+T/9zivvu\nvwsgIj2BUd57dOK/EY+43423fx/QX0QOVtUtqvqXTpyjqDDhnx/WR74QkVNF5P95ZogvgX8HDk/w\n+cg/+lc4rSbVtv8S2Q/vT7ghwXGOiur3x5E7ReR6EXnLm55vxQmmuNfgTefrRWSziGzDaWmJrrlR\nVb+KOv+/RLyO7NuROO0/sj/P4zRSiLr26GuJ4hjggwT7IzkOON8/p3feb+Pu3deAimTPq6rbcAPc\nt0VEcAPXQm/fUmAe8BDwmYjME5GDYx3HM2cs90wdW4GRtN7nVK4tISJypIj81jPhfAk8TuLvM5In\ngKu8AfIqYLmqbvCOm+p/Ix6JvhuAK4DRwD+83+WQTpyjqDDhnx+iU6nWAH/DmTEOAf43bjqcTT4F\nfK0ST8AcnaD9Rpyw8GkJJxWRE3CC6BagSlV7Au/Seg2xUsf+GqfdHqOqhwIPk/iaq0SkR9T5/xnx\nOvIcn+FMa6eoak9vO9Q7D7hrj3ktMVgPnBhnX/R1rQdejDhnT3XRV7d5fWpO4bzgTETfBYbi/qvL\nWk6sOkdVzwLOwJl9fhz9Ye9+PYmb9XzN+16W0nqfU7m2nd4xD4h478iI57OAPcAA7zd8PUn+hr1Z\n0Eacxn8tbjDwSeW/sRNnUovVv0TfDd7sYjROQXgeb+ZRypjwLwwOBrYBO0XkNGBCDs75PHCWiPyr\niFQCPwR6J2j/W+BOEekpbh3BbRH7DsIJi824ceRmPJOEx2dAnyhH3cHAFlXdLSLn0mqSiUcAmC4i\nXcXFi38TJ9jaoS7S52FgjudkFRHpIyIjI67lBk+rPBC4O8F5HwFuEpELPUdrHxE5JeK6Toho+yxw\nuohcKyJdvK1aRE7xTGuLgZ+KSA8ROQP4XgfX/BzQDyfwfu3NzvCOWe19bztxA11zjM93w9ngNwNN\nInIZzi7fmWvb6G3jxK0nCeG0aZ+Dvb5sE5FjgJ90cG3RPAH8COe3ifxeU/lvrMLNIHqIyMk4v4lP\n3O/Ga3+tiBzifU/biX0/SwoT/oXB/8RFnGzHaTq/yfYJVfUz3LT3P3COrxOBN3HaWyzuxmnM64Df\nAwsijvVXnIN1hdfmFGB5xGf/CLyPM1H4ZqhbgJniIp7uxAnkRGzACZdPgfm4ENr3E7T/nzizygqc\n8FiKE6So6nPAg8ArOAfiH+MdRFUbcP6D/+Md52Vatfc5wHc9M8J/eKaaUcA4r58bcVp3t4hrPgwn\nWB8BHkt0waq6GzdgXExbbbin9/mtuO/jU9z3GP35rTiB+jSwBWfffr6T16Ze2ztxvqGTaPsd3w1U\ne8d5FjerS4UncA7pP6rqFxHvp/LfuB+nhGwCHsU57v1r7ei7uQ742DMt3ei1K2nEUyaMMkdEKnBm\nlP+uqq/muz+RiMjFwMOq2jfffTGMUsE0/zJGRC7xzDjdgLtwEQ8r8twtwzBygAn/8mYo8CHOJjwK\nuEJV45l9DMMoIczsYxiGUYaY5m8YhlGGFGxit8MPP1z79u2b724YhmEUFW+88cbnqpoobBsoYOHf\nt29fVq5cme9uGIZhFBUikmjFegtm9jEMwyhDTPgbhmGUISb8DcMwypCCtfkbhlGa7Nu3jw0bNrB7\n9+58d6Wo6d69O3369KFLl5i1bTrEhL9hGDllw4YNHHzwwfTt2xeXTNZIFVWlsbGRDRs2cPzxcQvC\nJcTMPoZh5JTdu3dTVVVlgj8NRISqqqq0Zk8m/EuUcBhmznSPhlFomOBPn3TvoZl9SpBwGC66CPbu\nha5d4cUXIRjMd68MwygkTPMvQerrneBvanKP9fX57pFhFB6LFy9GRHj33XcTtnv88cf55z//mbBN\nIurr67nssss6/flsYcK/BBkxwmn8FRXuccQI9360KchMQ0Y5s2jRIoYOHcqiRYsStktX+BcqJvxL\nkGDQmXp+9rNWk49vCrrrLvdYW9v2tQ0ARiGTaUVlx44dvPbaazzyyCP8+tet5XpnzZrFgAEDOPPM\nM5k6dSpPPvkkK1euZOzYsQwcOJBdu3bRt29fPv/8cwBWrlzJCE+7WrFiBcFgkEGDBnHeeefx97//\nPTOdzRJm8y9RgsG2dv5oU9BTT7V9vWCBa1NVBY2NbrZgfgKjEMiGD+uZZ57hkksu4eSTT6aqqoo3\n3niDTZs28cwzz7B8+XIOOOAAtmzZQq9evXjggQe4//77GTx4cMJjnnrqqbz66qtUVlbywgsvcOed\nd/LUU6lWs8wdJvzLhKoqCASguRlEYOBAePVV94eqrIRHH4X9+93+QAC6dTNHsVEYxPJhpfu7XLRo\nET/84Q8B+M53vsOiRYtQVb7//e9zwAEHANCrV6+Ujrlt2zauu+463n//fUSEffv2pdfJLGPCv0gJ\nh92fIBkNPRyG2293wl3V/Yl++UuYM8dp+StWwDPPuH3gBoBM/ckMI118H5av+fs+rM6yZcsWXnrp\nJd5++21EhKamJkSEq6++OqnPV1ZW0tzcDNAmzv6uu+7iwgsv5Omnn2bdunUt5qBCxWz+RUi0/b4j\nO6ivOfnCXdW99s07v/996z5wmn+iP5k5io1cEsuHlQ5PPvkk3/ve9/j4449Zt24d69ev5/jjj+fQ\nQw/lscce46uvvgLcIAFw8MEHs3379pbP9+3blzfeeAOgjVln27ZtHH300YBzEhc6JvyLkFRDOX3N\nyV8TEinc6+vdjADc/jFj4J574v/JUh14DCMTBIMwbVpmZqKLFi3iiiuuaPPeVVddxaeffsro0aMZ\nPHgwAwcO5P777wfg+uuvZ+LEiS0O37vvvpsf/vCHDB48mIqKipZjTJ48mWnTpjFo0CD2+3+qQkZV\nC3I7++yz1YhNQ4Nqjx6qFRXusaGh48/U1Kh26aIqolpZ6V77x+rWzb3frVvHx5oxw50X3OOMGfH7\nOGNGcn0zyovVq1fnuwslQ6x7CazUJGRsRmz+IvIocBmwSVXPiLFfgF8A3wK+Aq5X1b9k4tzliD8N\nTtbmD87E09zszDuq7rVPpDmoIxLZX30/RFWV8zHYCmPDKFwy5fB9HHgAWBBn/zeBft42BHjIezQ6\nSXQoZ0eMGOEWfTU3u0dfaNfXO/ORqjP/TJ/utnjHjjfwRIbjBQLumOY4NozCJSPCX1WXiUjfBE0u\nBxZ4U5I/iUhPETlKVT/NxPmN5PBt/pH5oHxNfs8eJ6xfeMGFgCbS1mMNPJF+CD9cNHqFsU8qkUqG\nYWSHXDl8jwbWR7ze4L3XBhEJichKEVm5efPmHHWtPPAdu76G7zuJfU3+4otb1wF0Jh+QP7MAd45A\nAG6+uf0gYg5jwygMCiraR1VrVXWwqg7u3bt3vrtTUsTL9wNOOE+f7hZ2xdPWOyIYhBtuaJ1VNDfD\nsce2F/zTp7tZhiWdM4z8kqtFXp8Ax0S87uO9Z2SRaPNKIidxZ5zI0YwfD/Pnx3cGX3RRq3mpo7UE\nhmFkl1xp/s8C48VxLrDN7P3ZJZZ5paNY6XRjqRMtxvF9An56icGDY5uEbPGYkQsqKioYOHAgZ5xx\nBldffXXLwq7OEJmy+dlnn+Xee++N23br1q3MnTs35XNMnz69Zd1BpshUqOciYARwuIhsAO4GugCo\n6jzgd7gwz7W4UM/vZ+K8pUyk1g6pa+TZyIeSDPGikHyfgB9Z9NZbbfdbARojl/To0YNVq1YBMHbs\nWObNm8ePf/zjlv1+LHwgkJp+PHr0aEaPHh13vy/8J02a1LmOZ5CMaP6q+l1VPUpVu6hqH1V9RFXn\neYIfb+3Brap6oqoOUNWVmThvqRKptV94oROcd90FB5/Xn+ZAAI44okP1OJGNPx9E+wQinc6QeNWy\nzQiMbP4Ihg0bxtq1a1m3bh2nnHIK48eP54wzzmD9+vUsXbqUYDDIWWedxdVXX82OHTsA+MMf/sCp\np57KWWedxX/913+1HOvxxx/ntttuA+Czzz7jiiuu4Mwzz+TMM8+koaGBqVOn8sEHHzBw4EDuuOMO\nAO677z7OOeccvv71r3P33Xe3HOvnP/85J598MkOHDs1OeuhkVoLlYyvXFb4NDaojR6oGAm45lojb\n3uI0bQaNWKeletxxrUt14xwr36tsI/sQvTK5pib+Pr/PnVnNbBQ2Ka/wzcKP4MADD1RV1X379uno\n0aN17ty5+tFHH6mIaDgcVlXVzZs367Bhw3THjh2qqnrvvffqT3/6U921a5f26dNH33vvPW1ubtar\nr75aL730UlVVfeyxx/TWW29VVdVrrrlG//M//1NVVffv369bt27Vjz76SE8//fSWfixZskRvvvlm\nbW5u1qamJr300kv1lVde0ZUrV+oZZ5yhO3fu1G3btumJJ56o9913X7vryPsKXyMzxHKKdukC1U1h\nTt+/BoA2JZs//hgmTHDPQ6F2x/NNML7SlOu4+limHN+pHGsVcCyHc77MV0YBkYUfwa5duxg4cCDg\nNP8bb7yRf/7znxx33HGce+65APzpT39i9erVnH/++QDs3buXYDDIu+++y/HHH0+/fv0AGDduHLW1\nte3O8dJLL7FggVv3WlFRwaGHHsoXX3zRps3SpUtZunQpgwYNAlyRmffff5/t27dzxRVXtKSXTmRK\n6iwm/AuISKdoIOBi7++/Kkz/20Ykts9Nm+YeYwwA+bSlx/rP+g7lmTPj74sk0+l8jSIkCz+CSJt/\nJAceeGDLc1XlG9/4Rrsyj7E+11lUlWnTpjHBV+I85syZk7FzxKOg4vzLnUg7fbduLiZ+wJsLqNi3\nFyFK649kyxY3Axg0qMUm6mv7Cxbkr5h7Ir9Dsj6JTKfzNYqQPP0Izj33XF5//XXWrl0LwM6dO3nv\nvfc49dRTWbduHR988AFA3BrAF110EQ899BAATU1NbNu2rV166FGjRvHoo4+2+BI++eQTNm3axPDh\nw1m8eDG7du1i+/btPPfccxm/PtP880zCWPwHx8HChW0/EAiw48Svs2/zVg7evYnK3REhaqtWwdCh\nfPCTh7jol6GWKl3+yttca86J8gDV17cWk+nIHJVqHiOjBMnDj6B37948/vjjfPe732XPnj0A3HPP\nPZx88snU1tZy6aWXcsABBzBs2LA2At3nF7/4BaFQiEceeYSKigoeeughgsEg559/PmeccQbf/OY3\nue+++1izZg1B79oOOugg6urqOOuss/j2t7/NmWeeyRFHHME555yT+QtMxjGQj60cHL4J/VjV1a2O\nXX+rrta/1jS0fOaWypq2DmBvawLdyBE6g8laUaE6cWL+Hb8+5sA1LKVz5kjH4WtmnzwSN7xx1ChX\nWzESEZgzh+cbgy2fqdUQbw8c2+64AhzBJqYym/k6jvHjM1cII11SLURjGEZ2MOGfR2LavWtrYenS\n9o2vvRaCwXaf2Tm3DmpqoH//lqaR/oFrmxcSHHUIjBuX1WuJFYYd671CW39gGOWKaDIVPPLA4MGD\ndeXK0l8LFg47p+zGjXDkkXD/H07nwHWr2zaqrobly9t8JuaK39pamDgxflWWfv1c8p0MTwFiRRRB\n/CgjS+lc3qxZs4ZTTz0VkbghDEYSqCrvvvsup512Wpv3ReQNVR3c0edN8y8AHnsMFi+G/fNqaV63\njjaiu1evNoIfEuTgCYXg9ddh4MCWpbT+sRTg/fdh6FA3SGSQWKacROYdv/9gK3fLke7du9PY2Eih\nKp7FgKrS2NhI9+7dO30Mi/bJM76QnMEUpjK7fYOZM1M7YDAIb74J4TA7xt/CgWvfQokIE21udmGh\nH3wAs2al13mP6DDsqirXhURRRonWH9jMoLTp06cPGzZswGp2pEf37t3p06dP5w+QjFc4H1s5RPuo\nuhQHIanRJqQlcqcZVHv1Spi6IRlmzFD9HSPbp4Xwt4MOUp08OSPX4adxqKlpjebp1s1FGsWK6Jkx\nozWFRSDQWgjeooEMIz2waJ/8kkweqtpa+NWkMA/qLQiKQKuWPnNmzBW7qTBiBFzVYwkTpYYmArSb\nZO/YAbNnw5QpaZ0HWk05jY2t5p79+9sXdPGpqnKTEHCPVVXuuUUDGUZuMOGfBToqVRgOwy23wKRJ\n8KOm2VTQ3GKWEYDhw9MW/NC6yKrvz0OsqXkNGT48dsMHH4QhQzLiC0g2mqex0aWwAPfY2Bj781VV\n5hcwjKyQzPQgH1sxm31mzHBmC3CPvklD1ZlFKivdvnNp0L1UtDX3BALZtXWMHdve/BO5HX102udP\nJptoIvNOLBOSmYAMIzkws09uiTTzxNN+w2G47TZnDjmXMHczHfG0fgUkEICHHsqul7POWxfQty9E\nJLFq4ZNP4Lzz0loXkExFsETpWmKZkMwEZBiZxaJ9MkBHqYt9oVVf3yr4X+ZCurIXQVEEqaxw5pcM\nmHs6JBRyWzgMF1wA+/a1b7NwIWzeDEuWZK0bHaVrsYyehpE9TPPPAPHSjY8Y4XLW+7b/rVudbeVB\nJtGNPQQ8J69UnwPLlhEeEMqtfTsYhFdegaOPjr1/6VLo0SPrq4PjYRk9DSN7mOafAeJpqNGDwqpV\nMJ9xDCIqH/hZZxEmmJ+8+8EgbNjgBPz//b+uA5Hs3u1mAStWZGV1cDLdM6FvGJnHNP8MEE9DHTHC\npVQWcY+160cxDpei2bfzAzB+fP5DHOvqXAmxkSNj73//fecLyEBYaDRWo9cwco9p/p0g1grUeBqq\nv4J98Z5RHLvGJWxrE88/dqxL2EaB2LeXLHECfnaM1cbg3v/Tn+DeezOiksdb6WurfA0ju5jwT5FU\nyiLW1ztNfoiGGUVbwa/A5pFjOaKuDohf+CQvzJoFY8bAddc5jT+aZcvg/PPhjjvSThERb8aTr9KT\nhlEumNknRVIxz/i+gOvEFXGONPUsYSSPjKhr0z6ZEMmcEQzCe+/B5Mmx96u6WcAxx6Rlr4kVFpt3\nE5hhlAEm/FMklXz0wSCsGzaOkM4DWgX/HxjJVT2WFEfo4qxZresCYrFhQ1rrAmL5Syznv2FkH8vn\n3wmStkcPGdKuItenYybyePVDLZ+NdayCtXd3VC9g4ECYOzdjvoCCvAeGUeAkm8/fhH+2GDWqfUUu\nEZdv35NmqRZBKQjCYbjmGqfxx2PsWBc9lIOuLHAWNcaPL7D7ZBh5woq55JMpUxKWYvRJtQhKQRAM\nwvr1TsB37Rq7zcKFMGhQVmM3/cXJ8+a57cILLVTUMFLBhH+mCYfhvvvav19d3U4bjmXbLhp7t78u\nYGz7AvKAW9GWYYkcuR5gwYK2WSkKcqA0jALGQj0zzYIF7W3iI0fGzJETL7yzYEI+k6GuzqWHiLUu\nYM8eZ4+54460cxZFm8hGjWq7PxAo4IHSMAoQE/7ZZvjwhMnRYi0OK7qUBrNmwYknwvTp8Omnbfet\nXevKRv7+9y5stJMXFm0OO/LI1kVxFRUZ8zMbRtlgDt9M4YenVFXBD37gbBJdurRmeSsXfJvM88+3\ndwp36eJ8ATfemPJMIJ5zvGhmSIaRIyzaJ5fU1rpE/U1N0K0bzJnjktGXs1SqrXUafzyqq2H58pQO\naeGfhtExOY32EZFLROTvIrJWRKbG2H+9iGwWkVXedlMmzlsQ+LHv+/a5YrR79kBjI+ER05hZHyzf\nCJRQyC0Oq652Gn80K1bAYYel5BAuqBXQhlHkpC38RaQCeBD4JtAf+K6I9I/R9DeqOtDbHk73vAWB\nr91GzZ7erhqRsIZv2RAKOe3+lVdcrqBotm51q4OvuKKMb5Jh5IdMaP7VwFpV/VBV9wK/Bi7PwHEL\nmrdrwzRPmECk2Fdgc9UpzH0zWNix+rkmGISnn3azgFgsXgxDh2akgLxhGMmRCeF/NLA+4vUG771o\nrhKRv4rIkyJyTAbOmzfCYdgyYaqrwuW95w8C/+vz23n0UZe/v+Bj9XPN8uVw3HGx9zU3u1nUySfb\nLMAwckCuFnk9B/RV1a8DfwTmx2okIiERWSkiKzdv3pyjrqXOq7PDnBlRjcsX/LOZTK2GaGqC73/f\nyg/GZN26+AVjwKWQHjo0KwOAFY0xjAhUNa0NCAJLIl5PA6YlaF8BbOvouGeffbYWJA0NuqeihzYh\n2gwt22cjx2qPHqoVFao9eqg2NOS7owVOQ4PqmDGqzmPSfquuzuhNbGhQ+36MsgBYqUnI7kxo/n8G\n+onI8SLSFfgO8GxkAxE5KuLlaGBNBs6bH2bPpkvTLgIozQiN9OKTsZM5YkmdFRtPBd8PUFMTe/+K\nFTBsWErO4ESafcHnTDKMXJPMCNHRBnwLeA/4APg3771/B0Z7z2cC7wBvAS8Dp3Z0zELR/BsaVGfM\n8DTFkSNVI7T9PXTRYZUNpkWmS0ODar9+8WcBIqqTJ3d4iESavWn+RrlAkpp/RoR/NrZcC/82Qj7i\nPV9ghKnW5giB1AwaplorKtznjAzQ0KA6caK74bEGgeHD40rtGTNaPxbvO4n1HRtGqZGs8LfcPsSv\ny+ubCp5tGsUQWouy+A7ex+RGi+bJJH5So0GDYheN8WsHX355uzxBfjZU/zv0v5PIVcGGYbRiwp/Y\n9mC/nOBMpvDNiOLr/uOW6pH0HRPixRFm3884ft6fWAOAqlsXsHhxm2ypsTKkRg7qFRXuo/v3l2fK\nJcOIxoQ/8bXGIGGGNLnc/BL5gepqei1fwrQc97OsCIVgwACYOtVp/LFYuhSOOAKeeQaCwXbZUCMH\n9aam1vf37nW550z4G+WMFXMhdhFxAKZOJYC2FfwjR6ackMzoJMGgSw1RU+MS9sdi82Y47zw+mFLb\nLtInsjBORUVOemwYRUPZC38/PBCcsKiv9wTIuHGwbFmLfV+hw9z8RpYIheC112LnB8J9N8fPnkDg\nziltcilFDupz57qEqyLucfz43HXfMAqRsk7pHG0TFnE24ZDU8uB+l45YcMKlGWF1zesMCJmtIK/E\nSRXt/4r3UsnfB36br7/ZvoC8pYQ2yoFkUzqXlc0/+s8faRNubnZtVOFyngJaBT/A/dxBc2OQHSZA\n8ovvC7jmmpZiMUrrd9WV/QxYtRBGbW43Syu6CmmGkUXKxuzja/mRaZYjbcJdusD5gTAvcwHneGGd\nvuD/FWP5aY9ZVFW1P4aRB4JBWL/ehXv27Nki+CViY+lSOPRQZ76LgeX5McqdshH+8cI5fZvwb24P\n83LTeVzAMg5jKwAf0ZdbK2oIT3SpGxobLUVAQTFrFnzxBYwc2dYp7/Pll7BwIfR35SV8gV9b2zqI\nX3gh3HKLDQJG+VE2Zp+44ZyeKWD7gZdTQdsUzT17VfK950NtTAWxjmHkmSVLYMoUeOAB+Oqr9vvX\nrGHvoYfzq69mUKshRJyZr7nZDeQ1NTB/vuVkMsqLsnL4xnX4jRqFLm1dyOXfEZk82WmXyRzDKAzC\nYZg0CVatavO2/52GqWZYYDnp9iXJAAAdxUlEQVQVFc657//8Kyrg5pvh2GPtuzWKGyvgnixR0SMt\ngr8TBcaNAqJ/f1jTNnms/90ulZF8PG8Jb74Jjz7qtP/IaK/IFB+GUWzktIB70RIOO4Ovhy8cvjru\nNBP8xc7q1e2KxvgmvZG6lNBPDuGh7eOor3c+nxtucILf/DlGuVCWwt93/H06e0FrjKfHe/Sj96bV\n5gAsBZYscQb9Qw5peaslGmj7dli4kOBlVUyrqmX8+NbIL/PnGOVA2Ql/P+QzcOcUeix+os0K3iYC\nXM980/xKiVAItm2LXzpyyxaYMIHg1AtYPidsxXiMsqHshH99Pdy9awqTmc2hfNm6I1DB/6h8iD9X\nBE3zK0WWLIGGBjjzzNj7ly1jwK3DmTYiHFPw27oAo9Qom1BPnxEj4AQeByIie3r1Qp5/nu8R5Jh6\ni/YoWYJBFwU0apRbBBbN/v1w4418esoF/PHI8fQbH2yXGtqcwUapUHaa/zEPTuEINgGtDl5uuqkl\nJfC0afbHLnl8X4C0XRqmgK5Zw5GL5zFu3nm8NmxKS2ivLe4zSo3yEv61tfzLwrb5+bf2PK5dLL9R\nBoRC8PrrLlNoINAa4hux/aRpNgdOGtcmDYiZBI1SoSyEfzgMv7uiFr3lFsTLz+//2beE7sxn14x8\nEgzC00/Da6/xZvVEp/l7u3zlYMCqhQRvOp23f1BrzmCjpCh54R8Ow4dDx3HJ4gnQ3NwmRfO9TObF\nE0P57qKRb4JB9sx5iEWBsQAtg0BLWOjq1Zw4ewLT6keZ4DdKhpIX/jp1Ctc2L2z5IzvBH2Ai87iT\nWTz1VJ47aBQEwSCc8Fod4eGTaepxUOxEcUuXwvHHu1XhhlHklLzwP+cv7o/qC34FJvIQD+M0/quu\nylvXjAIjGITzXplF5VfbnUO4d+/2jdatc+lAjjrKBgGjqClt4V9bS5cdW9u8ta/XkXw+JkR1tft/\nh8zqY8QiFHKF4ePVDt640Q0CceoFGEahU7rCv7a2JW+PP4UXoNvMn/L00y51jwl+IyHBoKsdPHw4\n9OwZu83ChXDBBbb6yyg6SlP4jxvntLLIvD0irvKTSXwjFYJBeOUVVzRm7NjYbZYtg/POczUFDKNI\nKD3hP2WK08Yi0ECA310+j/AYi+c30qCuDqqr4++fPRsOOshMQUZRUHrC/4kn2rxU4NbAQ4x+LmR1\nd430Wb7czQC6dYu9f+dOp3yMGpXbfhlGipSe8D/hhDYvNx45kFoN2dJ8IyEpJW6rq4Pdu+ObgcCF\nhZqmYRQwpSf8773XrcMHqKjg85/OtaX5RkL8xG133UVqs8O6OudH6t0bunRpv/9b33IVxSwk1EiS\nXGaPLb2snsEgvPpqS6HdAcEgLw6wurtGfGIlbkv6dzJrltvCYef0jWTrVrdNmOCcwnV1Ge65UUrk\nOnts6Ql/cHcs4q5FvTSMNviJ2/w/Xadmh8GgWzgyaZIbRaLxgxBsADDikJYS0glKz+xjGCkSDDot\nK1bituhpeG2t8+X6lpw2+0MhN+scMyb2iRYuNDOQEZecZ49V1bQ34BLg78BaYGqM/d2A33j7lwN9\nOzrm2WefrYaRT2pqVCsrVQMB1R49VCdPVoXWbfJk935FhXtsaIj68JFHtv1A5DZ2bN6uyygsGhpU\nZ8xwj5HPOwuwUpOR28k0SngAqAA+AE4AugJvAf2j2kwC5nnPvwP8pqPjmvA3ckn0n66hQbVLl1ZZ\nHQionnRSW/l90klO8IN7nDGj/XE/GzlWm0GbYw0Aw4erTpyY3j/dKGoaGhIoEJ0kWeGfCbNPNbBW\nVT9U1b3Ar4HLo9pcDsz3nj8JXCQiMRMnGkauiRXtU1/f1nQfCMCVV7b93JVXJp6mh8PQ99U6JkoN\nq6V/a+U4n2XLYN48GDbMTEFlSrSdf8GC4or2ORpYH/F6AzAkXhtV3S8i24Aq4PPIRiISApdu89hj\nj81A1wyjY2I52kaMcOu49uxxwv2BB5xJ/8QT4amnXDbYUMiZ9+NFkvnHrdUQj1SE+MuAcXx91cLo\n07sTe3moLP1IeTFiBFRWukw0gQA89pgrJZ2LaJ+Ccviqaq2qDlbVwb1jpdM1jCwQy9HmO4Hvucel\n9vFlcijkSgD7rxPVfY4+7s65dS4iqLra/eMjaW52IaGHHmrpIcoM9aaEzc2wb1/uakVnQvh/AhwT\n8bqP917MNiJSCRwKNGbg3IaRNvGifRIJ9k4fNxRyKSKWLXPThmjr55dfuqigQw4xU1AZ4JsXfUdQ\nRUXuon1EtZ0lMrUDOGH+HnARTsj/GbhWVd+JaHMrMEBVJ4rId4ArVfWaRMcdPHiwrly5Mq2+GUbB\nU1sLt97q5vqxOO00WL06t30yckb0wq45c6CxMb0FqSLyhqoO7qhd2jZ/z4Z/G7AEF/nzqKq+IyL/\njvM6Pws8AvxKRNYCW3ARP4ZhhEIwYIBbHLZqVfv9a9bA4YfDjBnmDyhB/NlhPjIQpK35ZwvT/I2y\nY8gQWLEi/v7Jk10qCaNk8SPNcqH5F5TD1zDKmuXLnUP4gANi758926qGlTCdTjDYSUz4G0YniE77\nkLFsjKGQqwkwcmTs+sHLlrmykjYAlBThMEyf7kKLcxXtU5qJ3Qwji8Ry0t1+e4azMS5Z4k40bFj7\nRHH797sOXHmlJYorAfzf0549rfH+uYj2Mc3fMFJkwQJXy8XX0J56qv0isYzgpycfPrz9vl27XEho\nt24WElrk+IsBfcF/8cXZX+AFJvwNIyXCYXj00daFOZWVbrVv1rIx+gXk/cVh0UVj9u51i8OOOsoG\ngSIlcjFgt27O/JOLqB8T/oaRApE5f0Tg+993Zvp4KaEzhr847Jo4y2M2bnSDwJQpWTi5kU0SpRTP\nJhbqaRgpkOtqSzE56ign7ONxxBFw/fUWFlqmWKinYWSBfGlpbfj0UxcN5NeqjmbTJhcWevLJFhVk\nxMWEv2GkSLo5fzLCkiUu6qemBo47Lnab9993dYXNFGTEwIS/YRQzoRCsW+cGgXjMnm0DQIGQsfUg\nGcDi/A2jFPDz/kyYEHv/7NnOW718ec66ZLSlIPxFEZjmbxilQigEDQ2x1wWAyxvUr19hqJ1lSH19\n6wrePXvc63zOBEz4G0YWyfmf218XMHly7P1r17pVw1dcYYNAjqmqcgu5wD1u3ZrbXD7RmPA3jCyR\n60RdbZg1y/kBDjmk/b6mJli82DmDR43KYafKm8bG1nRNgYDL4J2VleFJYsLfMLJErNrAOSUUgm3b\nXFho167tq4YBLF0KgwbZLCAH+HWh/ZW8WV0ZngS2yMswskQsBx/kp3AH4NI/TJrUPlEcuIHh2mst\nUVyWic7Xn4n8/dEku8jLhL9hZJHIPze0DgaVlS41xPjxOR4EwmG47jq3BiAW1dUWEdRJsiHIO4MJ\nf8MoMGbOdPb/yNxA3bvnKeRv3Dh47jlXMD6agQNh7tw8r2IrLgopjNPSOxhGgeFnb/RN76p58gWA\nM+9s2wZjx7bft2oVnH8+nH66ZQpNgnwUYskEJvwNI0f4eYEmTMivo68NdXXO1BONKqxe7TprEUFx\n8TX+F17IbSGWTGDC3zByzLHHwi9/CTff7MzveWf58tgzAJ+lS+GYYywiKAapFGIppNQOYDZ/w8gZ\nkXbhigqnXO/f7zTFl18uABN7OOzSQCxeHL9Nv34wf34BdLYwSNbWn0ufgNn8DSPPRGt6kXH/+/a5\nTdXZihcsyGtXHcEgPP104iRxfqbQQlFf80yyKb7zvuYjBpbYzTCyQCxNz3f47t3rhH6BTrrd4rAB\nA1zVsA0bYreZOhUuuST/cY0FQDDY8S2I/O4LxSdgmr9hZIFYmp6vJd58c9s6LJWVLt6/oAgGYf16\nlyPogAPa71+2DO6800UFWURQhxREEaAoTPgbRhaILModqekFg87h6yf4EoGbbioMYRCTWbNg587Y\nEUHgpi9WOzgpCqIIUAQm/A0jCyTS9CIHhu7dC1Drj8Xy5c4XMHKk63Q0s2e7tJUlPAsotGiddLFo\nH8PIA6mkAiiUtAEtjBsHCxfG3z92bMnlCCqkFbwdkWy0jzl8DaNAiCXkC1Lo+IL9qadg9+72+/2B\noYQGgHg+nGLGzD6GkWNi5fmPl/s/Uujs3l0gIaHgBPuuXc4MFIuFC+Hgg+GCC0rCThLPh1PMmPA3\njBwTS4uMFwc+YkRrZJAqPPZYgcnSJUtcRNBBB7Xft2OHiwoqgXUBhRitky4m/A0jx8TSIhNFB91w\nQ2syuP37C2OBUBtmzYLt2xOniLjwwqJ3BhdatE66mPA3jBwTS4tMpFmOH+8CbAre5FBXF7928J49\nLiS0f/+iHwSgNCJ/0or2EZFewG+AvsA64BpV/SJGuybgbe/lP1R1dEfHtmgfw2il4CJ+EhEOw7e+\n5SqUx6OII4LiOeEL5TvKVbTPVOBFVb1XRKZ6r2Ot9tilqgPTPJdhlC3JpBAoGIJB+OILGDIEVqyI\n3WbhQnjnnaIpGhMp2OP5ZwouKqsD0jX7XA7M957PB8akeTzDKHtqa10K/aK3jixfDg0NMGYM9OzZ\nfv+qVTB0aMFfaHQkVlVVe/9MISZu64h0Nf+vqeqn3vONwNfitOsuIiuB/cC9qhozZ6yIhIAQwLHH\nHptm1wyj+KitdaZxcGn0weVZK1r8TKHhMAwb1r54fHOzu+BlywrSDBRZpau52Qn2xkan2UebeAot\ncVtHdGjzF5EXgCNj7Po3YL6q9oxo+4WqHhbjGEer6icicgLwEnCRqn6Q6Lxm8zfKkVGjWoU+uDD6\nJUvy15+MEg7DpElO449FdTVs2QJXXukiiPKMr/H7gj8QgG7dEufsLyabf4dmH1W9WFXPiLE9A3wm\nIkd5JzwK2BTnGJ94jx8C9cCgFK7FMMqGq65K/LqoCQbhzTddjqD+/dvvX7EC1q51eYKGDMl9/6JI\npUoXFF8oaLo2/2cBvxDddcAz0Q1E5DAR6eY9Pxw4H1id5nkNoyQJhVrzp9XUpG/yKciQxFDIOXsT\nrQtYsSKvpSPDYfjHP1y67YoK6NIFTjgB3n67AO9nZ1HVTm9AFfAi8D7wAtDLe38w8LD3/DxcmOdb\n3uONyRz77LPPVsMwOk9Dg2qPHqoVFe6xoSHfPYrB5MmqJ52k2qePX9+m/TZ2bE67FHnfunZVHTPG\nPQYCrjuBQAHfT1UFVmoSMjYtzV9VG1X1IlXtp848tMV7f6Wq3uQ9b1DVAap6pvf4SDrnNAwjOYoi\nAmXWLFca8re/dbaVWCxcmNMcQZH3rakJvvrKPfo1GHzHb0HezxSwFb6GUaIUVTKyYBBeew369Im9\nf9mynA0A0fftqqvcoz82BQJFcD+TwPL5G0YJUygRKCmRqF5A376udvD48Vm9IP++VVW50M7ox0K+\nn8lG+5jwNwyj8AiHXZH4cBj27Wu/v0sXeOWVrA8AxbZqFzIY6mkYhpFzgkEn3B94IPb+ffvgG9/I\nqimoKHwmaWDC3zCKlIIM48w0fuxrdXV7h/DOna31Ampr074f0Z8vKp9JJzCzj2EUIcVqkkiL2lq3\nQjg6RQTQDLzFQH4QmMufK4PccENqboFCz9SZCmb2MYwSptRNEjEJheDVV2H48Ha7BBjIKl5pPo/p\ne6dQU9O2HGZHxLufxbZqNxVM+BtGEVLqJom4+L6AqNrB4m0BYCqz+blOSWlQLMf7aWYfwyhSitEk\nkVFqa+Huu2HjxjZvK84MtJdu7Kq+gF7Lk8uMVyr300I9DcMoD2IUjfGlmgCcdhqsLp90YmbzNwyj\nPFi+3NUOjigY45uBAFizBo46quCLxuQaE/6GYXRIwYeVzprlSkc2NEC/fu33b9zoisZMiVVltjwx\ns49hlBiZtl0XZVhp//5O448mEHA5hAr+AjqPmX0MowyJrjebCU29KMNKV6+OXy9g6lTo1QsOO6ys\nZwIm/A2jhEhWUKdixinaMMi6Orc6uG9fEHFafyDgVgV/8QVs3eqqhpXpAGBmH8MoIZIx0XTGjJOP\nMMiMntM/2IMPwieftN/fp4+rKVAC5qBkzT6VueiMYRi5IRh0wjyR0Kyvby1KvmePe92RzAsGcysX\n0/UztBs4/Avwtf1oNmxwOYIaGkpiAEgGM/sYRonRUUqCqqq2VamqqnLXt2SJNF/t2QPTpyfvv0jo\n95g1K3Ht4GuugSuuKOCwpsxhwt8wyozGxrZVqRob89ufWPh+hkDADVAvvJC8A7tDv0ddndPwY1UN\n27ABFi+G888v+XUBJvwNo8yoqmr1fXbrln0HbmfWCPjmq4svbh0Ako00SspBHQzC+vUuVbTfMBJV\nty6gb9+SHQTM4WsYJUg8Z6lvEtmzx8m8Bx5wyTKzdW5I33bfmc+n7CyurXXCPh41NZm/UVnCHL6G\nUaYkEpi+SaS52UU/ZtrkE33u665rb4JJVXh35MCORcoO6lAIPvggtjMY4Lbb3PRl2rSiGQQ6wsw+\nhlFiJLJ5ZztmP/rckPz5fPNQbW1bhy2knlO/U+koZs1yvoAY9QLYtw/WrXOzg1GjUjho4WKav2GU\nGL6A97XvSIGbTChoqkRq6dHnHj/ebR2dL3LGIOJmJpF2/lyYioDWegG1tfCLX8D777cvIL90KYwb\n5xzHRYwJf8MoMToS8JmM2Y8WtHPmOFMPtC2j2NH5ImcMgYCbKYh0bnYSa+YT6/wJ/QKhkNumTIlt\nClq40DmM+/dPrV5kAWHC3zBKkFQEfDoraaPj8W+91QXK+Fp/skTPGObMcf6IzvQp0czHJ+nZwaxZ\nbkXwwoXt9y1b5raaGnj99aIbAEz4G0YZk240TVVVq6ANBNwgEM9ck2iQyaQ5KtlVzkk7ouvq3Kg2\naRKsWtV+vypccAH86EdusCgSTPgbRhmTkhD0iGXqaWx0A8Htt8fWuDsaZDKdO6ijmY+/1sGfpXRo\nWgoG4c03nS9g0iR3wyLZt8+Zhx5+2HmaiyAiyIS/YZQwHQnVZEwk0UQPGI2NLhoHYMCA2OdLNMjk\nul5AOOwGKd+/MGdOCucLhdxFXnedcwZHs2VL63qBAh8ATPgbRomSjFCNNpGAU1wTaeAdRROlOsjE\nC03NVhbRtNc6BIPw3nsu4uepp2D37vZtJkyAn/2soDOFmvA3jBIlWZOOL7DDYbjwwlYB/fLL8dun\nap9P9Bl/YNizxwnjrVuzu6q3M7OdmNTVuW3UKBf+GU2hZwpV1YLczj77bDUMo/M0NKj26KFaUeEe\nGxoSt584UdVZwd02cWLnzjljRsfniqamRrVLF9VAoPURXN9nzEjuvKlca2f7GZeaGtX+/dveQH/r\n3dvtq6nJ0MkSA6zUJGSsaf6GUaJkY0FXItKx3Tc2ti7sAmeLTyXOP1XHdcbrE/jrAoYMgRUr2u7b\nvNltEya4FBIFEhGUVnoHEblaRN4RkWYRiZtISEQuEZG/i8haEZmazjkNw0iejnL7RzJ+vBO2vtBN\nJU4f0qv1G5l2ols3V3DrZz9LfgDpKG1Fp9I9dIbly+NnCgUXEXTBBQVRLyBdzf9vwJVATbwGIlIB\nPAh8A9gA/FlEnlXV1Wme2zCMDBIMOoHdmZlCOAz/+AdUehKlstK9DoeTC+lMdpbSmc/nOpqI5cvd\nY7xMocuWOV/AwIEwd27+/AHJ2IY62oB6YHCcfUFgScTracC0jo5pNn/DKA4i7e1du6qOGeMefft7\nTY2zr9fUxLbLJ2t/T9Wu7zNjhvtMKj6EjDF2bGw/QOQ2cmRGT0kB2fyPBtZHvN4ADInVUERCQAjg\n2GOPzX7PDMNIm0hzD8BXX7nnfsqH225rDauMTtgGyWvlnVmQBhmM7ukMdXUuS+jMmS4raCyWLnUR\nQ0uW5LBjSdj8ReQFEflbjO3yTHdGVWtVdbCqDu7du3emD28YRhaIrAzWtStcdVWr/T0QgP37WweD\nioq2dvlYAj2efb6z6ah9k1AqPoSMEgrBRx+5HEBHHhm7zdKlOa8d3KHmr6oXp3mOT4BjIl738d4z\nDKPIibVa1l8Eu2ABrF7tTNzgbBw/+hH07NnWLh+plVdVxZ8JpBO9lPHons7gRwTFWxeweDE88wzc\ncUdOIoJyUczlz0A/ETleRLoC3wGezcF5DcPIMpGrZVVd+puZM+Htt2H+fHj11da2gYAT/NHRR9dd\nBzff7AR7Y2PrTGD3bjeARJJK9FLBsmSJmwX06tV+n6qLCDr55OzPApJxDMTbgCtwNvw9wGd4jl3g\nX4DfRbT7FvAe8AHwb8kc2xy+hlF4RDtno5293bq555ELtUBVpL2TNpYDt6HBHcf/XLduGVyIVYjU\n1LibE8sRXFHRqYsnSYdvWpq/qj6tqn1UtZuqfk1VR3nv/1NVvxXR7neqerKqnqiqP0/nnIZhZI5U\n4t/9kEm/vKIfxunb02+4oa193y/K0rWri3iMtrfHc+DecINzDoM7XirrBYqOUAjmzWu94EiamrJ6\n8bbC1zDKlFTj3+MJ68jcQPPnJ1+QJV4UzvjxbY+T0+icfBAvU2hFRVYv3oS/YZQpqYZOdhQymapD\nNl77XKelKAj8TKFTpsATT8AJJ8C992b14sWZiAqPwYMH68qVK/PdDcMoWTqz8jXTRVeMzCMib6hq\n3HQ7Pqb5G0aZ0tnUzJkW+jag5AcT/oZRxvjC1vcr5lr4JltDwMg8JvwNo4zJRtKzVDT5BQtcCghw\njwsWmPDPFSb8DaOM6Wy+nHjkPIOm0WlyscLXMIwCpbP5cuKRak7/dGsIGJ3HNH/DKGMyHVYZHQ5a\nVZW4IHw6NQSM9LBQT8MwMopv86+qcknfzASUW5IN9TSzj2EYGcVPvhaZpC3Vso5G9jHhbxhG2sTK\nEZRpf4KRWczmbxhGWsSL8CnLNA1FhAl/wzDSIlG4aEEUUTFiYmYfwzDSwsw7xYlp/oZhpIWZd4oT\nE/6GYaSNmXeKDzP7GIZhlCEm/A3DMMoQE/6GYRhliAl/wzCMMsSEv2EYRhliwt8wDKMMKdisniKy\nGfi4kx8/HPg8g93JB8V+DcXefyj+ayj2/kPxX0M++n+cqvbuqFHBCv90EJGVyaQ0LWSK/RqKvf9Q\n/NdQ7P2H4r+GQu6/mX0MwzDKEBP+hmEYZUipCv/afHcgAxT7NRR7/6H4r6HY+w/Ffw0F2/+StPkb\nhmEYiSlVzd8wDMNIgAl/wzCMMqTkhL+IXCIifxeRtSIyNd/9SRUReVRENonI3/Ldl84gIseIyMsi\nslpE3hGRH+a7T6kiIt1FZIWIvOVdw0/z3afOICIVIvKmiDyf7750BhFZJyJvi8gqEVmZ7/6kioj0\nFJEnReRdEVkjIgWV9LqkbP4iUgG8B3wD2AD8Gfiuqq7Oa8dSQESGAzuABap6Rr77kyoichRwlKr+\nRUQOBt4AxhTZdyDAgaq6Q0S6AK8BP1TVP+W5aykhIj8GBgOHqOpl+e5PqojIOmCwqhblIi8RmQ+8\nqqoPi0hX4ABV3ZrvfvmUmuZfDaxV1Q9VdS/wa+DyPPcpJVR1GbAl3/3oLKr6qar+xXu+HVgDHJ3f\nXqWGOnZ4L7t4W1FpSSLSB7gUeDjffSlHRORQYDjwCICq7i0kwQ+lJ/yPBtZHvN5AkQmeUkJE+gKD\ngOX57UnqeCaTVcAm4I+qWmzXMAeYDDTnuyNpoMBSEXlDREL57kyKHA9sBh7zTG8Pi8iB+e5UJKUm\n/I0CQUQOAp4CblfVL/Pdn1RR1SZVHQj0AapFpGhMcCJyGbBJVd/Id1/SZKiqngV8E7jVM4kWC5XA\nWcBDqjoI2AkUlA+y1IT/J8AxEa/7eO8ZOcSzkz8FLFTV/8p3f9LBm6q/DFyS776kwPnAaM9m/mvg\nv4lIXX67lDqq+on3uAl4GmfWLRY2ABsiZoxP4gaDgqHUhP+fgX4icrznYPkO8Gye+1RWeM7SR4A1\nqvof+e5PZxCR3iLS03veAxdA8G5+e5U8qjpNVfuoal/cf+AlVR2X526lhIgc6AUM4JlLRgJFEwGn\nqhuB9SJyivfWRUBBBT1U5rsDmURV94vIbcASoAJ4VFXfyXO3UkJEFgEjgMNFZANwt6o+kt9epcT5\nwPeAtz2bOcCdqvq7PPYpVY4C5nvRYwHgt6palOGSRczXgKedLkEl8ISq/iG/XUqZHwALPUX0Q+D7\nee5PG0oq1NMwDMNIjlIz+xiGYRhJYMLfMAyjDDHhbxiGUYaY8DcMwyhDTPgbhmGUISb8DcMwyhAT\n/oZhGGXI/w++6U8tCYD1ygAAAABJRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Wokallj1D21L",
-        "colab_type": "text"
-      },
-      "source": [
-        "Oh dear! The graph makes it clear that our network has learned to approximate the sine function in a very limited way. From `0 <= x <= 1.1` the line mostly fits, but for the rest of our `x` values it is a rough approximation at best.\n",
-        "\n",
-        "The rigidity of this fit suggests that the model does not have enough capacity to learn the full complexity of the sine wave function, so it's only able to approximate it in an overly simplistic way. By making our model bigger, we should be able to improve its performance.\n",
-        "\n",
-        "## Change our model\n",
-        "To make our model bigger, let's add an additional layer of neurons. The following cell redefines our model in the same way as earlier, but with an additional layer of 16 neurons in the middle:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "oW0xus6AF-4o",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "model_2 = tf.keras.Sequential()\n",
-        "\n",
-        "# First layer takes a scalar input and feeds it through 16 \"neurons\". The\n",
-        "# neurons decide whether to activate based on the 'relu' activation function.\n",
-        "model_2.add(layers.Dense(16, activation='relu', input_shape=(1,)))\n",
-        "\n",
-        "# The new second layer may help the network learn more complex representations\n",
-        "model_2.add(layers.Dense(16, activation='relu'))\n",
-        "\n",
-        "# Final layer is a single neuron, since we want to output a single value\n",
-        "model_2.add(layers.Dense(1))\n",
-        "\n",
-        "# Compile the model using a standard optimizer and loss function for regression\n",
-        "model_2.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Dv2SC409Grap",
-        "colab_type": "text"
-      },
-      "source": [
-        "We'll now train the new model. To save time, we'll train for only 600 epochs:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "DPAUrdkmGq1M",
-        "colab_type": "code",
-        "outputId": "34ad91e0-229b-479c-bd65-12ad1ed1c660",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "source": [
-        "history_2 = model_2.fit(x_train, y_train, epochs=600, batch_size=16,\n",
-        "                    validation_data=(x_validate, y_validate))"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Train on 600 samples, validate on 200 samples\n",
-            "Epoch 1/600\n",
-            "600/600 [==============================] - 0s 422us/sample - loss: 0.5655 - mae: 0.6259 - val_loss: 0.4104 - val_mae: 0.5509\n",
-            "Epoch 2/600\n",
-            "600/600 [==============================] - 0s 111us/sample - loss: 0.3195 - mae: 0.4902 - val_loss: 0.3341 - val_mae: 0.4927\n",
-            "...\n",
-            "Epoch 598/600\n",
-            "600/600 [==============================] - 0s 116us/sample - loss: 0.0124 - mae: 0.0886 - val_loss: 0.0096 - val_mae: 0.0771\n",
-            "Epoch 599/600\n",
-            "600/600 [==============================] - 0s 130us/sample - loss: 0.0125 - mae: 0.0900 - val_loss: 0.0107 - val_mae: 0.0824\n",
-            "Epoch 600/600\n",
-            "600/600 [==============================] - 0s 109us/sample - loss: 0.0124 - mae: 0.0892 - val_loss: 0.0116 - val_mae: 0.0845\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Mc_CQu2_IvOP",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Evaluate our new model\n",
-        "Each training epoch, the model prints out its loss and mean absolute error for training and validation. You can read this in the output above (note that your exact numbers may differ): \n",
-        "\n",
-        "```\n",
-        "Epoch 600/600\n",
-        "600/600 [==============================] - 0s 109us/sample - loss: 0.0124 - mae: 0.0892 - val_loss: 0.0116 - val_mae: 0.0845\n",
-        "```\n",
-        "\n",
-        "You can see that we've already got a huge improvement - validation loss has dropped from 0.15 to 0.015, and validation MAE has dropped from 0.31 to 0.1.\n",
-        "\n",
-        "The following cell will print the same graphs we used to evaluate our original model, but showing our new training history:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "SYHGswAJJgrC",
-        "colab_type": "code",
-        "outputId": "efcc51f6-f1f1-490a-ffba-ed283586f83e",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 851
-        }
-      },
-      "source": [
-        "# Draw a graph of the loss, which is the distance between\n",
-        "# the predicted and actual values during training and validation.\n",
-        "loss = history_2.history['loss']\n",
-        "val_loss = history_2.history['val_loss']\n",
-        "\n",
-        "epochs = range(1, len(loss) + 1)\n",
-        "\n",
-        "plt.plot(epochs, loss, 'g.', label='Training loss')\n",
-        "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
-        "plt.title('Training and validation loss')\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('Loss')\n",
-        "plt.legend()\n",
-        "plt.show()\n",
-        "\n",
-        "# Exclude the first few epochs so the graph is easier to read\n",
-        "SKIP = 100\n",
-        "\n",
-        "plt.clf()\n",
-        "\n",
-        "plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')\n",
-        "plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')\n",
-        "plt.title('Training and validation loss')\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('Loss')\n",
-        "plt.legend()\n",
-        "plt.show()\n",
-        "\n",
-        "plt.clf()\n",
-        "\n",
-        "# Draw a graph of mean absolute error, which is another way of\n",
-        "# measuring the amount of error in the prediction.\n",
-        "mae = history_2.history['mae']\n",
-        "val_mae = history_2.history['val_mae']\n",
-        "\n",
-        "plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')\n",
-        "plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')\n",
-        "plt.title('Training and validation mean absolute error')\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('MAE')\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzt3Xl8VOX1+PHPyQ4JEAhRtmBAEQg7\nRDQiJYgiasUflVpwQayI0rpUy1epK0WtuFQRS61LRVEUF6qioNSyiGhklUU2QQwS1hDWsGQ9vz/u\nzWQIWSaQySTMeb9e88q9zzxz73nuTObM89xNVBVjjDEGICTQARhjjKk5LCkYY4zxsKRgjDHGw5KC\nMcYYD0sKxhhjPCwpGGOM8bCkYKqUiISKSLaItKzKuoEkIueISJUfuy0il4hIutf8BhHp7Uvdk1jX\nayLywMm+vpzlPi4ib1T1ck3ghAU6ABNYIpLtNVsXyAEK3PnbVHVqZZanqgVATFXXDQaq2rYqliMi\nI4AbVDXVa9kjqmLZ5vRnSSHIqarnS9n9JTpCVf9XVn0RCVPV/OqIzRhT/Wz4yJTLHR54T0TeFZFD\nwA0ikiIi34nIfhHZISITRSTcrR8mIioiie782+7zn4vIIRFJE5FWla3rPn+5iPwoIgdE5EUR+UZE\nhpcRty8x3iYim0Rkn4hM9HptqIg8LyJZIrIZGFDO9nlQRKaVKJskIs+50yNEZJ3bnp/cX/FlLStD\nRFLd6boi8pYb2xqgR4m6D4nIZne5a0RkoFveCfgH0NsdmtvjtW3Her3+drftWSLysYg09WXbVERE\nBrnx7BeRuSLS1uu5B0Rku4gcFJH1Xm29QESWu+W7ROQZX9dn/EBV7WEPVBUgHbikRNnjQC5wFc6P\niDrAecD5OD3N1sCPwB1u/TBAgUR3/m1gD5AMhAPvAW+fRN0zgEPA1e5z9wJ5wPAy2uJLjJ8ADYBE\nYG9R24E7gDVACyAOWOD8q5S6ntZANhDttezdQLI7f5VbR4CLgaNAZ/e5S4B0r2VlAKnu9LPAfKAh\ncBawtkTda4Gm7ntynRvDme5zI4D5JeJ8GxjrTvd3Y+wKRAH/BOb6sm1Kaf/jwBvudHs3jovd9+gB\nYIM73QHYAjRx67YCWrvTS4Ch7nQ94PxA/y8E88N6CsYXC1X1U1UtVNWjqrpEVRepar6qbgZeAfqU\n8/oPVXWpquYBU3G+jCpb99fAClX9xH3ueZwEUiofY3xSVQ+oajrOF3DRuq4FnlfVDFXNAsaXs57N\nwA84yQrgUmCfqi51n/9UVTerYy4wByh1Z3IJ1wKPq+o+Vd2C8+vfe73vq+oO9z15ByehJ/uwXIDr\ngddUdYWqHgPGAH1EpIVXnbK2TXmGADNUda77Ho3HSSznA/k4CaiDOwT5s7vtwEnubUQkTlUPqeoi\nH9th/MCSgvHFVu8ZEWknIjNFZKeIHATGAY3Lef1Or+kjlL9zuay6zbzjUFXF+WVdKh9j9GldOL9w\ny/MOMNSdvs6dL4rj1yKySET2ish+nF/p5W2rIk3Li0FEhovISneYZj/QzsflgtM+z/JU9SCwD2ju\nVacy71lZyy3EeY+aq+oG4M8478NudziyiVv1ZiAJ2CAii0XkCh/bYfzAkoLxRcnDMV/G+XV8jqrW\nBx7BGR7xpx04wzkAiIhw/JdYSacS4w4gwWu+okNm3wcuEZHmOD2Gd9wY6wAfAk/iDO3EAv/1MY6d\nZcUgIq2Bl4BRQJy73PVey63o8NntOENSRcurhzNMtc2HuCqz3BCc92wbgKq+raq9cIaOQnG2C6q6\nQVWH4AwR/h2YLiJRpxiLOUmWFMzJqAccAA6LSHvgtmpY52dAdxG5SkTCgLuBeD/F+D7wJxFpLiJx\nwP3lVVbVncBC4A1gg6pudJ+KBCKATKBARH4N9KtEDA+ISKw453Hc4fVcDM4XfyZOfrwVp6dQZBfQ\nomjHeineBW4Rkc4iEonz5fy1qpbZ86pEzANFJNVd9//h7AdaJCLtRaSvu76j7qMQpwE3ikhjt2dx\nwG1b4SnGYk6SJQVzMv4M3ITzD/8yzg5hv1LVXcDvgOeALOBs4Huc8yqqOsaXcMb+V+PsBP3Qh9e8\ng7Pj2DN0pKr7gXuAj3B21g7GSW6+eBSnx5IOfA5M8VruKuBFYLFbpy3gPQ7/JbAR2CUi3sNARa//\nAmcY5yP39S1x9jOcElVdg7PNX8JJWAOAge7+hUjgaZz9QDtxeiYPui+9AlgnztFtzwK/U9XcU43H\nnBxxhmaNqV1EJBRnuGKwqn4d6HiMOV1YT8HUGiIywB1OiQQexjlqZXGAwzLmtGJJwdQmFwGbcYYm\nLgMGqWpZw0fGmJNgw0fGGGM8rKdgjDHGo9ZdEK9x48aamJgY6DCMMaZWWbZs2R5VLe8wbqAWJoXE\nxESWLl0a6DCMMaZWEZGKzswHbPjIGGOMF0sKxhhjPCwpGGOM8ah1+xSMMdUrLy+PjIwMjh07FuhQ\njA+ioqJo0aIF4eFlXfqqfJYUjDHlysjIoF69eiQmJuJcnNbUVKpKVlYWGRkZtGrVquIXlMKGj4wx\n5Tp27BhxcXGWEGoBESEuLu6UenVBkxTStqbx5NdPkrY1LdChGFPrWEKoPU71vQqK4aO0rWn0m9KP\n3IJcIkIjmDNsDikJKYEOyxhjapyg6CnMT59PbkEuBVpAbkEu89PnBzokY4yPsrKy6Nq1K127dqVJ\nkyY0b97cM5+b69ttF26++WY2bNhQbp1JkyYxderUqgiZiy66iBUrVlTJsqpbUPQUUhNTiQiN8PQU\nUhNTAx2SMcZHcXFxni/YsWPHEhMTw+jRo4+ro6qoKiEhpf/OnTx5coXr+eMf/3jqwZ4GgqKnkJKQ\nwpxhc3is72M2dGRMNaiOfXibNm0iKSmJ66+/ng4dOrBjxw5GjhxJcnIyHTp0YNy4cZ66Rb/c8/Pz\niY2NZcyYMXTp0oWUlBR2794NwEMPPcSECRM89ceMGUPPnj1p27Yt3377LQCHDx/mmmuuISkpicGD\nB5OcnFxhj+Dtt9+mU6dOdOzYkQceeACA/Px8brzxRk/5xIkTAXj++edJSkqic+fO3HDDDVW+zXwR\nFD0FcBKDJQNj/K869+GtX7+eKVOmkJycDMD48eNp1KgR+fn59O3bl8GDB5OUlHTcaw4cOECfPn0Y\nP3489957L6+//jpjxow5YdmqyuLFi5kxYwbjxo3jiy++4MUXX6RJkyZMnz6dlStX0r1793Ljy8jI\n4KGHHmLp0qU0aNCASy65hM8++4z4+Hj27NnD6tWrAdi/fz8ATz/9NFu2bCEiIsJTVt2CoqdgjKk+\n1bkP7+yzz/YkBIB3332X7t270717d9atW8fatWtPeE2dOnW4/PLLAejRowfp6emlLvs3v/nNCXUW\nLlzIkCFDAOjSpQsdOnQoN75FixZx8cUX07hxY8LDw7nuuutYsGAB55xzDhs2bOCuu+5i9uzZNGjQ\nAIAOHTpwww03MHXq1JM++exUWVIwxlSpon14oRLq93140dHRnumNGzfywgsvMHfuXFatWsWAAQNK\nPV4/IiLCMx0aGkp+fn6py46MjKywzsmKi4tj1apV9O7dm0mTJnHbbbcBMHv2bG6//XaWLFlCz549\nKSgoqNL1+sKSgjGmSgVqH97BgwepV68e9evXZ8eOHcyePbvK19GrVy/ef/99AFavXl1qT8Tb+eef\nz7x588jKyiI/P59p06bRp08fMjMzUVV++9vfMm7cOJYvX05BQQEZGRlcfPHFPP300+zZs4cjR45U\neRsqEjT7FIwx1ScQ+/C6d+9OUlIS7dq146yzzqJXr15Vvo4777yTYcOGkZSU5HkUDf2UpkWLFjz2\n2GOkpqaiqlx11VVceeWVLF++nFtuuQVVRUR46qmnyM/P57rrruPQoUMUFhYyevRo6tWrV+VtqEit\nu0dzcnKy2k12jKk+69ato3379oEOo0bIz88nPz+fqKgoNm7cSP/+/dm4cSNhYTXr93Vp75mILFPV\n5DJe4lGzWmKMMTVYdnY2/fr1Iz8/H1Xl5ZdfrnEJ4VSdXq0xxhg/io2NZdmyZYEOw69sR7MxxhgP\nSwrGGGM8LCkYY4zxsKRgjDHGw5KCMaZG69u37wknok2YMIFRo0aV+7qYmBgAtm/fzuDBg0utk5qa\nSkWHuE+YMOG4k8iuuOKKKrku0dixY3n22WdPeTlVzZKCMaZGGzp0KNOmTTuubNq0aQwdOtSn1zdr\n1owPP/zwpNdfMinMmjWL2NjYk15eTWdJwRhTow0ePJiZM2d6bqiTnp7O9u3b6d27t+e8ge7du9Op\nUyc++eSTE16fnp5Ox44dATh69ChDhgyhffv2DBo0iKNHj3rqjRo1ynPZ7UcffRSAiRMnsn37dvr2\n7Uvfvn0BSExMZM+ePQA899xzdOzYkY4dO3ouu52enk779u259dZb6dChA/379z9uPaVZsWIFF1xw\nAZ07d2bQoEHs27fPs/6iS2kXXYjvq6++8txkqFu3bhw6dOikt21p7DwFY4zP/vQnqOobinXtCu73\naakaNWpEz549+fzzz7n66quZNm0a1157LSJCVFQUH330EfXr12fPnj1ccMEFDBw4sMz7FL/00kvU\nrVuXdevWsWrVquMuff3EE0/QqFEjCgoK6NevH6tWreKuu+7iueeeY968eTRu3Pi4ZS1btozJkyez\naNEiVJXzzz+fPn360LBhQzZu3Mi7777Lq6++yrXXXsv06dPLvT/CsGHDePHFF+nTpw+PPPIIf/3r\nX5kwYQLjx4/n559/JjIy0jNk9eyzzzJp0iR69epFdnY2UVFRldjaFbOegjGmxvMeQvIeOlJVHnjg\nATp37swll1zCtm3b2LVrV5nLWbBggefLuXPnznTu3Nnz3Pvvv0/37t3p1q0ba9asqfBidwsXLmTQ\noEFER0cTExPDb37zG77++msAWrVqRdeuXYHyL88Nzv0d9u/fT58+fQC46aabWLBggSfG66+/nrff\nfttz5nSvXr249957mThxIvv376/yM6r92lMQkQHAC0Ao8Jqqji/x/HDgGWCbW/QPVX3NnzEZY05e\neb/o/enqq6/mnnvuYfny5Rw5coQePXoAMHXqVDIzM1m2bBnh4eEkJiaWernsivz88888++yzLFmy\nhIYNGzJ8+PCTWk6Rostug3Pp7YqGj8oyc+ZMFixYwKeffsoTTzzB6tWrGTNmDFdeeSWzZs2iV69e\nzJ49m3bt2p10rCX5racgIqHAJOByIAkYKiJJpVR9T1W7ug9LCMaYE8TExNC3b19+//vfH7eD+cCB\nA5xxxhmEh4czb948tmzZUu5yfvWrX/HOO+8A8MMPP7Bq1SrAuex2dHQ0DRo0YNeuXXz++eee19Sr\nV6/UcfvevXvz8ccfc+TIEQ4fPsxHH31E7969K922Bg0a0LBhQ08v46233qJPnz4UFhaydetW+vbt\ny1NPPcWBAwfIzs7mp59+olOnTtx///2cd955rF+/vtLrLI8/ewo9gU2quhlARKYBVwPl98mMMaYU\nQ4cOZdCgQccdiXT99ddz1VVX0alTJ5KTkyv8xTxq1Chuvvlm2rdvT/v27T09ji5dutCtWzfatWtH\nQkLCcZfdHjlyJAMGDKBZs2bMmzfPU969e3eGDx9Oz549ARgxYgTdunUrd6ioLG+++Sa33347R44c\noXXr1kyePJmCggJuuOEGDhw4gKpy1113ERsby8MPP8y8efMICQmhQ4cOnrvIVRW/XTpbRAYDA1R1\nhDt/I3C+qt7hVWc48CSQCfwI3KOqW0tZ1khgJEDLli17VPRrwBhTdezS2bXPqVw6O9A7mj8FElW1\nM/Al8GZplVT1FVVNVtXk+Pj4ag3QGGOCiT+TwjYgwWu+BcU7lAFQ1SxVzXFnXwN6+DEeY4wxFfBn\nUlgCtBGRViISAQwBZnhXEJGmXrMDgXV+jMcYc5Jq2x0ag9mpvld+29GsqvkicgcwG+eQ1NdVdY2I\njAOWquoM4C4RGQjkA3uB4f6KxxhzcqKiosjKyiIuLq7Mk8JMzaCqZGVlndIJbXaPZmNMufLy8sjI\nyDil4/ZN9YmKiqJFixaEh4cfV273aDbGVInw8HBatWoV6DBMNQn00UfGGGNqEEsKxhhjPCwpGGOM\n8bCkYIwxxsOSgjHGGA9LCsYYYzwsKRhjjPGwpGCMMcbDkoIxxhgPSwrGGGM8LCkYY4zxsKRgjDHG\nw5KCMcYYD0sKxhhjPCwpGGOM8bCkYIwxxsOSgjHGGA9LCsYYYzwsKRhjjPGwpGCMMcbDkoIxxhgP\nSwrGGGM8LCkYY4zxsKRgjDHGI2iSwsKF8PDDkJcX6EiMMabmCpqkkJYGjz8OOTmBjsQYY2ouvyYF\nERkgIhtEZJOIjCmn3jUioiKS7K9YwsOdv9ZTMMaYsvktKYhIKDAJuBxIAoaKSFIp9eoBdwOL/BUL\nWFIwxhhf+LOn0BPYpKqbVTUXmAZcXUq9x4CngGN+jIWwMOdvfr4/12KMMbWbP5NCc2Cr13yGW+Yh\nIt2BBFWdWd6CRGSkiCwVkaWZmZknFYz1FIwxpmIB29EsIiHAc8CfK6qrqq+oarKqJsfHx5/U+iwp\nGGNMxfyZFLYBCV7zLdyyIvWAjsB8EUkHLgBm+GtnsyUFY4ypmD+TwhKgjYi0EpEIYAgwo+hJVT2g\nqo1VNVFVE4HvgIGqutQfwVhSMMaYivktKahqPnAHMBtYB7yvqmtEZJyIDPTXestiScEYYyoW5s+F\nq+osYFaJskfKqJvqz1js6CNjjKlY0JzRbD0FY4ypmCUFY4wxHpYUjDHGeFhSMMYY42FJwRhjjEfQ\nJAU7+sgYYyoWNEnBegrGGFMxSwrGGGM8LCkYY4zxsKRgjDHGw5KCMcYYj6BJCnb0kTHGVCxokoL1\nFIwxpmKWFIwxxnhYUjDGGOMRNElBBEJDLSkYY0x5giYpgNNbsKRgjDFls6RgjDHGI6iSQliYHZJq\njDHlCaqkYD0FY4wpnyUFY4wxHkGVFApDjvH9ttWkbU0LdCjGGFMjBU1SSNuaxs4jW1mxbQ39pvSz\nxGCMMaUImqQwP30+GpKLFoSRW5DL/PT5gQ7JGGNqnKBJCqmJqUhoAWg4EaERpCamBjokY4ypccIC\nHUB1SUlIoW18Nhpdl8nD5pCSkBLokIwxpsbxqacgImeLSKQ7nSoid4lIrA+vGyAiG0Rkk4iMKeX5\n20VktYisEJGFIpJU+Sb4LjY6hpb1zrGEYIwxZfB1+Gg6UCAi5wCvAAnAO+W9QERCgUnA5UASMLSU\nL/13VLWTqnYFngaeq0zwlWWHpBpjTPl8TQqFqpoPDAJeVNX/A5pW8JqewCZV3ayqucA04GrvCqp6\n0Gs2GlAf4zkplhSMMaZ8vu5TyBORocBNwFVuWXgFr2kObPWazwDOL1lJRP4I3AtEABeXtiARGQmM\nBGjZsqWPIZ8oPByys0/65cYYc9rztadwM5ACPKGqP4tIK+CtqghAVSep6tnA/cBDZdR5RVWTVTU5\nPj7+pNdl1z4yxpjy+dRTUNW1wF0AItIQqKeqT1Xwsm04+x6KtHDLyjINeMmXeE6WDR8ZY0z5fD36\naL6I1BeRRsBy4FURqWin8BKgjYi0EpEIYAgwo8Ry23jNXgls9D30yrOkYIwx5fN1n0IDVT0oIiOA\nKar6qIisKu8FqpovIncAs4FQ4HVVXSMi44ClqjoDuENELgHygH04+yz8xpKCMcaUz9ekECYiTYFr\ngQd9XbiqzgJmlSh7xGv6bl+XVRUsKRhjTPl83dE8DucX/0+qukREWuPnoR5/sKRgjDHl83VH8wfA\nB17zm4Fr/BWUv9jRR8YYUz5fdzS3EJGPRGS3+5guIi38HVxVs56CMcaUz9fho8k4Rw41cx+fumW1\niiUFY4wpn69JIV5VJ6tqvvt4Azj5s8gCxJKCMcaUz9ekkCUiN4hIqPu4AcjyZ2D+YEnBGGPK52tS\n+D3O4ag7gR3AYGC4n2Lym/BwUIWCgkBHYowxNZNPSUFVt6jqQFWNV9UzVPX/UQuPPgp3L+FnvQVj\njCndqdyO894qi6KahLkH4NphqcYYU7pTSQpSZVFUk+1HfgZg4eYlAY7EGGNqplNJCn69IU5VS9ua\nxqTv/w7AoKlDSduaFuCIjDGm5ik3KYjIIRE5WMrjEM75CrXG/PT55IceAiA3J4T56fMDG5AxxtRA\n5V7mQlXrVVcg/paamEp4xA/kAuGF9UlNTA10SMYYU+OcyvBRrZKSkMIT/Z0bu/2z/+ukJKQEOCJj\njKl5giYpAHRNaA9AmwadAxyJMcbUTEGVFKKinL/HjgU2DmOMqamCKinUqeP8PXo0sHEYY0xNFVRJ\nwXoKxhhTvqBKCkU9BUsKxhhTuqBKCkU9BRs+MsaY0gVlUrCegjHGlC6okoLtaDbGmPIFVVKIjHT+\nWk/BGGNKF1RJISTESQyWFIwxpnRBlRQAwiPzWbBpqV0l1RhjShFUSSFtaxrZ7OK7zavoN6WfJQZj\njCkhqJLC/PT5EHEIzY0mtyDXLp9tjDEl+DUpiMgAEdkgIptEZEwpz98rImtFZJWIzBGRs/wZT2pi\nKhJxBHLrEREaYZfPNsaYEvyWFEQkFJgEXA4kAUNFJKlEte+BZFXtDHwIPO2veMC5fHbXlmfTKroD\nc4bNsctnG2NMCf7sKfQENqnqZlXNBaYBV3tXUNV5qnrEnf0OaOHHeABoFteARqFnWUIwxphS+DMp\nNAe2es1nuGVluQX4vLQnRGSkiCwVkaWZmZmnFFRMDGRnn9IijDHmtFUjdjSLyA1AMvBMac+r6iuq\nmqyqyfHx8ae0LksKxhhTtnLv0XyKtgEJXvMt3LLjiMglwINAH1XN8WM8gCUFY4wpjz97CkuANiLS\nSkQigCHADO8KItINeBkYqKq7/RiLR1FSUK2OtRljTO3it6SgqvnAHcBsYB3wvqquEZFxIjLQrfYM\nEAN8ICIrRGRGGYurMln5WygogK82fefvVRljTK3jz+EjVHUWMKtE2SNe05f4c/0lpW1N499r3wMm\ncPm/hzL3j+/YUUjGGOOlRuxori7z0+dTELMFgNx9Z9gZzcYYU0JQJYXUxFTCG+4EIOxQazuj2Rhj\nSgiqpJCSkMJHIycCMOrcv9nQkTHGlBBUSQFgQOfzqFMHQg+1CnQoxhhT4wRdUhCBM86AUzwx2hhj\nTktBlxQAoupls3jTJrufgjHGlBB0SSFtaxobjy5iQ9o5pI6ZYInBGGO8BF1SmJ8+n8LIfQDkvvOe\nHZZqjDFegi4ppCamIgVRx80bY4xxBF1SACC3XqAjMMaYGinoksL89PnHXQxvysopgQvGGGNqmKBL\nCqmJqYQPut0z/9rSN2xnszHGuIIuKaQkpHBlz3ZwxR8AyD/UkKe/8eutoY0xptYIuqQA0CSmCcT9\n6Mzsac+nP35qvQVjjCFIk8KwLsMIabLWmdnViUIttENTjTGGIE0KKQkpjO5/I0Tvgm09UZT9OfsD\nHZYxxgRcUCYFgNjIWGj3Cay7BjZexjPfPMP4mVO55hq7h7MxJngFbVJITUwlpP0nUBAJU79Ad7fj\ngTGh/Oc/8MkngY7OGGMCI2iTQkpCCgNSGxUXHGyB5tYFIDo6QEEZY0yABW1SAHio/x+KZw4mQG4M\n4Fxe2xhjglFQJ4WUhBRGfHiXM3MgAfKcLsKKXzYGMCpjjAmcoE4KAL/vORRidsCa38HheAA+XvVl\ngKMyxpjACPqkkJKQQpchH8OetrC/NQArf/nJTmYzxgSloE8KAC890hXOLu4daF5du/SFMSYoWVLA\n6S1ceufH0PEdpyA3hhkbZlhvwRgTdCwpuP76m2GEDL4R6mZCbgyFFFpvwRgTdPyaFERkgIhsEJFN\nIjKmlOd/JSLLRSRfRAb7M5aKpCSkMLDdQIjIhpz6oFhvwRgTdPyWFEQkFJgEXA4kAUNFJKlEtV+A\n4cA7/oqjMu678D4IPwKrboRPXrfegjEm6Pizp9AT2KSqm1U1F5gGXO1dQVXTVXUVUOjHOHyWkpDC\n2ef95MysuBmw3oIxJrj4Myk0B7Z6zWe4ZZUmIiNFZKmILM3MzKyS4MryxqR46PC+M5Pj7FsYMWOE\nJQZjTFCoFTuaVfUVVU1W1eT4+Hi/ruuixBQuuMI9o/mn/gCs3bOWPm/0scRgjDnt+TMpbAMSvOZb\nuGU13t9GXOJMvD8dMnoCkFeYZ/sXjDGnPX8mhSVAGxFpJSIRwBBghh/XV2X6nns+vxv9jTOzbpCn\n3PYvGGNOd35LCqqaD9wBzAbWAe+r6hoRGSciAwFE5DwRyQB+C7wsImv8FU9lTXumF22Tt8H6QaBO\nWSGFjPnfCUfWGmPMacOv+xRUdZaqnquqZ6vqE27ZI6o6w51eoqotVDVaVeNUtYM/46ms+0Y1h6y2\nsHqop2zBLwu4/3/3BzAqY4zxn1qxozlQrrsOuvQ8CB+/Cdt6eHoMz3zzjA0jGWNOS5YUyhEVBfNn\n1yeybh68uhT+7exnUJTrpl9nicEYc9qxpFCB2Fj4w63ObTrJuBDmPQoL/4/0A+n0ntzbEoMx5rRi\nScEHDz4I4RHuSddfjYX/PQ0KBVpgh6kaY04rlhR8EBcH27eV2FQHWgLwyYZPeGXZKwGIyhhjqp4l\nBR81bgzbt3sVzH0MCsJQlNs/u90SgzHmtGBJoRKaNoW8PAiNyIFVw+D73wNYYjDGnDYsKVRSWBi8\n9vF6Z+aHIbC9O2CJwRhzerCkcBKGX96F7hdvhvS+8Moy+No5y9kSgzGmtrOkcJJefqp18cycJ+Fg\nM8ASgzGmdrOkcJKSk2Gr990intsGK26EQkFRbvvsNrschjGm1rGkcApatIDCQjin2w6n4OMpMK4Q\nVjnXSnr6m6fp+q+udoKbMabWsKRwikRg8ZymjH51BsT+7BSuHOZ5fuWulfR6vZcNJxljagVLClWg\nYUN4ZsRAXvxsLjRfBLs7wdsz4ccrAGw4yRhTa1hSqEJ39LqFP4+Kh0PNYdMV8M5MJzns7Aw4w0lt\nJrZh1GejbEjJGFMjiaoGOoZKSU5O1qVLlwY6jDIVFsJzz8F/VswhbWq/4if+kARnrPPMhkgIL135\nEiN7jAxAlMaYYCMiy1Q1uaJ0dm+9AAAVTklEQVR61lOoYiEhMHo0fPt2P56b82bxE5O/hr2tnF7D\n4TgKtZDbPruNPm/0sV6DMabGsKTgR/dcfBOvfbqKxFtHw9E4mLgZ/rUS3v3UU2fBlgVc+PqFlhyM\nMTWCJQU/u+XXnfn5lWfp+muvL/yMFPji7zB5vufchgXpTnJo+vemDHpv0AkJIisLLrwQNm2q3vir\n2+LFzhFdK1YEOhJjgpMlhWqy7JMU3p33PYmXfOEUfHcvbOlTfG7Dp6/A8pvZ+dWv+Xj9x1z4+oW0\neqGV51DWadMgLQ2eesp5eVYW1LLdQT756CPn78yZgY3DmGBlSaGahITAkNRu/PzlAL5YtYQ6Tbcc\nX2H5rTDjdfj0VZj5IqweQvr+dG777Dbino5jzKwnAPh+79eMnvYSjRvDv/7lvHTmTHjggWpu0Gnu\nhx+cHss33wQ2jn//Gx57zJneuhW+/z6w8VSHjz+GKVMCHUXwsqQQAJd1Oo/sjLP4Zksa5z15HQy6\nAWK8btaw5A6Y/i68vAQW/4G9R/eSnRkLwLKtq/j7jFkA3PXs1zT9e1MG/ymNJ5/Ko+fLKX4/Se7w\nYaeXUpUWL4bQUOdLr6j3I1K166isop7K9OnVt87CQucLsbCwuGzECHjkEWe6dWvo3r3i5ezcCR98\nUPXxbdwI8+eX/pwqTJwIGzac+noGDYKbbjr15VS1os9mTo5vvfQjR8reXr748kvnR0F1C6v+VRpw\neg4Xtkxh8ZgU0ram8ddPH2JZxmr2vDALjsQ7lXYkO4/dHSE91Sk71NRz17f8zb3Z+a/X4efuUBjO\nkg0ZLNl5G6P/O5rQTQMJT1hJaL09J6w7KiyK2KhYcvJziI+Op1FUI5rENKFb025kHckiNTGVC1qk\nIOIcXnvoEDz6qPPa4cPhww8hM9O58VBlZGfDV1/BlVceXz5xovNF+OWXxV+IeXmVW3ZVWrYMvv3W\nmY6IKLve4cMQEwOTJzvbpTIee8y5TMrNNxeXvfoq3H576cvLyYH8fGc6P9+5hHtZBg2C776D3bsh\nNxfeew/uucdJtA88AJ06wdChlYsX4Nxznb+qTuI54wzncwzw3//C3XfDgAHw+eeVX3ZlrF7t9OTK\na8Mvvzh3TIyOLn9Ze/dCo0bOdF4e/OMfzntQp87x9XbudO6n8uKLcOedzmf2zjvLX/aYMU791auh\nY0enLDe3/M+Ut/79nb+33OJb/api5ynUMG9/uZKxz2/hQNfH2DPzTudmPr7qNR7OXOXsyF58J7Rc\nAJEHoctb0GYmRB6GbT1g/lj47e+gMAxUICwHwo85y8hsB5Oc8ynajR3E+rHOIH+TZ5sCsHO0c52n\nelc/TN0L3+Dgx48T3et1wpr8eFwoBYcaE1J3HxJa4Ck7NHs0h7/8M/EPnEdoowxP+f63/8mxFYOo\nP3g0R5dfQ97mFKJbr6Aweid1+48nvGnxz8/8rJYUHm5EREtnT3T+znMpzIkmvNlaoqKE+tKM1X+e\nS/2rH6Huhc4YRFES3Hd0HzkFOaVuuoJ9zQltuI2osCjS//Szpzwk6hAtxiWTG3LwuPpRYVFEZiWz\n4bEPkDr7OPOxJE98BVlnEd5sLVLnABKa76mfUC+RY7ubsjvqW7bckw5A24eu5WDeXvJDD3Dwk8fI\nWTOAMy6bTJ0Bf+XYMdj1F6fe2Y9ezk9/db5t4x9MJrThNk8s2XPv4PDcOznjsbaIwK6H16FHY4+L\nN+zMDTS87Voyx60EIHFCq+O2SVRYFC0btASFn77twqFFgwk561uiUl8AoPBILLsfcT4XzR/qw7bH\nvyK82Voiz1rB2cOeYfPUuzm0cDgRbb6i0W1DSt3GAFooIIpI6T9Ofl7ZgpXjXwQg4W+dyPxwLPUG\njHc+L1sv5JyO+1l46xwAer/Wl18O/ex5T4uWl3XgCFvv30BUu69o96c/kZOfQ2RY5Al/9yy/kIxX\nJtHk7kFEtVpB2LI72fTWvcT+eryn3Y3qNKJbk27Mn9WYbf+eULw9m/1A43svLbWNRXGsnzCBY+v7\nULfPS0Sn/hPS+5D55j84b/wQdkd+d0Lcx/Kc7XB48TWEH0lg0ZRBALQZfyHRsUfJyc+hbeO23Hfh\nfaQkpJS5jcvi63kKlhRqsLStadz36kyWfnApOXuaoe3fg68fOrmFhR+G9tOLk0ziPOd+EN7i10Bm\nh+L5ix+Euc6+DK68HVosgtfSoCDKKQvJhcII55pPf+gAB86CuA2QVxeezC5ezpWjnLLvb4ZM9ydT\nvzFOjyg0FzZeDru6Qp0s59Ddknr8Czq+ByF5MHmhU/ZICBxqBs+7yaXr6xCdCd/cX9zeZkuh3nY4\nkAA3DIDDZ0Kjzc7zm/rD3nOgzSz45SL46C0Y1s/plX3xwvHrP28SnD8RGh+f+NhwJbz7mTN9wXNO\nIi4MP77OgLug5yQIKYS0u2H2BBhyNUz75MR2Fmm6zEni6/8f7HbOhueGy+Dt2e50fzjny+L6Y93/\n4bvOdtr3VCYcLaUbF3YU8t2fwI8KZLaHsGNQb4fzo+CT1+DwGfDjVcWvOX8CXH4PLL0VPnOHJkNz\noCCyuM5FT8LCvzjTIXnQfzScsRri10K9XVAQCiEFsKM7vDkHukyBK+526heEAQqhBc70Y15dxMv+\n5GyvDu8563h5BaQ8C2mjnefbfQTbe0CHDyCrDeQ0gPb/gfoZ8L477ndLCix4EK65Hva0gxaLoVDg\nWCx8McH5f+j7MBxtBN/dU7zu+xtCnf1wpCG8O8Mp23pR8fNnfeV8ruM2Qmg+rB4CaffCTX0hJN/5\nofXOp7Dx1ye+D4OvdT4nEYcgcb5TtrcNvPVfaPgTbO95fP2bL4KzvoH9CVA/g/CwML4a/lWlE4Ov\nSQFVrVWPHj16aLB6eenLmvzPFG1+7VPafGyynjk+QSPPe1O57B6l/QfO48Kn1Ong+/NRcGJZVJbz\nN/xQ1a2n7u6yn7v0z0riHN+XFb3D+XvxX5QR551cPNcOUvrfq3SZrDT/Tolb79vrovYqiXOL52O2\n+b7OJsudv96vR5Ub+znxtPm0uKzbq05sviw35ZnS37/SHpfcp4TkVH57RRxQhvdW6m9xlt9w4/HP\n19mjhB5VWi5wtmv9LVXzuQk7cmJZg3Tnb7NFSt1dFS8jJFfpf4/S98Hy68VsV4ZeWTzf/RUlcr9y\n9udK9M4ylu21LaN3KmGHy19Hp7eVK/6ghGcrV4xSxqJ/W/C3Sn9/AEtVK/6OrbBCTXsEc1Ioy7e/\nfKt/W/A3ve/L+7T9P9rrmc800TPGdtCGI6/VMx4/W5s820QbjzlfG/3xKq1z2WPKpaOVbq8pI3oq\nHd5VOr2lnP+8Muh655/3/OeVnhOVRhuUzlOcD2bsZiXpPeeL595mzoc/9GjxF2N4tpLyrCJ5xf+Y\nt3d2llX0hSP5Ff8ztvnM+Rv/gxOP559vW/EXzXFfAIeVGy4t/Usr9qfyk1STZU78JesULavXk0rT\nJb5/GXWceuKXLao0XuPEUtprQo86ybxo/vbOSs8XlF7jlSFXKQ9FKO3+U/x8ZeKJ2e5s+zp7ir9w\ny/qiQp0vssvuVvo86sx3/ffx2/KONkrbj4rLuv7bibFo3rsdZT3a/efkEkx5jzNXnPhF3/fBstfT\ncJNXG153PrPxq5WR3YuTcGmPq29ytmnC18Wfec/7eMx5lHzNpX9W7m+g/P5CZ/kl68RuLn1dceuV\nRj8qPf5VXNZ6tnJPcw0fF67f/vJtpb8nfE0Kfh0+EpEBwAtAKPCaqo4v8XwkMAXoAWQBv1PV9PKW\nGUzDR/6StjWN+enziasbx/c7vmdn9k72Ht1L5pFMIsMi2Xd0HyJS4Th8kdyfUghr9gMhdQ6Rn9ma\nkJhMJOLIcfsTCo80oPBwHIWHGxFSfyd6OI7Qxj+jOTFI+FGnUmgeIVHZRIYW7wM4tKYXEW3nA8qx\n5YOJ7PQZmhuN5kVBfiSE5hEW9wtHsxpzYFN7Z/iq4WZiw5oS1eAQuQW57N2XD8tuhZYLiT7agZCD\nLQlPWEnE2c4JgloYQv72DhQei6H+uStpWLc+Ow7sYtdR9z4ZBWHEFrRFMjtRmN0YaT2X/fnbYOMV\nEJpL7LlrCTl0lmc/x7HVVxDWbDWhDbehR2PJj3K2L0cbOMM1e9tQN6IOR9jjDHNF7yY2P4mohntL\n3f+RfTCE7FWXOLF0nUL0wa6E7+tESN0DhDbYTsG+BCTyMHnbOhLZbi6aH0FovUwIP3rcUVyFR+sh\n4Tkc+akbh7Y3hYIIqLedmHbfkf1TJzj7vxBagCBo1tnQaBPRP44g5HAzorpNJ7RelrM994Q4Bzw0\nWe0seM01ELeRhi13EpLZmdBGW5zPQlZLctZcRkTiYrQwnLAzNhFSdz+qcOTHCzgUsgUa/QjLboM9\nbeHcmdBuBuSHw4KHoelyGpz1C2G5cRSeuZx9x7Jg23nOMGD4Eee9DsuhUeNCCvY158CPnaDVXMhu\nQr2zfuLQj92cYaGWC53PSnZTZz9awnfOvrW8upD4NRxuTGx0NAf4BS0IgbxoYsOacXR1f3JyC6HT\nO1AYSr1Gx4iOcPZca0EYhOSjRxpybOVVhJ+1HKSQ7G+vJ+fcd+CHa6HOXhpeMYHISHG229G9sKML\nZLWF/Cganj+LyPAwQrOSkMNnkhG6wLmYZmguNPE6e3P9/4OCcEj6kF+16s34fuNr5z4FEQkFfgQu\nBTKAJcBQVV3rVecPQGdVvV1EhgCDVPV35S3XkoIpS1GyS01MPe6fpqzyk13eySwzbWsaU1Y6O76H\ndRlGSkJKpZbxyrJXmL52OtckXVMlF1EsuW7veaDcuCpT15cY4urGkXUk64S/pb2PU1ZOYWf2TgCa\nxDTxbMvy2lTW8kuup7T342S2e3mfw5KfgZKvK2pf0Y+0to3bcvk5l5e6PSqrJiSFFGCsql7mzv8F\nQFWf9Koz262TJiJhwE4gXssJypKCMcZUXk24SmpzwPsuxhluWal1VDUfOACccPiJiIwUkaUisjQz\nM9NP4RpjjKkVZzSr6iuqmqyqyfHx8YEOxxhjTlv+TArbgASv+RZuWal13OGjBjg7nI0xxgSAP5PC\nEqCNiLQSkQhgCDCjRJ0ZwE3u9GBgbnn7E4wxxviX3659pKr5InIHMBvnkNTXVXWNiIzDOV52BvBv\n4C0R2QTsxUkcxhhjAsSvF8RT1VnArBJlj3hNHwN+688YjDHG+K5W7Gg2xhhTPWrdBfFEJBPYUmHF\n0jUGTryWdO10urTldGkHWFtqKmuL4yxVrfDwzVqXFE6FiCz15eSN2uB0acvp0g6wttRU1pbKseEj\nY4wxHpYUjDHGeARbUvDvDYyr1+nSltOlHWBtqamsLZUQVPsUjDHGlC/YegrGGGPKYUnBGGOMR1Ak\nBREZICIbRGSTiIwJdDwVEZHXRWS3iPzgVdZIRL4UkY3u34ZuuYjIRLdtq0Ske+AiP5GIJIjIPBFZ\nKyJrRORut7zWtUdEokRksYisdNvyV7e8lYgscmN+z73WFyIS6c5vcp9PDGT8JYlIqIh8LyKfufO1\ntR3pIrJaRFaIyFK3rNZ9vgBEJFZEPhSR9SKyTkRSqrstp31SEOcOcJOAy4EkYKiIJAU2qgq9AQwo\nUTYGmKOqbYA57jw47WrjPkYCL1VTjL7KB/6sqknABcAf3e1fG9uTA1ysql2ArsAAEbkAeAp4XlXP\nAfYBt7j1bwH2ueXPu/VqkruBdV7ztbUdAH1VtavXMfy18fMFzu2Lv1DVdkAXnPenetviy42ca/MD\nSAFme83/BfhLoOPyIe5E4Aev+Q1AU3e6KbDBnX4Z5zanJ9SriQ/gE5xbtNbq9gB1geXA+ThnmIaV\n/LzhXAwyxZ0Oc+tJoGN342mB8wVzMfAZILWxHW5M6UDjEmW17vOFc+uAn0tu2+puy2nfU8C3O8DV\nBmeqqnsneXYCZ7rTtaZ97rBDN2ARtbQ97pDLCmA38CXwE7BfnTsHwvHx+nRnwQCZANwHFLrzcdTO\ndgAo8F8RWSYiRTdSro2fr1ZAJjDZHdZ7TUSiqea2BENSOO2o87OgVh1LLCIxwHTgT6p60Pu52tQe\nVS1Q1a44v7R7Au0CHFKlicivgd2quizQsVSRi1S1O85wyh9F5FfeT9aiz1cY0B14SVW7AYcpHioC\nqqctwZAUfLkDXG2wS0SaArh/d7vlNb59IhKOkxCmqup/3OJa2x4AVd0PzMMZZokV586BcHy8NfXO\ngr2AgSKSDkzDGUJ6gdrXDgBUdZv7dzfwEU6yro2frwwgQ1UXufMf4iSJam1LMCQFX+4AVxt436Xu\nJpyx+aLyYe6RCBcAB7y6mgEnIoJzM6V1qvqc11O1rj0iEi8ise50HZx9I+twksNgt1rJttS4Owuq\n6l9UtYWqJuL8P8xV1eupZe0AEJFoEalXNA30B36gFn6+VHUnsFVE2rpF/YC1VHdbAr1zpZp24FwB\n/Igz/vtgoOPxId53gR1AHs6vh1twxnDnABuB/wGN3LqCc3TVT8BqIDnQ8Zdoy0U43d1VwAr3cUVt\nbA/QGfjebcsPwCNueWtgMbAJ+ACIdMuj3PlN7vOtA92GUtqUCnxWW9vhxrzSfawp+v+ujZ8vN76u\nwFL3M/Yx0LC622KXuTDGGOMRDMNHxhhjfGRJwRhjjIclBWOMMR6WFIwxxnhYUjDGGONhScEYl4gU\nuFfaLHpU2RV1RSRRvK56a0xNFVZxFWOCxlF1LmFhTNCynoIxFXCv1/+0e83+xSJyjlueKCJz3WvZ\nzxGRlm75mSLykTj3XVgpIhe6iwoVkVfFuRfDf92zohGRu8S538QqEZkWoGYaA1hSMMZbnRLDR7/z\neu6AqnYC/oFzhVGAF4E3VbUzMBWY6JZPBL5S574L3XHOtAXnuveTVLUDsB+4xi0fA3Rzl3O7vxpn\njC/sjGZjXCKSraoxpZSn49xcZ7N7cb+dqhonIntwrl+f55bvUNXGIpIJtFDVHK9lJAJfqnOjFETk\nfiBcVR8XkS+AbJzLGnysqtl+bqoxZbKegjG+0TKmKyPHa7qA4n16V+Jcw6Y7sMTrSqXGVDtLCsb4\n5ndef9Pc6W9xrjIKcD3wtTs9BxgFnpvyNChroSISAiSo6jzgfpzLUp/QWzGmutgvEmOK1XHvqlbk\nC1UtOiy1oYiswvm1P9QtuxPnLln/h3PHrJvd8ruBV0TkFpwewSicq96WJhR4200cAkxU514NxgSE\n7VMwpgLuPoVkVd0T6FiM8TcbPjLGGONhPQVjjDEe1lMwxhjjYUnBGGOMhyUFY4wxHpYUjDHGeFhS\nMMYY4/H/AZN6yxQ6gTLNAAAAAElFTkSuQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl8VNXZ+L/PvVlARWij1oVAcGeT\nLaIpImHR4q7V1rVBQBHcSm1fK77S8qoVpVqpSy2oUOJS608/UjfckJFtQHapC4IaSEQUUkEQSTJz\nn98fd+5kZjKTTDKZbJwvn3yYuXPuveeee+95zrOc54iqYjAYDAZDQ7GauwIGg8FgaN0YQWIwGAyG\nlDCCxGAwGAwpYQSJwWAwGFLCCBKDwWAwpIQRJAaDwWBICSNIDM2OiNgiskdEujRm2eZERI4VkUaP\nrReRESJSEvF9g4gMTqZsA871hIjc3tD9aznu3SLyj8Y+rqH5yGjuChhaHyKyJ+LrAUAFEAx9v05V\nn6nP8VQ1CBzU2GX3B1T1hMY4johcA1ylqoURx76mMY5taPsYQWKoN6oa7shDI95rVPWdROVFJENV\nA01RN4PB0PQY05ah0QmZLv4lIv8Ukd3AVSJSICLLRGSniHwlIg+JSGaofIaIqIjkhb4/Hfp9nojs\nFhG/iHSrb9nQ72eJyKcisktEHhaRJSJydYJ6J1PH60Rkk4h8KyIPRexri8iDIlIuIp8DI2tpn/8V\nkeditj0qIn8Jfb5GRD4OXc9nIW0h0bHKRKQw9PkAEXkqVLcPgQExZe8Qkc9Dx/1QRM4Pbe8NPAIM\nDpkNd0S07ZSI/ceHrr1cROaKyBHJtE1diMhFofrsFJF3ReSEiN9uF5GtIvKdiHwSca2nisjq0Pav\nReTPyZ7PkAZU1fyZvwb/ASXAiJhtdwOVwHm4g5X2wMnAKbha8NHAp8CNofIZgAJ5oe9PAzuAfCAT\n+BfwdAPKHgbsBi4I/XYLUAVcneBakqnjv4GOQB7wX+/agRuBD4HOQA6w0H294p7naGAPcGDEsb8B\n8kPfzwuVEWAY8ANwUui3EUBJxLHKgMLQ5/sBH/AjoCvwUUzZXwJHhO7JFaE6/CT02zWAL6aeTwNT\nQp/PDNWxL9AO+BvwbjJtE+f67wb+EfrcPVSPYaF7dDuwIfS5J7AZODxUthtwdOjzCuDy0OcOwCnN\n/S7sz39GIzGki8Wq+oqqOqr6g6quUNXlqhpQ1c+BmcCQWvZ/QVVXqmoV8AxuB1bfsucCa1X136Hf\nHsQVOnFJso5TVXWXqpbgdtreuX4JPKiqZapaDtxby3k+B/6DK+AAzgC+VdWVod9fUdXP1eVdYD4Q\n16Eewy+Bu1X1W1XdjKtlRJ73eVX9KnRPnsUdBOQncVyAK4EnVHWtqu4DbgOGiEjniDKJ2qY2LgNe\nVtV3Q/foXlxhdAoQwBVaPUPm0S9CbQfugOA4EclR1d2qujzJ6zCkASNIDOmiNPKLiJwoIq+JyDYR\n+Q64Eziklv23RXzeS+0O9kRlj4ysh6oq7gg+LknWMalz4Y6ka+NZ4PLQ5ytC3716nCsiy0XkvyKy\nE1cbqK2tPI6orQ4icrWIrAuZkHYCJyZ5XHCvL3w8Vf0O+BY4KqJMfe5ZouM6uPfoKFXdAPwW9z58\nEzKVHh4qOhroAWwQkfdF5Owkr8OQBowgMaSL2NDXGbij8GNV9WDgD7imm3TyFa6pCQAREaI7vlhS\nqeNXQG7E97rCk58HRojIUbiaybOhOrYHXgCm4pqdOgFvJVmPbYnqICJHA48BE4Cc0HE/iThuXaHK\nW3HNZd7xOuCa0L5Mol71Oa6Fe8++BFDVp1V1EK5Zy8ZtF1R1g6pehmu+fAB4UUTapVgXQwMxgsTQ\nVHQAdgHfi0h34LomOOerQH8ROU9EMoBfA4emqY7PAxNF5CgRyQF+X1thVd0GLAb+AWxQ1Y2hn7KB\nLGA7EBSRc4Hh9ajD7SLSSdx5NjdG/HYQrrDYjitTr8XVSDy+Bjp7wQVx+CcwVkROEpFs3A59kaom\n1PDqUefzRaQwdO7/wfVrLReR7iIyNHS+H0J/Du4F/EpEDglpMLtC1+akWBdDAzGCxNBU/BYYhdtJ\nzMB1iqcVVf0auBT4C1AOHAOswZ330th1fAzXl7Ee1xH8QhL7PIvrPA+btVR1J/Ab4CVch/UluAIx\nGf6IqxmVAPOA4ojjfgA8DLwfKnMCEOlXeBvYCHwtIpEmKm//N3BNTC+F9u+C6zdJCVX9ELfNH8MV\nciOB80P+kmxgGq5faxuuBvS/oV3PBj4WNyrwfuBSVa1MtT6GhiGu2dhgaPuIiI1rSrlEVRc1d30M\nhraC0UgMbRoRGRky9WQDk3Gjfd5v5moZDG0KI0gMbZ3TgM9xzSY/Ay5S1USmLYPB0ACMactgMBgM\nKWE0EoPBYDCkxH6RtPGQQw7RvLy85q6GwWAwtCpWrVq1Q1VrC5kH9hNBkpeXx8qVK5u7GgaDwdCq\nEJG6MjQAxrRlMBgMhhQxgsRgMBgMKWEEicFgMBhSYr/wkRgMhqalqqqKsrIy9u3b19xVMSRBu3bt\n6Ny5M5mZiVKt1Y4RJAaDodEpKyujQ4cO5OXl4SZdNrRUVJXy8nLKysro1q1b3TvEwZi2DAZDo7Nv\n3z5ycnKMEGkFiAg5OTkpaY9GkBjqxO+HqVPd/w2GZDFCpPWQ6r0ypi1Drfj9MHw4VFZCVhbMnw8F\nBc1dK4PB0JIwGomhVnw+V4gEg+7/Pl9z18hgqJvy8nL69u1L3759OfzwwznqqKPC3ysrk1u2ZPTo\n0WzYsKHWMo8++ijPPPNMY1SZ0047jbVr1zbKsZoao5EYaqWw0NVEPI2ksLC5a2Qw1E1OTk64U54y\nZQoHHXQQv/vd76LKqCqqimXFH0/Pnj27zvPccMMNqVe2DWA0EkOtFBS45qy77jJmLUN68Zf6mbpo\nKv7S9DnjNm3aRI8ePbjyyivp2bMnX331FePGjSM/P5+ePXty5513hst6GkIgEKBTp07cdttt9OnT\nh4KCAr755hsA7rjjDqZPnx4uf9tttzFw4EBOOOEEli5dCsD333/PxRdfTI8ePbjkkkvIz8+vU/N4\n+umn6d27N7169eL2228HIBAI8Ktf/Sq8/aGHHgLgwQcfpEePHpx00klcddVVjd5myWA0EkOdFBQY\nAWJIL/5SP8OLh1MZrCTLzmJ+0XwKctPz0H3yyScUFxeTn58PwL333suPf/xjAoEAQ4cO5ZJLLqFH\njx5R++zatYshQ4Zw7733cssttzBr1ixuu+22GsdWVd5//31efvll7rzzTt544w0efvhhDj/8cF58\n8UXWrVtH//79a61fWVkZd9xxBytXrqRjx46MGDGCV199lUMPPZQdO3awfv16AHbu3AnAtGnT2Lx5\nM1lZWeFtTY3RSAwGQ7PjK/FRGawkqEEqg5X4SnxpO9cxxxwTFiIA//znP+nfvz/9+/fn448/5qOP\nPqqxT/v27TnrrLMAGDBgACUlJXGP/fOf/7xGmcWLF3PZZZcB0KdPH3r27Flr/ZYvX86wYcM45JBD\nyMzM5IorrmDhwoUce+yxbNiwgZtvvpk333yTjh07AtCzZ0+uuuoqnnnmmQZPKEwVI0gMBkOzU5hX\nSJadhS02WXYWhXmFaTvXgQceGP68ceNG/vrXv/Luu+/ywQcfMHLkyLjzKbKyssKfbdsmEAjEPXZ2\ndnadZRpKTk4OH3zwAYMHD+bRRx/luuuuA+DNN99k/PjxrFixgoEDBxIMBhv1vMlgBInBYGh2CnIL\nmF80n7uG3pVWs1Ys3333HR06dODggw/mq6++4s0332z0cwwaNIjnn38egPXr18fVeCI55ZRTWLBg\nAeXl5QQCAZ577jmGDBnC9u3bUVV+8YtfcOedd7J69WqCwSBlZWUMGzaMadOmsWPHDvbu3dvo11AX\nxkdiMBhaBAW5BU0mQDz69+9Pjx49OPHEE+natSuDBg1q9HPcdNNNFBUV0aNHj/CfZ5aKR+fOnbnr\nrrsoLCxEVTnvvPM455xzWL16NWPHjkVVERHuu+8+AoEAV1xxBbt378ZxHH73u9/RoUOHRr+Gukjr\nmu0iMhL4K2ADT6jqvTG/ZwPFwACgHLhUVUtEZCAw0ysGTFHVl5I5Zjzy8/PVLGxlMDQdH3/8Md27\nd2/uarQIAoEAgUCAdu3asXHjRs4880w2btxIRkbLGsfHu2ciskpV8xPsEiZtVyIiNvAocAZQBqwQ\nkZdVNVKvGwt8q6rHishlwH3ApcB/gHxVDYjIEcA6EXkF0CSOaTAYDC2GPXv2MHz4cAKBAKrKjBkz\nWpwQSZV0Xs1AYJOqfg4gIs8BFwCRnf4FwJTQ5xeAR0REVDXSyNcOV4Ake0yDwWBoMXTq1IlVq1Y1\ndzXSSjqd7UcBpRHfy0Lb4pZR1QCwC8gBEJFTRORDYD0wPvR7Msc0GAwGQxPSYqO2VHW5qvYETgYm\niUi7+uwvIuNEZKWIrNy+fXt6KmkwGAyGtAqSL4HciO+dQ9vilhGRDKAjrtM9jKp+DOwBeiV5TG+/\nmaqar6r5hx56aAqXYTAYDIbaSKcgWQEcJyLdRCQLuAx4OabMy8Co0OdLgHdVVUP7ZACISFfgRKAk\nyWMaDAaDoQlJmyAJ+TRuBN4EPgaeV9UPReROETk/VOxJIEdENgG3AF7ymtNwI7XWAi8B16vqjkTH\nTNc1GAyG1snQoUNrTC6cPn06EyZMqHW/gw46CICtW7dyySWXxC1TWFhIXdMJpk+fHjUx8Oyzz26U\nPFhTpkzh/vvvT/k4jU1aY9BU9XXg9Zhtf4j4vA/4RZz9ngKeSvaYBoPBEMnll1/Oc889x89+9rPw\ntueee45p06Yltf+RRx7JCy+80ODzT58+nauuuooDDjgAgNdfb9tdVot1thsMhv2LxlzS+ZJLLuG1\n114LL2JVUlLC1q1bGTx4cHheR//+/enduzf//ve/a+xfUlJCr169APjhhx+47LLL6N69OxdddBE/\n/PBDuNyECRPCKej/+Mc/AvDQQw+xdetWhg4dytChQwHIy8tjx44dAPzlL3+hV69e9OrVK5yCvqSk\nhO7du3PttdfSs2dPzjzzzKjzxGPt2rWceuqpnHTSSVx00UV8++234fN7aeW9ZJHvvfdeeGGvfv36\nsXv37ga3bVy8xV3a8t+AAQPUYDA0HR999FG9yi9dqtq+vaptu/8vXZp6Hc455xydO3euqqpOnTpV\nf/vb36qqalVVle7atUtVVbdv367HHHOMOo6jqqoHHnigqqp+8cUX2rNnT1VVfeCBB3T06NGqqrpu\n3Tq1bVtXrFihqqrl5eWqqhoIBHTIkCG6bt06VVXt2rWrbt++PVwX7/vKlSu1V69eumfPHt29e7f2\n6NFDV69erV988YXatq1r1qxRVdVf/OIX+tRTT9W4pj/+8Y/65z//WVVVe/furT6fT1VVJ0+erL/+\n9a9VVfWII47Qffv2qarqt99+q6qq5557ri5evFhVVXfv3q1VVVU1jh3vngErNYk+1mgkBoOh2UnH\nks6eeQtcs9bll18OuIPn22+/nZNOOokRI0bw5Zdf8vXXXyc8zsKFC8MLRp100kmcdNJJ4d+ef/55\n+vfvT79+/fjwww/rTMi4ePFiLrroIg488EAOOuggfv7zn7No0SIAunXrRt++fYHaU9WDuz7Kzp07\nGTJkCACjRo1i4cKF4TpeeeWVPP300+EZ9IMGDeKWW27hoYceYufOnY0+s94IEoPB0Ox4SzrbduMt\n6XzBBRcwf/58Vq9ezd69exkwYAAAzzzzDNu3b2fVqlWsXbuWn/zkJ3FTx9fFF198wf3338/8+fP5\n4IMPOOeccxp0HA8vBT2klob+tdde44YbbmD16tWcfPLJBAIBbrvtNp544gl++OEHBg0axCeffNLg\nesbDCBKDwdDspGNJ54MOOoihQ4cyZsyYsDYC7mj+sMMOIzMzkwULFrB58+Zaj3P66afz7LPPAvCf\n//yHDz74AHBT0B944IF07NiRr7/+mnnz5oX36dChQ1w/xODBg5k7dy579+7l+++/56WXXmLw4MH1\nvraOHTvyox/9KKzNPPXUUwwZMgTHcSgtLWXo0KHcd9997Nq1iz179vDZZ5/Ru3dvfv/733PyySc3\nuiBpW5nDDAZDqyUdSzpffvnlXHTRRWETF8CVV17JeeedR+/evcnPz+fEE0+s9RgTJkxg9OjRdO/e\nne7du4c1mz59+tCvXz9OPPFEcnNzo1LQjxs3jpEjR3LkkUeyYMGC8Pb+/ftz9dVXM3DgQACuueYa\n+vXrV6sZKxFz5sxh/Pjx7N27l6OPPprZs2cTDAa56qqr2LVrF6rKzTffTKdOnZg8eTILFizAsix6\n9uwZXu2xsUhrGvmWgkkjbzA0LSaNfOsjlTTyxrRlMBgMhpQwgsRgMBgMKWEEicFgSAv7g9m8rZDq\nvTKCxGAwNDrt2rWjvLzcCJNWgKpSXl5Ou3b1WqkjChO1ZTAYGp3OnTtTVlaGWQuoddCuXTs6d+7c\n4P2NIDEYDI1OZmYm3bp1a+5qGJoIY9oyGAwGQ0oYQWIwGAyGlDCCxGAwGAwpYQSJwWAwGFLCCBKD\nwWAwpIQRJAaDwWBICSNIDAaDwZASRpAYDAaDISWMIDEYDAZDShhBYjAYDIaUMILEYDAYDClhBInB\nYDAYUsIIEoPBYDCkhBEkdeD3w9Sp7v8Gg8FgqIlJI18Lfj8MHw6VlZCVBfPnQ0FBc9fKYDAYWhZG\nI6kFn88VIsGg+7/P19w1MhgMhpaHESS1UFjoaiK27f5fWNjcNTIkwpggDYbmw5i2aqGgwDVn+Xyu\nEDFmrZaJMUEaDM2LESR1UFBgOqWWTjwTpLlnBkPTYUxbhlaPMUEaDM2L0UgMrR5jgjQYmpe0aiQi\nMlJENojIJhG5Lc7v2SLyr9Dvy0UkL7T9DBFZJSLrQ/8Pi9jHFzrm2tDfYem8BkProKAAJk0yQsRg\naA7SppGIiA08CpwBlAErRORlVf0oothY4FtVPVZELgPuAy4FdgDnqepWEekFvAkcFbHflaq6Ml11\nNxjSgd9vtCZD2ySdpq2BwCZV/RxARJ4DLgAiBckFwJTQ5xeAR0REVHVNRJkPgfYikq2qFWmsb1KY\nzsDQEExkmaEtk05BchRQGvG9DDglURlVDYjILiAHVyPxuBhYHSNEZotIEHgRuFtVNfbkIjIOGAfQ\npUuXFC/FxXQGhoZiIssMbZkWHbUlIj1xzV3XRWy+UlV7A4NDf7+Kt6+qzlTVfFXNP/TQQxulPmam\nu6GhmMiy5DATS1sn6dRIvgRyI753Dm2LV6ZMRDKAjkA5gIh0Bl4CilT1M28HVf0y9P9uEXkW14RW\nnK6LiMTrDDyNxHQGhmQxkWV1YzT+1ks6BckK4DgR6YYrMC4Drogp8zIwCvADlwDvqqqKSCfgNeA2\nVV3iFQ4Jm06qukNEMoFzgXfSeA1RmM7AkApmcmvtGPNf6yVtgiTk87gRN+LKBmap6ociciewUlVf\nBp4EnhKRTcB/cYUNwI3AscAfROQPoW1nAt8Db4aEiI0rRB5P1zXEw3QGBkN6MBp/60Xi+KnbHPn5\n+bpypYkWNhhaOiYqsmUhIqtUNb+ucmZmu8FgaDEYjb910qKjtgwGg8HQ8jGCxGBoBkyYq6EtYUxb\nBkMTY8JcDW0No5EYDE2MmdhqaGsYQWIwNDFmlruhrWFMWwZDE2MmthraGkaQGAzNgAlzNbQljGnL\nYDAYDClhBInBYDAYUsIIEoPBYDCkhBEkBoPBYEgJI0gMcTEzrw0GQ7KYqC1DDczMa4PBUB+MRmKo\ngZl5bTAY6oMRJIYamJnXBoOhPhjTlqEGZua1wWCoD0aQGOJiZl4bDIZkMaYtg8FgMKSEESQGg8HQ\nBmjOkH1j2jIYDIZWTnOH7BuNxGAw1BszYbVl0dwh+0YjMRgM9aK5R7+Gmngh+949aeqQfaORGAyt\ngJakATT36NdQEy9k/667mkewG43EYGjhNLUG4PfXPoeouUe/hvg0Z8i+ESQGQyNTV0dcX+JpAOnq\nMJIRWmbCqiEWI0gMhkYkHdpDU2oAyQotM2G1cWjsQUdzYQSJwdCIpEN7aEoNwJitmo62FLRgBInB\n0IikqyNuKg3AmK2ajqY0WaYbI0gMhkakLXTExmzVNLQl7c8IklZGW7GptmVMR2xIhrYw6PBISpCI\nyDFAmapWiEghcBJQrKo701k5QzRtyaZqMBjazqAj2QmJLwJBETkWmAnkAs+mrVaGuJiJYAaDoSWS\nrCBxVDUAXAQ8rKr/AxyRvmoZ4mFWLjQYWi4tKftAU5OsIKkSkcuBUcCroW2Zde0kIiNFZIOIbBKR\n2+L8ni0i/wr9vlxE8kLbzxCRVSKyPvT/sIh9BoS2bxKRh0REkryGVk9zp0EwGAw18fthwgQYOhQm\nT3bNz/ubMEnW2T4aGA/8SVW/EJFuwFO17SAiNvAocAZQBqwQkZdV9aOIYmOBb1X1WBG5DLgPuBTY\nAZynqltFpBfwJnBUaJ/HgGuB5cDrwEhgXpLX0eppKzbV5sQELBgaC89vuW8fqLrbWnsob0NISpCE\nOv+bAUTkR0AHVb2vjt0GAptU9fPQfs8BFwCRguQCYEro8wvAIyIiqromosyHQHsRyQZ+DBysqstC\nxywGLmQ/EiSG1DABC4bGxPNbekJEZP80Oydl2hIRn4gcLCI/BlYDj4vIX+rY7SigNOJ7GdVaRY0y\nIR/MLiAnpszFwGpVrQiVL6vjmF6dx4nIShFZuX379jqqmhh/qZ8JjxUz4feb9zt1tS1iAhYMjUms\n3/K66/bPwUmypq2OqvqdiFyDG/b7RxH5IJ0VAxCRnrjmrjPru6+qzsSNMCM/P18bcn5/qZ/CuydR\nOet1CGYx+6EgC96197uHpC3RliaBGZqftjQXJBWSFSQZInIE8Evgf5Pc50vcMGGPzqFt8cqUiUgG\n0BEoBxCRzsBLQJGqfhZRvnMdx2w0fCU+qj4bBMEs0AwqKoJMmQJTpjTNA2Ns+Y2PefENjY3xWyYv\nSO7EdXgvUdUVInI0sLGOfVYAx4Uc818ClwFXxJR5GTcSzA9cAryrqioinYDXgNtUdYlXWFW/EpHv\nRORUXGd7EfBwktdQb3IOyMHq9jpBuxICgNq88w4sWtQ0a0IYW356aIsvvhl0GJqTpHwkqvr/VPUk\nVZ0Q+v65ql5cxz4B4EZcAfQx8Lyqfigid4rI+aFiTwI5IrIJuAXwQoRvBI4F/iAia0N/h4V+ux54\nAtgEfEaaHO3+Uj8T35iIdl6KffXP6HHqV1iW4DhNY1s3tvyWTUuaM+ANOvbX0FND85NsipTOuCP/\nQaFNi4Bfq2pZ4r1AVV/HDdGN3PaHiM/7gF/E2e9u4O4Ex1wJ9Eqm3qngK/FRGazEwcHO9XN6/wV8\nsa6oyWzrxpbfcmlp2mJbyiJraJ0ka9qajZsSxev0rwptOyMdlWoJFOYVkmVnURmsxLZsyPUz/dl+\nrPEfDHnvQefjgPS9rcaW33JpaR23GXQYmhtRrTugSUTWqmrfura1VPLz83XlypX13s9f6mfakmm8\n8ukrKEqGlYEgBJwAWXYW84vmU5Brevj9jZamkXh1aqpBh/HH7D+IyCpVza+rXLIaSbmIXAX8M/T9\nckLRVW2dVze+SlCDAFQFqwBQlMpgJb4SnxEk+yEtQVuM7cybKoCgJQpRQ/OTrCAZg+sjeRBQYClw\ndZrq1GLwlfgIOsHwd0XJtDJx1CHLzqIwr7D5KmdoVpoz8qs5O/OWYNYz2lfLI9kUKZuB8yO3ichE\nYHo6KtVSKMwrxLZsAk4AAEEY228sXTp2oTCv0GgjhmahOTvz5vbHNKUQNdpX8iSb/TcetzRaLVoo\nBbkFPHr2o2RamUjo38ItC8k5IKdZhUhLCj1tbvbHtmjO5QSaOwN1U4bFmxD85Ellqd39In37uAHj\nAJjw6gQcHD7a/hHXvXpd1G9NrWo3xiipLajs++uIsbl9NM1p1mtKjai5ta/WRCqCpEH5q1oj5XvL\ncXCitr340YuMGzCuyTuzxjBrtJUOuCXY65uLtjg7PxmaUog2t8BuTdQqSERkN/EFhgDt01KjFkhh\nXiEZVkbYVwJwcQ93Yn9Td2aNMUpqKx2wGTHunzSlEN1fBXZ9qVWQqGqHpqpIS6Ygt4CFVy9k2pJp\nbN29lbH9x4bNWk3dmTXGKKmtdMBmxGgwtAySmpDY2mnohMRE+Ev9+Ep84cit1uhvaI11Nhjqi3nO\nU6OxJyQaQvhL/QwvHs6+wD4ABncdzL3D72XSpNb1lBqV3dDWaSu+wNZAKuG/+yXF64r5IfADGvq3\ncPNChvxjCP7S/Sj+1LDf0JrDq034btNhNJJ64C/1M2vtrBrbq5wqky7F0OZo7SP6+voCjRms4RhB\nUg9iU6Z4WGKRc0DsUvMGQ+umtUf31ScYoyUJzdYo0IwgqQeRqeVFhKM6HEXpd6WoKje8fgNQPUnR\nYEgHTdnJtIXovmR9gS1FaLYkgVYfjCCpBwW5Bcwvmh+O2PKV+Ljj3TtQlIAT4PrXrmfNV2so6lPU\nYDNXaxyN1Jf94RpTJV4bNXUnsz+FV7cUodlSBFp9MYKknhTkFoSFxPpv1rtTM0MR1EENMmPVDOas\nm9OgtUpmzoQbb3Qfouzs1jMaqQ+tdcTVlCRqo+boZPaX6L6WIjRbikCrLyZqq4GE13SPmYejKBXB\nCnwlvvodzw833ABVVeA4UFHRNqNMTCRN3SRqo+ZM1rg/UFAAkyY1r+Bs7qSYDcVoJA3EW9NdUQRB\nPbWk9FTYPJyck86t3/F8rgDxsO222VG01hFXU5KojVrKqNmQXlqjFmgESQOJXdNdECpLBqBz3kad\ndkxcYtG7HiOKwkLXnFVRAZYFjzzS+h6mZDCdYd3U1katsZMxtH1MipQUiEyVAjDl7greeXIITlCw\nbbj2t5vpcu6z5JSfS/nHvZMKQTQdrCFZzPNiSDfJpkgxgqQR8ATKzk3deXDCOQQDGWRmOmjRcAJO\nAGfOW1hOe7KzpFXZPeNhOq+WQbqCFhJFizXHPTfPWnKks51Mrq0mwsu9VRGowMFBfvVT7M3DOWVw\nJYucheii30MgC0clLZE2rXFJ2+7WAAAgAElEQVRRrbZCc3Z0jbUuTWT9491faJ57bp615Ggp7WQE\nSR3U1ln4/TDlHxVUOP1xOi8BQDsvxem8jCUacsDn+cCuxFKbrCxpVOdya1xUq63Q3C9wqkEL8eqf\nKFqsOe65edaSo6W0kxEktVBbZ+H9VlE5BMd6Cyk6A81diiUWllg46oZgSe5yLpj6CIdvvxTy3oPO\nxwGNc6db46JabYXmfoFTCVrw+2HKFDeww3Gq65/o/jbHPTfPWnK0lHYygqQWaussvN+coGDRnhH2\nn7j43E9Z89Uatu3ZxrxN8wg4AWzLhs5+Zu/7A4HtAeYUZzVosmI8WuOiWm2FlvACNySCKzwACgkR\ny6quf6L72xz33DxrydFS2sk422shGY0kyp7c2c/QOUOpDFaSYWVwznHnMG/TvPB8EwBbbO4aeheT\nBk9qlGszDsnmozW2/dSpMHmyOziyLBgxwtVOWkv9DU2LcbY3EqNGuf8XFdWM548dCUx4tZiKYAXg\nppZfuXUlVU5VWIgIQpadFQ4X9ohdcbE+mHkFzUdrbPtYTaqlCJHWKJQN1RhBkoBIjcO2q7fXZ3JY\n2e6yqO8iwvSR06OEhRf1VRmsJMtuPLPX/ojpjOqmpZhCImnuwAVD6phcWwmI9Y/MmOE+7LWtFFfU\np4gsOyvh7446zNs4j6mLpoZXVPRSrQQ1SGWwst45uloSzbmantcZTZ5c932q7RitdTXA+tASckpF\nYvKvtX6MRpIAzwSwbx+oun+RDvd4o9+C3AIePuthrn/teoLqLoAVlYcLeOXTV3h5w8tYlsWjZz8a\nlWolntmrJVFXKHRzjipTjaJq7vrvz7SEwAVDahhBkgDPBFBcDLNnQyBQ/ZDX1umU7y2POk6kf0RE\nwgLGcRxufP1G3rv6vag1TlqqWauujra5w2FT7Yyau/77M8mY24zZsmWTVkEiIiOBvwI28ISq3hvz\nezZQDAwAyoFLVbVERHKAF4CTgX+o6o0R+/iAI4AfQpvOVNVv0lF/zwdSVBT9EE+dmrjT8TSMfYF9\nYSFiYZF/ZD5rtq0Jzy8Bd/0SX4mPSYMntVgB4lFXR5uoI2+qDiBV239jjIpNZ9dwavM37g/aYmt/\ndtImSETEBh4FzgDKgBUi8rKqfhRRbCzwraoeKyKXAfcBlwL7gMlAr9BfLFeqavqSZ8UQ+5DX1ul4\nqygWrytm9trZBJwAWXYW/Y/oz6qvVoXLCUKGlcGWXVvwl/pbrCDxHvCcnNo72ngdeVMv1JVKFFWq\nE/xiNde22Nk1F21dW2wLgjKdGslAYJOqfg4gIs8BFwCRguQCYEro8wvAIyIiqvo9sFhEjk1j/RpM\nXZ2Ot4piUZ+iqOzAc9bNoSLghgd36dSFsu/KmLFqBrPXzmbBqAUU5BakFArc2MQ+4NOnQ3l54o42\nsiP3FuoKBNzv3kJdqb4g6Ry51SWIalv+1vOlQfo7u9Y+eq0vbd2HUlxc/fy0VkGZTkFyFFAa8b0M\nOCVRGVUNiMguIAfYUcexZ4tIEHgRuFtb6KzKyGV5AaaPnO464recTMmiQjcPV+4yKoIVFK8rBuD0\nu35P4PPTyDj69yycfF/KwiSVTid2JOgJES+qprbj+XyNv1BXU43c6rNeus/nCsnIJ1DE1eDSQaJ6\ntGXh0hJDlhsLvx9mzap+fjIyWqegbI3O9itV9UsR6YArSH6F62eJQkTGAeMAunTp0qgVSKZDi/di\nl+8tJ7hlIMx5B4JZYFfCqOGQuwyAaf9aRGD2GxDMIvBeJdOOf4SXftfwtybVjjd2JJiTk/zx0rFQ\nV1OYOOq7XnpOTrTAtCz3+8SJ0Lt349cvUahsazeN1EVrnPyZDD6fey/BHYCMHt06rzOd80i+BHIj\nvncObYtbRkQygI64TveEqOqXof93A8/imtDilZupqvmqmn/ooYc26AISUVfce6I5DYV5hVgfjIJA\nNmgGBDOhpBBbbIr6FLF1/fGugAn9tnZZp6g5J7H4S/21/l48dzP7KpwGx+d7I0Fv/ejy8uTj/b19\n774bFi6EceNSn6eRzjXLvboVF9dvvfTycld4gNsRqEYnQmxs4tXDzMNovUTez3bt3MCe1kg6NZIV\nwHEi0g1XYFwGXBFT5mVgFOAHLgHerc1MFRI2nVR1h4hkAucC76Sj8rVRl8027ovd2U/xqxuRtWNx\n5beCFcTqtohBXQYxbck02h1zAthnQlDBrqKk02zuWPA+2XZ2eMa750PJOSCHiW9MTDgj3l/qZ9bO\nSaj1OmgmGZkWhYU29SV2JFgfW3WszyTVUXO6TByxWQwyQm9F5DUmOreneXn7ikSHijc2ierRkn0I\nbdnslioNfaZbWpumTZCEfB43Am/ihv/OUtUPReROYKWqvgw8CTwlIpuA/+IKGwBEpAQ4GMgSkQuB\nM4HNwJshIWLjCpHH03UNiajr5tcwCXVfz/Di4exb8Bs0ACCIKCeeuZxNXd5n4eaq6p1HLYSSQshb\nALnLcBT2BfaFfSheOhURwVEHR53wjPhIQeIr8RE8arFrOls3ij5HDgT6p/W6a6OxzFLpMHFE1g3g\n2muhS5fk1kuPbRPveOl8wWPr0ZI7o7YQkZRu6vtMt8Q2TauPRFVfB16P2faHiM/7gF8k2DcvwWEH\nNFb9UqG2mx/7YvsCr7oZgPPeBWsyOBaZmRZDztvMJ98EonfO9WN1WY5q9Xx4RXl89eNs+35bOJ2K\npRZS9lPki9Oxj1lSY0a8N5+lQmycdUWsXNOe4W+n/tA1tCNvyZE3sXXzzAvJBBXEzXBQUG0qa6oR\nY0vtjNp66G5z0BLbtDU621sMtY3ool7s0ohOHW+WOxy8axDW4tsJdp0fdrgDqGqN1CpBDfLKhlfI\nsDLQoLpC5Kn5UJWBLFHWn/URvpKp4bBhbz7LlLsreMdpjxNMz1K/ydKSI2/iaRXJdLK1RVC1tBGj\nV9/w4MbXNJ1RSx5ANCWNqf21xDY1gqQeRD4MkHxnEdWpk42jQlUVPPiHY1DnLix7Ms6ZN8EPOaGQ\n4OU4ODWO46hD90O688E3HxD8YjBUWqAWVVXKDX/7f+hp95BlZzF95HTK95ZTmFfIlKsLWfRU3Q9d\nU8xfiR01R052rG1+SlMQWbfaMhdEkqgzTuT8bk4hGm9OUFN0Ri15AFEbjdnxN/bAoiW2qREkSRL7\nMIwaVb8RXUFuAVOuJtypi7j7Oo5gaTbWG4/hBNVd3/3qn7lrv8cIE0VZ+/Va90veArArEUewMxyC\nXd/F0SA/BH7g+teuBwg74efPL4g7L6J47ma2OR/CD4fwetUkgkctbrJU9vFW6ktl9nsqL37svsmO\n+BKVSyVsOl3ECrfy8upccukm3aG7je3raWh4fyLSof21tHBoI0iSJPZhgPqP6CJHEjk57lwDT6g4\njg0K4ojrbO+8FFtsBnUZxJadWyjZVRJ9sNxlriO9ZCi/PP8ont/1fng+g5cYsjJYSfG6Yrp09LHz\n8O5M/DscueRTzjr2LG6+vAcV+44CuoAEwX4dRg2nssuKGo77dOC1p1fnyJDZhgiChnbUifZNZsSX\nqFzs9pYwczmR0Jszx902Z056BFy6Hfr1uffJ1qWujj82ym/MmJoL30XSEk1RjY0RJEmSk+OOmlWr\nF7qqK2VIPCJHEr171xQqVoZDMG8BDg6WWvhL/VQ5VfEPlrsMzV3GM9+6fpdYghrkiTVPENw8EJ3z\n6/AkyH/3fQqt6I57+zU0b0WRkqFkdVuXVCr7hnYQsbm74q0dnnDfBOa3VEZ8ifZNdsSXqJy3vaXM\nXI4n9JI14TWUpvAVJXvv61OX+oT3B4PuWkW1CeJkByYtLaS3PhhBkgR+v9vRB4PVk84efzz+Ou71\neRC8Mj5ftVDK6f4JEz9cTWXQjko7XxeRjvlIAk4ASk6PmOioKI47qz4AbhS1A1aQk3+6l+l1mLX8\npe58mNm3XEmgyiYjM8jovzxD0bnH1anFJMrdlYyPpLaVJBO9+Mn4fdI1WvTOveXVKwgGu4a39+nT\nOMdvCLFCryHX3twmnVi8a6ioqJmapqHBBcmG9ydaqyjRMeuK/mtu82cqGEGSBJFmGM+3AdGO1Mjs\nr8mouxDtJ7AsePRRGHdhb3oPmF9j0qFt2Zx97NkcftDh9DuiH/M2zmPuhrnJXUCezxUcoYmO9Cl2\n/9YVweox4LiPQWG3wvAKjfESSHqd+b4Fv0ErFBSCjsOMFz/h8e1jGNRlED0O6UFRn6IakyO9TrWy\nsmuUnX7SpCTvQYmvxkqS3jkKCmD6s+t58qXPOLL3p9B5MP5SogRPZACCt5+/1I8v4GP6s+dS/nHv\nxrOxRwg9e+ebZGTOR9XGcWDlSveez58PdE4+wCHZYIh6BU109jPqgY1QMoSiC7vWee317ezSbdLx\nBMVNN8GDD7rv3vXXw7x5cNZZ7uDPe7duuaXhk2nj/RZvraKcnIaHfDeF0E0nRpAkQeSoJzKvUkZG\ntSM1Mvurp+7OmlW7QPH5qo/pOG7KdTc/U3Wyx96H9Y7bMYwbMI7fv/N7pi2ZFt4mCIO7DGbxlsVR\njvrDTvycb0YND0109FWHGpcUgtqADao88OwqGDwV27I59ahTWVK6BEXDM+u9zlzz3gX7f8OCSfMW\nENQgCzcvZOHmheFsxkBUOn2vUwU7qZc5chb/ll1byLAywAHbssPp971zPLn+SaqOroLvYd6cbEb3\nHR0WPBWBCm58/UaCTjC8MmXvw3ozvHg4FSX9sTb/wKPXQ0FB77oehaSIFHoctZhr//IMn88t4p13\nqn1BxXM3M+fgmhpWPEFQmzYW217JlKtR9uAsijrPByKEf4zm4ffDlCnVz2tkZ5eozo0ppGsMaiKE\nmje4U3X/nzsXXnnFraeXsubBB918b40VHegJGm+tokjzdJ2+mjjtlVCrbiXmLiNIksAbgUyZQrgz\n8BKsefmnYhO7eOpubfbTwsLqJH/gvgSxI5HYDMKR3DfiPgDuX3o/KGTYGfQ4tAdXnnQl8zbOY0P5\nBjb9dxPbv98Oud9EzVUBOLzXBnYsdghUVoFVRbDdNlj4PwTzfCwMLgyX2xfYxxTfFC7ucTG2ZRMM\nO/oLowVTCM/JP2fdnKgFvrxOtcvOIvdF6exn6qL4o2evo6sIVODghNdvOe/485i3aR6Pr36cWWtn\nIYgr3CJMexXBCl759BUssVBVEMJ+Jm9lyrH9xlJR0h/nH2/hBLO4/j2HeX+axuEnfhHWqBoaEh27\nfHLRucdBX1i0qLqjIO89KrdHa1jrv1nvCjwNRqXFqU0biyTZcrFlvcwJYSEQxwTpje5j/VmR9ylW\nSLvXfxfTR07Ht7ccShO3Y21tHaXhWTZj+o6BRbeFtVvLqjY5e3jbIwd39dGAIwcxsZpsJJ5ASdbf\nlEjYxzOntSZzlxEkSVJQ4AqSyM7AmwHtjSRsG84+21WtPeGi6morxcU1H4KCAtecFbn4U33V//tG\n3MeFJ1wYHvk/vvrx8APqK/Fxx7t3RPtPSk8NC4BtuS8hvxoKXwyBfR3g9UdBrRpZiRXl7c/fZv4X\n8xEJOfVzlyG5y0O/R5NhZbD6q9VUBCuilhrOsrPoN3Af5XunMvf7nTww+wEcdci0M/GN8kWNZqf4\nplARrAhrVopS5VTxafmnYcERDCb2H325280PGhYmEXh+J2vzMJyQ7yhYVcXcN76F7//O7LWzeeis\nh6LMimP6joky2c1cNZMXP3qRvkf0pVN2p6iOJry42auu2YiyrtXmkLmbIe89+g3cR9Yb1cJmZ8VO\n7lhwR3gFzYpgRVgQxAqmRMEQyZbzytqWTTAYRFFmr50dvj432Wcu6lhUVsKLL1abdi0LRoxw34WC\nApjw2Eb2LZiI5i3AyV3G9a9dz4AjBrj3Tp244ehRWktJ3XnjIoVeMBhkxqoZZO78OEq7vekmeOAB\nCAbde52ZpfxmosWDDyb/bsXWxxvEWGKR8eVgxnSak9AEGKlRZGQG2dLpGfylNf2GdZloI4/dmsxd\n0kKX8mhU8vPzdeXKxllQMdFaFbEjieJiePJJqAoFXGVnw4IFiSNKEqmvyaq2UxdNZfKCyQQ1iC02\ndw29i8K8Qk7/x+muwx1cITJnfs0U9qWnwuz3wMkEBCQAwybD4HujBE+s5hGJhcVpXU/jx+1+zLxN\n86gKVkWZ17zfl5ctr/EbwPgB43ns3MeYuWomN75+IwEn4Aqh2s4f8ZvdZQXnHX8eS0uX8s3e+Csv\nW1gghEf761cdxI2XnUigykKtinB7CMIZR5/B/C/mRwU72GJz3gnncXzO8TVMiu0y2kV3kp7/q1Kx\nMwI88twn9B6wJ2pk7fm8Dm53MPcvvT9qGeZMK5P3rn6vRqdbHx8JEHefmXPX8+K8cvZ1foNFzjQU\njXpmCu+eROWs1yGYSXa2xUN/teOabfx+GDosSEWF1hh8xMMWm2v7X0uXjl2ihEdk3rjYMuV7y8Nl\nI7VbW2yuPXRWWLstKHCv6/p7lhHcfQh2h+387fYCev+kd+J3K6at4uWxA8LvjTjtaJdt1ZrpoHju\nZmbtHJVwTlZd5seGTnpOFyKySlXz6ypnNJJ6Es8JF7st8vuMGa5WEggkHlEkcuzVK2Qxzmi0ILeA\nR89+NGwusbacQdDJRtV2/Rslhe6LX1IIjgUIockskOfjoG1nsGfO3Lhrp8SiKCOPGcn7W9+nIlhR\n43cHh4WbF8bZ02X1V6uZuWomN7x+Q92CL85vwVEjOP6nx/PqxlejjmuLHSUMzj/hfG796a0hkyH0\nXuC+/E98+ysCR7nHzrAyuLjHxfg2+6K0nqAGmfvJ3Bqh1opSEahgim8KUwqnuOYonytEnKDgODDh\n0ec4/5qPokbW/97wbzLtTIJOMEqIWGLxm4LfRAU+JDJxxg40vHLxTE7jBoxj5tz1XPfLYyDQHeyB\nZI7243ReEn5mopJ9lhRyeJ8S1hzZienPXh/l6/CX+pn496+prDwvHA1IydDw/YlN8SMItmWH/WXg\nZmpQ3KANW2z3L1TGG2xYYpFtZzN95HTWfLUm/JuI0G/gPsYN8JZT8LElYwvaZz384y2CwSwmXBrg\nsX+tZ9Kkmr6v2PY59/hzo/LY2ZY7r8vBQUqGosEsVK2wf8sXeLaGgC4oAF/gWYILFsfVODzB5V1L\nvHsZ+77XMHeFoiaTDZBoKowgSZF42oj3vaioesJXQ6JW6hWyGDKnxI5Axw0YF3bY55x0LhMX21RU\nKmIrg05X/JJJVZ4PMkLhwJYDZ98IucvYs+i2qLDhsOCJpfRUKBnG3ODXvG8nGUkWw4qtK1ixdUW0\nGa6kMPH5a/w2hD8vuS/qmBeecCGHH3Q4f1/1d8DtFF779DVu/emt1e1WAAUFXTn4nQL+vGQxiuKo\nw2fffkbfn/Tl/a3v16hrvFBrB4e3P38b32YfY/qOoV/367EzTsQJCojitPsmnCvNCbodqKJUBaui\njmeJxe9++jseXv5wXLOav9QfzgTdL3A9E6/oHbqfVZz3p+mcNbQT5XvL2bJrS9g04zgOE16bAMCL\n8453hUio3Y777hquGjoy6pkJ+8Fyl7EZ+PsqyLajl4MunFNIZWZ/sM4EzQS7CqvbQmwr09UsLBtB\n3CALz68BPL768bgh7ZZYjO03lm3fb+Pfn/w73CZeduvyveU8du5j9DuiX3hgNPGNiQBRJkgpuTX8\nXDih1EG9B+yJa2KKbB/v3njBHJERkmsOa8fsJUKgyjVbzdo5iuCCmhqHv9QfDgrRoCIi5ByQE/7N\nE1wigiUWjjrMWTcnfAzXpNgZddx31OcTJk2C9VkzmfLRi/T9vi/T/9/ykLaYxeyHgix4146eLNlM\nS3UbQZICiZyStY0o6kN9wycTjVojt/eeDz6fUFiYRUHBvUx4dRcznBlohPPc6vI+qoLGhg3n+QB3\n9AjuS05pATrnbTSYxfvvVcKo5bWaNwA6ZXfiu8rvcNQJj1zjdc7SbSHqzXcRhfbVKzBLXui3iLpF\nHiPbzubWQa7AeGLNE+FRcFCDFK8rjjJpFK8r5vHVj4f3D2owynQVD1tsBhwxgONyjuPZ9c+Gr6Ey\nWMnfV/2dLHsWv/ztWzxzb4Hrd3rjrwR/8iEDTglyZIcjeW3ja1Q5VTWu+3c//R2dsjuFOznPJzBn\n3RxuOuUmHlj6QLWPZ3EOTkUPcGxwhLlvfMvc73+PIGTama4/K3R4R90gg9+c+jxvzaput40HP0Fh\n3r0ATHjVFTZnH3c2cz+JHhBELgc9xTeFqmBVdXaFddWrMY3tNzb8ud8R/cIj76I+RczdMDdK84rE\n2/76xtdraDIAcz+ZGzZ1eWanfYF9/HXZX8MmLw0qJwzYyob3AjgB9/qCXeYzxbecKYVTAKKiAGPb\nZ3Tf0Wzbs41XPn2Flz99mWw7m6I+RYybUEC/I9zw8m8OeZ7NBy9EVdkX2Bd+TrzAlqAT9CqOow4T\n35gYHsiFTXManX1i2pJpbN29lZX/zUStt0AzcaSKnO6fMXOVn+tevQ6Atz5/Cz6rHtxVVgajBpf+\nUj+Fd0+i6rNBZB4zCd8dU5tMmBgfSQpMnequghgMuo52b36A9/2uu6qjRFKdCZ6u8L/w3JAI+7OF\nq9o76uCUnoJ+cXqUj8IWOzxyZ9EkePdOd4QrVTDsD65vpbFYeU3cIABB0NJTXWd2rP+ktIAeeyZw\nfP5XHH7iFxzc7mAe9D/omi1CI0FVJcPKwBKrRtRXPAThqA5H8eXuL1Hc7MwXnHABW3dvjau1AHRe\n9yhf/vs61LHDbWOdPo0MK4OgE4w7Mr9n2D3srNgZV5DFmouqzXuuRhBp+hOELh27sGXXlur7Khbj\n+o/jvcWVfLzyJ5C3IOxbennDy2G/lS02llg1MirYYpNhZUS3V4yJse+tv2N99kwUt31V3SCJGnWP\nwMIiOyObUX1GRWks8fa5ddCtTF82ncpgZfxjiYVVNgj9Ykgoq7YbIp5hZWCLHa674GoFkffg1kG3\n8hf/X8KDDq+9AJ5c82TiDBO1YGGRf2Q+lcHK6jx5tRHh9xt4isPW3Vsp210W/XvEPb9w6iPceulg\nCnILmPBYMX+/+ZLwvbhw6iMMPCWYknaSrI/ECJIUiNVIvIlRXpRIY6YVT6cg8swlnv06cgJfPEen\nJ2iCWwbC2l/hrBnljopDnZnV5X3yj8hn5Vcrw2HJZx97Nlt3b61pvqqLRbfBu3e5gsoKYA/7Pxg8\nFREJrdnidlhnH3u26+TfnI/zj7ei/Cp2lxX89qe/5bt93zFz9cyQJpVcEIF3vdkZrp3+pnk3JezE\nauC99E4WYlehRcOqhWCcNsi2s3norIei/UTJnCOJ65DQP4Rwu1likWFlEHACNTQFzyz4xqY3auZ5\niyTFgYQgnHzkyRzZ4UiAuIEakZx59Jkc/aOjmbFqRsLnyBab844/L/GE3ThtJgjH/OgYPvv2syiH\nfg3B2UhEPQP1eBZjy0vucjLtTMb0HcNHL53PwtlnRNyLPyKD760RFVmvehpne/qJl4TRi1+fPj06\nBUoqYXyJUovUJVSSFWCe6auoT1Hc2Pneh/WuIWhuOvJZHvzTOQQDGWTaDv3OWUfhhVvodOz5FOb9\nJeHM+MI5heGOOFLz8SZBLlxSFa1lRJjXsrMsHrrhl6zJ2OE6XZ0qLMvi4bMeZtyAcW7Y8N0VvB0T\nUBDMXcaD/gcZ229stRCJ48S/8MQLOTDzQJ5Z/0xU+4w4ekTYiT5v07waZp+ERMy30YhOwouSCpuo\nQhFtPQ7pwbxN85IXIt45akSzFcTV1BycsClHEPKPyGdP5R4+2vFRaL/qDurTQz7l1kG3sm3PttoF\nSSgLdaz5Mxk8QbZm25qwVmdh0aVTF0p2xj9n3yP6cuEJF9acoxRDwtF/gnuvKJu+3RQuJgiH/vd8\nvv7PiWhotdLGQhC6durqXmNMfTqMu4g9h71du+CKuOcKYXOqWB+APSTiXiwIm1sj5wmlAyNIUiR2\nQpI3WXHNmup0CammiogURBUV7rwTx6kWDl6Z2JxV9RVg4Vm2ceyskYKmMK8Q39MFOAFwgqCOTf8j\n+3Pf6P7AhdUHLCuAxQXuU5brHt83yhe2tRf1cW3rxeuK2bZnG5QVkPH0RAKVrhnLvvpn/Payn/Ld\ngBciolR6M3VRF3cUjYOoUL63PFz/KVfDe8VBKiqqojo2r9POsrOojOPEl9zlHH7g4Xz+7edRbWKJ\n5UZwlfiYu2EuL7+zHb64LdxJe53hmL5jOLjdwbyy4RU+2fFJdUcQp6MXhGv7Xxv+3u+Ifkx8YyKL\nNy9OOBJPmtICrOJ3cQKuBke/2dDnKTRk4om8rjXb1lSba2I6tI8Yzmk7TqueN5SIOianxqPHIT34\n9am/DgcFzFg1I/ybg8PmnZsT7vvw8oc55kfH8LNjfsYrn74S1zwY1GBCQVRrAEcI12x6CtvmPA2B\nrFAAyg2Q/0S4zOldTmdJ6ZLEufDq0DLKviuLW5/dG/rDYW8lvP5ILKwo/6J2Xlrve9FYGEHSSEQK\nC9uOzsHjOd0buvZD5LGr1zEJpdoodiPD4q3r0RABVvzqxnBUSOV7lRT3fYGCCdUT7cKjmkL3Or3U\nFLNnR6eCiacNebmlIif2+Uv9zFo7y9VSFp2Iu1hXBuII1/74ae4b0RVGxLRHLRPvCgpgwbs2xXPL\n+OjAx1jCChQ3hLTfEf0Ywxjm7tjAtvdqOupnr53Nr0/9tevUDHF5r8urJ6eVDozqbE+ffCcjh3aM\nskF3yu4Uns+TiAzLfe28dpi6aCqVwcooIeL5YA4/6PCwfd7T4MJzbGIQBGvzMIKBjFDHZMPKcbB2\nVI3Q7UFdBrFo86LqneN0sE7uspqzTT28jrL9DvjhUKxuC9HOy8PFvYCMeOl2njj/iaiJnfGuJVJj\nizQDeRMcw6HDsfWpqwONE0Di+c08lJCACWQBGeCo66f7yX8gdxmZViY79u6oXYjE0Xq8wAFFcRx3\nkbq9fUrYsrAKDdSh0fxLaigAAB5fSURBVMWYs9zUq3EGHXEGLplWZnjQli6MIGkkIs1cW7a42YGD\nwepZ7ZGhwMmu/RDp34i3jklWllsu0boekyY1IGqsZEiNsNpE1ztmTOJ5MrHaUKLcUr4SnxsBBOGX\nXByhXbZN0YVd4587QahzZN0KCroC9+IvvaBmAsxDbOyrf0bwi8FIt/cg1AEGnACdsjsx49wZvPjR\ni1zc42LK95bz3H+ec1/amM62x/cTmDQ4uo6RQk5E6H94fwq7FfLdvu/Ytmcb//3hvywpXcLM1TPD\noZ/ePpEzqb2os1hNEKrzl0Xa7jOtTDd89sCDmeurhIAAFmC7jtmYkXePQ3qwrGxZ2MyYefRSWAxV\nlVW1dmiWWIzInMz8p24nWGWDWogodpaDjBoRnogXmyQzdvLf1EVTyTkghxc/erGGz8i2bG4puIVO\n2Z3IOSCHNV+tiXJ21+jAa5lvdHrX06OXYojRoCR3OadxK0sWZxJstw354VCk20KcPJ+riTiK61iy\nkJJhDD4tC3+pv9ocGAcpGYZGPCd5O0cz8sK+Yc3Tu88byjeQ/eMS/mfGmzzw7MoaS24nur5DJlzO\n9pyXE54/koFHDmT6yOlpj94ygqQRiV2DInK0DvUzM8Ub0XsRYN46Jp6GEamRiESn0052XQ2Pogu7\nMvuhIJWVQbKyrISdOUQLR9t2Bajf754vVhuKl1vKS/+RaWe6HVruMjJHn8XYHxXXOdmqthxk8cp5\no/6gBsGBay/oRZeOHcg5YBQT31hTYyLnuAFutI6/1F/dyUeMZjOzJG7bxBNyfj/4VkC/7uu5YWP/\ncEe4r6QfU+6uYMrVheF94uV2ir3W2vxZM4+YyVwvJHfNaAhm1AidzrQyAXj4rIerw3PHFMHoDIrn\nbo7S5DKsDPr+pC8rv1qJs2UgWjKMnQeeHxKo7vFUBSdgcW2nOXQZWnOiXuQ1hKMEv+iHlpwOebuB\nU5CSoVjdFkGuH0cdHl7+cI1Z34kc7FIyFHWy3QSkDmGhaWHRzm4XpW0IgoZG7YKQufV0lhf/iWCF\ngAoqQTQUfbbunJvQ1x4GtbAzHf5242WU5xwUrcnhdtZj+48Nt2W/Ppdx8xIJv0PP/nZc+FnufVhv\nJj7+L1YsPQAnbwGVXVbQ6diPWTTrHG6bHWDRwmEhn0y1KfLQb37J9gjBdOg3l0QLkhhtLDM0l8cT\n6E0RAmwESSMRGx0VO1qH+pmZaozoi+P7QaBa69i5szpq7KabXD9NXansY/FMQ8loMQUFruP/ySfd\ncz3+eLS2FalFrflsCPbO0yA0Yi3MKwy1WQEP91rJmoy/AW6HVpCbWHg1lBqJFCPMa4kyLEO0YMg5\nIIc1Uf6a+BPACnILoKwA39OwPmrRshMJXjUQcpdC6anonLd5R9uz6CmYP7+ASYOTv1GJBGn53nKs\n3Pdds9Tha7Dm/Q11Msh4+zEKug5hxw5l48FP8Lg+XjNFR25NTc7TIArvnkTlnNfRYBZrMoWMjOrM\nul4SR7dNJuH3w9Sn4z8/vhIfFSX90TmhyDrLfTlUM2FREC0ahtN5SY1Z4UV9iqpNoMQEahyzFFlC\naMKgoMf4CYpNlp3FxT0uZtGWRdUTFmMnSe69jcer7JBQ1HBnvX3ZWSDL4OwbkB8O49qLT2DchUX4\nS/dUD3wgPOs+Ns3J6KvdzzXev7IC1v15oLsMg12JPeZst43LClhxbwFUKCJBrHNuhgEzybKzuHvM\nCG5+u1ow/fqyvtyw3o22q6mNjUDyVnFtv2trLOeQVlS1zf8NGDBA08nSpart26vatvv/0qWJt91z\nj/t/fY6ZlaWana1qWW4aSMuqPmYk99zjlvfSRYrEL9fY1y1SfU7bdusR7zqy2wV0/N/m6NItS+O2\nT23nqavdlm5ZqvcsvEeXbklcKJky9Tn30i1Ltf3d7dX+P1vb390+fNzIa8vIiLhvtqMZZ0xWa4ql\n1ojbVaxgjTarzzOS6Bq9OmWcMVkt2wk/M5mZ6p4z43tl7Klq/5+t9yysvlm1nXv8rSVR9R0/3i07\nY0b0PnXd16VblmrGGZMVqQo9M4HQX3X7xLZnuA6vjFeZIsoU1P4/W8e/Mj58PyPrHnufI7/X+G2p\n+1wiAXXtWFWKVKqdEXDrmPG9Zo0bElWXpVuW6vhXxuv4V8bXqGNd1x/5jooV0PG3loS3e88JqNoZ\nwfC7Eu/eeHXocWmxYrnth1Qqw28L39dUnyVVVWClJtHHNnsn3xR/6RYkkQ9Ho3YKof3Hj48WEPE6\nbK98oo69MR6qWJIRXJEviGVV1zlRm8Vrg7oETqIOPVVmzHA734SCe+E9av+fHe7YvE458tq8Dtyr\n/4yXPtB7Ft6jM176IKnBh9cG9bl3XmcZeY5IgQYBJf+xhMIv9lqXLnWfwezsugV/Mvd1xksfaGZ2\npVp2UO3MKs3MCtZon6iOO3T9M176oNHv84yXPlD75Jkq+Y+pddp9atlVijjVz3REZx+P2HtT2/XX\n1o5Ll7r3yDtv5LtSG959s2xHyfxerWsGafu728d9vhqCESRNKEjqM7pO5fh1aSRe2QsvdOvilZsx\no3HqV2NUFKM1jR9f89gzZkQLwBkzau6baseUqENPhWRe7GQ0Eq/94wmCGTNUzzyzuk3iXWuqz1a4\nE57h3qOQQUrtzCqd8dIH4XK1DYbqusex50umvlEaRC2CskZbxhE0DWXpUtXM7Mqw5iH5j4W1rmQ0\n+mQtEbFla3tXahu4JGqnSEHrtU2yA7W6SFaQGB9JI1DXGs91Udfs81h/Q12TEd98030VvImR3uJb\nqaxrkGhyY13XXf7/2zv3WDuK84D/vvuwoaQCYiJABdegoEZUTgxxKW5pZdJgQagiSyARGhWKrKAL\nlFKpqgOKVKVVFbf80RTHNDWkvJSoiQIlINLyMtwKyVcG8zA4cdJA6xIQLuAGIqricn2nf8yOz5y5\ns7Ozu+dx77nfTzo65+zZszvf7O58M99j5mBn8a6xMfs9lCklS04Ic+46HDlLADimp7tXwxwfn3/u\nsuixHNlmZjq+k6eesgEUMVnbJrP6ASBr1sAzz1jnOHMTHNy3+kjaT1k9++cHWLmy2m8Wk90trXDg\nAJx0kvUd+ItMlS2f8Oqr3fIf3Lc6OptvE6anKUKlBQ4bxmSciWWG2Q/ylsuOXZuySMmcerz66u5A\nmq7JGBPJxZ2AmtXFC1jf32WO55GjbRb7q98jkia4Ye7GjXkmg1z60astO24OvTh3ro9k6u/uNlOb\n92f1bFO9R3//sTE7MnGjhl6Raw7tVf3ljGhj9dyr83dGQ/a1fHleT9/5B/sx2vfNQpPL/89sv//F\nxn7MHD9fm3ps8vypj2TEFEl4Qbdvn+/zaNJAxx76lA22rb+m6YPQD/9M3fLFHsTQl7Fhw/z6jDmU\nB1HecN8256+SM6esbc/v++2c2ajsXg+v1caN3SbAXuGelypzXdUx6iiepvVYdb/06xlTRbJAFEnY\ns928udv2nmOPzeml5thge6FM+q0Q6uLK5AckpAIRYnWW6qn30/81qOvRTxlyyrIQRySDqBP/XL14\nblI+kn7JoopkgSiSMKxvbGx+72xycn7D7/eGw5ukqned25BWMUjFUXWuKrNLToNTdowNGzrXKKy7\nrnBNsddpWPjlr3s9Q8d+nfNUmhUzyuJ6/xs35o0AcjsITemVM9ova5nc/VZY/YoaNUYVyYJRJDt3\ndo9ARDqRGW7YHl5oP3qjKw9hrNMY1LH3G1PvwUmZyPpB1WiqTLZQJpfbUHcklqq7sDed6kn3gtxe\nZ24DW/daNhkN9LJRTpXHv9fbNpC9auB7+dzFjt10xNkr+XIViUZt9Zl16+DWW+2MvW6dktQ08DMz\ncN11nWx4Y2CiuEpzc/D44zbKJzzGzAxceaXdLxZpkhP95CJr7rzT7meM3d400isXP6Ll8GE7I4Cf\nIT893ZkC5tChTllCmWJy50yln4qyqppTLEXdNWRSZQ0jhMDuc+hQ95Q4seO9/37+tfTP46LWjEn/\nL3Vv5dZB2X7htYH2a/v4x60zkWqsjFVRdU1n/q6zdEQY1RmLduvn8wvoiGRQ5PYuQlPY5GTHLBEz\nv9TpceYMwWNO0UGNSMoy5MtyUapkMqZ9tEvT0VmTHmHKRBErQ2XCpHe83Gvp+43Gx7uTKav+1zTi\nq05dtYkebBORVrZv1Wg2FayRundTSa2xc4SjkF75llDT1sJSJLn4D7IfdpoavqamKMkl1ug4M1Ps\nQei1/yTVYPvKtcpPEZYr1QCUJQm6xtmfmqbMDFl23qYKrE4QRR2/WE4yYVkdNI1qyq2DumbXJr6+\nHNNo6rypfZsoqTq/j493nvGwg5Eyd5aZeuuQq0j6atoSkQuBW4Bx4BvGmL8Kfl8O3AN8EjgIXGaM\n2S8iK4B7gV8D7jLG/KH3n08CdwFHA/8M3FAIPBK4iRDvuw8uucQmKbntofnlmmu6zRYizZOPwvVU\nXDIWRNYViWxrOmx25jSw57viivnmg/XrrXnPmdvCtU/8Y+UkTZbtF5oVP/igU7cA3/8+bN4clyE8\nXpVJI2YmqTJRQHciW9U5mibKHjxozVpzc/a8VUmIvizQKf+rr3bMsq58ZfvmmF3d/2LXsyyJL2Xm\nqWN2Su0bm2G7yuRV9btverv99s59ODERTxb1zZ0pU2/fyNE2TV5Y5fEKcDqwDNgDnBnscy3w98Xn\nzwHfKT4fA5wHTAHbgv88DZwLCPAvwEVVZVkMIxJ/GOz3MlLDYt8JHIv8aloG/xhNIsRyzzU1Zcud\n48iemor3ynxy8yXKyh+aFV1vsGr+o7Lz5jrOU7/HTBShOaNOhFUOTU0+sclF/RFNSq5Urk5qVJnK\nm8ox85SNJqpMUbGRbx1zaG4dT0117j+wo+LUMXptKWDYpi1gHfCI9/0m4KZgn0eAdcXnCeBtQLzf\n/8BXJMDJwI+875cD26vKstAViX9DhFFaZbbRqrDUOg9ITtl8U1sT80LsmHWS1HIeGr+sVRncKXu3\nL2uVH6LOeR1VijgVjdbEVt+E3HslvA/Da+qX3ze9+PvWMSu5e73KrFtVh23CdcN9/M7fxIR9LzML\n1lX8oSIpe877kTRrzMJQJJdizVnu++9HRhd7gVO8768AJ3jfQ0WyFnjc+/5bwEMl578a2A3sXrly\nZW9rt8eUOdZ8pRKzy6acfL1s/GONadXDmMoYDv0xuaG1/nE3b47b8XfuNOacc+Y3UmEeRU6vM7Ut\nVrZUPkq4b1P7ednIrJ8huClyRiSuZx76nMJOUuqahCHY4YzYYSBBU4Wb4wsJfREbNsTv57CD1+QZ\n3LnTyitS/nz0uhPhs+QVif9aTCMSf5hfNWtv2YMXi/zyb/6602TkNlI7d9qht3/u2M0fNj51ktT8\nnn8suqx7llt7/s2bu/ft9VQbMblyo5xSpstQGcca1F6MSKoUf87/w162kytsdDduLO8ApMrvK1AR\ne5yqQILY81FlIi0b+fqmqphZLjbCDq9RLyPOfPrZiVgIikRNWzWo00POOVY4BfrUVD3zS3i8nOF+\nOA2Gb64K5aiTae0oG8n4pg2/d+h6hBs2dO+3YUOezE1MBXX+FyrUUGmEo8oyc2Yb80ZKObUlbIBj\nkUWpEVWooMJy1g2rzZU1PG/MhBZGRPmylpnbUte7bT33y1eSq0j6GbX1DHCGiJwGvI51pv9esM+D\nwJXADHYE80RR+CjGmDdE5Ocici6wC7gC+Fo/Cj9oytZWr7vmuvtPmATpIqK+/GWb1Dg314kWgerp\n1auif1wESciyZTYqJ0yuCqdQz5Fx/XobUeYimMBOTT8+buVZtsxGuj31VCf6DOz06Y8+2vnPJZek\nz5OK6vIjzMqmPk9FSPn7pBIx/STMuTl7Lbdtmx+Vk5NwmWJ62kanOXKS16qipMLEwfFx+MIXOlGA\nd9/dHf00MzM/wiu8Z3bsmJ8Y6hL03D2cishzsrqIPBG46KLOf93vYeSWuw5+qzQx0ZHFP/e6dXa7\nS+qdne3IsmVLJ+LM/R4uTV2H8F7sR9JmLXK0TdMX8Bng37Amqy8V2/4C+Gzx+Sjgu8DL2Gis073/\n7gf+G3gPeI0i4gtr3tpbHHMb3gim7LUYRiS9JNVDdb0ylyfiTEHue46Zoeyc4WjhzDM7ZSmzKVeZ\nylKmCd/xWtY7dDJs3pw/AirrHfu92bGx7ryS3FFbzEnr92B9mWILa4Wy1s1vCH+LmQKrTHI5vofU\nFC6xHn/YSy+7zmVObleWVH2Eia3ORxPz4/jnCKc4ipWlbFRUt4xVuLpJRTv20tTFsE1bC+m1lBRJ\nzg3uO+82bux+uNyU3TlO4xA3Pb6bT6ws7LNqDRb/YfEVXI58jrJInxxfQOwcX/lK2gae8/D6viun\nGLZvn+/zmZy0x8xJDMxt2MPORNgg5/pIUqHTYZRU3etUprT9evaVUNl/Q5NgrP79qLGyz36ghh9s\nkpppIiTmk2nSUfOvW1W0Y9Pjx1BFskQVSd3Q0nPO6b4pXehiXT+KIzYaKHNYphrGsMEOo19yoqj8\nXtvkZH7OSuwcZT4g53PJWc44NtVLTEH5DUNZfabKakz6PmjbI46NIJoqpzKlHY5ucx3jrp5jIdth\n2WOjkLKck3CEkbvsdSo4InUPV13TmJL1/9t2nRVHriLRSRtHjKps3fD3TZtgzx77fWzM3ppuWdxP\nf9r6VOr4Atwki7Oz85cg9bdDd8a0n4V86FBaxly/kb9U7uxst527yhcQnsPJdfPN8OCD3ccum0gz\nJLbssMva9/0UExO2HmZmyuszLFvoq4hllTtyM7pT2fcxO7+fle98YM6Xkzp2zP/mJqR0dVUnc9/P\nzPfrKzYBZNXncDlbfzaJ1DPijuHudRG46qrY8rjxujn//E79PflkJxN/YsLKNTFht73/vn2G/efI\n94/E6r4fqCIZEaoeTEc4DcfBg7B1q30PG4AyJVLlyCtrqMq2+8cU6W6koRMsUIfp6W7FIdLdYIdO\n0BzFtG4d3H9/x9H53HOwe3enwTp4sHsd8pD1660sofwi9n1sDM47D3bt6m6g60zlcdttnSCLiQnr\n5D7rrPlO4XAqFvebo2qN8Jhyc+V6/vn09B+xY4frt4f3aNk1ijXIYX2tWGEVAHTWig+V5MxM+piu\n3Hfc0bmvJifLlUisHLn38D33dDpThw7Z7+4c7tzG2PtkdhZeeqkTsFI19Uq/UEUyAlQ9mCHuxoo1\nFKtX50Vn+Teq2+7+Uza/1fR0vNfuH9M1qmA/X3DB/Ic1JzrKn58LbOTQtm22kQPbuPpK080hVnVc\nV3+xCKGqOc5i9bJlS3ev9aij4qO5nDmzZmbg2ms7x3MRSqGcfkPpR1a5+dXKGiS/fmKNtX+sstGQ\nO0aVoqkT/VbVWbr++s59cOedtuPk14kfRZiKckqNMGI0ness5MCB7vMb04n0M6b5/GE9Jcf+tdhf\no+4jaWLzbpMcVRU5U/WflMO4avrrOo7EVPJZU+dwTLY28fqp+qybaxBOp+HkyvGVON+M79Oqus5l\njm+XMJiKGKvKn6kKxMidOyv0QYnMjxqsE0XY1omd69/zI8X8QIOc56RJjlYZqLN96SiSJjd4m4ei\nKnImJGcf30GYctTWUYCpiKbcRLlBEHPsN1kDJTbBX070ViqBLvc6pxzLMTnLZKwKZ64qb0zZxRIZ\n63aGyq5VFaHMuZ2Esk5QeLxYeH+vIraMUUWypBSJMc16xm170+4YbUck4T5Vs7XWeVBijXRO1FHb\nB7BOmWI0GTG6RjMWMp0aHeQorZz6SY0Aq6KzykJtU1FL4fxaZXUWi2KK3Re9yARP1VnV/GCp/+aW\nq5c5JMaoIllyimSY5A7XU/uEppGq+ZCaPvRNE/h6RW4D0bQhcTLUnS6lF9cwVeYcpeGH1oY5IOHx\ny2bWHURnIPc+KTOfxhJQ25wn/I+OSFSRLFlyRyS9PE+/Rx4x6prmmii2YcpYVuawTDGlkWsCrZt/\n0Uvq1G1M5qmpzsSVuTlNTcrYqzrIVSQataUsCFIx/r0MX+xVJE1T6kTVNJlnDYYXAgrpOeP8endl\nnJuzEVAukq+qbqrqpGmd5VKnbsPoMT8y7OKL4YEHrCqZne3tNep3HcRQRaIsGGJJgIM4zyAZhCIb\nWghoBWG9h2UctpLPoW7dOpm3bOlWQCedZEO9F9o1aorY0ctos3btWrN79+5hF0NRBkZOLsawWQxl\njNGk3LFcL1j48ovIs8aYtZX7qSJRFEXpP4tRceYqEjVtKYqiDIBhmlT7zdiwC6AoiqIsblSRKIqi\nKK1QRaIoiqK0QhWJoiiK0gpVJIqiKEorVJEoiqIorVgSeSQi8hbwnw3+egLwdo+Ls9BRmZcGKvPS\noK3Mv2yM+UjVTktCkTRFRHbnJOOMEirz0kBlXhoMSmY1bSmKoiitUEWiKIqitEIVSZrbhl2AIaAy\nLw1U5qXBQGRWH4miKIrSCh2RKIqiKK1QRaIoiqK0YkkrEhG5Q0TeFJG93rYPi8hjIvKT4v34YruI\nyFYReVlEXhSRs4dX8uaIyKki8qSI/FBEfiAiNxTbR1ZuETlKRJ4WkT2FzH9ebD9NRHYVsn1HRJYV\n25cX318ufl81zPI3RUTGReR5EXmo+D7S8gKIyH4ReUlEXhCR3cW2kb23AUTkOBG5V0R+JCL7RGTd\noGVe0ooEuAu4MNh2I7DDGHMGsKP4DnARcEbxuhr4+oDK2GtmgT8xxpwJnAtcJyJnMtpyHwI+ZYz5\nBLAGuFBEzgX+GviqMeajwM+ATcX+m4CfFdu/Wuy3GLkB2Od9H3V5HecbY9Z4+ROjfG8D3AI8bIz5\nGPAJ7DUfrMzGmCX9AlYBe73vPwZOLj6fDPy4+LwduDy232J+AQ8AFywVuYFfAJ4Dfh2b8TtRbF8H\nPFJ8fgRYV3yeKPaTYZe9ppynFA3Ip4CHABlleT259wMnBNtG9t4GjgX+I7xeg5Z5qY9IYpxojHmj\n+HwAOLH4/EvAT739Xiu2LVoKE8ZZwC5GXO7CzPMC8CbwGPAK8I4xZrbYxZfriMzF7+8CKwZb4tb8\nLbAZmCu+r2C05XUY4FEReVZEri62jfK9fRrwFnBnYcb8hogcw4BlVkWSwFiVPZLx0SLyIeA+4I+N\nMT/3fxtFuY0xh40xa7A99XOAjw25SH1DRH4XeNMY8+ywyzIEzjPGnI014VwnIr/t/ziC9/YEcDbw\ndWPMWcD/0DFjAYORWRXJfP5LRE4GKN7fLLa/Dpzq7XdKsW3RISKTWCXyLWPMPxWbR15uAGPMO8CT\nWNPOcSIyUfzky3VE5uL3Y4GDAy5qG34T+KyI7Ae+jTVv3cLoynsEY8zrxfubwP3YTsMo39uvAa8Z\nY3YV3+/FKpaByqyKZD4PAlcWn6/E+hDc9iuKqIdzgXe9oeOiQUQE+AdgnzHmb7yfRlZuEfmIiBxX\nfD4a6xPah1Uolxa7hTK7urgUeKLo1S0KjDE3GWNOMcasAj6HLf/nGVF5HSJyjIj8ovsMbAD2MsL3\ntjHmAPBTEfmVYtPvAD9k0DIP21k0ZEfVPwJvAB9gNfsmrG14B/AT4HHgw8W+AtyKta2/BKwddvkb\nynwedpj7IvBC8frMKMsNfBx4vpB5L/BnxfbTgaeBl4HvAsuL7UcV318ufj992DK0kH098NBSkLeQ\nb0/x+gHwpWL7yN7bhRxrgN3F/f094PhBy6xTpCiKoiitUNOWoiiK0gpVJIqiKEorVJEoiqIorVBF\noiiKorRCFYmiKIrSClUkitIQETlczDLrXjdW/yv72KvEm5VaURYyE9W7KIpSwv8aO+2KoixpdESi\nKD2mWBPj5mJdjKdF5KPF9lUi8kSxDsQOEVlZbD9RRO4Xu17KHhH5jeJQ4yJyu9g1VB4tsvIRkT8S\nu57MiyLy7SGJqShHUEWiKM05OjBtXeb99q4xZjWwDTsTL8DXgLuNMR8HvgVsLbZvBf7V2PVSzsZm\nZYNdM+JWY8yvAu8AlxTbbwTOKo4z1S/hFCUXzWxXlIaIyHvGmA9Ftu/HLqT178UEmQeMMStE5G3s\n2g8fFNvfMMacICJvAacYYw55x1gFPGbswkSIyBeBSWPMX4rIw8B72OkwvmeMea/PoipKEh2RKEp/\nMCWf63DI+3yYjk/zYux8SWcDz3gz+irKUFBFoij94TLvfab4vBM7Gy/A54Gnis87gGvgyAJcx5Yd\nVETGgFONMU8CX8RO+T5vVKQog0R7MorSnKOLVRcdDxtjXAjw8SLyInZUcXmx7XrsSnZ/il3V7qpi\n+w3AbSKyCTvyuAY7K3WMceCbhbIRYKuxa6woytBQH4mi9JjCR7LWGPP2sMuiKINATVuKoihKK3RE\noiiKorRCRySKoihKK1SRKIqiKK1QRaIoiqK0QhWJoiiK0gpVJIqiKEor/h+mPrdO7d3H3QAAAABJ\nRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl8VNXZ+L/P3CQssmnUgiQQ6goY\nEYhoqmAQtWBdsNiK2gbceKvCW2zVV21VXFqs2hb3AgolVUGrPyMWECwSQAhCICCCCwiBhE2M4gZk\nMnPP74+75M5kkpksk0zC+X4++WTu/txzzz3PeZZzriil0Gg0Go2mNnzNLYBGo9FoEh+tLDQajUYT\nFa0sNBqNRhMVrSw0Go1GExWtLDQajUYTFa0sNBqNRhMVrSwSHBExROR7EenRmPs2JyJykog0es62\niFwoIiWe5U9FZHAs+9bjWi+IyL31Pb61ISJlIpLTyOd8SUQmNeY5NfUnqbkFaG2IyPeexfZABRC0\nl/9HKfVyXc6nlAoCHRp73yMBpdSpjXEeEbkJ+JVSKsdz7psa49yaxkFEXgK2KqUmNbcsrRWtLBoZ\npZTbWNs915uUUv+taX8RSVJKBZpCNo1G03AivbN1fY9b4nuv3VBNjIg8IiKvishsEfkO+JWIZIvI\nKhE5ICJ7ROQpEUm2908SESUiGfbyS/b2BSLynYgUikivuu5rbx8hIp+JyDci8rSIrBCRsTXIHYuM\n/yMiW0XkaxF5ynOsISJ/F5FyEdkGDK+lfP4gInPC1j0rIn+zf98kIh/b9/O53euv6Vyua0RE2ovI\nv2zZNgEDw/b9o4hss8+7SUQut9dnAs8Ag20X35eesp3kOf439r2Xi0i+iHSLpWwiyPyIiMyx68f3\nIrJBRE605dsvIjtF5ELP/l1EZKb9TMpE5CER8dnbThaRJSLylYh8ad9/57Dy+Z2IbLTrwGwRaVOD\nXLWey+Zs+9l8LSIvOucSkeNFZL5dd74SkWWe8/YVkaX2to0i8rMarn+TiBR4lt26LiK3AlcD99pl\n9qa9T5qIvGmX23YRua2Wcm8rIn8TkVIR2Sciz4lIW3vbhSJSIiL3isheYHqkdfa+0erBrSKyFfik\nJlkSFqWU/ovTH1ACXBi27hHAD1yGpazbAWcBZ2NZej8GPgPG2/snAQrIsJdfAr4EsoBk4FXgpXrs\nezzwHXCFve13QCUwtoZ7iUXGt4DOQAbwlXPvwHhgE5AGpALLrKoX8To/Br4HjvKc+wsgy16+zN5H\ngAuAQ8AZ9rYLgRLPucqAHPv3E0ABcDTQE9gctu8vgW72M7nWluFH9rabgIIwOV8CJtm/L7ZlPBNo\nCzwHvBdL2US4/0fse7rQPvYVYDtwt718C7DFs//b9vXaAz8C1gI32ttOAYYBKfbzXgE8EVY+q4Cu\n9nP5DMsSjiRXLOf60H7Gx9rndcrncSyFm2wfP8Ren2Lf2132tgvtcj8pQhmHPAMi1/VJnu0+YD1w\nr32dk7Dex2E13N/TwJt2/egEzAce9tSrAPBn+1ztalgXSz14x75Gu+Zun+rcnjW3AK35j5qVxXtR\njrsD+Lf9O9JL8Q/PvpcDH9Vj3xuA5Z5tAuyhBmURo4zneLb/P+AO+/cyPI0QcAk1KAt7+yrgWvv3\nCODTWvb9D3Cb/bs2ZbHT+yyAW737RjjvR8DP7N/RlMUs4M+ebZ2w4lRp0comwnUfARZ4lq8EvgF8\n9vLR9vk6AN2xFEsbz/6/Bt6t4dxXAWvCyme0Z/lvwDMxPv9I5/I+48ud54bVoP4/4MSwcwwFdgHi\nWfdv4I8RyriuyuJcYFvY9e4Dpke4Fx9wGOjpWTcYWynb9eowkOLZHmldLPVgSCzlm4h/OmbRPJR6\nF0TkNOCvWK6R9lgV64Najt/r+X2Q2oPaNe17glcOpZQSkbKaThKjjDFdC9hRi7xg9aavsf9fa/93\n5LgU66U/Geslbw+siXI+sKyGGmUQy/12O5bVgS37sTGcF6z7W+ksKKW+FZGvsRpzp0zq8sz2eX4f\nAvYrpUzPsiNfT6ANsE9EnP19WJ0URKQr8BRWw9nR3rY/7Frhch0TSaAYzxVevifYvx8FHgQWi0gQ\nqwPzuL19p7JbVs9x3SPJUEd6Aj1E5IBnnYFlXYbTFascN3jKUcL22aeU8kdZF0s9CHn3WxI6ZtE8\nhKeNTsXqyZ6klOoE3E/1ytrY7MHq8QAg1ltS20vaEBn3AOme5Wipva8BF4pIdyw32Su2jO2A14HJ\nWC6iLsCiGOXYW5MMIvJj4HksF0+qfd5PPOeNlua7myolg4h0xLIAdsUgV0MoxW7glVJd7L9OSqkz\n7O1/wcrGy7Sf2VjqX69iOVd4+e4Gq9FUSt2ulMoARgL/JyLn29vTxdNC28dFKrcfsDoGDl3Dtoc/\no1Isy6CL56+jUuqyCOfeh+UaPtWzb2ellDcmE6kOhK+LpR602Gm+tbJIDDpiuRp+EJHewP80wTX/\nAwwQkctEJAn4LXBcnGR8DZgoIt1FJBX4v9p2VkrtBd4H/onlythib2qD5R/eDwRtK2NYHWS41w4I\n98CKozh0wHqJ92PpzZuB0zzb9wFpYgf0IzAbuFFEzrCDupOxXHw1WmqNgVKqFFgKPCEinUTEJ9YY\nliH2Lh2xGtlvRCQdy3VYX2I513jPM74HK0aGXcdOtJXCN1iuGROrFx4Afi8iySJyAZaL8tUI594A\nnCEimXan4YGw7fuwYlkOhYBfRH5vB68N+9iBYcehrJTzF4ApInKcWKSJyMUxlo1Ds9SDpkIri8Tg\n98AYrIDzVCK/LI2KUmofVgbJ34By4ESgGKv32NgyPg8sBjZiuYxej+GYV7D8wq4LSil1AMtV9CZW\nkPgqLKUXCw9gWTglwAIgz3PeD7ECnKvtfU4l1MX2LrAFy93jdds4x78DPGTLtQerd3xdjHI1lF8B\nR2EF7L/G8vk7ve4HgEFYDfRc4I0GXCeWc80G/gt8DnyKFasAqzzfwwperwCeVEotV0pVYCUsXIGV\niPEUVqxqS/iJlVKb7fMV2OdeFrbLC0A/OxPrdWWlpV5iy1xin38qVhwhEr/HcoGttu9xEZarM2aa\nuR7EHQl1F2qOVETEwDKjr1JKLW9ueTQaTWKhLYsjGBEZbrtl2mAFjSuxelYajUYTglYWRzbnAduw\nfPU/Ba60XQMajUYTgnZDaTQajSYq2rLQaDQaTVRazaC8Y489VmVkZDS3GBqNRtOiWLt27ZdKqdrS\n5oFWpCwyMjIoKipqbjE0Go2mRSEi0WZUALQbSqPRaDQxoJWFRqPRaKKilYVGo9FootJqYhYajaZp\nqKyspKysjMOHDze3KJo60LZtW9LS0khOrmmKs9rRykKj0dSJsrIyOnbsSEZGBqETxmoSFaUU5eXl\nlJWV0atXr+gHRCCubih7OolP7c8M3h1h+xARWSciARG5KmxbUETW239z4ymnRqOJncOHD5OamqoV\nRQtCREhNTW2QNRg3y8KemO5Z4CKsr2itEZG59uyRDjux5sWPNN3xIaXUmfGST1OdwkIoKICcHMjO\nbm5pNImMVhQtj4Y+s3i6oQYBW5VS2wBEZA7WVMSuslBKldjbzEgn0DQdhYUwbBj4/ZCSAosXa4Wh\n0WiqiKcbqjuhnxAso26fS2wrIkUiskpERkbaQUTG2fsU7d8f/oVHTV0oKLAURTBo/S8oaG6JNJrI\nlJeXc+aZZ3LmmWfStWtXunfv7i77/eFfPo3M9ddfz6efflrrPs8++ywvv/xyY4jMeeedVy1WcOml\nl9KlS5eQdU888QTt27fnu+++c9f997//pXPnzu49nnnmmSxZsqRR5KoLiRzg7qmU2mV/8vI9Edmo\nlPrcu4NSahowDSArK0vPiNgAcnIsi8KxLHJymlsijSYyqamprF+/HoBJkybRoUMH7rgj1JOtlEIp\nhc8XuT88c+bMqNe57bbbGi6sh44dO7Jq1SrOOeccvvrqK/bt21dtn9mzZzNw4EDy8/P59a9/7a4f\nOnQo+fn5jSpPXYmnZbGL0G/yplGHbxIrpXbZ/7dhfR2rf2MKpwklO9tyPT38sHZBaRqfwtJCJi+f\nTGFpYdyusXXrVvr06cN1111H37592bNnD+PGjSMrK4u+ffvy0EMPufued955rF+/nkAgQJcuXbj7\n7rvp168f2dnZfPHFFwD88Y9/ZMqUKe7+d999N4MGDeLUU09l5cqVAPzwww+MGjWKPn36cNVVV5GV\nleUqsnBGjx7NnDlzAHj99de56qqQnB4+++wzAoEAkyZNYvbs2Y1ePg0lnspiDXCyiPQSkRRgNNbn\nGKMiIkfbH+RBRI4FzsUT69DEh+xsuOcerSg0jUthaSHD8oZx35L7GJY3LK4K45NPPuH2229n8+bN\ndO/enUcffZSioiI2bNjAu+++y+bN1ZuRb775hvPPP58NGzaQnZ3NjBkzIp5bKcXq1at5/PHHXcXz\n9NNP07VrVzZv3sx9991HcXFxjbJddNFFvPfee5imyauvvsrVV18dsn327NmMHj2anJwcPvroI778\n8kt325IlS0LcUCUlJfUonYYRN2VhfwN3PLAQ+Bh4TSm1SUQeEpHLAUTkLBEpA34BTBWRTfbhvYEi\nEdkALAEeDcui0mg0LYSCkgL8QT9BFcQf9FNQUhC3a5144olkZWW5y7Nnz2bAgAEMGDCAjz/+OKKy\naNeuHSNGjABg4MCBNTbEP//5z6vt8/777zN69GgA+vXrR9++fWuULTk5mXPOOYc5c+YQDAZJS0sL\n2T5nzhxGjx6NYRiMHDmS11+v+lT90KFDWb9+vfvXHDNsxzVmoZSaD8wPW3e/5/caLPdU+HErgcx4\nyqbRaJqGnIwcUowU/EE/KUYKORk5cbvWUUcd5f7esmULTz75JKtXr6ZLly786le/ijjOICUlxf1t\nGAaBQCDiudu0aRN1n2iMHj2aX/ziFzzyyCMh64uLi9m2bRtDhw4FoKKiglNOOYXf/OY39bpOPNBz\nQ2k0mriSnZ7N4tzFPDz0YRbnLiY7vWn8nN9++y0dO3akU6dO7Nmzh4ULFzb6Nc4991xee+01ADZu\n3BjRcvGSk5PD3XffHdEF9cgjj1BSUkJJSQm7d+9m+/btlJWVNbrM9SWRs6E0Gk0rITs9u8mUhMOA\nAQPo06cPp512Gj179uTcc89t9GtMmDCB3Nxc+vTp4/517ty5xv19Ph933nkngGudKKV49dVXWbx4\nsbufiDBy5EheffVV+vXr58YsHB544AGuvPLKRr+f2mg13+DOyspS+uNHGk38+fjjj+ndu3dzi5EQ\nBAIBAoEAbdu2ZcuWLVx88cVs2bKFpKTE7IdHenYislYplVXDIS6JeUcajUbTAvj+++8ZNmwYgUAA\npRRTp05NWEXRUFrnXWk0Gk0T0KVLF9auXdvcYjQJOsCt0Wg0mqhoZaHRaDSaqGhlodFoNJqoaGWh\n0Wg0mqhoZaHRaFoUQ4cOrTbAbsqUKdxyyy21HtehQwcAdu/eXW0SP4ecnByipeBPmTKFgwcPusuX\nXHIJBw4ciEX0Wpk0aRIiwtatW0OuJSIhMq1fvx4R4Z133gk53jCMkPmjHn300QbL5EUrC41G06K4\n5ppr3NlbHebMmcM111wT0/EnnHBCyLxLdSVcWcyfP7/adynqS2ZmZsi9/fvf/64239Ts2bM577zz\nqs1M265du5D5o+6+u9qXrBuEVhYajSbuFBbC5MnW/4Zy1VVXMW/ePPdDR870GIMHD3bHPQwYMIDM\nzEzeeuutaseXlJRw+umnA3Do0CFGjx5N7969ufLKKzl06JC73y233OJOb/7AAw8A8NRTT7F7926G\nDh3qzuOUkZHhzhD7t7/9jdNPP53TTz/dnd68pKSE3r17c/PNN9O3b18uvvjikOt4GTlypCvz559/\nTufOnTn22GPd7Uop/v3vf/PPf/6Td999t0Hf1K4rWlloNJq44nyy9777rP8NVRjHHHMMgwYNYsGC\nBYBlVfzyl79ERGjbti1vvvkm69atY8mSJfz+97+ntlkqnn/+edq3b8/HH3/Mgw8+GDJm4k9/+hNF\nRUV8+OGHLF26lA8//JD//d//5YQTTmDJkiXVvla3du1aZs6cyQcffMCqVauYPn26O2X5li1buO22\n29i0aRNdunThjTfeiChPp06dSE9P56OPPmLOnDnV5pBauXIlvXr14sQTTyQnJ4d58+a52w4dOhTi\nhnr11VfrVrBR0MpCo9HElXh8stfrivK6oJRS3HvvvZxxxhlceOGF7Nq1K+IX6RyWLVvGr371KwDO\nOOMMzjjjDHfba6+9xoABA+jfvz+bNm2KOkng+++/z5VXXslRRx1Fhw4d+PnPf87y5csB6NWrlzu3\nU23ToEPVR5Ly8/Orzf/kfPPC2c/rigp3Q4UrmoaiR3BrNJq4Eo9P9l5xxRXcfvvtrFu3joMHDzJw\n4EAAXn75Zfbv38/atWtJTk4mIyOjXq6a7du388QTT7BmzRqOPvpoxo4d2yCXjzO9OViB6JrcUGB9\nm/vOO+8kKyuLTp06ueuDwSBvvPEGb731Fn/6059QSlFeXs53331Hx44d6y1brGjLQqPRxJV4fLK3\nQ4cODB06lBtuuCEksP3NN99w/PHHk5yczJIlS9ixY0et5xkyZAivvPIKAB999BEffvghYE1vftRR\nR9G5c2f27dvnurzA+pb2d999V+1cgwcPJj8/n4MHD/LDDz/w5ptvMnjw4DrfW/v27fnLX/7CH/7w\nh5D1ixcv5owzzqC0tJSSkhJ27NjBqFGjePPNN+t8jfoQV2UhIsNF5FMR2Soi1ULzIjJERNaJSEBE\nquWyiUgnESkTkWfiKadGo4kv8fhk7zXXXMOGDRtClMV1111HUVERmZmZ5OXlcdppp9V6jltuuYXv\nv/+e3r17c//997sWSr9+/ejfvz+nnXYa1157bcj05uPGjWP48OFugNthwIABjB07lkGDBnH22Wdz\n00030b9//3rd2+jRoxkwYEDIutmzZ1dzS40aNcp1RYXHLBo7GypuU5SLiAF8BlwElGF9k/sa7+dR\nRSQD6ATcAcxVSr0edo4ngeOAr5RS42u7np6iXKNpGvQU5S2XhkxRHk/LYhCwVSm1TSnlB+YAV3h3\nUEqVKKU+BMzwg0VkIPAjYFEcZdRoNBpNDMRTWXQHSj3LZfa6qIiID/grlsVR237jRKRIRIr2799f\nb0E1Go1GUzuJGuC+FZivlKr1A7RKqWlKqSylVNZxxx3XRKJpNJrW8oXNI4mGPrN4ps7uAtI9y2n2\nuljIBgaLyK1AByBFRL5XSjVuxEaj0dSZtm3bUl5eTmpqKiLS3OJoYsBJs23btm29zxFPZbEGOFlE\nemEpidHAtbEcqJS6zvktImOBLK0oNJrEIC0tjbKyMrTrt2XRtm1b0tLS6n183JSFUiogIuOBhYAB\nzFBKbRKRh4AipdRcETkLeBM4GrhMRB5USvWt5bQajaaZSU5OplevXs0thqaJiVvqbFOjU2c1Go2m\n7iRC6qxGo9FoWglaWWhaFI051bVGo4kdPZGgpsXgTHXtTEjXWPMMaTSa6GjLQtNiiMdU1xqNJja0\nstC0GJyprg2j8aa61mg0saHdUJoWgzPVdUGBpSi0C0qjaTq0stC0KLKztZLQaJoD7YbSaDQaTVS0\nstBoNBpNVLSy0Gg0Gk1UtLLQaDQaTVS0sqgBPVJYo9FoqtDZUBHQI4U1Go0mFG1ZRECPFNZoNJpQ\ntLKIgB4prNFoNKHEVVmIyHAR+VREtopItS/dicgQEVknIgERucqzvqe9fr2IbBKR38RTznCckcIP\nP6xdUBqNRgNxjFmIiAE8C1wElAFrRGSuUmqzZ7edwFjgjrDD9wDZSqkKEekAfGQfuzte8oajRwpr\nNBpNFfEMcA8CtiqltgGIyBzgCsBVFkqpEnub6T1QKeX3LLZBu8s0LYTCQj13laZ1Ek9l0R0o9SyX\nAWfHerCIpAPzgJOAO5vSqtBo6oPOoosNrVBbJgmbOquUKgXOEJETgHwReV0ptc+7j4iMA8YB9OjR\noxmk1GiqiJRFpxvDULRCbbnE072zC0j3LKfZ6+qEbVF8BAyOsG2aUipLKZV13HHH1VtQjaYx0Fl0\n0dFp6S2XeCqLNcDJItJLRFKA0cDcWA4UkTQRaWf/Pho4D/g0bpJqNI2AzqKLjlaoLZe4uaGUUgER\nGQ8sBAxghlJqk4g8BBQppeaKyFnAm8DRwGUi8qBSqi/QG/iriChAgCeUUhvjJatG01joLLra0R+w\narmIUqq5ZWgUsrKyVFFRUXOLodFoNC0KEVmrlMqKtp9OSdVoNBpNVLSy0Gg0Gk1UtLLQaDQaTVS0\nskB/u0Kj0WiikbCD8poKPUhIo9FoonPEWxZ6kJAmnmirVdNaOOItC2eQkGNZ6EFCmsZCW62a1sQR\nryz0ICFNvNBzRWlaE0e8sgA96lYTH7TVqmlNaGWh0cQJbbVqWhNaWWg0caSlW6362xMaB60sNJoj\nkFiUgA7QJybNpcC1stBojjBiVQI6QJ94NKcCP+LHWRzJ6DEARyaxji3S355IPJpzXJi2LI5QtIvh\nyCXWLC0doE88mjPDTiuLIxTtYjhyqYsSaOkB+tZGcyrwuCoLERkOPIn1pbwXlFKPhm0fAkwBzgBG\nK6Vet9efCTwPdAKCwJ+UUq/GU9YjDT0G4MhGK4GWS3M9u7gpCxExgGeBi4AyYI2IzFVKbfbsthMY\nC9wRdvhBIFcptUVETgDWishCpdSBeMl7pKFdDA1Hp5VqjiTiaVkMArYqpbYBiMgc4ArAVRZKqRJ7\nm+k9UCn1mef3bhH5AjgO0MqiEdG9y/qjYz6aI414ZkN1B0o9y2X2ujohIoOAFODzCNvGiUiRiBTt\n37+/3oJqNHVFz1asOdJI6NRZEekG/Au4Xillhm9XSk1TSmUppbKOO+64phdQc8Si00o1RxrxdEPt\nAtI9y2n2upgQkU7APOAPSqlVjSybRtMgdMznyOVIjVXFU1msAU4WkV5YSmI0cG0sB4pICvAmkOdk\nSGk0iYaO+Rx5HMmxqri5oZRSAWA8sBD4GHhNKbVJRB4SkcsBROQsESkDfgFMFZFN9uG/BIYAY0Vk\nvf13Zrxk1bRO9Ah1TWNT31hVa6iLcR1noZSaD8wPW3e/5/caLPdU+HEvAS/FU7ZE50g1dRuLI7kH\nqIkPhYWwcyck2a1mrLGq1lIX9QjuBKS1VK7mRI9Q1zQm3nfSMODmmyE3N7Y61VrqYkJnQx2p6LTM\nhqOzlTSNifedDAahR4/YG/zWUhe1ZeEhUVw/eiqOhtNSs5USpQ5qQmnIO9lS62I4opRqbhkahays\nLFVUVFTv4xPN9aMbjSOPRKuDmlBa6zspImuVUlnR9tOWhU2i+RV1WuaRR6LVQU0oR/o7qWMWNq3F\nr6hpueg6qElktGVh01r8ipqWi66DmkRGxyw0mhZEa/Wba5oPHbPQaFoZOgAeP7QSjo5WFhpNC0EH\nwOODVsKxoQPcGk0LQQfA44MeBBsbtVoWItJJKfVtDdt6KKV2xkcsjUYTjg6Axwc9CDY2ormhCoAB\nACKyWCk1zLMt39mm0WiahiM91z8eaCUcG9GUhXh+H1PLNk0D0ME1jaZ50Uo4OtGUharhd6RlTT2o\nKbimFUhioJ+DRmMRTVkcLyK/w7IinN/Yy/qj141ATcE1nZ3R/BwpWTKJqhATVa4jlWjZUNOBjkAH\nz29n+YVoJxeR4SLyqYhsFZG7I2wfIiLrRCQgIleFbXtHRA6IyH9ivZmWSKQMF52dkRgkwnOI9xfW\nHIV4333W/0T5kluiynUkU6tloZR6sKZtInJWbceKiAE8C1wElAFrRGSuUmqzZ7edwFjgjgineBxo\nD/xPbddp6dQUXNPZGc1Pc2fJNIVlk6hjNxJVriOZOg3KE5E+wDX23wGgtiHig4CtSqlt9rFzgCsA\nV1kopUrsbWb4wUqpxSKSUxf5GkJhaSEFJQXkZOSQnd60tTI8uKazMxKD5n4OTdFg1qYQm9MNFE9F\nrd1b9SOqshCRDKoURCXQE8hyGvpa6A6UepbLgLPrI2Qtso0DxgH06NGj3ucpLC1kWN4w/EE/KUYK\ni3MXN7nCCEdnZ4TSXC94cz6HprBsalKIzR2viZeibu77aslEG5RXCHQC5gCjlFJbRGR7DIqiSVBK\nTQOmgTWRYH3PU1BSgD/oJ6iCHA4cJm9Dnru+OSwNTShH6gveVJZNJIWYCG6geCjqRLivlko0y2If\nloXwI6zspy3EnjK7C0j3LKfZ6xKO1Pap7m+FYvpbHzH9yfmojCW0yXg4ISyNI5nW+oLHYi01l2XT\n3PGaeNFa76spiBbgHikinYGfA5NE5GSgi4gMUkqtjnLuNcDJItILS0mMBq5tDKEbk8LSQia+MxFT\n2WGT0nMIzloIwRQw7qFi7MUUlBRoZdGMtMYXPNGtpeaO18SL1npfTUHUmIVS6htgJjBTRH4E/BL4\nuz03VHotxwVEZDywEDCAGUqpTSLyEFCklJprZ1S9CRwNXCYiDyql+gKIyHLgNKCDiJQBNyqlFjbs\ndqvjuKCUYzCVDLUUhUqCoMK34wJyMnIa+7KaOtAaX/CWYC211rhZotxXSwu01ykbSim1D3gaeFpE\nesaw/3xgfti6+z2/12C5pyIdO7gustWXnIwcUowU/EE/hs/gnCEmy5b6IajAqOR31wxo0VZFS6uQ\nNZEoL3hj0RqtJU3sJLplGYloAe65UY6/vBFlaRay07OZMnwKb2x+g1F9RlF+sJz3Sy/G3D4YX6/l\ndDnpZ8DI5hazXjR3hWwtiioetEZrSRM7LcGyDCeaZZGNlf46G/iAVjh5oBOz8Af9LN+5nCnDp9Am\nYx3+9FWkGCnkZDze3CLWm+askM2tqFoCsVpLWulGp6WVUUu0LKMpi65YI7CvwQpOzwNmK6U2xVuw\npsKbNusP+ineU8yYfmMAyO2X26JdUM1ZIVtizykR0Uo3Oi2xjFqiZRktGyoIvAO8IyJtsJRGgR2I\nfqYpBIw3TsyiIlABwIvFL2IqkxQjhdx+uc0sXcNozgrZEntOiYhWutFpqWXU0uJwsYzgbgP8DEtR\nZABPYWUwtQqcmMX4+eMJmAFOJlLKAAAgAElEQVSCKghARbCiVaTMNleFbIk9p0REK93o6DJqGqIF\nuPOA07Eymh5USn3UJFI1MeUHyzGVWZU+C5jKDBmsp6k7La3nlIhopRsdXUZNgyhV84Bse4K/H+xF\n744CKKVUpzjKVieysrJUUVFRvY515oY6HDjsKgwfPsYNHEePzj3iMuVHSwvIaTSa1omIrFVK1TYp\nrLVfbcqiJdEQZQGWwsjbkMfM9TMJmAEMn4EgVAYrEREuO/Uy7vrJXUDD54xqiQE5TdOiOxOapiJW\nZVGnQXmtmez0bLLTs8ntl0tBSQE7v9nJtLXTMDFBQf4n+cz7bB4+8REwAw2anbalBuQ0TUNzdSa0\ngkpcEuHZaGURhqM0pq2dZjvbqrZVmpUIgkLhD/rrHQBPTQWfD5TSATlNdZqiMxHe+GhrN3FJlGej\nlUUEnIF64S46o+w8KMlBZSwhJWNdveaMKiyEiROthsDngylT9MCspqQllGNjZvdEut9IjY+2dhOX\nRHk2WllEIHxywd7H9ubUQ2NZ8NLv8fsFSfoDE/7xTjWrIpaGyHnwpgkiUF4eXZ5E6Vm0dFpKOTZW\ndk9N9xup8dHpp4lLojwbrSwikJORg+EzCAatMRdbv9qKbOpGhR8wfSh/Ek/8+ShOPHoj40ZmArE3\nRPV58M3Rs2gJPfC6kig9tFhojLTjmu43Uh3U6aeJS6I8G60sbEIbx2xuOPMGpq6dikJRaVayucNz\n4BsFZgpgYH4+lFuvNsksANIKmfTPCir852MGpdaGqD4Pvql7Fi2lB15XEqWH1lTUdL811UE9LiZx\nSYRno5UFkRvH3H65zNowq2rsRfoqGDMMCh6AbReCSiJYWcljL69mYbdhVJgDMH2L8NEWX1KQ1N6f\nAJkRr1fXB9/UPYuW1AOvC/Utx5ZqZdV2v4nQ+Di01PI90tDKAsjLg8OHrewkp3G8555sFucuJm9D\nHi8Wv0ilWWkpjJwHYccQ93sXu495BX/Qj5m2Asm9CHYMJZhRwMRN68gc2HifY23Kl7s1Z2vVtRxb\nupWVSEohEi29fI8kfPE8uYgMF5FPRWSriNwdYfsQEVknIgERuSps2xgR2WL/jYmXjIWFMGOG1TAC\nJCV5zPX0bJ6/9HmeueQZfE5RORbGBQ+QdP1wbryiDylGCoYY+Hp8gDrvz5hpK9zU2pZGfbO1mpLC\nQpg82fofbyJZWZrGQ5dvyyFuykJEDOBZYATQB7hGRPqE7bYTGAu8EnbsMcADwNnAIOABETk6HnIW\nFFgV1bouXH999cax/GA5IlWf8vD1WI0MfhSjx2oyj89kce5iLjvlMpRSbgZVki+pRX6O1ZutpVT1\nbK2mbKgj4fRE77vP+h9vORy/v2FYfzt3Nt+9twbC64+3fFubFdvaiKdlMQjYqpTappTyA3OAK7w7\nKKVKlFIfAmbYsT8F3lVKfaWU+hp4FxgeDyGdyurzWRW2f/8I+9jTmBtikOSzPHcKRcAMuNbD25+9\nbY32BgTh+jOvb5Ez1tb28jZ1Qx2Jpu6JOn7/m2+2OhPTpzfdvTe3Ym5sItUfp3wfftj6D63rnlsT\n8VQW3bG+sudQZq9rtGNFZJyIFIlI0f79++slZHa25WoxDKs3PXFiVUV1XlbKrPjFw0Mf5tlLnqWN\n0QZDDPtLejnkbchzpzYH8Ikv4rcwCksLmbx8MoWlifsmhL+8XisrEVwGzdETzc6GHj0gEGi6e28M\nxZxoyqam+pOdDffcY/1u7s6IpmZadIBbKTUNmAbWRIL1PU95uaUoTDO0EocG3rK5Z7DVcmYen0lB\nSQEHtvZm0iMVHE7rHHK+y065rPqAPXtmW3/Q36B5peJFeEZKY40RaWyaK+c8J6eqQ2EY8b/3mhrW\nWO87EQPH0epPa83C89KSM7/iqSx2Aeme5TR7XazH5oQdW9AoUkUgUiWureJmp2ezcW0H7v2fEyGQ\nAsYgjLErMNNWkGKkMOLkEUxePpnU8ksp/zjTOl8g9POtifRhpVgbltoa6qZ8CRojw6c+8jphK6Ws\nDDpHlngQXidTU+vW+Dd1wxtLeUZT9InQGYkn9VHgCaVclFJx+cNSRNuAXkAKsAHoW8O+/wSu8iwf\nA2wHjrb/tgPH1Ha9gQMHqoawcqVSf/6z9d9ZbtdOKcOw/jvrHS4et0QhlQqUQvzquMv+pn7z9m/U\n1KKpqt0j7ZTvpnMVyT8on2Gqdu2Umvrmh6rdI+2U8aCh2j3STq3cubK6EM3En/9s3SdY///857od\nH62sEo36yOstI1BKJP736q2TdX1GTflMGvNa4e9hayJRnyFQpGJo0+NmWSilAiIyHlgIGMAMpdQm\nEXnIFm6uiJyF9YnWo4HL7G9791VKfSUiDwNr7NM9pJT6Kl6yQvXearRe0JnnHGDRDL873mL/8a8x\nc30xgDXuYvtgCKRgKmtEd/nHVtZUQ7+FEQ8i9ejq0qNpae6D+sjrlJEzHsc7Jide9xpeJ+vS625K\nd11jPv9EHxfSEOpqOSXaexXXmIVSaj7WJ1m96+73/F6D5WKKdOwMYEY85YuGU3GdQKE7nXNpIVN2\nXQ1jBkBJDmQUQPoq/EHLT5FipFDRazlmkh+faZCSItax9vTnhYUw+aXGe4kLSwsbpITCGxaom7kc\nPogvNTW0vBKN+ro7xoyBvXthwQIr2N2UrpK6NP5eRe8EjuNJa3cfNRZ1VeCJVq4tOsDdFESczjlQ\nQGXQHtGdvsrdN8VIIbdfLv279eeNzW9w5k8W8e0n/SFjKaSdDGS756vwK4ykAM/M+cSdjDDkujEq\ngMYKnHt7dJMnx96jCR/EN2GCtZxIgdVw6vrShteBp56ykiIiHRtPH3Msve7mCGwnykR3LYG6WE6J\nVq5aWUQhkimY86scko1k/EE/YH2v+/LTLnc/uzrxnYn4g34WsxjVXmF+YfLCP5N49pJnKS8YR4Vf\nYQYF04Rbn32V4qTnyO2X6zbyXgVg7DqPS5Ifo2vfT8i99ORqisCZTr0xA+d16dF4B/GJwPr1iWU6\n10RdXtrwOlBebvXYq1mcCZCB1Fyui9bsPmpOEqlctbKIQsTpnNOzKRhTQN4GKyXG29Df8p9bqiYf\n9BAwA4yfP57bk4cBGSBWrCPYczFT137ArA2zXKvAVQA7zyI4az75wRQwTmfG+kso+OPkEGWQk5GD\nses8zM/PxThxRaOMGq9Ljya8fEaNguXLG246N1UWSE0fB/Kuqymmk4gfEGpo/OlIoznrWUtDK4so\n1Didsx1/8FJYWsiM9TNQpWeHxDIcKndk8dd/paOUD6QShk+E9FUooCJQwaSCSUzKmeSOGD9cMhQV\nTAGVBEFF5efnVrccyrKRvMXgF9RyRZ7PgNyGV8hYezSRyiczs2EvRlP10CNdByJfO/weI7nqEsHH\n3ND4U31JpMYwVlniUc9i/TJhc5dRfdDKIgZibTgLSgoI7DgLZi2CYAoYfmvSQUdhlJxPsNIHShCf\nAYeOc+0PE5N3t73L8p3LWZy7mCnDp3Bryb8IGlUZV8knriAnY3LoNQsgUGmgTKj0w9SpMGtW01bI\nSJlkDbl2U/XQI10HIl87/J4izcybKD7m+saf6ksiNYZ1kaWx61ldvkyolUUrJdaeSk5GDr4dhzA9\n1gAlOVXKIqPAUiBBhfJVQsaSkOMVisOBwzy24jEOVh5Epa+EMcOQkqGc9ZODTLl5cjVrpq4pnXUJ\nnDdWmq9TfqmpNQeGI91TvHvoNV0n2rVrm5k3kXzM0DRlmUiNYV1kaeyyqenaiWBxNgZaWYQRrhjq\n0lPJTs/m2Vs7MH6ZIhgwEUPh+/FKgvgwSwdZimP4b+HQsdVcVA4KRf6n+QiCQuHrsZo2vTYwJdfy\nkdzyn1sA6N+tP+UHy8nJyGHx4mzy8mDmzNpTOmPNnGrMqUnc7K8KKwju80GbNlHKMY499PDnG+k6\n0a4dHtSP5TvqzUVTWDtN2RhG67jVRZbGLpuarh3JNZjIqeU1oZWFh8YIWo4bmUnmEmc6iBT6//QZ\nFmxdQP6s8RFdU45SCEeh8OEjq1sWA7oNYOMXG5mwYIKbgeUc2zapLYtzF/P889nk5to9+N4bKQj8\nB0pz2Li2A28sKGfUiFTKU2PLnGrMDCtvwwqh82/VVo6ReugR/cF1sIBqUvzh14lmHbS0nmK8rZ14\nKySvZRopLTuWDkBtsjeWvLVd27lOIrns6opWFh4iKYbaGobaejmzZtnHzMok86JKS1GEuaZ84nOm\nNwGsFFzTM1u7iFC8t5g1u9fgEx+mCp3JXaGoCFa4jXl2NpBWZRVI2U8IzHwHAr1ZNF1xUjYYpy+E\n7u+7M+ZGwgmwO5ZFLBlWNTXaTvl5LYv6NLARg9FpsVtKBSUF7PzPtfj9PRvsLomr5dMMbsKQ89Yz\nUB0vheR97iKxTPjZvK7AaNdOJJddXdHKwkNOjvWlPNOs+mKe0zA4E8c51NZDCK8QJ3TsBkl+CFiB\n6iFDFMecMpJ5W+ZRqSoBaGO0YcTJI8j/JN+9RlAF3anPvVOgezHECGnMvWm3FNxrTXRIEijF1pWn\nk7Tmv9z85CsRx2w4ZKdnV5uapLbGqTa3lbdhDY9Z1KXBixiMPi+6BRQyZuXAQpKSFwNGiMKqT8Mb\nq+VTF+LlJoxZAdWx1xsPxRa+r/e5+3zWjL9gKY7U1NB6cfiw9Z7G023ZUFqaVepFK4swnI5+MBiq\nIBxLwck0qq2HEF4h7rqtGyOu2ui6gzIHXsGkgkmuAhCEESeNAFWzWyoSgnB79u1uY563IY+93++F\n0myYtdBWFAagAGsqkkClj235uXAmoXMCU/1FjThI0Gdww5k3hIwtiea2itiw1tLgRWpcUntvBONU\nRBkkJUNOjgFp0S2ggpICKkoGYG4fjOq1nHF/e5keB3JDFFZjxGfq0tDW1HjG6v6ri5vQub+KQAU+\nn49nL3mWcQPHRdy3Lr3eeCi2SPWsf+9bSUnJdMt1wgT4+98tGSdOrPoWTTBovbszZ0JuHVLHa+0E\nxcFlVJNVmkipxzWhlYWHgoKqShcIVKWhjhlTN/dUxApRmEl5BsBG9+U1MfGJjyRfEm8v/pLg9vMg\nY2/EwHckFIonVjzBqrJVFJYWUmlaVopsv8dye5EEBLAUhc8+Slj0bpCly2DJe0aI79/7Uk8ZPsUN\noHsbp2AwyNS1U0MGEaa2T7VcaqiY3Vbec1YEKpj4zkRO6HgCAAu2LiBgBqoajG79mfDRBIK/tubi\nMn+8EtIeDbGAUtunUlBSwMYvNrpyZ6dnk1p+Keas31qTOvoCbP76cxiW506/UlvDW5cecV7+Dg5X\npKNMH4crFLm/3c6d9/5QbSqX2hrPWN1/seznut6+2cnh7f1RJUMwMwoYP388mcdnRryfuvR646HY\nItWztkmzmPLKBxQvtMrx229DXVHl5XDDDda76ry3XiVXX4sYGu4yquna4Z2nlhLH0MrCQ01pqFD1\nEjnfYYba/dbegNYtt1RlKvmSTiP46wGYaSvw4ePCXhfSft8w8v8ZOQDuw8epx57Kx19+HFFmE5Nl\nO5ZVrSg9B/VNOvgCYFpur2N+/hBfbesJe/rD7ixQSVRUVJKXX0Z2dk8KSwuZVDCJimAFpjKpCFQw\nfv54TGW6iiPFSHFHpjspvs4I9onvTCRoBvH5fEwZPiXiSxn+0jgNnqM0V3/gg5LT7CyxCgC3wfCJ\nz7LC7Lm4KsEdwOicz6uABcHwGdb0Kh+Pw2cqTCUQNFj2xmkse6uXOxo+JyMHw2cQDAZRSrF692r3\nS4ax9oin5W9k+pJVKPk1kIQyDbYW9eB/fumH1zaSOfB79/4LSgqqytkTb4LI7r9IhO8HMHn55BCX\noTdupWa969atwNiLQhViHYLDjvUKVjaeV2Gltk+1vuHSPjVEWdcl/uUORg2rZwu2LmDhrEz3/fMZ\nJqZSVRYmnhhhmHuxVmXgUU5OfY4Ub/O+99OmWQoqtfdGylP/U+NzqotF1VLiGFpZePDGJ5zG3fGR\nTpkCxcXW+unTq9xRtc3q6fQYHOUDoEjCt+MCJH0VKUYKk3ImkffMCSEBcN+OYSRnFBMwA6QYKZzf\n83w+Lf/UDXCHB8JdSs+BWYutc/kCMPAF6JfHV+mroI93u6VEyFhKYenJ5MzKcbOsBAGxpidRKPxB\nPwu2LCCjSwaHAoco/aaUoAqiUMxcPxPAmpIdE1FC+cFy997z8new97hXWXD4fvdenJfGafAmFUxi\n0dJvq+QOU5YKFTFes2jbIt7d9i6Dew6mz7F9XBmcY5zpVZ7JzKZNSqbnGRgQTHZHw+dk5LhJBiYm\n+Z/ks2DLAq4/8/paGxKHafkbueXqkzEre1tl3r3IVcgEFA/nLWf/R79z7//nvX/uPkdTmaS2T42o\nTCNZSd7GOrdfLvcMvidio+RtBGXbYDDbgDIgqDB2DHMb7Gn5Gxk/+jQClQZJyUF3Usuaxud460my\nL5mfnfwzunboSv9u/Zn4zsQQa7mN0cZ91rUpNi/Ovnkb8nix+EUqzUoUircXfodZYaJMHwoTGfAi\nSgUJiI+N+7IZNzIzopKryapxyvtAxQH32grFtLc2sved1dx13SC3s+dtD6ZNs9OlfQplnIhvzDyS\nek6q5pat7dpueXqUdCSLLl4JDA1BK4swnEqSm1tVSaZPtx7imDHVv8McS+aDoyhEoE2KMOXWX1Ce\n2q6qIoyEmU8F8fuD+JIUz912NZkDfxbygs3aMCvERbRgywLyP80PuV7bshEcdpSOqaDzzlCXVvoq\nqyHekItPDPp3yyZvw3PV0nGVsnp1UvYTVEkO+Tvfg/SP3e1OXKUyWMm6PetI8iWhggoRsRq/Qhh6\nQZCKiu5gjIcxb9pTuFd/YUf1GcW7s3ZUTWsSENiQW6Mr7vj2x/PFwS8A6wVftmMZK3auIMmXhBk0\nQ+I9ATPAG9/dwZRXnqB4YSYvzlBUVgZCRsPnbchz3XcOFcEK1u1Z51o0CsX0ddPp361/iL+/sLSQ\n255bgFl5f1WZdyuGfWe4CrnsmJcgaFlKhwKHeGXjKyFlXbyn2J140nm2kRreKcOnhKROT183ned+\n9hzlB8urNUre3rxx4gpkBVRWKowkeObWX5CdnunKHvDfD8pHpT8YMqklZdnk5e+AjKXkXnoyBSX2\nTMs2lWYlb336Fm2T2rL3h70h86GZyuRw4DAT35nIgG4DalVskZInnMZx6tqpKBRmz/dQvntAJWNK\nJXRdAwumEAymcOvVJsVT8si99GTuuSf0ZYxk1XhjOCEdrtJzMGctIj+YwoIXg66L1hmBHQg46d8K\nZYoly/bB+NNWVHPL1nRtt95EcDuFjMOIIcuvOZSJVhY14K0kjnKAumUyhJuxN9zgBN8ygSpfdna2\nFT+wKothbyekEoS7J8YNHMe0tdO45T+3uJXen74IjDs8lkNBZMHWj0GZbbj16iDp47+BTlWbnHNJ\n6U+Qfy3GrEwC4w+WkgFUSQ5Gr/dRaSsxMVmze41rjQTNILfNv41L952J35/l9mYpyUHSP6j2wjov\nw7WXP87LBQEIGoAPiq+HfnkRFcbXh7+ulgQQVEEuO+ky3v7s7ZCkAYXiv9v/y3LjbBbfu5jc3Gwe\ne7mI3ce8Qs6Qs90ebCSK9hRZ9+W5xvj54wEo3lNcVV49N4Jxtx0aUtB1HTLmJY7edyVfd30TlVYY\ncl6v3Ek+6/XzWjAvrnsxxEoylYk/6OeNzW+ENNaOPM9c8ozrznOUNcCYfmMAyL0+F8Y6dSvZrVsF\nJQWYPd+zZA9WTWr5j7WreGHuJtSsdwlWdgfjKl4oHs6lF6SS5EsKUayOm+jtT9+ulpShsFx6q3ev\nZub6mSwZsyRibxuIGIDP7ZfrdpCk52qCYy9CbR+CZCxDdgx1Z0kI+oP8Y8ZBZpTnuD18wLXAvLG3\n7PRsJi+fHFK+LiU5rnXv9wd57OXVHNx6H6P6jCInZxxJyUGCQQUYIIGQ98uxwJ37cWJoY/qNYe/3\ne+naoSsbv9hY5YosyK7mdrrnHiDNjjNt2OlmNB4qGcpjbZbz5h0eq6QRB83WhbgqCxEZDjyJlZLz\nglLq0bDtbYA8YCBQDlytlCoRkRRgKpAFmMBvlVIF8ZQ1EuHmYW4uVQPfUqvyvBtjJHK0/OxIExeO\nGziO4j3FVT2wtBVWo14y1JpKxBP3cHzAzkuhlEGw0qRkQ08YHOGCJTmYgeSqBn9DLqwfA8EUfCnQ\n/647WG08WXVeu60ImAHyD09EjPesY+2Xqvdxvbn0lEvdoKu3gfyAKZw07FS2LhoKGGAa1j3YY1EE\nwVSW1RA0q7ukkn3JfHX4qxB3VfeO3dn9/W63sbVeVFjYbRiHA4dZvaJ6xlnvY3sjCB9/+TGmMvHh\nCxnfEjAD3DrvVvc6PnwYPQzM4RNh/jOgfPDOkyTfcAmTJ3Vi4jvF+IOWHzPclWaIwTOXPEPm8ZnM\nWD/DipmgKN5b7FpqTvwF4HDwcLXGOqiCFO8p5qcn/tRSlGaQW+fdiogQNIOkGCnWSP9AATm/CnVl\n7f1+L8k9i6gYcyGUnB8yo0Bg27lQabhu0UDxNeRv24lkfEGXkzdz4HCV+0YkevZeRbCCvA155PbL\ndS1An/hYvWs1+Z/ku1aJaZrcMs+aoWDcwHEhyQsTFkygMq2QZCOZiefeyV+XQrBSYXUuxuLvN4up\nwanWRJ5KueXUxmjDkjFLaoyV+cR+xr2WY9pT8fiSTPIP/xa2rWLRtkXcde7n9LuzgtUr20O7/dVm\nYBAEEWHT/k08UPBANUvV6bgIQoqRwm+7zwHjkpDMvmlrpzF+/niCKmh1IpyMxmAK+Uv9/F9qPn+5\nfiTQuINm60LclIWIGMCzwEVAGbBGROYqpTZ7drsR+FopdZKIjAb+AlwN3AyglMoUkeOBBSJyllIq\ngqM+ftTW2NeUvRAeMIz3ACGnB+a6AdJX4UtfjeEzMJXhujaK9xQzc/1M/BnLUJ7JCckosGIZYQpG\nZbwHvj+ASrb2A7fnFagMUrEtG05+MrJQ6YWo3KEhM+9u3g+b92923VhOp12h2Pr1Vki/j6Q2SwlW\nQlKy8LPhXSBtJJRlQ0kO8yvvInDCcus4T9vU59g+/Pac33LrvFtDRPjx0T9m3w/7ADB8Bqt3real\nD1/iUOBQNXGdkfATz5nIhAUT3MbP8Blkp2Xzfun7KGW52byNvokJJsih46yZhFUSYgo3dJnFuIE9\nyTw+k7wNeawqW8X6fetDrnfzgJvJPD6Tx1Y8Ruc2ndl/cL91TmVyY/8b6dG5BwcqDvDXlX8lqIIs\ne9+Pb8ddnDnoKza2mYZCkeRLsp5p0O/KHFRBt3wqghVuuaQYKUw4e4J7PrCU7KCzg6xJ/0tog59R\n4M5hhi9oWXpmEsrwc8COJzlJBL/L/h1TVk0JcWVG4sXiF9n7/V5X6VealdXcqM79ezO2HAXnKE1B\nOPGMLxg44kNWv90flNidixxU+ioqg5Uh9+IP+snbkMdjKx5j93e7uXHAjW5cxOn1u9bIgNfZXHQc\ny+ThEKv28RWPowwVuVNFVYzs5Y0v17jd+V8RrOCxnVfCr89xM/s2plzHbfNvI2AGXJmPKvspP3ji\nmE+8UsTIC39ULWnA8Bns/GYnhaWFcVcY8bQsBgFblVLbAERkDnAF4FUWVwCT7N+vA8+IiGCFY98D\nUEp9ISIHsKyM1XGUNyLerCZnPpeashea5StldlDwsVeX8/bC71AZS2iTsa6a+Q2WYplUMIlFeHqT\nEDm47MQ3nAYfsS0LaxLEDW2nhMgRYr1Ata8IOoRbIi7pqxhw112MbDeF1N4bmbhpEhWLB2D+czxi\ntsFIXoT8ehhm2gprKhTbl//C5S+QtyEvpBH34eODXR8QNINuLztSw+RlwtkTKD9YHmK5mMrk/Z3v\nVwXOVfUetIlpKVnjDxC0MnT6Z3/rZgbNWD+jWkMqInRq24nBMwdXk9vwWZaIkzllKtNNTDCDKXy4\nLMA1j/dl/9FzaZ/SPqILKEQ+u3GuCFTwxMonQmYBCJgBTuh4Akm+JDehwXkW7rP/pgesvdmOJwEF\nD0DOg5x1tskJHU/gsy8/CykXR4lc3fdqlpYspey7MoAalYP3OK/C8/aWC0oKXPkqg5WMnz+ewHFn\ngfEuYrZBkkxUxjJAqjLnnDIVH9PXTXfXrd69mrvOvSskBugGpy+FIV8OAbvRdoh13FOd8GT2Pbnq\nS0wztB/8Q/d5YPzO7dSpjCUUlBwVkhzy2IrHePuzt5m2blq1mEk8iKey6A6UepbLgLNr2kcpFRCR\nb4BUYANwuYjMxho6NtD+H6IsRGQcMA6gR48ecbgFi3AlMGVK5NiFV4nEYzRpjZRls/D+bJT9qdYp\ncz5h3MDqn2rNTs9mUs4klu8cxuH0VdZLsPzuiFORACEN/pCeQ1hGdZcFWC/685c+HzHoDpbLxWm0\nauPGK/qQebwnjXf7YNdlFvCbyPbBqLT33ZTjSTmT2PjFRqavmx5yrctOvYy5n8x1M7SiXVeh+Hvh\n313/v9O4xyKzVU6FyNiLMHYM4/ZrBjBx07WWr91WVNWup6zxMeF+824du/HlwS+Zvm46szbMYsLZ\nE6xG1ONPNysVL88twzfkv/jKzoXt9+DLWIKvh6UcXXlLz0FKhuLrtRwzbQUiUqUoSs+xOwHLmOeb\nR9AMug38nI/mhKQpG2XnEVw/xlIUGLDtQtgxhLXyU1anVX/WIsLVfa/mtU2vuT3lWAaa+sTnlrlP\nfG7spbC0kJ3f7LRcM6Z1/oAZcGdkViU5qIylkG7FhryKQhAGdhvI6t2hfcx/rPmHa4kfDhx207AL\nSgoiPq+Q+7MV0nFHHWcNgK2FJF8SF2RcwKJti2rdb/OXm6379xaRrbClZCgqo4CUnuvIyXgi5Dhv\njK62jL3GIlED3DOA3kARsANYCVR7ikqpacA0gKysrDiof4twS6K42MqMgtDRojk5DRtNGk6sozod\n+cygICRT/nEmjIy8rww1L44AACAASURBVDeV8UDFAf5a9r77zQxfsolhz5Lr8/m4uu/V7P9hP6P6\njCLz+ExyynLwpxdWO+fgHoMpP1jOiJNHMH/r/JA03GQjmXO6n8OK0hU1TlniEx93/OQOMo/PDM1U\n8bpDjEqk1zJ8YrgpxwC3zb8tJKh984Cb6d+tvzttikJhiFHt2pGC5OUHy0NSN2uSNxIn9N7BWRdu\n5DM2ug2RT/lCerqOKyXEAvPgfKpXoTgUOGS5P1DVyoGMAsydgzBnvQNmG3xJf+Dqx1/ktW9ut/zl\nToq02Ybg0goYcyG+nkX4xEfljoGuJakMP5WOW0kJfY/ry80DbnZjYIYYXDbsWN7mpwSX/BG2DXNd\nbcHtgyHt/Wr3oJRi9kezXcUkCGedcBbFe4upNCtxPkF8SuopVfdnl3/Pzj0p+7YMU5lMfGcin3/9\nOX8v/Lvrx7/slMsATyNZg/XqkGKkkNMrh6LdRSGK+Vv/t1Xyoli09Dve+9cCLhjqi3SaapjK5MuD\nX5LsS64Wn3AQhEtPuZTd3+6O+ZxASF01eqxBpa+2pUwO2T/cmnZS2cNTeBuTeCqLXYROKJFmr4u0\nT5mIJAGdgXJl2bW3OzuJyErgszjKGhHvbJferCbvVOC5uVX7Z2fXPpq0rteuzaUVLU+7NhxTdvLy\nyVaPbMwwpOQCxo06ldxLH62WkldYCAUvwdOnF1Gc9Bx7v9/LvC3zCJgBknxJfLDrA1aUriDFSGHi\nORP5e+HfCZgBd6LE5TuX19qzFIQubbq4gTsTK7jcrU8puzyusMuHdWVQ94dDMlu85nuSL4ncfrkU\nlBS4gWmf+Lh5wM2s27PO7WF6s5yc5TZGG1Lbp5K3IY91e9aFNPBXnHoFXTt0DXFnhLPru13s+iS0\neicbyTw14ik3e8oZjxDps7uGGJQcKAlZV90tNNS16mT5PXa6sYFZafLK3N0w2HaflAwFsw3KNKyY\nU8n5BNM/QERCsn68mWqO77t/t/60TWrrumi6dugK6W9DziTYMRgxBV9SEHotr9Z7c1xQ3t55ki+J\nGwfc6GaSmZjM+2weu7tWb0R3fLPD/R3uNvMH/cz9dK4b9I/FWjn6y0t4/NEkVMbZiGNJh1N6Dsz6\nL4FgCouW+GHMwloVkHOOgBlg5KkjGdR9EKt3rw6Z083Z761P3nLdirFiKpOenXsiIuz8Zqer5AJm\nwM22yvvPFpYt/Ql0WB/6JU6zMq7B7ngqizXAySLSC0spjAauDdtnLjAGKASuAt5TSikRaQ+IUuoH\nEbkICIQFxuNOJNdTebk1inP69OpfV3Ma7tzcyKNJ60pNcZFIsoXnaceinELM+x5rSOm1gdxLF1fL\nugq9ViaLFz9P9qWh00k4jag/6Gf9nvWu+8ZUZmRXjusGKQhJqQVCctPvP/9+JhycQGX6ByQbydx1\nbuiLkJORQ5ukNm7a5TOXPONub2O0cQOAADcOuJGNX2x01wlSfUqRsCngnbjIXefeRXZ6Nrn9cl1l\nsmb3mqgKcMRJIyg/WB7S23OC3jPXz6QyWInP5+PSUy7lrU/eqv2BeXrR12VeR6mYLFvq9/i038Px\nYxi9llsWhZOckLEUxO69RrBSjm1/LAcOH2D6uukYPoNLTrokJPA7a8Ms/D3WINcPR20/H5WxhKSe\nazi3+xBWlK5wg+1OOTrjRJxnUn6w3HVJgdWohbuGIhRgtVmWvYMuaypzrxtu76yX7FjcH1BjLsTo\nsdpS9m79W2q5VSO4YY9pewwHKg7UPhC2LBu238WI3htZmLTQ7QQ4cigUSilGnjqS3d/tpnhvMaYy\n3frnTUxwUKgQpem9t9W7VvPAv96hcuYC+75GhQxgDZ9UtLGJm7KwYxDjgYVYqbMzlFKbROQhoEgp\nNRd4EfiXiGwFvsJSKADHAwtFxMRSNL+Ol5w1Ed5Yl5dbudCFhaHKIDW14Q13JGqzFvLyqkaFe/O0\n6zJ5mnfCtpsH3Fyj+RpeDnl5zr1lc89gK1PFGywc1WcUy3cuD2mUQ14K7yhzw88Vk5/hrqsHu9cO\nH0+SeXxmzYOPyrIZ8+3H7sAx73Ynx33elnlMXTuVZCOZp0c87Qb9gZDzTl4+OWQcA8CPu/yYO8+9\nM2Q6Dic7xzuaORIKxbwt83j7s7erjVx3FI930OX8LfMjns+Hj2OPOpYvfvjCXbf/h/207bXfHWDp\nRRAGDvJTxMVWzCdjKb4eH1QNtEz/AOVNXEhfxZcHPcHlkrPIL+hNyokrye0HG9d2IPOzVzgh8zO6\nXr6d6esmY6ogQdNg+EnDefTC6pao88ycqT9S26e6LrZweh/bm61fbXXdOYYYKKWqKYracKw/xwUa\nHuexZkW4gN+PHszb//2ST//1HCqQTFJykF/+fiWvva8IVAbtr1cWAHDTwJt4+oOn3diTqcyQmELy\nriEs+NfvebvS6kRNeeUDylP/Q2r7VDfzsDJYiYgw4uQRjBs4LmQgHRAyUt3F05EKT33P/zQftoXG\nGKXkArAtQ29nKR7ENWahlJoPzA9bd7/n92HgFxGOKwFOjads0aipsQ5Pp41kAdSl4Q7H614KVzqF\nhVZj/eKLVaPCfT7L2iksjP2a4TOx9ji/R42VLHxg4f9v79qj5CrK/O/r7plJlF2EwQcKIaCsGg9I\nIDs6iybR4CwqSHbDCugxEQLjCHHJHg8jkaMnKE509Wh4yU4WwjKrKz4wLnJ4GUiA3c4BA4GExypJ\nDCFKNjBr8ICazOPbP+re7uqaqlt1X909PfU7p0/fvl236qu6VfXV96ivZBWcUI1NjGckT/AAKivp\n0fFR4LkFGAvCTxS4iK6RfnRLykpVstHtLwnbSTDpY9DevhiLgyi6MiMEIEK175qPgzM3YssLW3DD\nGTfU5F2p58z5Eya0nft3Yvndy3HCG6ob2Sqhs5dsrImTtOWFLRMkjnAS0EbiVeqlyw8QHmzb9m3D\nZ+74TCXtolmLAAD3PnBzZe8LHl+Cwqd70DHzsUCKWo4DR20CCBVGUUABpx13GhadsSigdxybXyjU\nGr4DRn7wgYO4vO2neHDN3wOjIpTJ3IU7UOx8puY8FN37qexpuGoFRnacira33oNre6/FXRv2o/xg\nG/a94UeViXDeMfNw08duqtR97yt7a5wkTOqmtkIbClSohFF50yFvqqi/CASS9k2gOILzzjwS1z58\nGf68+Z/AI0WACxgfLeBdh8zDAxvEONv/pnvxeOkvsWjWIHpP6cXCty+sML1wl30oieKPl2PNwSLG\nx8VZLcPPnIAVK6pOJbOPnF3ZN1HpQ3u6gf/qBkrBpt/Q2y3AMX84F8/dclNlIVX89N/izAVH4PZf\n315Np0iGZ51+KLre87W67ORuVgN3w+Fy6lWIrOLTr1kDLFsmGE949GgYe0oXZwoQIQjkWFUuDKMm\nEmvpIDo/usOYVm4HnQquuxs1gwBH6yf8cCXdeeIZWP7fxaC9KHM1nbxhSWxsWl8ZfHvfcR1wxsS8\nBIPurthkHnvhMfzykRLGfzMXB459CENPDNVIT7KUIOeBh57D4/uXYPTND9WoLAp7TsXuOz6BTaWI\nDZwGphj+BwC3PX0bFs1aVAk5cte0k/AzifGeVrgKKxd3VNIvu3NZxeU0VKnJwRdVCfNtr1yMp6VV\n687ybBHmnksiCONPZ6Gj4z5c9O3vY/EZxwN7urHqe3oJeuiOZ3Fw7Z0VxrN6ZA12/uDzGBkpAPQZ\nYMlp6Jj5WEWiDWkKjw0OEdqLgIlMFEDNoqQmJE7fYtz19uvwu21/haV/91YMd/4Bt244KNR1xStA\n44T29iI6O6tHESyevxDf6K56hsh0qRLumt/Vnv7Y2Vlb/+E/DlfUsAfHDmLojmdxy+e7azQQakiQ\nD5e+jjU8HeNMoHHCRYd/DzPe8h+1HoaKS/uHP3C+MeR85mDmlviccsop3CiUy8wDA+I7TR6lUhjr\nlpmIua+v+v/AAHOxWP0fYC4URDpA/Dcw4FbWwABzoTgu8iiOW58rlwUtCxcyd3SIsqZPF/fLZXFd\nLDK3t4t0tnbIqr3CckNamJnLu8s8/arpXLyyyMXTrmDQiGgvOsh9/buc8hlct5XR9qp4tu1VXvjN\nb3DxyiJjJbh4ZZEHHhww5tExbZS7vnQpF64sMFaCsbSbi+0HJtCZBUxtwMw88OBAhebCygL3DPVw\neffEwsu7yzzw4ACXd5e5XBb0U2GUO6aNcv+q7aIdMMoi8FW1n0WVzczc17+rpu3x1rsrvwvFce7p\n3WCkp+OrHUwriTu+2qFNY2wPqS4qBtdt5dKHvsSFC0/l0kXv567F67h/1XZub+egbuPc1j7m/H4G\nBsT4C8ehOobKu8vc3juPacEXub13Hvf176qMX3lsq+2vtml5d5nbv9ou+pLmo/bFJIAwC1jn2IZP\n8ll9Gs0s+vrcJkoT5M4Xfjo6pElQ6kilkvgOGUWhEG8isg10Na0YUOLT1lZbT5WJESWbFF0ZiDq4\ndM+EaQbXba2Z/HR56wa9ykz7+ndVGND0q6ZPmIzkNigWuSZ96UNfquQlM/QsGKYpn3JZ0NDeO89I\ns2t+g+u2ctdZj3Jb+1hNf1HrPGGyDBgPCiOM0quMMy5klF5lKoza+5xh0k/aZmF/LxTHudh+gNsu\nmlt5N6AxacyNahcUUXmaxlCV8Y5xx7RRHhysHUfy2LbVsby7zH0/7+O5a+dyYWWhwijiMlMTPLOo\nE9TJ1NQJop4fGGAeHBSdLmQAukEYpu3rqw7UQoG5pye7yVm9PzBQSxPRRJpc6TbR6Mq8ZKnBdQK0\nlT04WMugBwf19EStWqPSD67bOvE/k1SUscTVMW2U+757S2JGEfWfyztTmVa4wk5SvzgLHBUyY6PC\nKNOCLwqJ68JTmUp/rkgWKP6J+757SyyaTO2lY6Z9fck0ATVlBoyj7+d9mTAKZs8sckPcydSWlzwA\nBgdFh1JVPbbnslJtaCc9B2YYSlY6ul1ota1SK+kk1YpOHZQEJnVC3Ik7Kv3goGDog4PVMtX6pn2n\nuoWEqS1N0kjc8k356FbGJkbrCtc+YqJz+nTxfoulMS6d9dnKgqN/7Tou/vUapjk3cHvvvMwmYNNY\niquyrQc8s8gBWv32YK0aRp1M5cGjYzQ16ou+qpRhekaXb1YwDUhXNZuOJpdBnrVkEadtspqk40hN\nuntZTIayitKkmjTVN035urxdJsOosRGVd5L3NDgoVKiFwkSJy8bM0qi/tCpChwVhHDrSzgWeWeQA\n3eReWbEUhQFYt1oL/29ri15pRBmPszaO6pBHec6MwLHDuwxsU3lZD7Y0UpOrWseFNp3zQ6lUlWRc\n6Un77uPYr2x9X4c0k6JpYWbtbzmMiSSMOap/pKXPlVl419kYUPdeAEFMpnGxB6GrS9yTo9MeOFB1\nsRsbqz4T7seIcksN07qezOcaS8qEOOdvuJbtmqdrKPcoF1MgWUTgpGHko3bZh4jar2NrJ9coxmEZ\nNcf3sthIakpr2z+UpD3mzwdKpWo/Zza3i9x24fiISh/SmDR0zu7dgjbAtF9I/6zLO1bLsrWhLTyP\nLg+jq3hM+lLBhaNMhk8jbBY6m4P6W3aHjVptuaorouiqpxRS77KdJY8c1SyuZSWlXUUcmlX1RpQa\nKA8VZpivbN+K8tLTSRaFglkiSkOTqhpzsevons9yDEZJuXEkiHpKFg2f5LP6NNIbKrQz9PRUjaVh\nJ5R1pboBrNPbutgsVMgTS1IPqaTIYyKWEXdA6NpMfg9RE1jcSTSviTfMO4nROYlOPIt6xO2Dcpku\n7yctTUmdCXTj0rWsLOiV6dDZDr3NYpIwC+Za24RuRRVnFeHSkXX52WjIE1lKFrq6pR2EctuYVq71\nlMziDO4kE0Hc9sqq7mnyyWvBkcYW5JKPjCwYXlQ5efVRV2bhbRYZINQbjo+LWE2nnQasXGnXiev0\njYBZ575xowgrsHz5RD12qHNeuRJYv17QkrsOE9X6ZRE80aSjV3W8nZ1Vu5BLWfL7IdLr8uul+417\nmmISPb1ryPqwT+3e7V73KJ18mn6gozmtDS6Kpqh2jWMzkJ9Zvlz8XyiIKNVZ2w1N80XaNnKGC0eZ\nDJ9mkCzicnzdSsQmbZRKE1VdaWnJU5USp6yoTUuyKiBJ/fJcFcZpvzxVdjqVZpQdJYk3Up7Sl0p/\nI2xwSW0DeatidTQkGQs6wKuh6oukYq1ONaLmpeqCVRfcpLQk1W8nhU3EdtkJn3RQRrWJ/C50LtBJ\n65Q0fRLmHZfZJXElzWovRlwbXBbvOYtyXfpQHJdt3f9x0mbFoDyzaHLEedHqRGAztLkg7NxRYTqy\nRpQBVP5PDaKoozsvv/e48a2S+szrbE5pJCfZ604X2E73TB4SWlbPZ0lfPco1GcBt+blKeKY+U0/J\nIlebBRGdDuBqiMOPbmTmryv/dwAYAnAKgGEA5zDzLiJqA3AjgJMhAl8PMfOqPGmtN0Kf9PFx8R2l\nV1Z1ob0ZRCQO9Z8c+OUTpQ+xbkOolw73nqxfDzz0UBCueX6tznrxYn0eoU43DCstI66OWz02N9yn\nwBZ/f12d4oSoV/Xlsh2DSLRNHJvTxo3VvQqA2EdgPVo3oh1tz6htHPeseBfbSBL7RxZ7EeKWa7NB\n2cp22W9iKiMrW6EzXDhKkg8Eg9gB4DgA7QCeADBLSXMxgH8Jrs8F8MPg+hMAbg2uXwNgF4CZUeVN\nFslCXoW0t4tVbHu7eVWQdOepS/mq/3neKqgoF2M5TRJVTlp1kGtcrqi6JW2/tGrGKJWmje60Lsl5\nr9pd6mCzc+Rp/7BJlq6ShSnKg0sZaYFGq6EAdAO4R/q9AsAKJc09ALqD6xKAlwAQgPMA/Dy41wng\n1wAOjypvMjALuePYDNW6Z1wNkS7l69RZaSe9qHJlxmhTtdjo0Kmz4my0UvNIwrCyhO29yHSZ2i4J\n3XE3/enKdWnHODp5XbmmZ02LhjQ2i7hwYUS2skM1M5HeZhannknQDMzibAjVU/j7UwCuU9I8CeAo\n6fcOAEcAaANwK4AXAbwKoNdWXjMyC/WFRq0go+wQYT5xJ0QVNuNdXquvvj6u2cUeSjGue09UyKux\nsC3jMlKTJ1q9GUWIqLKTLDJcy3R951FMIUrKi/teTJKCLp96eSDZFlRZSJbyWTZtbdHMLuux6sos\nmnWfRReAMQBvBnAYgIeIaD0z75QTEVEvgF4AmDFjRt2JjIJOz6jqt1evFj7/pr0TIUL95KZN4vhU\n1Re9cizk4to9GaoeM0q/HhVTyaQTTeMHH7X3JLRpHDig1y93d4u2++Y3gR07RNqxMeCii4AZM+z0\n6OxAgNv+hyx8/3WI8vuX302hIGwSWdiY4ui8TXtdOjuBJUtEmtmzJ+7bcI37BEwcM0uWROeTxF4U\nByo9q1dXx2mxCFxwgajz8PDE9ovTT+bPF+9VjiGntpPcP1atqmM8KBkuHCXJB+nUUNcD+JSUbi2A\nj0eV12ySRRw1R1x1gLzCUN1Nk6p44toBXFc35bKQJEIx23Y4lHoYUX//xBAHOskijgpA194u70Bd\n6SYJ2Z40jU1NVQ+oqjCTZCfHejLp4XVQvdLmzrVLKHlKg2qf6Omppc8UAyvJyt/V9TkcT1m6u6MJ\n1FAlADsBHIuqgftdSppLUGvg/lFw/QUANwfXrwXwNIATo8prNmYRp8MkFSsHBiYevCR36LiiedgR\nZTWRKS9ZdDa5asadXNV8Qx2uuvfC5IKr0m9q17iMUabN1cU2quwk6oRGqshUqO0QtoW8oXLhwuiY\naDqUyxMDb/b3J7N32NImZdKqu7nO7TzrPSI6erJ0Smk4sxA04CMQxukdAK4I7n0FwMeC62kAfgxg\nO4BHABwX3D8kuP9UwCgus5XVbMyCOdvObXomjmQRpzPKg0OXl+44UhVJ9yDI+nndKYSmyV5tiygb\nTxLdc1iuy94UXd1Vum02qGZiEDJ0kp268k9qX+vqqu1XPT3J6IvyNHRh5HJa1Wah0qguGrK2KYTI\ny0bjyixytVkw850A7lTufVm6/jOAf9A894ru/mRDnLg+SWIAhT7kqs3ihBP0vvA2nbxqtxgeNvvV\n33ab0JszC31rnLMTbHUKy+zsBD73OfE8IPaj7N4trlW6Vq0CRkaq+YTPmMqX7UBynClbnKZwb4J8\nHoKuXrq6q+1roi+0Q6lnLgDp7CU6+1YSqO8o1NnL9AET7WsuWLoUeOSR6u9Fi+LTNzRUbd+DB8Vv\nlxhLCxYIOxkRcOaZQH+//tyR1atFfUZGhO3iwgtr2zOv/Q9522iscOEok+HTjJJFI6GuiOLq5G3q\nlXB1bYuwm3Z1HKqWFi6M1tPqJAub6iLJClDW28e1R5gkIl0aVXoJT2VM4zbtEk7FRHsSmPKw5a2e\nWR4XOu87tXy1LVWPJFsbubZPFv0/ajxl8Z7QDGqoen48s6giSk1j2wRo63zqoCKqDmqdGisrNYor\ns3M5K9wlT5NKIumEHcVkZLpl9Y2s4sjCbVqn0jPRGqeervr/qH0iWaJcFhM9kbCb6PqDbtLVHVRW\nL9fkJM9npe5yZRbN6jrrkQI6MXv+fCFeA+J72zbzMag2F8owH0AMqy1bJpZ74ACwbJlwBzSpvWxq\nEdn90EUEl1VLoWohjjuo7Ip8881CzVAoANdfL0KsJA1jrnPBDOkL6QjVJm1ttcd/XnBBNfSJSa3j\nepRnW1ut+sukxohTTxf1pi2cCZBMPRbVf84/H9i7F7jrLnFc8S23TDxKN6xr+Pv664GLL64eCxs3\nVLqaLml/CTE0VA1BYwsVUhcXWheOMhk+XrKoQjZAhuEfTBsCk3hVLFxYuwLr6qpKL66bx2xqEReV\njZw26YYl3bOqt0u4SSpO3nK+UZsx5bDscnu6rIZN7RRVPxfJKyrPJOpN22ZUF/WYThIwqR2j+qBN\nwlHbyPWdm/pr0pX/4KDeE9D1PcUBvBqq9REl/usi1eoGkc0F1FRuR0f1ed0uaJu6waYWcfX8UAdM\nGnWNziVUpc1F5WJq+0JBfGQX076+2klPZwdypVmnSkurNrO9Uxdmpe4LkFVxqn1Bp/ox2Rh0/cfG\nmFwXMy7t65LO1l9MCwBZJWaKwhxX7WqCK7PwaqhJCpv4PzxcK+7Lnk3hjvEkUVYBkWbDBv2pfCtW\niDQbN1Z3qOvEd1UtUigIuuT/XTw/TB5GoVdLZ6e7GkEuE6iqI5iFh86mTXY13aZNwCWXCC8mQNAx\nPCy8ur71reou3UKhGl138WKhcnjsMWDz5ngRZ5PsyneBrNIzqZCGhsSOedN7lp8tFsUOe1ldtGkT\nsHZtbbm6d21Sq5rUarooCao3mutOeNd+aEpnO5FPN4Y3bqyNIlwqTYzCrD5ritKcKVw4ymT4TDXJ\nwrbiSbLiy8IAF2c1OzjI/M53VlfccfeFhHmEu8NDlZu8sk8am6ivL5n0pToAtLUJelTjaaFQu+HM\n1J4uiFLPpVVTRKkvbe1q66OqJBeqM3X1c1UZRXmqqfm4OmC49MM46UKYTobU9WkVWe65gFdDtTZc\nJgKXzhu3g9uej6s+SnP4kpqHznNI3VkcZzdtEvrC51R7keqWGXrquGwMM5Xjan9I0wdME6yLus9l\nwZLEBmT632WjXZz+nheTCJ+JsrnI/cekqsrKq8wzixZEmo6fRXmuz7gYXE0uonEkC52NoVCo3ZOQ\nJqx7UulL957UvSlposdGTTRZvy9dfVyfMz0b539X6BYpaSbUOPVLUoZJqlLrEbW/Jqu288yixZC2\n4+cxicQtT85TDTynrpBdJzGd95K6GstaekqTR7gyT7PfQGcUtm3aM9XBNMnmtaKOizi06Izg6sTr\nSq9JRaQiqTpIliBkpwa1X6TdX+MCzyxaDGk7ZZqVT9zyTAMyzuCNo87q6aldtbuoivKe5FzgqkpS\nn1G9p1ziYKmqDfU/ncdQGvWma12SqJZMUHd+mxYnLiov193uaRdVulMj1YWOlyw8s4iFRkz6cY2B\nLiqoODrqRqfNG0loMakvovLSGd1Nk05ax4mousY15sdxRzXlF6aNs0KXyzW5rUbVLbyXdk+LLX9v\ns/DMwohGqZNcVSYujEk3aUQZV7M2RmbpRZIWSWhJstIsl2u9saKkL1t/iTuRmvJ07Suu3nZZMrkk\nY0aVBvKOw+W9oTyzyAVpxdW4Hk6ug6wRq/zJLlmEz8V9n66H69jyd5kI1eeTGJ/lhYqLt13SFXqS\nNtClVe0MrnG4ksJLFp5ZNCXirspcXTXzNN5lJbHkRUO9acmyrCjjr6s0EKWmsUlPeevw40Jni4sj\nWSRFVvX1zMIjU2TVMeXBnsat1bWMRkkPzUBDXoiqm4udIY6qy6RSahYvrDBtHGbYbHBlFrmG+yCi\n0wFcDaAI4EZm/rryfweAIQCnABgGcA4z7yKiTwK4TEp6IoCTmfnxPOn1MCPJ4Uw6yCEXABECYsaM\nbA+JqXs0zgbS4BrKJEtEHe5jiuSrHlQV1Ta2EBtZ9UUTXCLpqvTI7QGI6zQHTDUlXDhKkg8Eg9gB\n4DhUz+CepaS5GLVncP9Qk88JAHbYyvOSxeRAPVbczbCqnyr11MEmRWRtX8gaaYzHpvo2g6u2CWgC\nyaILwHZm3gkARHQrgLMgztQOcRaAlcH1TwBcR0QUVCDEeQBuzZFOjzoiryMn611GM9DQDBKUDvLK\nXydFrFhhb5u8pYcopDm+VH0nQ0O155DYpJRmRp7M4i0Anpd+7wHwHlMaZh4lopcBdAJ4SUpzDgRT\nmQAi6gXQCwAzZszIhmqP3FGPiaCRk029aGj4mcwOSBKNtdFIw+jV+gLNydCToKlDlBPRewD8kZmf\n1P3PzGsArAGAOXPmsC6Nh0erohkkKBsmA406JGVmOvuF6YTDyYY8mcVvARwt/T4quKdLs4eISgAO\nhTB0hzgXwA9ypNHDY1KjmVfoISYDjVlCre9kZJY65MksfgngeCI6FoIpnAvgE0qa2wEsAbAJwNkA\n7g/tFURUAPBxyA05dgAABsVJREFUAO/PkUYPDw+PXNEqzDI3ZhHYIJYBuAfCM2otMz9FRF+BsL7f\nDuAmAP9ORNsB/B8EQwkxF8DzoYHcw8PDw6NxoFrHo8mLOXPm8ObNmxtNhoeHh8ekAhE9ysxzbOkK\n9SDGw8PDw2NywzMLDw8PDw8rPLPw8PDw8LDCMwsPDw8PDytaxsBNRC8CeC7h40egdtf4VICv89SA\nr/PUQJo6H8PMr7clahlmkQZEtNnFG6CV4Os8NeDrPDVQjzp7NZSHh4eHhxWeWXh4eHh4WOGZhcCa\nRhPQAPg6Tw34Ok8N5F5nb7Pw8PDw8LDCSxYeHh4eHlZ4ZuHh4eHhYcWUYBZEtJaI9hHRk9K9w4no\nF0T0bPB9WHCfiOgaItpORFuJ6OTGUZ4MRHQ0EW0goqeJ6CkiujS438p1nkZEjxDRE0GdrwzuH0tE\nDwd1+yERtQf3O4Lf24P/ZzaS/jQgoiIRbSGiO4LfLV1nItpFRNuI6HEi2hzca9m+DQBE9Doi+gkR\n/Q8RPUNE3fWu85RgFgD+DcDpyr3LAdzHzMcDuC/4DQAfBnB88OkFcEOdaMwSowA+z8yzALwXwCVE\nNAutXecDAD7IzO8GcBKA04novQC+AeA7zPw2AL8HsDRIvxTA74P73wnSTVZcCuAZ6fdUqPMHmPkk\naW9BK/dtALgawN3M/A4A74Z43/WtMzNPiQ+AmQCelH7/CsCRwfWRAH4VXA8COE+XbrJ+APwngA9N\nlToDeA2AxyDOfH8JQCm43w3gnuD6HgDdwXUpSEeNpj1BXY8KJooPArgDAE2BOu8CcIRyr2X7NsQJ\nor9R31W96zxVJAsd3sjMLwTXewG8Mbh+C4DnpXR7gnuTEoGqYTaAh9HidQ7UMY8D2AfgFwB2ANjP\nzKNBErlelToH/78MoLO+FGeC1QD6AYwHvzvR+nVmAPcS0aNE1Bvca+W+fSyAFwHcHKgbbySi16LO\ndZ7KzKICFuy35XyIiegQALcBWM7Mf5D/a8U6M/MYM58EsdruAvCOBpOUK4joDAD7mPnRRtNSZ7yP\nmU+GULdcQkRz5T9bsG+XAJwM4AZmng3gVVRVTgDqU+epzCz+l4iOBIDge19w/7cAjpbSHRXcm1Qg\nojYIRvF9Zv5pcLul6xyCmfcD2AChgnkdEYXHB8v1qtQ5+P9QAMN1JjUtTgXwMSLaBeBWCFXU1Wjt\nOoOZfxt87wOwDmJh0Mp9ew+APcz8cPD7JxDMo651nsrM4nYAS4LrJRB6/fD+4sCj4L0AXpZEvUkB\nIiKI882fYeZvS3+1cp1fT0SvC66nQ9honoFgGmcHydQ6h21xNoD7g9XZpAEzr2Dmo5h5JsT59fcz\n8yfRwnUmotcS0V+E1wB6ADyJFu7bzLwXwPNE9Pbg1gIAT6PedW608aZOBqIfAHgBwAgEl14Koau9\nD8CzANYDODxISwCuh9B3bwMwp9H0J6jv+yBE0q0AHg8+H2nxOp8IYEtQ5ycBfDm4fxyARwBsB/Bj\nAB3B/WnB7+3B/8c1ug4p6z8fwB2tXuegbk8En6cAXBHcb9m+HdTjJACbg/79MwCH1bvOPtyHh4eH\nh4cVU1kN5eHh4eHhCM8sPDw8PDys8MzCw8PDw8MKzyw8PDw8PKzwzMLDw8PDwwrPLDw8LCCisSDC\nafi53P6Uc94zSYqG7OHRrCjZk3h4THn8iUUYEQ+PKQsvWXh4JERwrsI/B2crPEJEbwvuzySi+4Oz\nBO4johnB/TcS0ToSZ248QUR/E2RVJKJ/JXEOx73BDnQQ0T+SOJNkKxHd2qBqengA8MzCw8MF0xU1\n1DnSfy8z8wkAroOIAAsA1wK4hZlPBPB9ANcE968B8ACLMzdOhtiBDIhzB65n5ncB2A9gUXD/cgCz\ng3z68qqch4cL/A5uDw8LiOgVZj5Ec38XxIFLO4PAjXuZuZOIXoI4P2AkuP8CMx9BRC8COIqZD0h5\nzATwCxYH2ICIvgCgjZmvIqK7AbwCEd7hZ8z8Ss5V9fAwwksWHh7pwIbrODggXY+hakv8KESMn5MB\n/FKKJOvhUXd4ZuHhkQ7nSN+bgusyRBRYAPgkgIeC6/sAfBaoHNR0qClTIioAOJqZNwD4AkQ48QnS\njYdHveBXKh4edkwPTuALcTczh+6zhxHRVgjp4Lzg3ucgTjW7DOKEs/OD+5cCWENESyEkiM9CREPW\noQjgewFDIQDXsDinw8OjIfA2Cw+PhAhsFnOY+aVG0+LhkTe8GsrDw8PDwwovWXh4eHh4WOElCw8P\nDw8PKzyz8PDw8PCwwjMLDw8PDw8rPLPw8PDw8LDCMwsPDw8PDyv+H54gjB3Fee3GAAAAAElFTkSu\nQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "f86dWOyZKmN9",
-        "colab_type": "text"
-      },
-      "source": [
-        "Great results! From these graphs, we can see several exciting things:\n",
-        "\n",
-        "*   Our network has reached its peak accuracy much more quickly (within 200 epochs instead of 600)\n",
-        "*   The overall loss and MAE are much better than our previous network\n",
-        "*   Metrics are better for validation than training, which means the network is not overfitting\n",
-        "\n",
-        "The reason the metrics for validation are better than those for training is that validation metrics are calculated at the end of each epoch, while training metrics are calculated throughout the epoch, so validation happens on a model that has been trained slightly longer.\n",
-        "\n",
-        "This all means our network seems to be performing well! To confirm, let's check its predictions against the test dataset we set aside earlier:\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "lZfztKKyhLxX",
-        "colab_type": "code",
-        "outputId": "b792a12e-713d-4b07-9f8e-de0d059d5cdb",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 298
-        }
-      },
-      "source": [
-        "# Calculate and print the loss on our test dataset\n",
-        "loss = model_2.evaluate(x_test, y_test)\n",
-        "\n",
-        "# Make predictions based on our test dataset\n",
-        "predictions = model_2.predict(x_test)\n",
-        "\n",
-        "# Graph the predictions against the actual values\n",
-        "plt.clf()\n",
-        "plt.title('Comparison of predictions and actual values')\n",
-        "plt.plot(x_test, y_test, 'b.', label='Actual')\n",
-        "plt.plot(x_test, predictions, 'r.', label='Predicted')\n",
-        "plt.legend()\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "200/200 [==============================] - 0s 146us/sample - loss: 0.0124 - mae: 0.0907\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJztnXmYVMW5/z9v9yzgEpVR44KIMRhj\nnJ+Ak+iJim3QuMS4EaOJZhSJjQtRkmvQyY0JuS4ImlyMIDIKyFwTjHEUl2gkoq2irTgoCRE1oBcR\nl6ijeF1glu76/VHnTPf0dPf0TPdMb+/nefrpPnvV6XO+VfXWW2+JMQZFURSlvPDlOwGKoijK4KPi\nryiKUoao+CuKopQhKv6KoihliIq/oihKGaLiryiKUoao+JcwInKWiCzLdzo8RGSoiDwgIh+LyJ/z\ncP2AiGyKW35JRAL9OM8RIvJqThM3iIjIuSKyIt/pSEfif5XD8xZ83gcLFf8MEJEfikiLiHwqIu+I\nyMMicni+09Ubxpg/GGO+ne90xPE94ItAjTHm9HwnxhjzNWNMqLf9RMSIyJfjjnvKGPOVAU1ckSEi\nI937VJHvtCiZoeLfCyLyM2A2cC1WuEYANwMn5zNdvVGgL+HewL+MMZ3ZnqhA86coxYMxRj8pPsAO\nwKfA6Wn2qcYWDm+7n9lAtbstAGwCpgHvAe8ApwAnAP8CPgR+EXeu6cDdwJ+AT4AXgIPitl8BvOZu\nWwucGrftXOBp4L+BVuBqd90Kd7u4294D/g9YAxwYl88m4H3gDeCXgC/uvCuAG4CPgP8Fjk9zP74K\nhIDNwEvASe763wDtQId7TyclOba3/G8ALgf+AbQBFcAeQLOb9v8FLonbfyhwu5vutcDPgU0J5zva\n/e0HfhF3f1cBewFPAgb4zE33Gd7/2lue3W23A3OBv7jnfQ7Yt7f/JMm9mQi87J7jdWBy3LYA9jn7\nD2LP2cS47TXA/e41VgJXec9Fimv9GXgX+NjN/9cS7ulv3efkY/fZGApsdO/Tp+7Hcf/PO+KOHenu\nU5FpnlKkbx5wQ8K6+4CfZfierEiWHnddCPhx3PJ5bho/Ah4B9u7rf1eon7wnoJA/wHFAZ/zDkWSf\n/wKeBXYFdgGeAa5ytwXc438FVALnY0Xqj8D2wNeALcA+7v7TseL4PXf/y7CCVuluPx0rdj6sCH0G\n7O5uO9e91k+wojg04UE/FitoO7oP7lfjjm1yX57t3RfiX7ji7J6jw027H7gQW8hJkntRCazHimgV\n8C33BfxKXP7uSHMve8v/BmA1VpSHuvdhlXt/q4AvYUXkWHf/64CngGHuMf8ktfj/3H2Bv+Len4Ow\n5imwAvHluOMC3nkyyPPt2ML4G+7/8gfgzt7+kyT35jvAvu5+RwKfA2MTnrP/ctNzgrt9J3f7ncBd\nwLbAgcBbpBf/89xnwavYrI7bNhcrkHu6z8M33f1G0lNIu/3fiftkkKdU4j8OeBP3GQR2wr5He2T4\nnmQk/tjW/Xr3f6nAVoqe6et/V6ifvCegkD/AWcC7vezzGnBC3PKxwAb3d8B9KP3u8vbuw3ZI3P6r\ngFPc39OBZ+O2+bC1uCNSXHs1cLL7+1xgY8L2+Af9W1hRPxS3Vu+u92Nr5AfErZsMhOLOsT5u2zZu\nHnZLkp4jsDXG+PMvAabH5a838U+Zf6xYnxe3/ZAkeW4AFrm/XweOi9sWJLX4v+rdyyTpSif+veX5\nduC2uG0nAK+k+08yfDaXApcmPGfxIvaee14/tkDdP27btaQR/4Tr7Ojmfwf3/9hCXGssbr+R9FH8\nM8hTKvEXbEtjnLt8PvBYmjwkvieZiv/DxLVQ3fx/jjVf9vu/K5SP2vzT0wrs3It9eQ9sE9jjDXdd\n1zmMMRH39xb3+99x27cA28Utv+n9MMZEsc35PQBEpF5EVovIZhHZjK3F7Zzs2ESMMY8Bc7A1t/dE\npFFEvuAeX5kkD3vGLb8bd57P3Z/xafbYA3jTTXeqc/VGyvwnbse+hHt498O9J7/A9s10pSchLanY\nC1uQ95VM8vxu3O/Pce9dmv+kByJyvIg8KyIfuvk8ge7/favp3pfiXWcXbK01o/sgIn4RuU5EXhOR\n/8MWkLjX2hkYQv/uU7Jr9ZanpBirxHcCP3BX/RDbovLO29t7kil7AzfGnedDbMGzZ1/+u0JFxT89\nYaxt+ZQ0+7yNfUg8Rrjr+ste3g8R8QHDgbdFZG/gVmAK1hyxI9aMIXHHmnQnNsb83hhzMHAAsB/W\n1PEBtmaYmIe3+pH2t4G93HT391xJ8x+3PT6PbwL/a4zZMe6zvTHmBHf7O/Hnc9OSijexJoi+klWe\nU/wn3RCRamy/xg3AF93//iG6//epeB9rEsr0PvwQa+44GlvbH+klA/usbCX5fUr27H2GbSl67Ob9\nyDJPYFtX33Pfi0Pcc5HhexKfPlKlEftMTE54voYaY56BzP67QkbFPw3GmI+x9uS5InKKiGwjIpVu\njWWWu9sS4JcisouI7Ozuf0cWlz1YRE5zWxtTsYXPs1h7rcG+zIjIRGyNJiNE5OsicoiIVGIf+q1A\n1G2V3AVcIyLbuy/Pz/qZh+ewNc5p7n0KAN/F1tIyJVX+k7ES+ERELnfHEPhF5EAR+bq7/S6gQUR2\nEpHh2P6QVNwGXCUio8Ty/0Skxt32b2x/QjL6nedU/0mSXauwdvX3gU4ROR7IyIXX/X/vAaa7z+8B\nwDlpDtkee89bsaJ4bdy5osBC4Hcisod7vx1XyN930x5/n1YD40RkhIjsgDXJZZ0nNy0vYguj24BH\njDGb3U0ZvyfGmPexhfTZbl7Oo3vBdgv2+fmae64dROR093em/13BouLfC8aY32LF8JfYB+pNbK1i\nqbvL1UAL1gNlDdZD5eosLnkftpPqI+BHwGnGmA5jzFqsl0UYK0a1WO+eTPkCtkb0EbbZ3wpc7277\nCfYBfh3rvfFH7EveJ4wx7VjhOx77Yt4M1BtjXunDaZLmP8X1IsCJwGhsx7AnBju4u/wGm9f/BZYB\n/5Pmur/DFhbLsN4bC7CdymBt14vd5v/3E9KQTZ7T/Sfx1/gEuMRN30fY2vn9GZzfYwrWBPQutg9i\nUZp9m9y0vIX1lEkseC/DPufPY80gM7E278+Ba4Cn3ft0qDHmb1jPrX9g+7YezGGewD6nR7vf3nn7\n+p6cj62xt2IdMJ6JO9e9bv7udE1g/8T+z5Dhf1fIeL3lSgEgItOxHYtn5zst+aDc868og4nW/BVF\nUcoQFX9FUZQyRM0+iqIoZYjW/BVFUcqQgg2OtfPOO5uRI0fmOxmKoihFxapVqz4wxuzS234FK/4j\nR46kpaUl38lQFEUpKkQk3Uj2LtTsoyiKUoao+CuKopQhKv6KoihlSMHa/BVFKU06OjrYtGkTW7du\nzXdSipohQ4YwfPhwKisr+3W8ir+iKIPKpk2b2H777Rk5ciQimQbxVOIxxtDa2sqmTZvYZ599+nUO\nNfsoijKobN26lZqaGhX+LBARampqsmo9qfiXEOEwzJhhvxWlkFHhz55s76GafUqEcBjGj4f2dqiq\nguXLwXHynSpFUQoVrfmXCKGQFf5IxH6HQvlOkaIUNkuXLkVEeOWV9FMv3H777bz9dv8n5wuFQpx4\n4on9Pn6gUPEvEQIBW+P3++13IGDXJ5qC1DSkKJYlS5Zw+OGHs2TJkrT7ZSv+hYqKf4ngONbUc9VV\nMZOPZwq68kr73djYfVkLAKVYyHWl5dNPP2XFihUsWLCAO++Mzbg5c+ZMamtrOeigg7jiiiu4++67\naWlp4ayzzmL06NFs2bKFkSNH8sEHHwDQ0tJCwK1prVy5EsdxGDNmDN/85jd59dVXc5PYAUJt/iWE\n43S38yeagpqbuy83Ndl9amqgtdW2FrSfQCk0BqI/67777uO4445jv/32o6amhlWrVvHee+9x3333\n8dxzz7HNNtvw4YcfMmzYMObMmcMNN9xAXV1d2nPuv//+PPXUU1RUVPDoo4/yi1/8gubm5uwSOoCo\n+JcwNTXg80E0CiIwejQ89ZR9iSoqYOFC6Oy0230+qK7WjmKl8EjWn5XtM7pkyRIuvfRSAM4880yW\nLFmCMYaJEyeyzTbbADBs2LA+nfPjjz/mnHPOYd26dYgIHR1Jp54uGFT8i4hw2D74mdTQw2GYOtWK\nuzH2xbnpJpg929byV66E++6z28AWALl6sRQll3j9WV7N3+vP6i8ffvghjz32GGvWrEFEiEQiiAin\nn356RsdXVFQQjUYBuvnZX3nllRx11FHce++9bNiwocscVKiozb9ISLTf92b79GpLnrgbY5c9887D\nD8e2ga35p3uxtKNYyRfJ+rOy4e677+ZHP/oRb7zxBhs2bODNN99kn332YYcddmDRokV8/vnngC0k\nALbffns++eSTruNHjhzJqlWrALqZdT7++GP23HNPwHYSFzoq/kVCX105vdqSNw4kXtxDIdsiALv9\nlFPg6qtTv1h9LXgUJdc4DjQ05KZVumTJEk499dRu6yZMmMA777zDSSedRF1dHaNHj+aGG24A4Nxz\nz+WCCy7o6vD99a9/zaWXXkpdXR1+v7/rHNOmTaOhoYExY8bQ6b1ghYwxpiA/Bx98sFFiPPOMMUOH\nGuP32+9nnun9mPnzjamsNEbEmIoKu+ydq7rarq+u7v1c115rrwv2+9pr06fz2mszS59Snqxduzbf\nSSgZkt1LoMVkoLE5sfmLyELgROA9Y8yBSbYLcCNwAvA5cK4x5oVcXLtc8Jq+mdr8wZp4olFr3jHG\nLnvEm4N6I53NNb4fAnSUsaIUC7nq8L0dmAM0pdh+PDDK/RwCzHO/lT6Q6MrZG4GAHfQVjdpvT6BD\nIWs+Msaaf6ZPt59U505V8CS64J1zTu69MhRFGRhyIv7GmCdFZGSaXU4GmtwmybMisqOI7G6MeScX\n11dS49n842NAeTX5tjZbMDz6qHUBTVdTT1bwxPdDtLXBCy/YQgbStxC0QFCU/DNYHb57Am/GLW9y\n13VDRIIi0iIiLe+///4gJa108Tp2vRq+10ns1eSPPjo2DqA/8YC8QsQ7R0uLLWTOP797QaIdxopS\neBSUn78xphFoBKirq8vAGq2kIxCAGVzOGfyBDdF92anmOggDoRDO5s3cvSnEi2YIrQzjfXbDqakH\nMq+We4XI9Om29RCN2kJmxIjuwj99eqyVoeYgRSkMBkv83wL2ilse7q5TckljI5/cuIAPPxvCdnsP\nw3nnJQ6NrANgL/MWctER1i7T0QHGsD1whHdsBOSSRVD7eJ+U2XGsuHsjhxODyo0fHxP+3sYSKIoy\neAyW2ed+oF4shwIfq70/xzQ2YiZPZru1KxnxxpMMe3IpZt06BLo+RCJdwu/RbXs/Y0GnGoTj9Ql4\n4SXq6pL3K+gAMmWw8fv9jB49mgMPPJDTTz+9a2BXf4gP2Xz//fdz3XXXpdx38+bN3HzzzX2+xvTp\n07vGHeSKXLl6LgECwM4isgn4NVAJYIy5BXgI6+a5HuvqOTEX1y0lEl0m+9w56o40TDu3j99vP15A\nn0R6G+KbJlHJOoQ9byPPs+jvf09+WnUPVQaboUOHsnr1agDOOussbrnlFn72s591bfd84X2+vtWP\nTzrpJE466aSU2z3xv+iii/qX8BySK2+fH/Sy3QAX5+JapUi8AFZUxGLx9EkMJ0yAZctI7CjpKgwO\nOgjmzbO/QyHYvNl+DxkCw4bBbrtBfX36Ib59VGjHgfPOg/nzu3c6p4s8Gr9dPYSULgbwYTjiiCP4\nxz/+wYYNGzj22GM55JBDWLVqFQ899BCvvvoqv/71r2lra2Pfffdl0aJFbLfddvz1r39l6tSpbLPN\nNhx++OFd57r99ttpaWlhzpw5/Pvf/+aCCy7g9ddfB2DevHn8/ve/57XXXmP06NEcc8wxXH/99Vx/\n/fXcddddtLW1ceqpp/Kb3/wGgGuuuYbFixez6667stdee3HwwQfnNN8F1eFbrjQ1wdatViC9CrkX\niyfjztFgEIFuNv+aYSQX9b6+PH0Mqxj/ntbXw+LF3fsD4renGkCmLQKliwF8GDo7O3n44Yc57rjj\nAFi3bh2LFy/m0EMP5YMPPuDqq6/m0UcfZdttt2XmzJn87ne/Y9q0aZx//vk89thjfPnLX+aMM85I\neu5LLrmEI488knvvvZdIJMKnn37Kddddxz//+c+uVseyZctYt24dK1euxBjDSSedxJNPPsm2227L\nnXfeyerVq+ns7GTs2LEq/qVGOGxDK3tm+IoKax/3av7drDC91X6CQbYPBtk+g137RB/CKiZ7T+MH\niEH67Yn9BTpgTBmIh2HLli2MHj0asDX/SZMm8fbbb7P33ntz6KGHAvDss8+ydu1aDjvsMADa29tx\nHIdXXnmFffbZh1GjRgFw9tln09jY2OMajz32GE1Ndtyr3+9nhx124KOPPuq2z7Jly1i2bBljxowB\n7CQz69at45NPPuHUU0/tCi+dzpTUX1T884w32has6E+aZGvLPYS7sREuvtg2DXoJvJ/zilK6Ib7u\nw+21LpK9p/EBuWbMSL/dI9dhfJUiZgAehnibfzzbbrtt129jDMccc0yPaR6THddfjDE0NDQwefLk\nbutnz56ds2ukQqN65pn4uXeHDIlZaLoJYjgMU6bEOmrb2pJ65XheM01NAzCZe2KiwmE46ii45Rb7\nce05qeYSTpbfdO9xrsP4KkVMnh6GQw89lKeffpr169cD8Nlnn/Gvf/2L/fffnw0bNvDaa68BpJwD\nePz48cxz+9kikQgff/xxj/DQxx57LAsXLuTTTz8F4K233uK9995j3LhxLF26lC1btvDJJ5/wwAMP\n5Dx/WvMfZBLNMRkFbAuFMJ0RBDCA+Hw9VDOx0zhVmIWc4VXxPTo6YOpUnLFjeW52PQ+2Oj3y4+Xd\nm1CmN5NUX2MZKSVMHh6GXXbZhdtvv50f/OAHtLW1AXD11Vez33770djYyHe+8x222WYbjjjiiG6C\n7nHjjTcSDAZZsGABfr+fefPm4TgOhx12GAceeCDHH388119/PS+//DKOm7ftttuOO+64g7Fjx3LG\nGWdw0EEHseuuu/L1r3899xnMJPRnPj6lGNI547DMCXGR/zH/GfMZQ00HPtNGpVk/bX6PQxLDLl9w\nwQCHVvbiQseChsY+fn8sfnTc7n0NSa2UJhrSOXfkPaSzkhkZ9VslMdg/2OrwF99yjoiGeMoX4Ds7\nOjQkHJZoFk3ltZkzHAcef9zamF54AZ5/PtZrHYnABRfY38EgoB24ilJoqPgPImn7rTybyMaNPVQy\nEHC4qtrh2XaHqiq4PtDz3P2J958tYRxCIxxOHBOmdnUA2tutWQrbopSLLoLaWnAc7cBVlAJDTCaz\neeSBuro609LSku9k5BxP42tq4MUX7br//ORyhi+5wdacq6qSjvIqtAFPiQ2U52aH+dKMSWyz4eWu\ngWVGBJk82UZ6CwRsYREqnDwo+eHll19m//33RyTteHSlF4wxvPLKK3z1q1/ttl5EVhlj6no7Xmv+\ng4wnekcdZZ12fkwjezKrq8ZMR4c1lbiC6R3Ql/6uwSgoEs04D7Y67HncAs685Ugq6QAgIn4qFi2y\nefL5cObOhUCwW2hppfwYMmQIra2t1NTUaAHQT4wxtLa2MmTIkH6fQ8U/D3jC+WMauda13nd7BbIw\n2A/WyNhkZpw1axzG+5/grEgTfj9897uw2/2N1j01GsVccCHvycP8hWlcVe30SFuhtW6UgWH48OFs\n2rQJnbMjO4YMGcLw4cP7fbyK/yATDoN/ZZjHzRWM48mu9V01/8suy0r5BqtjNbGPAWDqVGgzDs9V\nOsyZA7vVhuHB2+JiVkQ5ySzlWB5i/NYQoZCTckpI9e0vXSorK9lnn33ynYyyRwd55ZDeQhMvvTxM\n+zeP5LKlh3UJv1fjl2HDbAS0mTOzSkOmg6hyQfy4r/jwzdGoO1m848DcuTaQP7HQ0VW0M9/8mK9u\njt2oZIWWoigDh9b8c0RvNdell4c5ftY4qugEupt5BGyp4bpFZkM+vH4gjSeTl6eLLsK4cSwE+Bpr\nOeD6w2HfeRAM9ji+psbeEjUBKcrAoOKfI1KZW7zwN8PmhziJzpgnjPstIvDzn+dE+D3yMTI2baET\nDEJtLe2nfJ+q9zbFCj4TtWEramtxHKfr+Joaa0JSE5CiDBxq9ukniSaeZOYWrzUQuaWRH5g7iGJF\nv0v4x42Dp5/O2tRTKPSISZSwsfqqK4HYPeiaXaypCWbMwCFMQ4M1GakJSFEGFq3594NUJp7ly2NB\nLsGK1o+2NnILsYh9EeD/djuAYb+5NKe1/aLAnXOA2bPh1VftuspKG9M6ErEl53nnceKYeq6qcnRA\nmKIMIFrz7wfpOicXL4Zbb7WFQ00NTDILgFhnpx8YdsnZEAyW59y1wSCsXQsrVsDVV8PEifZGejfz\nlluovXgca37SqBE9FWUA0Zp/P0jVuZlYKAx5McxY34sQjZl6Ir5KKgIBdW30OibCYVtielOZAXR2\nsu9/T6HhidoyuymKMnhozb8fpAov7hUKPp/97P9uCD/Rrg7OtXIAL897IuWkJ+VEV6sH92ZOnhyL\nQw32xkyfXmbNIkUZRDIJ/ZmPTyGFdE6IsJyW+fONqagwxucz5siqZ0xn9VAT8fnNFt9Qc++02AnK\nOcRxyrzPn28i/koTwWeiYIyIMZWVPcJDK4qSGjIM6aw1/17wzDNXXmm/e6uItrbCIdEw06Iz6OyE\n3x6/nOm+qzia5fzwJqfr+HKeqSpVqydcG+Rb/if4G0fb9pIxNi7QxRdrC0BRcoyKfy/01Tzz/c2N\nPBYdx1X8kmXR8QBcaxp4Our0OD6ta2QJk2oUcigEKyIO05lOhIqufhKi0fKziynKAKPi3wt9CpcQ\nDrPvf0+hkk4qiDJU2jhjt9CghVsoFnrrM3ne7zC1Yg7GX2E7T6qr9cYpSo7ReP4ZkHG0yVNPhaVL\nY8sVFfDkkz3i2CeeT6NZxuh2L4ib/CCTSX8VRck4nr+Kf65obLQeKx4+H8yb12MgV6KL5+zZGsog\nLUl8YsM4XYPpBny6SkUpMnQyl8Gmubn7cl1d0hG8iX0Izc06t21aEm7YG00hjlzg0GHni2HRIjuV\nsN4zRekbavPPFRMmdF+eNCnpbol9CBMmDF4I5qIkECBSUUVE/EQqqvjTu4Eu4YfyHCOhKLlAa/65\nwqvlNzdbRU8RtydZ9MvaWrX5pyKMQ4NZzmGEeNoE2InuN8jn0wJTUfqDin82JPbUBoMZBWtLDLmc\njxDMxYLn/vmEcfBH4PzdYqE1/H64+Wa9d4rSH1T8+0tjox18FI1aV0TtqR0QEuMo1dfDZV9oRO5p\nxpw2gX3LLTKqouQIFf/+EA7bSUg67axctLVpT+0A0cNMtqYRZrleVbOWwb6UX2hsRckBOenwFZHj\nRORVEVkvIlck2X6uiLwvIqvdz49zcd28EQpZ7xMP1/BcliGaB4FuI6ETvaoWLMhLmhSl2Mm65i8i\nfmAucAywCXheRO43xqxN2PVPxpgp2V6vIAgErKmnrc0anufMIYxT3iGaB4sJE2DZstjyiy/a0lZv\ntqL0iVzU/L8BrDfGvG6MaQfuBE7OwXkLim61etcW8UbwapomPUG4Nlj2IZoHjWAQTjkltqxxfxSl\nX+RC/PcE3oxb3uSuS2SCiPxDRO4Wkb1ycN1BIxyGhkCYnX5xIf847EKWXh4mjMNXFzdw3q1O16xd\n6q8/SEybBkOH2ptdUQEbN6qtTVH6yGB1+D4ALDHGtInIZGAx8K3EnUQkCAQBRowYMUhJ6511TWGW\ntR9JJR1goG3WQn7/rxDt7U5XTb+1taf/vjJAxE+YvHChnTdz8eKUtjaNnaQoPcmF+L8FxNfkh7vr\nujDGtMYt3gbMSnYiY0wj0Ag2tk8O0pYTvrN2FpV0dM3IVUkH+70doiphknH11x9EHCfW8R6J2Gkg\nm5p6/AFlP12moqQgF2af54FRIrKPiFQBZwL3x+8gIrvHLZ4EvJyD6w4O4TA1K2LZMUAUH/tOCpTt\nZCwFQyAAFTbuvzGGyIKFPcw/2hejKMnJuuZvjOkUkSnAI4AfWGiMeUlE/gs7ndj9wCUichLQCXwI\nnJvtdQeKHiaCpiZM1M7Da4VfeOasmzkyaNVeRT+POA7vHD+RXZfOx48h2hFhU1OIveP+lMRBYtoX\noyiWnNj8jTEPAQ8lrPtV3O8GoCEX18oVyezASU0ECcfdz8m88rUgRw5yepXk/G23er7HYippp4Mq\nniBAfdz2ZLGUFEUp06ieqeblTWoiqK8nWllFBKGNKm6smqa1xwJiVL3DCVXLmS5XcULVckbVW3XX\nAXeKkp6yDO+QTOQdJ4WJwHHwPxHijaYQTxBgRr2jtccCwnFgRsghFHKYEYjNjOa14Px+Ow98ZydU\nVmoUDkXxKEvxT2UHTmkicBz2dpxu5gSlcEj0soov3OOjcLS3J3UIUpSypCzFP50d2LEz7gIBelr8\nlUIjWd9NfOEO3QsARVEsZSf+8WIRCMRc/xzHboweeRTS0Y6prML3hM4PWMik8uF3HHhudpjW5hCb\nRwc480anW0hoRVHKTPwTbcEi1hbsTaT+lRubGNfRZgdzdbTx7qwmdrtXxb9QSdV3QzhM7VT3j36q\niud/v5wHWx319lGUOEpa/BNNAvFiEY3afYyxwTn/56Iw10de6Hb822/DbknOoxQGKX34E0qF2tYQ\ntQ36xylKPCUr/slMAvFiEe8F4hDmkch4qmgDIAJ0UEXlpHoND1DApOy7STOySwtyRbGUrPgnMwk0\nNMTEoqYGfvITu++RhKiinQqidOLj1eFHE71yOrVBhxkzUpgWlIIgaTylJKVCOGw9fRYtsgV+RQVM\nnGj7APT/VMqRkh3k5VX+EkMse7NCtbZaQTcGniBA1F9FRPxIdTVfu8sKf7rzKAVO3PRfm86+nOHf\n3IszbjmSMW1hIhFr6ps/v/sgP0UpJ8SYggme2Y26ujrT0tKS1TnSNfHDYVhxxOWcHLmH+/yncdzN\np1DbmnxnNRUUMZdfjpkVCyLbgZ8jeYpnXTdevx/OPx9GjND/VykNRGSVMaau1/1KWfzTkiAKMm0a\nzJw5cNdT8sOoUZj167vCcUe7+QmrAAAdUklEQVSBPxxwLT9e30Ak0tPrS/t0lGInU/EvWbNPr/zx\njwBdosA99+QtKcoActppXRFZDYDPz49uCxAK2XDc551nhV9DPivlRsl2+MbTw2wTDhN9+50uUQDY\ndMhpDM9bCpUBw23NyR//CF/6EnLddeA4OMTiAC1erCGflfKj5M0+ia6az80OU9s8nejfHsVnokSB\npxjHM9c+QUNBBZ1WBoQkHTjap6OUEpmafUq+5h/v8jm2Lcz+U8ZDpA0x1q2znWp+XXUdMwL5Tqky\n4KQYtJFu+k0tGJRSpeTFPxCwnXrRKAQkREWkHaJRxOfjk7qjeWDsdA3TXC6kjAdhSRR6HeCnlDIl\nL/5gvTkAnvIFiPqq8Hfat3mn2dOp17e5fIgf+VtRARs3QjhMGIemJli40JYLntD3UlYoSlFT8t4+\noRDUdYS53MwgEoE/TNRZ18sWb+Tv+efb0X233krkqPE0BMLMn99T6HWAn1LKlGTNP775/s2XGpkW\nvRAfUTqjFbwy5kkIas9u2eJF+PNmeom2cxghnjC2IiASE3qd/1cpZUpO/OPttIf5wizvuAAfBgEq\n6KTm+isg+ES+k6nkkzjzj4iPkyNL+UBqWFwV7BHvJ11nsKIUMyVn9om3057Z0dQl/B4VG1/LV9KU\nQsGr0n/3u/g6O/i6WcktZjIbjzybefNU7JXyoOTE36vUHeYLM5FF3Ud3Am8Fzspf4pTCwXHg888B\nO8pbgF2X/QEaG/OaLEUZLEpO/L1K3dVHh6j2dXbV+j/1fYHV357GmEc0fo/iMmFCz3XNzYOfDkXJ\nAyUn/gDOmkYCm5cifh/4/cjQoWy/4q8q/Ep3gkE4K6EluM02GuNZKQtKT/wbG2HyZFi5Ejo64Lvf\nVbdOJTV33GED+3/jG9b3/4EHNMi/UhaUnvg3N3fZ9w1Yu64Kv5KOYBBOOcX6/mt4T6VMKDnxf220\nteOahGVFSYuO6FLKjJLz879rxyAbBE41zdwrExi5YxAd0qWkIjYg0MGJn+DZq/lrq1EpUUpO/AMB\nGD8kyIL2oI3REsh3ipRCpWfgNgcngEZzU/LKYEWSLTnx1yH5SqYkDdxG3MqtW6GpSR8iZdAYzEiy\nJWfzB3uzGhr0nVXSk9TMHwhYrx+wHcALF6rnjzJoJKuQDBQlKf6KkgleK7FbkFfHgYkTMW4ccNMZ\ngVCIxkY49tjYAOBwGGbM0HJByS2D6XeQE7OPiBwH3Aj4gduMMdclbK8GmoCDgVbgDGPMhlxcW1Gy\nIVngtqVfqOfbZjGVtNMRrWLRSwGm/MFuW7YMXnsNbrpJuwWU3JBo4x8ss3XW4i8ifmAucAywCXhe\nRO43xqyN220S8JEx5ssiciYwEzgj22srSrYkm73r+//tcDDLCRCilRr2ezjEocCz2Dfxnnt6n+RF\np39UMiGVjX8wnplc1Py/Aaw3xrwOICJ3AicD8eJ/MjDd/X03MEdExBTq7PFKWZDsxfNC/XtCv5zx\nVH/UzkVUMZ7lPIvDaad1r/knNs11+kclU5LZ+LdbE6a1OUTNhAC1wYF7cHIh/nsCb8YtbwIOSbWP\nMaZTRD4GaoAP4ncSkSAQBBgxYkQOkqYoqUn24gUCUF0NbW0wnhBDTDs+E2GIr53zvxRi4s+drgHB\nqWr2Ov2jkimejb+tDXw+GBVq5KvLLsJHlPZlVazh8QErAAqqw9cY02iMqTPG1O2yyy75To5S4iTr\nXOuKCns1nDEvgG+I3cHnE84btpQgtsc3nUeZDhZWMsVxYPZsK/xf7wxz8rKL8BPBh6GaNjoWNA3Y\ntXNR838L2Ctuebi7Ltk+m0SkAtgB2/GrKHkjVedazObqQO1ymDULli61wQJXrrQ9vjNTR4jVsSZK\nX2httV7F40wIH5Fuk0/tvsfAXTcX4v88MEpE9sGK/JnADxP2uR84BwgD3wMeU3u/Ugj02rkWN+lL\nFzfcYO0+aQ7U6R+VTAkE4HB/mL2jG+k0lfjoACDqr2D3afUDdt2sxd+14U8BHsG6ei40xrwkIv8F\ntBhj7gcWAP8jIuuBD7EFhKIUBxMmWB9PD2PUkK/kDGdNI491XoSYCKaiEjnxFNhtN/zxk0kPADnx\n8zfGPAQ8lLDuV3G/twKn5+JaijLoBIPW1HPDDVb4Kyth40br1qMFgJIN4TBceCG+aNQud3bwwtu7\n0TZt3oA/WgXV4asoBcvMmbBihZ0oSARuvVUnfVGyp6kJPOF3WblycB4tFX9FyRTHgREjMB2dEIlg\n2tp5oymkYR6UnGCACD4WUz8o8wmp+CtKH1hTE2BLtIpOfHREfVx3aw1XXqmNAKWf1NcTrawmitCJ\nnwuZx0qfMyguwir+ipIh4TBc1uwwldlE8eEjwm8jU/l6JKwzPyr9w3G4Y9LjXCnXMI6nWOgLcvTR\ngzMqvOTi+SvKQOCFbGhrg2m04sNQQRRDO9+SEH+vcnQwl5IZCYGfRtU7XLDYob0dqqtg+vTiie2j\nKCWPF7IhGoUnJUCnVOGnHb+/ggljNnLmpDC16vmj9EY4bEW/o8N6jYVCOI6Tl0GBavZRlAyID9nw\n4hCHdfOWI8Hz8Ylh7KpbqZ2qRn8lA2bNsrUIY+x3kw3fkI8JqFT8FSUDEid+qQ1azx8ikcGZdkkp\nfsJheOCBfKeiCzX7KEqG9AjZ4DUHUsV2VpR4QiFb4/fw+6F+4MI39IaKv6L0F43gpvQFN164aWsj\nip8N/zGHffP4zEihxlerq6szLS0t+U6GoihKzljTGObPF4d4LBrghWpnQFw6RWSVMaaut/205q8o\nijJIPNjqcE3UIRoFX1usmygfjUcVf0UZIHQeXyWRmppYKJ9oFDZvzt+Unyr+ipJrwmHeaArRsDDA\nioij8/gqXbS22lm7olH7vXp1/qb8VFdPRckl7lDgveZfyUPt4zX0g9INb45ov99+T5iQvyk/teav\nKLnEHQrsMxGq2cJspnK5fzY1NQ4zZqgJqNxJ5iBWW5sf86B6+yhKLgmH4aijoK0N782KVFRztO9x\nVkQcKipg4kTr3q2FQGlRKH08mXr7qNlHUXKJ41h1B8T9+DrbOawjRCRiA8PNn68hoEsNL/DfX34Z\n5uFxM1jTWPh/roq/ouSa+nprwPWorOLpygAidtEL66L9AKVDUxOcvaWRx6JH8qvOX7L/lMIv3VX8\nFSXXOI5V9gsugAsuwPfE48wIOUyenL/OPWXgCIdh7YIwc7iYSjqoIEpFpK3gS3ft8FWUgSAxEFDY\nxoG76Sbr7ldTE9MGtf0XN6EQ/KCjCT8RBDsdo/j9SUv3QukXABV/RRlwwmH4n3GNnNzZzH0VExg9\nN8gll8QG9jz+eP6FQOk/J9aEGcUifBgMYHx+ZM6cHn+q1y+QjwFdyVCzj6LkmHCYbpO6fzSrkbmd\nk/k2y5jbOZkPrm2krc3a/tvaukK6K0VKbWuIal8nAiCCL3g+BIM99vMmBCqUCOBa81eUHJK0dvd2\nM0CXSWDcB81AT3FQipRAAKm2ob2lqiplmOZCiwCu4q8oOSRZ7c6ZNAGzclmX33/TZxMAELEz+eUx\npLuSDfEG/AxCexdaBHAVf0XJIUlrd04QAV6/vplX18NlXM+XeI3Hj5k5aJN1KzkmWROvoaHXw3pM\nCJRHVPwVJYekrN0Fg1Q9+RrHrZ8FwOXM4uxdYLgzM19JVfpIN0+dpE28AlH1DFHxV5Qck6p2N/y5\ne6wbINb2P/yem6Bx36Sdgx6F5BpYziRW9J+bHaC2kAz4/UDFX1EGi9NOg1mzumz/bNkCkycDEK4N\n9hD5QnMNLGdCIRjbFuasaBOyBV58sZ7aQjLg9wMVf0UZJMKnzGTFb+HCyE1sy5au9R8taGb8mmAP\nkQ+FrCtoNGq/i9CyUDKcWBPmp9GjqKYNgOiChVAfysjOX6ion7+iDBKhEDQwk58yG6CrBRDeY0JS\n/+/EWZ9qagY7xYpH7cOzqKatK1ifv7Mj/476WaI1f0UZJDxPoEXtQSoFrhnbzE6TJrBTbZCqR3qa\njxNnfWptzWfqy5jGRli6FIlfV1lZlHb+eFT8FWWQ6O4JFGQnx3b0OiT3EPJmfSriPsXSYPbs7st7\n7gl//jNhHEJFPEFPVuIvIsOAPwEjgQ3A940xHyXZLwKscRc3GmNOyua6ilKspPIESra+0AYFlSWN\njfDyy93X/epXhHGSdsYXk3dWtjX/K4DlxpjrROQKd/nyJPttMcaMzvJailJ2FNKgoLKkubnrpwE+\nH3kA2waDhGYkj9NTTN5Z2Xb4ngwsdn8vBk7J8nyKUp6Ew7xx4QxmnRrmwgsLfh6Q0seLzjd6tI3U\n6a7++aZLCYdj/TfxczMUWuC23si25v9FY8w77u93gS+m2G+IiLQAncB1xpilyXYSkSBuxKsRI0Zk\nmTRFKRLCYSJHjWfPtnamUMV4lrNwoaOunfkiYYDFI6OnIatXczcTWGSC7BWyHp7JTHLFNO6rV/EX\nkUeB3ZJs+s/4BWOMEZFUs8HvbYx5S0S+BDwmImuMMa8l7mSMaQQawU7g3mvqFaUUCIWQ9nZ3MpCt\n1NPEcx0q/nkjrgpv2tp5es2OXM0jAFTFzdGSaJIrtj6aXsXfGHN0qm0i8m8R2d0Y846I7A68l+Ic\nb7nfr4tICBgD9BB/RSlLAgFMhR/TEcGH4cfcyj98YwgENOxzXoiLztfpq+LxSACwUVjPOy+9qBdT\nH022Nv/7gXPc3+cA9yXuICI7iUi1+3tn4DBgbZbXVZTSwXHwTzoPEASoIMJcLsYhO8N/4qQySga4\n7jqv/WQ2ofFX8ZefLueFagefDyoq4AtfKKF7aozp9weoAZYD64BHgWHu+jrgNvf3N7Funn93vydl\ncu6DDz7YKErZ8MwzxlRWGmMn+DLG5zPm2muzOt3Qocb4/fb7mWdymNZSxb1pUZ/ffMZQc5jvGTN0\nqDHTphlTUWGMSOyvKeR7CrSYDDQ2q5q/MabVGDPeGDPKGHO0MeZDd32LMebH7u9njDG1xpiD3O8F\n2VxTUUoSx4E5c2z10uezo7uy6DEsNs+TgsC9aRKNUEk7R0RDtLfD6tVeiWx3i0ZL457qCF9FKRSC\nQaitzUmPYaFNGVgUuDfNtLXTEa3iKV+AqiqYMAGeeioWZM/nK417KsYUplNNXV2daWlpyXcyFCW/\nZDFktJhGmxYM7k1bUxPgwVanS+Cbmuz3mDE2xlIh31MRWWWMqet1PxV/RSlQvNFEHR02kJj6fg46\nxTinQqbiryGdFaVQaWqyqmOM/faqn0puyMAdqpT7TtTmryhFiJp0etKne5KiSp94jlLuO1HxV5RC\npb4eFi2KKU99PVCcpoiBJv6e+P12MFZ9fZr70tQEW7fGWlWhUMpIncU0arcvqNlHUQoVx4HHH4dr\nrrHfrvKUsimivyTek/nzbWGQ1KITDsPChTHfzYoKCARS3lfHsbF8Skn4QWv+ilLYJIkXUMqmiP7i\n3ROvMh9Xoe8p2qGQVXiwMRsmTgTHIUB53VcVf0UpMkrZFNFfvHvS1GQtZZ2daQQ8sfR0zWnldl/V\n1VNRihHt8U1JRremhO+f+vkrSqmiPb5KGtTPX1FKlUHu8S2J6KAlkYncojZ/RSk20vT45tqaURKN\njJLIRO5R8VeUYiNFz+RAaFyyRkbR6WZJZCL3qPgrSjGSxAW0LxqXaQuhJNxKSyITuUfFX1FKhEw1\nri8thHy6P+bMhFVuPpwZouKvKCWC48Bzs8O0NoeomRCgNoXIJYlsUHDz0mZrwlrTGHcfgk5xTa47\nSKj4K0qpEA5TO3W8nXXkMR8w104Q030XFi2KRTbw+wvTChJvwmprg+nT7ScT/V7TGGbU5AAH0EHH\nskrWELIFgNINdfVUlFIhFIpNN9XZCVOm9HBtDIXsJrCRDc47rzArxJ4Jy+ez2Xn00TSxehKomj2L\natrxY6imnY4FGgo7GSr+ilIqBAJWLT0ikR5jAGpqrOj7fDBkSFdkgwGjv+71npn+6KNjBUBGQxrC\nYUa9+kC3VXvs0bdrlwtq9lGUUsFxYO5cohdNgWgEIz78S5daxQ8GCYdh6lQrpH4/zJ49MLV+r6O2\npsZer792e8expp6nnsrQUaexEa6/Hl/UBm0zAD4/u00b4BKuSFHxV5QSIlwbpMFfy6WRWZwSWYpZ\nuRJZuRKAUGuQ9nYr/iJ2LtqcXz+uo1bEXiu+1p6p+Md7+mTkqNPYCJMnx5ZFEL8f5s4tTLtWAaDi\nryglRCgEKyIOV/A5AOJtaG4mMD044O7u8R21Pp9tYYhkdr10LYaGhl4u3NzcbXHLnvvy5xObGFXr\noNKfHBV/RSkhvI7Se7dO4FizDINbAEyYMCDu7r1Nezh7tm1h9Ha9rFsMEybAsmWANff8x7s/p/FW\nh6rFGs0hFSr+ilJCxAQ+yOubYd/VzVYYXZfPXLq7J/PFBzjnHPuddhrFBLJpMQAxl9bmZh7eZgKN\nDwR7HelcwlGdM0LFX1FKjJjAB91ParIRwMRwEk1NsHhxjzlSMqK/LYZuBIMQDLJTGKoeSW/e0lhv\nKv6KUrZkI4DhMGzcaKe/BXs8pI8tlK6gyaVJKpNzaaw3FX9FKS/iFDgUcvolgPGFht8P558fq+XH\n1/zja9y9FTQZt0Ay3LE381ZNjTUvGVO+sd5U/BWlROmhk54Ct7WB38/3fzqHq6qCffb+ia81A4wY\nERPaVDXudDXtlAVDvPtPa2v2Awfi7svUqbH+hYEa71DoqPgrSgmSVFDjwz9Eo+x7w4WsuQzu2jHY\nJdaZVKzTRQ9NVeNOd4xXMEyMNPK9Lc1se9EubHl3BUPe3QgY663k9QJHIv0bOBCHd72BHO9QDKj4\nK0oJkrSmHQhYAY1G7U5uAdAwD3DsCOCjjooJ9OOPJ9fW/tjnkx4TDkNTExc+u5ZzIuvYnXfszqtj\nxwnWdVO8NPt8Kd2AymqOghyg4q8oJUhSgXMcmDMHLrywWwHAlClQW0tTk0Nbm13d1ma9d9LF+U/c\n1pv4Og44uDutqYFLLoG2NnYEdnD36RJ7d9l4B/t8UF2d0g2oWOYoKCRU/BWlBEkpcJ4/fHwB0NEB\nU6eyzx6z+TFrmEAzzUygNzfReFKKb2Mj3Hij7VkdOxb+9Cd7XZ8v1mlAT7E3ced+/9tnsWvga2mV\nuq/eOxreP0vxF5HTgenAV4FvGGNaUux3HHAj4AduM8Zcl811FUXpnZQC5xUAF10UE+CVK7nMdwSC\nXT6WZbz+BYBg9yo9dO+E3bwZHnyQL79vmL9lLKNYR/WWNoaf3A47dsK6dbHrvvxy7Lcx3QoAT+yj\nwKPybb7+xTeJRIT/m3gp+87svRDqzZRT7gO6kpFtzf+fwGnA/FQ7iIgfmAscA2wCnheR+40xa7O8\ntqIo/SUYhBdfhFtu6Vrli0a6TC4Gd3RwuDZWpa+osKLd2RlrNbj77gycTZy4vw/mfftT6I4BjM+P\n7+a5Ng1r1yIffMAHO+/HQwdMY1S9wzBXoHdJODaViKcz5eiAruRkJf7GmJcBRBL/3m58A1hvjHnd\n3fdO4GRAxV9R8kl9PSxcaFURwO9H3Jq4Fw+omz3FE3wTM8ok2ueTKUG8CSeKEKGCqTKHH9UGcRIa\nFvUZxv9JJuKpWjo6oCs5g2Hz3xN4M255E3BIsh1FpGs8+ogRIwY+ZYpSzjiOVcImd6ar+npYs8ZG\nyPTiAYXDMXtKmpp/st8eUeAN9ubPcgYfsyOPmwDPG4e9QnZ7prXy/oq4evckp1fxF5FHgd2SbPpP\nY8x9uUyMMaYRaASoq6tL9hwpipJLEqvLjtN93t9Eewp0s/mvDm2matmDgGG1byzf/co6fB1tbFzf\nzvvswityAItNPWEcBFt+RKMxEU4m6N4lEs03/RVx9e5JTq/ib4w5OstrvAXsFbc83F2nKEoxkKyA\nwDXDXAVtvpn4fHbelC+45cbHYbizCdauhfCTdp0x8NOfwo47dhfheEGvqUndEshGxNW7pyeDYfZ5\nHhglIvtgRf9M4IeDcF1FUQaQxJGyL75o5+v1auSLF8PWrbH9fT4r/N7ELJ6tP951P34QcltbT9OO\ninjuyNbV81TgJmyn/F9EZLUx5lgR2QPr0nmCMaZTRKYAj2BdPRcaY17KOuWKogw68Z2z8WYYvx8W\nLbLdAVVVNqZ/e3usb1jEjtHyCoZUnbdr1nQff1ZTM/h5LBey9fa5F7g3yfq3gRPilh8CHsrmWoqi\n5Ja++r4nE2zPDLNxI9x6a8x2D90LhvPO6z65S6rO29ZW20LwxoGVa9ydwUBH+CpKGdIf3/dkgt3Q\nEAsIlziRS3196sIlVedtIGBbCOqZM/Co+CtKGdIft8neonkm64zta3wd9cwZPMSYwvSorKurMy0t\nSaNFKIqSJf0d9aphEgofEVlljKnrbT+t+StKGdLfGvZAeNtogZIfVPwVpUzxhDwcjrloDrb4Njba\niNKRiLX1a9ydwUPFX1HKmIEIepZpTT4chosvtu6hkNyvXxk4VPwVpYzJddCzvhQmoVC3EEH4/erd\nM5j48p0ARVHyh+fB4/fnxrUyVayeVNeurrb+/BUVdpIxrfUPHlrzV5QyJteulYnuoDU1qfsT1K0z\nv6irp6IoOcWz+dfUwNSpOonKYJOpq6eafRRFySmOY0f+trZmbgJSBh8Vf0VRssJzFQ2Hu6/PdX+C\nklvU5q8oSr9J592jNv3CRsVfUZR+05urqMbfL1zU7KMoSr9R007xojV/RVH6jZp2ihcVf0VRskJN\nO8WJmn0URVHKEBV/RVGUMkTFX1EUpQxR8VcURSlDVPwVRVHKEBV/RVGUMqRgo3qKyPvAGxnuvjPw\nwQAmZ7AohXxoHgqDUsgDlEY+BjsPextjdultp4IV/74gIi2ZhDAtdEohH5qHwqAU8gClkY9CzYOa\nfRRFUcoQFX9FUZQypFTEvzHfCcgRpZAPzUNhUAp5gNLIR0HmoSRs/oqiKErfKJWav6IoitIHVPwV\nRVHKkKIXfxE5TkReFZH1InJFvtPTV0RkoYi8JyL/zHda+ouI7CUij4vIWhF5SUQuzXea+oOIDBGR\nlSLydzcfv8l3mvqLiPhF5EUReTDfaekPIrJBRNaIyGoRacl3evqLiOwoIneLyCsi8rKIFEzw66K2\n+YuIH/gXcAywCXge+IExZm1eE9YHRGQc8CnQZIw5MN/p6Q8isjuwuzHmBRHZHlgFnFJM/wOAiAiw\nrTHmUxGpBFYAlxpjns1z0vqMiPwMqAO+YIw5Md/p6SsisgGoM8YU9QAvEVkMPGWMuU1EqoBtjDGb\n850uKP6a/zeA9caY140x7cCdwMl5TlOfMMY8CXyY73RkgzHmHWPMC+7vT4CXgT3zm6q+YyyfuouV\n7qfoakciMhz4DnBbvtNSzojIDsA4YAGAMaa9UIQfil/89wTejFveRBGKTikhIiOBMcBz+U1J/3DN\nJauB94C/GWOKMR+zgWlANN8JyQIDLBORVSISzHdi+sk+wPvAItcEd5uIbJvvRHkUu/grBYSIbAc0\nA1ONMf+X7/T0B2NMxBgzGhgOfENEisoUJyInAu8ZY1blOy1ZcrgxZixwPHCxax4tNiqAscA8Y8wY\n4DOgYPoli1383wL2ilse7q5TBhnXRt4M/MEYc0++05MtbvP8ceC4fKeljxwGnOTazO8EviUid+Q3\nSX3HGPOW+/0ecC/WxFtsbAI2xbUe78YWBgVBsYv/88AoEdnH7Uw5E7g/z2kqO9yO0gXAy8aY3+U7\nPf1FRHYRkR3d30OxjgSv5DdVfcMY02CMGW6MGYl9Hx4zxpyd52T1CRHZ1nUcwDWTfBsoOm84Y8y7\nwJsi8hV31XigYJwgKvKdgGwwxnSKyBTgEcAPLDTGvJTnZPUJEVkCBICdRWQT8GtjzIL8pqrPHAb8\nCFjj2ssBfmGMeSiPaeoPuwOLXS8yH3CXMaYoXSWLnC8C99o6BRXAH40xf81vkvrNT4A/uJXT14GJ\neU5PF0Xt6qkoiqL0j2I3+yiKoij9QMVfURSlDFHxVxRFKUNU/BVFUcoQFX9FUZQyRMVfURSlDFHx\nVxRFKUP+P5OxXtvr2werAAAAAElFTkSuQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3h7IcvuOOS4J",
-        "colab_type": "text"
-      },
-      "source": [
-        "Much better! The evaluation metrics we printed show that the model has a low loss and MAE on the test data, and the predictions line up visually with our data fairly well.\n",
-        "\n",
-        "The model isn't perfect; its predictions don't form a smooth sine curve. For instance, the line is almost straight when `x` is between 4.2 and 5.2. If we wanted to go further, we could try further increasing the capacity of the model, perhaps using some techniques to defend from overfitting.\n",
-        "\n",
-        "However, an important part of machine learning is knowing when to quit, and this model is good enough for our use case - which is to make some LEDs blink in a pleasing pattern.\n",
-        "\n",
-        "## Convert to TensorFlow Lite\n",
-        "We now have an acceptably accurate model in-memory. However, to use this with TensorFlow Lite for Microcontrollers, we'll need to convert it into the correct format and download it as a file. To do this, we'll use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert). The converter outputs a file in a special, space-efficient format for use on memory-constrained devices.\n",
-        "\n",
-        "Since this model is going to be deployed on a microcontroller, we want it to be as tiny as possible! One technique for reducing the size of models is called [quantization](https://www.tensorflow.org/lite/performance/post_training_quantization). It reduces the precision of the model's weights, which saves memory, often without much impact on accuracy. Quantized models also run faster, since the calculations required are simpler.\n",
-        "\n",
-        "The TensorFlow Lite Converter can apply quantization while it converts the model. In the following cell, we'll convert the model twice: once with quantization, once without:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "1muAoUm8lSXL",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# Convert the model to the TensorFlow Lite format without quantization\n",
-        "converter = tf.lite.TFLiteConverter.from_keras_model(model_2)\n",
-        "tflite_model = converter.convert()\n",
-        "\n",
-        "# Save the model to disk\n",
-        "open(\"sine_model.tflite\", \"wb\").write(tflite_model)\n",
-        "\n",
-        "# Convert the model to the TensorFlow Lite format with quantization\n",
-        "converter = tf.lite.TFLiteConverter.from_keras_model(model_2)\n",
-        "converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]\n",
-        "tflite_model = converter.convert()\n",
-        "\n",
-        "# Save the model to disk\n",
-        "open(\"sine_model_quantized.tflite\", \"wb\").write(tflite_model)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "L_vE-ZDkHVxe",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Test the converted models\n",
-        "To prove these models are still accurate after conversion and quantization, we'll use both of them to make predictions and compare these against our test results:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "-J7IKlXiYVPz",
-        "colab_type": "code",
-        "outputId": "0c10f56c-dbd7-4cc3-e332-30ad673769e5",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 281
-        }
-      },
-      "source": [
-        "# Instantiate an interpreter for each model\n",
-        "sine_model = tf.lite.Interpreter('sine_model.tflite')\n",
-        "sine_model_quantized = tf.lite.Interpreter('sine_model_quantized.tflite')\n",
-        "\n",
-        "# Allocate memory for each model\n",
-        "sine_model.allocate_tensors()\n",
-        "sine_model_quantized.allocate_tensors()\n",
-        "\n",
-        "# Get the input and output tensors so we can feed in values and get the results\n",
-        "sine_model_input = sine_model.tensor(sine_model.get_input_details()[0][\"index\"])\n",
-        "sine_model_output = sine_model.tensor(sine_model.get_output_details()[0][\"index\"])\n",
-        "sine_model_quantized_input = sine_model_quantized.tensor(sine_model_quantized.get_input_details()[0][\"index\"])\n",
-        "sine_model_quantized_output = sine_model_quantized.tensor(sine_model_quantized.get_output_details()[0][\"index\"])\n",
-        "\n",
-        "# Create arrays to store the results\n",
-        "sine_model_predictions = np.empty(x_test.size)\n",
-        "sine_model_quantized_predictions = np.empty(x_test.size)\n",
-        "\n",
-        "# Run each model's interpreter for each value and store the results in arrays\n",
-        "for i in range(x_test.size):\n",
-        "  sine_model_input().fill(x_test[i])\n",
-        "  sine_model.invoke()\n",
-        "  sine_model_predictions[i] = sine_model_output()[0]\n",
-        "\n",
-        "  sine_model_quantized_input().fill(x_test[i])\n",
-        "  sine_model_quantized.invoke()\n",
-        "  sine_model_quantized_predictions[i] = sine_model_quantized_output()[0]\n",
-        "\n",
-        "# See how they line up with the data\n",
-        "plt.clf()\n",
-        "plt.title('Comparison of various models against actual values')\n",
-        "plt.plot(x_test, y_test, 'bo', label='Actual')\n",
-        "plt.plot(x_test, predictions, 'ro', label='Original predictions')\n",
-        "plt.plot(x_test, sine_model_predictions, 'bx', label='Lite predictions')\n",
-        "plt.plot(x_test, sine_model_quantized_predictions, 'gx', label='Lite quantized predictions')\n",
-        "plt.legend()\n",
-        "plt.show()\n"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi40LCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcv7US4rQAAIABJREFUeJzsnXl4FFXWuN/bnbCELbIMCiHpqKzZ\nISBkYXGZDFECIhFkEWRcUFHHJCAOIo7K/DAkcRkc/XRGXAi7DIQx8+GHbAmRkTWYIMiSTtgUBAIB\nAln6/v6o7k4n6ex7ct/n6ae7q27dulV169Stc849R0gpUSgUCkXLQtfQDVAoFApF/aOEv0KhULRA\nlPBXKBSKFogS/gqFQtECUcJfoVAoWiBK+CsUCkULRAn/BkYIMUUI8W1Dt8OCEKKtEGKTEOKKEGJt\nPewvXQgxsq73Ux8IIQxCCCmEcKhE2RlCiOT6aFdlEEK4CiGuCSH0Dd2W+kAIMVIIcboO6m1U17U8\nmo3wF0JMFkLsNXfgc0KI/wghghq6XRUhpYyXUv6+odthwwSgO9BFShle1zuTUnpIKbfX9X4U5SOl\nzJJStpdSFtakHiHEdiHEk7XVLpt6K/1gVVSOZiH8hRARwHvAX9EElyvwd2BsQ7arIhppR3YDfpZS\nFtTlThrpsSsULQcpZZP+AJ2Aa0B4OWVaoz0czpo/7wGtzetGAqeBucB54BwwDggFfgYuAX+2qesN\nYB2wGsgB9gM+NuvnASfM6w4DD9usmwHsAt4FLgJvm5clm9cL87rzwFXgR8DT5ji/BC4AmcBrgM6m\n3mQgBrgMZACjyzkf/YHtQDaQDoSZl/8FyAPyzef0jyW26wHkAp1tlvkBvwGOwF3AVvOx/QbEA842\nZY3AK8Ah4BbgYF52fyWuk/U82dQngbvNv0PN5zsHOANElXHsttcgGzgJBJiXnzKf++kl+ldZ511v\nPue/met53twmB5tt/4nWp86Yr7e+5PGUd93ttP8J4CfzcZ4Enimxfq55f2eBJ0ucoweBA+Z9nALe\nsNnOUKLt24G3zOcqB/gW6Gpe1wZYbr7O2cAetEHXIqAQuInWf5aWcQxrgV+AK8BOwMNmXVsg1nyu\nr6D167ZAlrl918yfYWj34vJyjqHMc4X5vi+jfR8BMSWWbQQiKnmPJ9trj815fdLm/0xzGy8DmwG3\nqvaJasvO2qysIT7AH4AC2xNsp8ybwG7gd0A3IAV4y6YTFACvowmwp9Bu9BVAB8ADTeC5m8u/gSYc\nJ5jLR6EJW0fz+nA0IakDJgLXgTtsOkYB8AKa4GtborOEAPsAZ/PF72+z7ZfmDtjB3Kl+xiyczXXk\nm9uuB55Fu/mFnXPhCBwH/gy0Au41d+K+Nse3vJxzuRV4yub/EuBj8++7gQfQhHg3tBv7PZuyRuAg\n0Atoa7Ps/kpcJ+t5sqnPVrCdA4LNv28DBpbRfss1eMJ8rt5GEywfmtv9e/P5aF+J8z4LOGI+ns7A\nNooLn38B/wO0Mx/TD5gFUGWvu532P4j2kBXACOCG5VjR7oVf0PqsE5qAtj1HIwEvtL7pDfwKjLMn\nqNCE1AmgD1o/3Q4sNq97Bthk3oceGAR0tNnuSXtttzmGmebzaXnYH7RZ96G5jp7mugPM5Yq1z15f\ntXMM5Z2rkZQt/IejPRyFTX/KBXpU8h6vlPBH00wcN19vB7SBRUpV+0S1ZWddCub6+ABTgF8qKHMC\nCLX5HwIYbTpBLkUjsg7mC3aPTfl9NjfJG8Bum3U6bASPnX0fBMbadIysEuttO8u9aMJlKObRpXm5\nHm1EPsBm2TPAdps6jtusczIfw+122hOMJiBs61+JeRRIxcL/SWCr+bcw3yTDyyg7Djhg898IzCxR\nxkiR8C/vOlnPk816W8GWZT4nHSvoCzOAYzb/vcz1dLdZdhHwrcR53wrMsln3e3NdDmgj4VuYH3Lm\n9Y8B2yp73SvZ/zcAL5l/fwb8P5t1d9ueIzvbvge8a/5toLTwf82m7HPA/5p/z0R7MHvbqXM7FQj/\nEuWdzfvthHYv5WLzJm1Trlj77PVVe2XKOVcjKVv4C3N/Gm7+/xTmPl9G+ZL3eGWF/3+webs2H/8N\nNNVrtftEZT/NQed/EehagQ65B9prpIVM8zJrHbLI0JVr/v7VZn0u0N7m/ynLDymlCU1t1ANACPG4\nEOKgECJbCJENeAJd7W1bEinlVmAp2ujnvBDiEyFER/P2jnaOoafN/19s6rlh/mnbZgs9gFPmdpdV\nV3l8DQwTQtyBNkIyAUkAQojuQohVQogzQoiraCPPriW2L/P4qfg6lccjaKqfTCHEDiHEsHLKlry2\nSCntXe+KznsPih+PbTk387bnbPrC/6C9ARSjnOteCiHEaCHEbiHEJXOdoRSd45LtOVVi23uEENuE\nEBeEEFfQ3lxKXh9bfrH5fYOi/vQVmopilRDirBAiWgjhWE49tm3QCyEWCyFOmPuI0byqq/nTBm0Q\nUGMqOFdlIjVJvArtYQ0wGU2Faam3onu8srgB79vUcwntwdOzKn2iujQH4f892ghrXDllzqKdaAuu\n5mXVpZflhxBCB7gAZ4UQbsCnwGw0bxlnIA3tglqQ5VUspfxASjkIGID2yj0HTaecb+cYzlSj7WeB\nXuZ2V7kuKeVlNP3vRLSbYpX5ZgHN4C4BLyllR2AqxY8dyj/+8q7TdbQ3GgCEELeXaNceKeVYNOG6\nAVhTmeOpgIrO+zls+oJ5nYVTaP2yq5TS2fzpKKX0sLejMq57MYQQrdEevjFobyrOQCJF5/gcWl+0\n0Kt4DawAEoBeUspOwMeUvj4VIqXMl1L+RUo5AE0t8xDwuGV1BZtPRlN33I822jeYlwu0830TTVVT\nard2lhXrE4C1T1TiXFXESmCC+Z6+x1wXlbzHbdtHWW1E6yPP2PQPZyllWyllClSuT9SEJi/8pZRX\n0PT1HwohxgkhnIQQjuanfrS52ErgNSFENyFEV3P55TXY7SAhxHjz28af0G7y3Wi6XYlmM0AI8QTa\nqKBSCCEGm0dnjmgd5yZgMr+VrAEWCSE6mDtgRDWP4b9oo7i55vM0EhiDNtKpLCvQbvYJ5t8WOqAZ\n464IIXpS9c5a3nVKBTyEEL5CiDZor/wACCFamedLdJJS5qMZyEzUkEqc9zXAi0IIFyHEbWiGQMu2\n59AekrFCiI5CCJ0Q4i4hxIiS+ynruttpUis0/fcFoEAIMRpN1WRhDfCEEKK/EMIJWFBi+w7AJSnl\nTSHEEDRBXGWEEKOEEF7mOQFX0R6Qlvb+CtxZzuYd0O6Xi2hC8a+WFea30c+AOCFED/NbwjCzIL9g\n3odt3QeB4eY5Cp2AV23WVXSuykVKeQDtYfQPYLOUMtu8qtL3uJTyAtpAYar5WGZS/MH2MfCqEMLD\nXFcnIUS4+Xdl+0S1afLCH0BKGYt2U76GdlFOoT2ZN5iLvA3sRfMy+RHNQ+ftGuxyI9rI9zIwDRhv\nHg0dRvNU+B7tJvBC85aoLB3RRhWX0VQIF9EMqqAZia+jeS0kowndz6racCllHpqwH43Wuf8OPC6l\nPFKFahKA3mi2llSb5X8BBqJ5aXwDrK9i88q8TlLKn9EMwluAY2jnwJZpgNGsSpiFZguqDco775+i\nqT9SzW0tebyPowmhw2jXdB1wh519lHfdrUgpc4AX0YT8ZTThnWCz/j/AB2iG5+NoAxLQhC1oevs3\nhRA5aA/W6r4d3W4+lqtonio70FRBAO+jjZgvCyE+sLPtl+ZjPIN2XnaXWB+Fdu33oKlB3kHTed9A\n8ybaZVaTDJVS/h+a190hNLvcvy2VVHSuKskKtDcU6wCnGvf4U2iDoItohvgUm7r+ZT6+VeZ+m4Z2\nX0Il+0RNsFizFZVECPEGmgFtakO3RaEoDyFEfzSB0lrW8bwNRdOjWYz8FQqFhhDiYSFEa7Ma6h1g\nkxL8Cnso4a9QNC+eQZsYdAJtwtWzDdscRWNFqX0UCoWiBaJG/gqFQtECabTBtbp27SoNBkNDN0Oh\nUCiaFPv27ftNStmtonKNVvgbDAb27t3b0M1QKBSKJoUQIrPiUkrto1AoFC0SJfwVCoWiBaKEv0Kh\nULRAGq3OX6FoTOTn53P69Glu3rzZ0E1RKABo06YNLi4uODpWKqBqKZTwVygqwenTp+nQoQMGgwEh\nqhwIU6GoVaSUXLx4kdOnT+Pu7l6tOpTap5kQHw8GA+h02nd8fEVbKKrCzZs36dKlixL8ikaBEIIu\nXbrU6E1UjfybAfHx8PTTcMOcwiUzU/sPMKW2YlsqlOBXNCpq2h/VyL8ZMH9+keC3cOOGtlyhUCjs\noYR/MyAry/7yzMziqqDnnlOqoabOhg0bEEJw5Ej56Rc+//xzzp6tfrK67du389BDD1V7e0XjRwn/\nZoCrq/3lQmgPACm1748+Kv7/6afVA6CuqCsbzMqVKwkKCmLlypXllqup8Fc0f5TwbwYsWgROTsWX\nCaEJ+fK4cQOmTgUHB628ehuoHSw2mNp+0F67do3k5GT++c9/smpVUdbNd955By8vL3x8fJg3bx7r\n1q1j7969TJkyBV9fX3JzczEYDPz2228A7N27l5EjRwLwww8/MGzYMPz8/AgICODo0aM1a6SiyaAM\nvs0Ai1F3/nxNBeTqqgmcylJYqH0rQ3HtUJ4NpibndePGjfzhD3+gT58+dOnShX379nH+/Hk2btzI\nf//7X5ycnLh06RKdO3dm6dKlxMTE4O/vX26d/fr1IykpCQcHB7Zs2cKf//xnvv766+o3UtFkUCP/\nJkJFaoQpU8BoBJNJexOoriOAMhTXnLJsMGUtrywrV65k0qRJAEyaNImVK1eyZcsWnnjiCZzMr36d\nO3euUp1XrlwhPDwcT09PXn75ZdLT02vWSEWTQQn/JkBV1Qjz51es8imPkkJKzSGoGmXZYMpaXhku\nXbrE1q1befLJJzEYDCxZsoQ1ayqff93BwQGTyQRQzDd8wYIFjBo1irS0NDZt2qRmMLcglPBvAlTV\nlbOmI0xbIVVX+uvmjD0bjJOTtry6rFu3jmnTppGZmYnRaOTUqVO4u7vTqVMnli1bxg1zB7l06RIA\nHTp0ICcnx7q9wWBg3759AMXUOleuXKFnz56AZiRWtByU8G8CVFWNUJMRZkkhVZUHj3pD0JgyBT75\nBNzcNPWbm5v2vyb6/pUrV/Lwww8XW/bII49w7tw5wsLC8Pf3x9fXl5iYGABmzJjBrFmzrAbfhQsX\n8tJLL+Hv749er7fWMXfuXF599VX8/PwoKFB53lsUUspG+Rk0aJBUaLi5SamNu4t/3Nzsl1++XEoh\n7G9T3sfNTdvWlrLqEUIr6+am/e7SRUpHx+JlnJxK19dUOXz4cEM3QaEohb1+CeyVlZCxtTLyF0J8\nJoQ4L4RIK2O9EEJ8IIQ4LoQ4JIQYWBv7bSlUVY0wZUrVdf5CaPWVHJ2W9RbRuXNxddDFi5CfX7yM\nMh4rFI2X2lL7fA78oZz1o4He5s/TwEe1tN8WQXXUCF26VG0fUtrX5Zf14IHS6iB7ZGUpdZBC0Rip\nFeEvpdwJXCqnyFjgS/NbyW7AWQhxR23su6Vg68ppNFZff6wr54rbG6mX9eC5VN7VtqHkG4IyGCsU\njYP6muTVEzhl8/+0edk520JCiKfR3gxwrYnVUsHFSf1ok9uat7cKIoypPBg4nH0uNzh/1484XunB\nXZk9udjhJl1y2tD7N8FWz4v0+OV3/EJ3QhcNJHH+XGtdU6aUftjMn1/xRDJHR7h8WXtg2VIbE54U\nCkXNaFQzfKWUnwCfAPj7+9fAU71l0e1PoYica3Q5D6CdNkeXAm72OkrU43qij/nzW7fjmG47C1JP\nftcMjjifBod8LhS24og+D/Jbc+x3RjA5cH9u+bNCQVMH2YaRtocQpQW/hZq6oyoUippRX66eZ4Be\nNv9dzMsUtUDAr9240CuZI94/cMR7P0d8/kt+9xNgEiAKOd9nryb4TQ5wqx2dTwwEh3xtvT5Pq0QH\nmByIXeVOxPKlFe7Tog6y8Roshl4PeXllb295sVP2AIWiYagv4Z8APG72+hkKXJFSnqtoo5aCrQDs\n2lX7VEUYblwbT9jmIHDMA/1NTbAD6MwvT0L7BCcHEvyDH5fu3o/uSg/QS+s6HG4R/F8/IoypxYbl\noYuiiXstppiEjnsthtBF0UyZAl98Yd8gbIkXZA9HR+3NQU0gqxqnT59m7Nix9O7dm7vuuouXXnqJ\nvDKesGfPnmXChAkV1hkaGkp2dna12vPGG29Y5xXUJbb7ef3119myZUuZZQ8ePEhiYqL1f0JCAosX\nL67zNjZFasvVcyXwPdBXCHFaCPFHIcQsIcQsc5FE4CRwHPgUeK429tscKCkAL17UPlUShoWFbNyd\nRMcsL9CbigS65SMFSEgK/J6kIQfofHwgpk5noVBbjgQKWpN0zwHiDD7F/Dvvz9URlbeYOOEMUhIn\nnInKW8z9uVrXKcsg7OZWdnM7dtS2K28CWZN/I6jlA5BSMn78eMaNG8exY8f4+eefuXbtGvPt+NIW\nFBTQo0cP1q1bV2G9iYmJODs716ht1aG6E8refPNN7r///jLXlxT+YWFhzJs3r1r7avZUZjJAQ3xa\nyiSvLl0qN/mqXPR6GTY0WLJQSBboJAsp/vmzk2z/hIf2+3W99j3fUft+rZW5TGvJn50k8zrK2PlL\niup2c5OxBh8p5nSRwaNGSDGni4w1+JTZKMvEr4qOqaJJaE5OjWvCWJUmeS1fXusHsGXLFhkcHFxs\n2ZUrV2Tnzp3l9evX5bJly+SYMWPkqFGj5PDhw2VGRob08PCQUkp5/fp1GR4eLvv37y/HjRsnhwwZ\nIvfs2SOllNLNzU1euHBBZmRkyH79+sknn3xSDhgwQD7wwAPyxo0bUkopP/nkE+nv7y+9vb3l+PHj\n5fXr16WUUi5cuFAuWbJElmT69OnymWeekYMGDZK9e/eWmzZtklLKUm2UUsro6Gjp7+8vvby85Ouv\nv26t4+2335a9e/eWgYGBctKkSdb9TJ8+Xa5du1ZKKeUPP/wghw0bJr29veXgwYNldna27NWrl+za\ntav08fGRq1atksuWLZPPP/+8lFLKjIwMOWrUKOnl5SXvvfdemZmZaa3zhRdekMOGDZPu7u7W+s+e\nPSuDg4Olj4+P9PDwkDt37qz29asrGnySl6J6xMdro/yKyHQpW/UCMDZ8CgkhyZDfCgrbQIGjtqEE\nCtqAhGs9jtM+ywMKHHD8zZ1+qffQ7Wd/+h0cQtjmYNqdv4t+aUPod2k0W9raWGmzsogwphK015Ok\nETsI2utZSjVkezyWt5iKKG8Sml7fxNNS1kFezfT0dAYNGlRsWceOHXF1deX48eMA7N+/n3Xr1rFj\nx45i5f7+979z2223cfjwYd566y1rjJ+SHDt2jOeff5709HScnZ2tMYDGjx/Pnj17SE1NpX///vzz\nn/+ssL1Go5EffviBb775hlmzZlkDxtm28dtvv+XYsWP88MMPHDx4kH379rFz50727dvHqlWrrKP4\nPXv2lKo/Ly+PiRMn8v7775OamsqWLVto164db775JhMnTuTgwYNMnDix2DYvvPAC06dP59ChQ0yZ\nMoUXX3zRuu7cuXMkJyfz73//2/qmsGLFCkJCQjh48CCpqan4+vpWeNxNiUbl7dPSqJQsCIzGl3Si\n8r4B4UKEzGTsHa4kyIXE5v4FgJTuF+h2KqiYt4/OBJk9LtHrl84U4sAt5+549CvuwlkpXF2JE84k\n+6cRvGMEyf5p9G8XBDoHnvIMJCI9BYA4jwDevrsXN/wGwq4q7sMGJ6eyPYiajIdQXcV0roAHHnjA\nbkjn5ORkXnrpJQA8PT3x9va2u727u7tVwA0aNAij0QhAWloar732GtnZ2Vy7do2QkJAK2/Loo4+i\n0+no3bs3d955pzXtpG0bv/32W7799lv8/PwALVnNsWPHyMnJ4eGHH7aGqQ4LCytV/9GjR7njjjsY\nPHgwoD0IK+L7779n/fr1AEybNo25c4v66bhx49DpdAwYMIBff/0VgMGDBzNz5kzy8/MZN26cEv6K\n2qMyssD/nI59j3zDmKQBRIUfZunxADK8kwnbHETEuaXwdhQX3kskPt6+770l06ubGyR+VfU2xk2d\nTVTeYmLWuhBh3EFchg+REw+BvoDI/g5w3QeAyDFpINPwW+3PATv1VJRZTAjN1LBoUdlzCJrM1I+y\nsunU4AAGDBhQSod/9epVsrKyuPvuu9m/fz/t2rWrdv0ArVu3tv7W6/Xk5uYCWpC4DRs24OPjw+ef\nf8727dsrrEuUSChh+W/bRiklr776Ks8880yxsu+99151D6Ha2B67NHfU4cOHs3PnTr755htmzJhB\nREQEjz/+eL23ra5Qap96pKQNsKy8G0IUGU+3XVlKzFoXNgUfpsNvPcnwScH90DA27k6yPj0qo26p\n7qBzS1sTMa3mESGzQQgiZDax//ak36GBICByWjqR09JAFBC72p19xjn8xRBIh4cmQaCmlnJzg6++\nKtst1M2t+MzlugiJXK/UwQHcd9993Lhxgy+//BKAwsJCIiMjmTFjhnWEXBaBgYHW2P+HDx/mxx9/\nrNK+c3JyuOOOO8jPzye+kobrtWvXYjKZOHHiBCdPnqRv376lyoSEhPDZZ59x7do1AM6cOcP58+cZ\nPnw4GzZsIDc3l5ycHDZt2lRq2759+3Lu3DmrSignJ4eCgoJSoaxtCQgIsKa/jI+PJzg4uNxjyMzM\npHv37jz11FM8+eST7N+/v1LH3lRQI/96wiKgLSqNzExo1Upze7QGRAuMxv+cjm1XltL+UhbgSmwH\nZ77r2QnDsXZk+KbQMdMb491HiTP4aAIZ+yrmklR30GlVE70dZV0WodMRkS4Zfn0ESSMs+uVb7Li9\nI+DDwolpINLw3N6aw8HRhHrO5emn7bt/2pOJ9tJS2gs612ipgwMQQvCvf/2L5557jrfeeguTyURo\naCh//etfK9z2ueeeY/r06QwYMIB+/frh4eFBp06dKr3vt956i3vuuYdu3bpxzz33lClcbXF1dWXI\nkCFcvXqVjz/+mDZt2pQq8/vf/56ffvqJYcOGAdC+fXuWL1/OwIEDmThxIj4+Pvzud7+zqnZsadWq\nFatXr+aFF14gNzeXtm3bsmXLFkaNGsXixYvx9fXl1VdfLbbN3/72N5544gmWLFlCt27dWLZsWbnH\nsH37dpYsWYKjoyPt27e3PnibDZWxCjfEp7l5+5TlAdOlS1FY5CCPJUXeNGD1svEIDZIsFNJ9XIAU\nc7rIsKHBWjmzV05lPGdq1VPG7AHEvI6SP7eVvNa6yLNogaNkXkfN++iVjrLz48GyzX3v2G2XXt90\nQj435ZDOBQUFMjc3V0op5fHjx6XBYJC3bt2qs/3ZeuQo6hbl7dMEKEvtcrFfNC9OjcHkaiApfQ4x\na12InJhB58d9iQo/zZikAaT77idscxAnN6RYVUAeSQ9avXLKG9XXRiKRksRNnU3kpAyQELuiD7HL\n+4HJUZtToM+ndXZ3Ekakgr6AS3ek0v+E/W5mMjWh0XwT5saNGwQFBeHj48PDDz/M3//+d1q1atXQ\nzVI0MErtU0/YtQFODsU120RU3l7Nk4dMPhvQAVpf4/KdBwneMYJ8vSRmRW8eMWZhQjDemM3ytfP4\n6S4TP5pVMvbi7Dg51b7Qt/DlZRMds0YTmXaKl40pvGvw0dxLRSEIE7duP6aFjshvS+xqd142ziGT\npYw3zOZAT5PVG6jJGHCbOB06dGDv3r31tj+VDrJpoEb+9YQ9G6DnyW5kDf6WAen9iQo/TZepA0kf\nnAyFjla3yvvPXCE8Mxt3jOgx4Y6Ro+ej+McTxaNu2s6qtfjJW2bK1jbZ38zl6r9XsdC4i0GGJURO\nzADpQNi3gVBgHlHqJDjcZMftHRHAeoMzB8MX41mYDoHRCAGhobXfNoVCUTmU8K8nigVCmxyK59Dp\nHNr9JWGbg0gfvAupK+DS3fvB5EhsfD92bttBzFoXosJPs+Sx2RUmcrH1krEYVusqVo6tCutATxMd\n0kczZruPpuqRDubJZjrQmUgIScJxthuRkzI0FVbwN4wu/F/aPziJ/0mLtratyYdzUCiaGErtU48s\nSYnGafR+emZfJy3kfxlHMCN+uUpCfmtoewXy20BhkS42QmZDq3lsGWDCWAlhWN7E0tpU/xRTYe2a\nSw6w/aFJoN8HhQ7Eru4NQOTkY6DPpaBrJhTqSBiZSth2HxJG7gMJfqv9rRPdSnpCPf209lvZBBSK\nukHI8mbeNCD+/v6yPvWUNcUyycqeZ1/oomi+y1rNnafbccQ7FSR0N/bh1342x3fDGZyu4LEnkMMe\nP2m+9TbulZVBp7M/kaq8uPrVoaTbKgCB0XS4bT9RaadYYNRm/Y4bGkzCfT8gbnZEdrgAJrSHW0Eb\nYle7s/32TnzjkUMr4yRufld6VrCbm+b73xj46aef6N+/f0M3Q6Eohr1+KYTYJ6WsMCmHUvvUAhWF\nJu64Q0delyMcGZyMxyFv0BcUCX4B3HBGLsm2qoAGZPgXj69TScoyoNa2YbVkJE9AewMw2wF0SAYZ\nlrAp+DBh3w0BnQlx9Xdab3PM47Yzd7Hj9o5sCtmJqftR7r6ebp0QZktmplIB2dK+fftSyz7++GOr\n//nnn3/O2bNn67tZxVDhl5sOSvjXAmWpW156SYvNv/j/lhK7sjfktdUMug65ReGWczuB0xXGDg1m\n4+4kwo5O49eeuqrH4KF+Z8ba5hS2F775QE8TPikPsin4MGOSBiBb3bSGkL581wESQpIg34mwrYNI\nD/4GvzP2u2JTjPEfHQ3bthVftm2btry2mTVrljXkQF0JfxV+uXmihH81KGmcLCuswsWQUG79fjhf\nG5yJMKYSvHuwJvB1Uou/VugISDz2BJIQkszYSY+zceUXXHgv0X6FFVBWbP261pvbfegcmIv08WDM\nqQdJGKmpumK/8kZ/uZf1wSdutSNhZCoxa13YZ5xDBgb8DDGl3gKaVERPYPBgePTRogfAtm3afzsT\nVWuMZaS9bt069u7dy5QpU/D19SU3N5d9+/YxYsQIBg0aREhICOfOlc6fNGPGDGbNmoW/vz99+vTh\n3//+N6A9SMLCwrj33nu5776VBShvAAAgAElEQVT7AFiyZAmDBw/G29ubhQsXWutYtGgRffr0ISgo\niKNHjxar2xKPaM+ePQQEBODj48OQIUO4cuUKr7/+OqtXr8bX15fVq1fz+eefM3v2bECLCnrvvffi\n7e3NfffdR5bZy2DGjBm8+OKLBAQEcOedd1rrP3fuHMOHD8fX1xdPT0+SkpJq/2Q3M5TwryL2VDwl\nYlhZ8TzZjWt9komafIyxQ4NJCkouSp4CeOy7BwSkex3CI/33pHS/UOP22Y7ILbFy6pqyHjoHP5xL\nvqcH/S6NJma1Oztu70jhbafApAeJZgfQ3+RTzw68a/CxuoP+5czGUg+BJhPRExg1Ctas0QT+669r\n32vWaMvrigkTJuDv7098fDwHDx7EwcGBF154gXXr1rFv3z5mzpxpN/ELqPDLLRXl7VNF7Kl47NrM\nA6OZfuYQSZuDSAhJ0tQcABI89gaR7rOf9MG76LcniF8de+DQcyAX3qt+KOSGZsoU+w8ai/pq7GPT\nSej7pVXVk3DvPnC8AQ55HPHeQ6RvIRS20tRjXNfmBCQ9SFpgNOya2+QmhI0aBc8+C2+9BQsW1K3g\nt8fRo0dJS0vjgQceALRAcHfccYfdsir8cstEjfyrSGVHoH5ndMwNP8WIX66iz3axqjrcfwwgLTGZ\nmJW9af9zEEec23N54yqOfT63Sem1q4qWcyCYsK2D2BR8mNgVvQnbHIzjBXct9aRDAbS6wVK/dkVh\nLcy2gCYV0dPMtm3w0Uea4P/oo9I2gLpGSomHhwcHDx7k4MGD/Pjjj3z77bd2y1Yl/LKlvuPHj/PH\nP/6x7g6gHMoLv9yzZ09mzJjR/IKw1QFK+FeRyo5A1xu1UMyRk49R6Hzaqu7J6H+QOIMPjxizubZy\nJ6zQ9PtNTa9dVS68l8j5f+4k/6GHeP7MPB49lc2G3UksTuwIeW257aQvFLQiwyeFVjmd2RR8mJi1\nLvxgnMfMcTHEG+vAWlpHWHT8a9bAm28WqYDq+gFgG864b9++XLhwge+//x6A/Px80tPT7W6nwi+3\nTJTwr4CSxt3QUPseNV26FF/mSpYW4tjxhjbiTw0gbHMwOOYSOfkY4w2zS+2rKem1q8sUw1w+2xBF\nr0IjgwxLiAo/Texqd17bKbUUlIU6bt1+jDbZ3YgwpvKBwZMPey7GMS3dmraysbNnT3Edv8UGYEcd\nXiVu3LiBi4uL9RMXF1dsvcV46+vrS2FhIevWreOVV17Bx8cHX19fUlJS7NZrCb88evTocsMvT548\nmWHDhuHl5cWECRPIyckpFn559OjRFYZf9vHx4YEHHuDmzZuMGjWKw4cPWw2+tvztb39j2bJleHt7\n89VXX/H++++Xe262b9+Oj48Pfn5+rF692pq5TFE2apJXOdibzOTkBNOnQ2Ji8QldAFM/isbvjI71\nxqW4kUmHPw7geo+fafvrXdzs9Bsxa13YfnsnEj2vUfjTxFLpDhvTpKa6oph3VKB2vsbxL94IP8qY\npAEkjExFd8sJU8dfaHu2HzedLzAmaYD2JlCNiW+1RXOd5DVjxgweeughJkyY0NBNUVSDmkzyUgbf\ncijLfz8xsbSQDl0UjWdhOgfDv2H9WhfAmevdj4FJz9vftgG0OD2+a6fifCyK3Fywrbop6rWrQ7G3\nm11zOQAcCDThmZTOpuBviF3lToQxFaen+pHb8witf+ltVQFFyKXFksooFIrqo9Q+5VCVPNz35+pI\nD7bk2j3N/N/fBMdcwr4bwsvGVMYbs/FdO48DPU1cutQw/viNAbs2k11zSdN74Lt2Hi8a04gz+HDT\n+QKtf+nNrduPYTjelwhjasvQi9Uzn3/+uRr1t1CU8C+HyoZL6PanUHZsTyyWa/dmj6Pocn7Hxt1J\nSATuGDlgjIJdc9HpYNo0bdtZs7TvadNaRiiDsmYhdzkylwPGKIYYFlu9fW51PsNtx/3I8P6esUOD\nQacj7rWYJqP7VygaM0r4l0NlwyUE/NqNhPu3s+P2jhiO9eWq2yGQYGqdQ5zBhyyKPy0KC4smiH30\nUdkxgZojZU0Ie/997dwe6GnCI0kLCxG2dRDZd2RpM6Dv3cfYwQFE5S3m0pc6a5L7rl2b9/lSKOqM\nyuR6bIhPY8nhu3x5UY5dN7cycs7q9VrOWkse24VIXnWy5toN8lgihdBy1paXa9fycXOr32NsLFjO\nNYHvyCCPJbJQp7fmMXYfFyD5s5OMNfjIDNyKna9Wreo+F3BTzuGraL6oHL51SKXCJRQWMuKXqyB1\nIKBjljexKzVD5ZhTD9LhMRMmU+XDKrdU1bblXMvkuSSlRaGTJiKMqQTt9STDN4Xg7wcTYUzFleIn\nKC+vec+RUCjqAiX8awO9nsXDHEFIOmZ6c9X1R3bc3pGYda7ke3pYQxxUdoJYUwtlUFdc6+xKnMGH\nZP80a1pLe2o0aBkPzKYQ0tkedRW6eeTIkfWSm9h2P6GhoWRnZ5dZdsOGDRw+fNj6v6Kw1g1KZV4P\nGuLTWNQ+9hj99jsydv4Sqz4oLHCUZKGQ3R7zlxLMKiAhwyY9Xmy75culdHIqX+Xj5FT3KoymQpDH\nEinmdJGxBh8pwaoC8jMsqXdVWVXUPu8kvyO3ntxabNnWk1vlO8nv1KgN7dq1K3f9iBEj5J49e2q0\nj7pg2bJl8vnnn6/1emtyvPn5+XWyn+nTp8u1a9dWq03VQal96pHQRdE4pqUTlbeYOOEMUvIfXyMU\nOjBvt5Y8d+OeFMKOTisVpdOesfPZZ1umy2dlSHY24bt2HuON2ZgQLApoTa90f7r3/AYTgnwc8Bw6\nHSaHNqo5EoN7DObRdY+yLUOL57AtYxuPrnuUwT1qP6ZzTUM6Z2RkWGftvvbaa9a3i+3bt/PQQw9Z\ny82ePZvPP/8c0OL0Dx48GE9PT55++mlrfJ2RI0fyyiuvMGTIEPr06UNSUhJ5eXnlhm729fW1ftq2\nbcuOHTu4fv06M2fOZMiQIfj5+bFx40YAcnNzmTRpEv379+fhhx8mNzfX7jkxGAzMnTsXLy8vhgwZ\nwvHjx4GiGdD33HMPc+fOrdZ+DAYDv/32GwBffvkl3t7e+Pj4MG3aNFJSUkhISGDOnDn4+vpy4sSJ\nYmGtv/vuO/z8/PDy8mLmzJncunXLWufChQsZOHAgXl5e1sB6O3bssJ4bPz+/MkNhVJvKPCEa4tNY\nR/6x87XRqMWY6/5wgDbKHxrcci21dYSbW/HRvefQx4vOtc0bVsADj1dYV02pqsF368mtsmt0V7lg\n6wLZNbprqTeB6mBv5L9w4UK5ZMkSKWXxEWpeXp4cNmyYPH/+vJRSylWrVsknnnii1PZjxoyRX3zx\nhZRSyqVLl1r3sW3bNvnggw9ayz3//PNy2bJlUkopL168aF0+depUmZCQYN1/RESElFLKb775Rt53\n331SytIjf3tvAgkJCTIoKEjm5eXJV199VX711VdSSikvX74se/fuLa9duyZjY2Otx5Camir1er3d\nEbmbm5t8++23pZRSfvHFF9bjmD59unzwwQdlQUGBlFJWaz9ubm7ywoULMi0tTfbu3VteuHCh2Dkp\nOfK3/M/NzZUuLi7y6NGjUkopp02bJt99911rnR988IGUUsoPP/xQ/vGPf5RSSvnQQw/J5ORkKaWU\nOTk5dt9W1Mi/HolYvrSYP3+GTwruh4axcXcSpswsawwg5X5Yc0q62h7YHU+3nweREJJEpye8SQhJ\nJmxzEI8cO9TofP9HuY/iWf9neWvnWzzr/yyj3Os3prNtSGdfX1/efvttTp8+Xarcrl27eOyxxwAt\ndHJl2LZtG/fccw9eXl5s3bq1WMC48ePHAzBo0CCMlYxVcuzYMebMmcOaNWtwdHTk22+/ZfHixfj6\n+jJy5Ehu3rxJVlYWO3fuZOrUqQB4e3vj7e1dZp2WY3rssceswe0AwsPD0ev1ADXaz9atWwkPD6dr\n164A1tDXZXH06FHc3d3p06cPANOnT2fnzp3W9fbOW2BgIBEREXzwwQdkZ2fj4FC7ARlqpTYhxB+A\n9wE98A8p5eIS62cAS4Az5kVLpZT/qI191ztZWUTITJYeCyDDN4WOmd4Y7z5KnMGH8cZspI2/PigV\nTk2wnLv58zWDrl4WMu/7fCLvduCq2yE6Znoz4perRIWfJia3cY1jtmVs46O9H7Fg+AI+2vsRowyj\n6vUBIKUW0tlW8JVFyZDOAA4ODphs3NMsCV5u3rzJc889x969e+nVqxdvvPGGdR0UhVvW6/WVSv94\n7do1Hn30UT799FNrvgEpJV9//bXd6KKVxfaYbH+XDFNd0/3UFvbO27x583jwwQdJTEwkMDCQzZs3\n069fv1rbZ43vGCGEHvgQGA0MAB4TQgywU3S1lNLX/GlSgt82sudpnStjhwaT4fM97gcDyOl6xhrS\nwTZSZ3MP0Vxf2LraCr1ei5SqK4BCB666HiJy8jEt7s/ypQ3dVCsWHf+aCWt4c9SbrJmwppgNoK6o\nTkjnwMDAYqGTLbi5uXH48GFu3bpFdnY23333HVD0EOjatSvXrl2z6rMr266SzJw5kyeeeKJYyOaQ\nkBD+9re/WW0JBw4cALSY/StWrAAgLS2NQ4cOlblPS5TQ1atXM2zYMLtlarKfe++9l7Vr13Lx4kUA\nLl26VO6x9u3bF6PRaLU/fPXVV4wYMaLM9gOcOHECLy8vXnnlFQYPHmy1BdQWtTFcGgIcl1KelFLm\nAauAsbVQb6PA9/lonls1iZkikEIpWN3LmYT79tDu1AD6XXCwqoA8kh7kQM/ijvwtwf2wPhkbPsWs\n6gnmtkwvLUGO4w3tgZCZ2WhCP+w5u4c1E9ZYR/qj3EexZsIa9pytWUznugjp/P777/Phhx/i5eXF\nmTNnrMt79erFo48+iqenJ48++qg1g5ezszNPPfUUnp6ehISE2A3hXJKyQjdnZmaybt06PvvsM6th\nc+/evSxYsID8/Hy8vb3x8PBgwYIFADz77LNcu3aN/v378/rrrzNo0KAy93n58mW8vb15//33effd\nd+2Wqcl+PDw8mD9/PiNGjMDHx4eIiAgAJk2axJIlS/Dz8+PEiRPW8m3atGHZsmWEh4fj5eWFTqdj\nliW2Sxm89957eHp64u3tjaOjI6NHjy63fJWpjGGgvA8wAU3VY/k/DU2tY1tmBnAOOASsA3pVVG9j\nMfjeFjZR8mcnySsdZazBR/Z7KEgyv41kfmsZY55t6mdYIgl8R83UrWO6vjRahgWOkrEGH8m8jpI/\nt5XMbyPb/XGA1Q00dv6SYttUaoZ2JWgpM3wrcidtCliMsi2BpmDw3QQYpJTewP8BX9grJIR4Wgix\nVwix98KFmiczrymhi6IZev48FDqAvoDIyT9zxO+/4HCTsO+G8Igxu1jANltaSojm+uTCe4mMGBmq\nJYBZ5U6/Q4NA6rjePYPIiRnErHWBDf+yjv4t+RhaUuwkhaKy1IbwPwP0svnvQpFhFwAp5UUp5S3z\n338Adt/XpJSfSCn9pZT+3bp1q4Wm1YyMrP38Z+Q+wnb4ABJa5YI+H4eLrmzcnVQqzIBer/z165ot\nbU1aUhdjKk+l5WC5LreduxOAqNCj3G82/paVj0HZYsrGkqaxKWM0Gq1eOIqyqQ3hvwfoLYRwF0K0\nAiYBCbYFhBB32PwNA36qhf3WOiVTNj6x/RRISLh3HzjkaYUkFHT4rVSYAScn+OKLCmIAKWpM4vy5\nWjYvNzdtgckR8tpy2eWodfT/9Mea8bcq+Rgqg5SNM+udomVS0/5YY+EvpSwAZgOb0YT6GilluhDi\nTSFEmLnYi0KIdCFEKvAimg2gwSgp5OPj7asI5vycoo36HW+ArhBMeshzAgGREzOY1Ge2Guk3EHFT\nZ1vVP8HfD7G+lQE4XdSke2XzMVSGNm3acPHiRfUAUDQKpJRcvHjRbr7lylIrfv5SykQgscSy121+\nvwq8Whv7qikl8/Ja9MBt25ZWEQDsdreZQl7QirBtg0gYkUr7LB86P27CpFQIDcKWtibeiO8LXLcG\nfksacoBPPTsw3ujKSINNrmAbHB2rZ4txcXHh9OnTNAZblEIB2oDExcWl2tu3uBy+ZemB7Qn+twwB\nnL9zP+Q7Efz9YJKGHCBhZCqjtw9iW7s/kLhibumNFPVC4vy5BK/UsSt0sebnb9xBXIYPUeE/MT5t\nHplG+9tVNqx2SRwdHXF3d692exWKxkbjmhZZD1RF3xvj2QsKWhG7ojc7t+0gdrU7SEjp+jv+8YQS\n/A1NzggTg74uCvxmmye5LAoL4aWX6rGRCkUjpcUJ/7L0vY4jo/G/M4YMDBSiIwMDAnA7O5qnc4qE\nS9C/F2BwHaj0+42AOQFzybgShTtG9Jjw72Lf7bYk5kmZCkWLpkWpfeLjwZ4nmxDQpft+9g79D+tX\nuRNhzGS9wZmrnv+hx6XRtP/NCIABSKrPBivKpKTtBiA3V3uIexp1rDcuxZUssnBlvGG29jZQwUNB\noWhJtJiRv0VY2Bv1SQnP/ldz64yclMHwUSOInJQB0uzuqWh0lGW7GXhWx8Hwxaw3OKNDst7gzMHw\nxfidKerqXbrUc2MVikZIsxX+Jd05X3rJvlGXwGj8DDEsMKZoOn1dPkkjdoDDDWJXuzPn55RSbqGK\nhqcs282qn7WQ21Hhpxk+aoQW8XOtC+uNmu9/q1bw/vv12FCFopHSLIW/PZ/9svS8fme0keK7Bh9t\ngdCycSGLTo0KD9D4KNOHnyxr0vekETsI2utpTfrepQt06ADTpqkHuUIhGuukFX9/f1nd5MwGg30f\nb3tkYGC9wVlT8zjcAn0e7oeGkdEnDQT8ZZUnC427im3j5qbN4lU0HPZ0/k5O8GtbA590cCYq/DRB\nez1J9k8jZq0L4VnZuJqMperp0kV7E1AGfEVzQQixT0rpX1G5Zjnyr4o7p2Wk2PbSHeBwC/fUYZz8\nV4rVrTPGs1epbVSo5obHXj7kTz6BT2bNtqp6dg/IxOFaJyInZbDG1RkTgn6hw+FPLhCoBX+7eFG9\nzSlaJs1S+JelEujSxRwS5vl+uIb+ARMCgSTO4ENu9xNwswPG3lpWrheNadzzrwXkXB5Y6foV9Ytt\nohdLPCVr4DeZTZ8MF/J/dxIccvmHZwe8QoM4MjgJ2v1WzAB84wZMnapUQYqWRbNU+5SlErDE33F/\n6A8Y/TfjsSeImYdziJxyFBxuWv9HhZ8mptU8uvePKrceRSNHCDxDg0gfnAxSgJBQ0JrY+H6MN4fj\nLom6voqmTotW+5SlEpgyRYvRPzv9Fzz2aEIhcmIGONyk3SkPfkxMJkJmE9NqHlvamsqtR9E0SEtM\nRnelB+gkCAhOGWo1ANtDhXxWtBSa5cjflvj4ogTgrq4wJjCGD3tq8WDmjLuAyfksmASxX3rzsjEV\n0UjPh6IaVGPkb96s2jGAFIqGprIj/2Y9w7eY+icwms5ndLy7Yh7uBk8ipxzRvHskICSfDejAy8YG\nbrCiVvGcEEK6x2YoaE2/1MFcbneTX/vtJXLKEW6sGISfjLE781fZdBQtgWap9rFgOwvU4s//gcGT\nzwZ00AS/APfUAKsKyBAa0rANVtQqP7saaXfWm9j4fjyVlsN51wy6H/FHd70z8R5af/A/V/wWsE2/\naS/vg0LRXGjWah+dTpucBUX+/FHhp5H6W9D6Gu6pARh7HyVmrQufDujIkTvPI/92pBZar2h0GAzE\nidL+/0/nZOPZ3khmppaGs7BQs+2EhmqZ2ZSxX9HUaNEGXwudOxf9tvjzG473hTbXcD8UwMkNKdZQ\nAO2PhLF8qBL8zZYs7fo7/+pabOZvu4uZ3N46BgKjKTRP7s7MhI8/Vvl/Fc2bZi38r3ppcXu08Mya\nP3/GgH20PdsX491F/vyBifPI/4NJjeiaM66uxBl8uNzjBOS1JemeA8QZfHjX4MMPY4sHfoOiN8aS\nqAl+iuZCszP4Wrx7Ml2iYdgSDgReY318X8CZyMnHQF/ALaer1hE/rRaT9HZUQzdbUcfETZ1NVN5i\nYldp2bgiJ2YQOflnKHQkdrU7441LcafifqCMwYrmQrMS/rbePX5Cx4HWOeBwi8gpR2l7wU1LxA70\n+/kuIuQpMPvzRzRwuxV1z5a2JmKYR4RxDgCv3HCloEsWt53sS4TxIBLwHDqdtDsvwAotHbUQxd8A\nbI3BCkVTp1kZfK0B3QKj+cuZjbTnepFLpwAkeOwJ4sfEZOXP31IxG34jJx/TBgOFjsQuH8CO2zuS\nEJJM5yNDuaR3xmlDItOnQ2Ji0RyRRYuUsVfR+GmRBl+LPtbvjI43wo8C4H54kCb4ARDMPJzTIG1T\nNA7ipmqB32JX9Kb7EX/Q5xM57RAJIUl0PzKIS/12453RjU8+gb//vXTsIIWiudCshL9FH7veqCX0\niJx8jAzvFG0ilwlAEjlFM/QqWibWwG/GVH5ZvRf9ZRfQazN/f+23j7DNQaT+EK8EvaLZ06yE/6XR\noXgPnY4b5mD+DrnaqP9mB2K/9IGC1uBwkwX3K5VPSyVx/lwi3o4CvZ6xQ4MpvO0M5LcGx1voL/dk\n4+4kKCwk7rUYQhdFN3RzFYo6o1kJ/1HZ3TgU8hXjhgbzqWcHbaEE9JoDd0x8P9qe9qag462Ga6Si\nUTA2fAoJIcl0PzIIHPLApKPwttPcPtGfOIMPUXmLuT+3Wd0eCkUxmpXBFwcHxg4OICEkCUwOoCsg\nbHMwI365ag3THKHcOhVAtz+Fov8tm1/v3k3Y5iBG/HKVyKmHQZ8PeU7EruxNhMxWKdsUTY4WafCV\nhYVs3J2EPtsF9AV0zPJm4+4kXjamWsM0KxQAF95LpLCrM2FHp7FxdxIRxlSCdwWAAIdrXYkwpqoZ\nXYpmTbMS/oWY9bjOZ+iY6c1V1x+1/+iJeDuKxPlzK65E0WJ4b3Aiqd9/gRE34gw+JA3bg/vBAArb\nXNecAnQ6pftXNFualfD3G6rpccM2B3Fl2SHCNgeREJKM31DluqEojmVCYGYmjDdo7p9hWwdh7H2U\nMUkDiAo/zdjBAUr3r6hX6jOSbLPq1Yd7X8Bz8zS+3p2CBL7enYLn5mkc7n2hoZumaGTYhvs+0NOE\n79p5fL07hb4/9SdhpBYAMOHefcSsdYEN/1Kjf0WdYzsgkVL7fvrpunsANCvh/2VIIicPfYEjBeiQ\nOFLAyUNf8GVIYkM3TdHIKKbO3zWXA8YodJh4Ki0HdPlk+KQQ/P1gAKJCj6rRv6LOsR2QWKjLSLLN\nqkernLuKymIvQFsW5oUmRy3y57AfiJyYQcxaFx5ZtBQhwMEB7r9fJXlR1D5l+RfUld9BsxL+oAl6\nNSVfURGLFmmB2mwJv9Mc+mGVO8HfD4FWuZrrJ9DLnPC9sBC++67+Xs0VzRtbHb+uDGlcV5Fka0X4\nCyH+IIQ4KoQ4LoSYZ2d9ayHEavP6/wohDLWxX4WiukyZAtOna9m7QPvO8DTxxtq+AFbPHwod+dSz\nAyZ0+Bm0pC8lKflqrtI/KipDSR2/JZmQLXUZSbbGwl8IoQc+BEYDA4DHhBADShT7I3BZSnk38C7w\nTk33q1DUhPh4LU2j5YYrLIRLm+aygYeLef6E7fDhaP+feGRoAAfDSyd9sWB5Na9vo52i6VJKxx9Y\nlHyqEB1GDIwJjCHeWDfOBrUx8h8CHJdSnpRS5gGrgLElyowFvjD/XgfcJ4QQKBQNhD3jmpTFPX9i\n1rqwKfhwMc+f9calduuzvJrXt9FO0XQppssPjKa96785MOkt1huc0SF5cagrq4csIP3I/jrZf20I\n/57AKZv/p83L7JaRUhYAV4AuJSsSQjwthNgrhNh74YJyz1TUHWUa0Ww8fyKMqehvtrN6/kQYU3Ej\nE8+h02FyqHUT21fz+jbaKZoutrp8vzM6rrmlgiggclIGd44LICEkGUw6ntp1quxKakCjMvhKKT+R\nUvpLKf27devW0M1RNGPKMqJZ3kez0HL+FrT/DSQkBewmzuDDuKHBpIV8heGKCQKjS3mUlVWvSv+o\nKMmiRUX9bb1xqZZiVDqA4w0yfFOgwJHYlb2Zn5FSJ/uvDeF/Buhl89/FvMxuGSGEA9AJuFgL+1Yo\nqoU9bx8nJ5g1S3MRtsz6jV3ZG489QVo60MfTSAhJxmNPIJkD9hI7UlfKo6yselX6R0VJpkwBGaDp\n+d3IJMKYivtRT9AXaAWkvk73XxvCfw/QWwjhLoRoBUwCEkqUSQCmm39PALbKxhpOVNEiKGtOiCV7\n1+1PFiV9SUtMpvWvvUFXCDc7cNjjJ2LWuhCxvLT+X801UVSW0EXRdDDr+d81+DB2aHBR8qlCPUhB\n5KQMlvQJqJP910pIZyFEKPAeoAc+k1IuEkK8CeyVUiYIIdoAXwF+wCVgkpTyZHl1Viuks0JR21jD\nhCfDzQ7Q9iq3Hffj0vIDAMTNX8KWtiYVNFBRZfo/M4kjXTdpwl4ADjdBmMDkQNj/DSNhpGYD6Hdx\nDD/9z6pK11uvIZ2llIlSyj5SyruklIvMy16XUiaYf9+UUoZLKe+WUg6pSPArFI0FS9IXjz2B2oJC\nRy7fdQDP0CCV9EVRI57adUrLOyLQsg7qtJDzYf83jA27k3hzlSdup8fg7jqwTvbfvJK5KBS1TLc/\nhdL9jInD7nu1IG9A5JSjoM+DvPbErnJnvDGbkW5GFi1S6h1FFRCCOIMPkVOPgIM5u2B+K2Lj+/Oy\nMRUdEje3qucTapHJXBSK2ubCe4m4+t5LTKt5vGxMZUvPTrinDwSdidvO3kWEMZWvDc5kukSryVyK\nKrPj9o6gNwv+QgcwORA5KYO3DJqevy5dhJXwVygqIHH+XLr3jyILNxwLJRk+3+N+MIDs7lmMHRrM\nnPDT+J3RqclciioR5xFAwv27AbRQIvlOmgpIFBDjqTlQ1qWLsBL+CkUFWEI2PGyYzabgw4RtDsLY\n+6g28zckmTFJA6wzf9VkLkVZhC6KJu61GGvgp0/vagVAt5/9ObkhRfPzNznQ3jiInMsD69xFWAl/\nhaICLCEbDvQ0sWStC0IxldEAACAASURBVBt3J+H8qysZPim4HxpGvl7gRiZ+hhha36uSvijsk5G1\nn8iCt4gTzlosEVMBFDrinNMeE4Lxxmz8Vi3gWtZD6HfPrXMXYSX8FYoKsI7md83lEWM2cQYfLvc4\nAXltyeibxv1nrvCuwYeD4Yt5sru6pRSlCV0UTZ/08yAgcmIGw0eN4IjXftAVMi0tDz0m3DFywBiF\n04G5fPFF3TsPqJ6qUFSArd7VOvN3lTuxK/qAhMjJP1uTvvxtl/3Ab4qWzf25OjYFHCJsuw/o80ka\nsQNa3SDsuyG8ZkxpkEmBDnW/C4WiabNokabzt6h+/NbO42XjHASw4YcRJI3YwW0n+xJhPFgUrEWh\nsCFi+VIQLkROSrUmCKKgFSN+uYqg6u6ctYEa+SsUFVAsZEPKXC7JKK53cSPO4EOyfxrBO0aQ3f0U\ncQYfFcFNYR+L7lB/E/QFdMz0hsI2RE7MIM6jbsI3VIQS/gpFJSiZHvSTWZr6J2atCzu37SBmrQtR\n4aeJmzq7oZuqaIy4urJ4mCM45ON+MICcrmc0FZCA9/16Vbx9HaCEv0JRDba0NQd+k9kgBBEym5hW\n89jS1tTQTVM0QuKmzuaCu+YmfHJDUaIgz+3jyMoY2CApP1V4B4WiDoiP11xEs7I0TZAK/dCyCV0U\nzS//0LHeuBRXssjClfGG2RzoaYJdRUEBnZxqbvCtbHgHJfwViloidFE09+fqePrjpThdLH6DOx2o\ne79tRePGwcF+kvaSVCeejy0qto9CUc/cn6sjKm8xn3TQcrCuNzhbk76r0A+Kygh+qL9Z4kr4KxS1\nRMTypfT9qT+RU45w58MBVoPwZFbQ4aFJZLpE17teV9F4cHOrXLn6chhTwl+hqC2ysngqLQeQZPik\nYDjeF4A5E0+Q4/kf/M7oyMyEqVOha1f1EGhOxMdbQ/aU+YC3l+KzJPWZ8lMJf4WitrAM2QrbQH4r\nMrxTiJzyEwiIXeVuDf4GcPEiKgR0MyE+Hh7/n2g6ixhOSgMnM3UETzPwwpQYQhcVxXqyl+Lz2Wcb\nLuWnEv4KRS0RN7Uo9ENwyjAtPK9jHu5HPIkwpuJKcWWusgM0D574RzQD8tM5GL6Y9QbN3vPCPa4s\nNSwsleWt5HwRS85oy//6dAhQwl+hqCW2tDURk6ipepKGHIC8ttobgMd+4gw+ZFFamatCQDdt4uPB\n06gjPfgbxiQNICr8NHeO03I+h20dpIV1aKQo4a9Q1BKJ8+fCuIeJnJihqXpW9CE2vj8UtCJyUgbj\nDaVn/+p0SvXTlHlyWTTj+Jd10laH33qS4ZtC23N92Lg7qdjTvTJ2gfpECX+FohbZ0tZEv8ujid2k\nqXr+X4AD/X70pW+aN7f33EQ+DngOnQ6TQwHN/W/mzIYXBIrq0f+EjjfCjwJgON6Xq26HoNCB3NvO\nFYv1ZEkIlJmphfLPzGx4m48S/gpFLTLFMJfczauIOrwLg5ukR4YHRwYn0fc3QeKunTwyNIC0kK/w\nPNnNuk1eHrz0UgM2WlFtEk4tJWatC5GTj5HhnQIFrSHPibAdPsViPVkSAtnS0DYfFdJZoaglLKM7\ny02emQn5mfE8IoNICEmmU39vrromE7Y5iK93x+PIF9ZtL15soEYrakTPwizAGfR5ICB411DGZWQT\nFX6YMaceZIuniQjKtu00pM1HjfwVilrC3uhOTyEbdyfRMcuLq26H6JjlxcbdSeip5HRPRaPDNhev\nQPKpZwcobMVtJ/1I9k8DICaxL/meHpodiLInbjVkBHAl/BWKWsLeKK4QPWOHBnPV9f+3d+/RUdVZ\nose/uyoBEgQjEBEIlQqIIIk8BDWGlJGW7jRRiD29aBkjcKdv6+2eca49gWG4g3fZvZS1EEPWONfp\n26O2LjSo04zdEjTd3KZbMYFBQXmYBFAkIYDKQ4iAiUKqfvePUxXyqEpSlZB67c9atULFU6d+p5B9\nTu3fPvu3Dzl3LeccH1GY7QLAUfB9+LtJAAwf3p8jVb3ha+NRKimUOqdyIMvK8z/6rqe1tTf3/qA1\n8IP/G7z684YufzTto1QfcTisVE9b07OLqM5/masaMrngqOGqhkzK86sYemMmFxybcezM5/NEePrp\n8IxZ9UzbLq0NtmdgrBXkU06MBQNr/yOD4vq91p1a3tbexW1e76vfj6hOr8aYiHzMmDHDKBVNysqM\nSU42xqrn8D6K5prZ9yw2HjCZBbmGxzD88yDDY9ZzD5jZ9yw2Ix6Z63d/6enGiFg/y8r6/ZCU6fz3\n6kaMAeOanWf4hfXTgPUXFQGAXaYHMVbTPkr1EX+375fNreAvm6yJ3eqKKvhmKAz4Br4ZSnVFFfdm\nu3h7xsuk7UptV/8diaWB8eonL67B/p2F/NI5Cw+CYCjMdlGZ815rnj8al/DUfv5K9QMjwk0FudTc\n4j0BDDqHnL8WM+SUt/pnO4m0AFYuOCnJfwVQb3u9q+DdnFHC7oWPt6Z3tl43lPL8KriYxNpXJwBY\nHVwHrKD4iWVhHq3281cqooy7O5+aW6rI3JmLedIb+IeeRM6ndqr+aWoKXPrZcU5BXVmpPy/guusq\nWPtaBggsvf9jyr+3DTzC2lcnUFy/N2qX8NTgr1Q/qHfW49iZz0cVVRRmuzBDTiHnrsUMOUlhtgs3\n9h7tx96zzVQfyTmRyh/y32HrdUNxvTcdBjSDzcOwumnWBK8I1NdT/MSydtU90UCDv1L9IP3NAzRU\n/JEp2Yutpl+bc/GUnmT+Zhfl+VVMz+5Z2UdPV4NSvZf68wKor2P+5lzK8yupdFWBAYxwZswhSp1T\nqTeOiOjTEwoN/kr1A1+dd/W4U2RtXsTrO7ZjgN+9t50pmxdRPe5U67bJyYHr/nu6GpTqvZwTqZTn\nV/HpMAPGDjbrzJu5c5aVArrPatbnm4z/27+NrMZt3elV8BeRYSLyJxH5xPvzmgDbuUVkj/dR3pv3\nVCoatVYCbaug5r11XJ/ewitlBrunheUPryN9W0W7BT2efjrybgqKNxs3rGf+Zu8kvbitq35g/Bmh\n5LUMhtTMZfcYK8/f1AS//nV0VWf19iavFcCfjTGrRWSF9/k/+dmu2RgzrZfvpVRUKyrqfFNPwao1\nzGm2UX3hGZJNAw1HHPzVow9T7fTwkyXLqaiIoJuC4oDv76O47Jn2OTaBoUemcOf+qynPr+Lw5kWc\nf3Ndu9d2LJz0NW6L1L+z3qZ9CqG1O9U64N5e7k+puOJrFfDsEGsFqN85U9izYDVZ9Taef94K+OFY\n5SletW3dAPBm9iEABn4xgXOOjwCYt9nVLk3XlUherKdXdf4i0miMSfH+WYCzvucdtmsB9gAtwGpj\nzBsB9vcQ8BCAw+GYcUTr2lSsczq58aaxHMjch+t964ahkg1pXGAwj40pJP3Ycq3r709OJ6WSwrIF\nx7jmcwdnxu8mc2cu1d4qrfL8KrI2L6J6R/urfpHOV/4Qnvsy+qzOX0S2iEi1n0dh2+28txUHOpOk\newdzP/AvIjLe30bGmGeNMTONMTNTU1P9baJUbGlo4MHq8zCgmcq8reTuygLgFwsOkuWu4Ujamm52\noPpUQwPF9XvJ3ZXFmet3M+zT6VRXVGGA13dstwL/uFMMH97+Tu6f/jT65mi6Df7GmDnGmCw/j43A\nCREZBeD9eTLAPo57fx4G3gGm99kRKBXNfC0B3IlgoHLWDpYurGNe5WRqXG+R2xhaZjbSlgyMdK1t\nmm02Sp1TqZpZTcaeHM44DlLqnMoR0kmkxbrif6UCaJ+S+9WvOrf2ePbZyE7V9TbnXw4s8f55CbCx\n4wYico2IDPT+eQQwC6jt5fsqFRNKH3iYZQuOsfaVCWTszYGEbyGxifK8vZRsSOMPXwS/ALj2BQqe\nL9dfeEsOyxYcY17lZOonHGT+X2awbMGxTusvf/ll58+0qMg6EUTLHE1vg/9q4Lsi8gkwx/scEZkp\nIs97t7kR2CUie4G3sXL+GvyVwlrzt6RiIgD1Ew4y9MgUsLcw6Owoiuv3ctWZ4GcMI3HJwEhXXGYt\nx1j+nQ9wHprIJlctJRvSeH3HdqZtWNFa0tlWtH+m2thNqTArfbSEZRdXM69yMptctTgPTaRuyn8x\nf3MuGz9vCHrG0GbzP/koYl2VKj+8H9ods/OozNuKa2se7769FQ+CncAfWiR+ptrYTakosSXJw7yj\nd7debR7+/XZSP55B+V3vU3rVmNbEfemjJRSs6n4COBKXDIx4Dkdrrt+1Na+1TXMDXX9o0fyZavBX\nKswqVi7nUlam1RLYNIIIw5uuAo+d5zKsK9JSSWFpy+PUNXzY7f4iccnASOebeynZkMa7b29tXY6x\nY66/rWj/TDX4KxUBKlYut3rBe2cMHzx0EUwCBzL3ccfsPJbeVwcCD2472u2+/C0qE+mVJ/2p7QLs\nvm9Vz53axcSv5lBsGjEIPzraGDDXD7HxmWrwVyoCFddst3rI2y9RmbcV7JdY+1oGxTXbe1TGGW2V\nJ70VTGlru7t4vd+qDl69hTkXZuKkHrt4yE2rZ3f9MtjWuU2zt4tz1H+mGvyViiIGLePsyF9p6wMP\nwIgRnT+XglVr4I3ft6Z17pidx9KFdUzcfyP/+Ooz7fYh4v/9ojnP35YGf6UiUGlmjpXq8STi2poH\nnkSW3lfHqowcLePswF9pK/ivxZ/TbGNZwUEAcndlWd+qbJd4sPo8aaZ9Wa0xnU8A0Z7nb0uDv1IR\n6LlZY0Fg7WsZvPv21tZlBNdkjvW7fSQ3ELvSujr2jidGXz3/0vvqqLz9fbiYBJ5Eaz9+KnuMid25\nk962dFZKXQEZjpt5sHkmxeYZELGqgOz/m5XN/icgYyUVEQqHo+u1jdudHBoaID0F7JdgQDOurXnc\nW9fIsgXHKNuwAurbvzYcjdn6i175KxWBOlb/+NaJnTgRZo4roQ4nbmzU4WTmuBJS7o7fBnD+Slvb\nandidDh4LmsIuBNb6/kBVm+cyN6x7U+ssZTi8UeDv1JRZPE1Nj744Wp+57zc//+DH65m8TXx+0/Z\nV9rqb+nLjgG89IGHOXjjftb+R0a7ev6EBT/gpf+xPGZTPP7E7/8xSkUhX87aV6niuzGpuCz4BnA9\nES3dQYuK4PRpKCtrn6Of8N/WcGL/5Zr+LTv+zLyjd7Nl/KjWdFrJgBVsSfLEXXksxpiIfMyYMcMo\npToQMQaMa3ae4RfWTwPGiJiyMmPS061N0tONKSvr3VuVlRmTnGzt3vdITu79fvvT2pVPGfnH4Wat\nc6oxYNY6p1rPVz4V7qFdMcAu04MYq43dlIombVaayt2V1bry10PnGxnZXN+u5DE5uXepC6fT/0Rq\nVE2CBvi8ik1jFB1EcLSxm1IxKFAPmrnXPdzj+v+epnIClVBGVVlpm5W5fCulFdfvjbKDuDI0+CsV\nRbYkedo1gPPlrKtS/JeAdoxxwSz0Es7uoH021xCgW2dc18b69CQ3FI6H5vyV6rlBdz1ppjufMnWk\nGzdi6kg3051PmUF3Pdluu+HD2+fwfY/09M77DFfOvzfvO/eJJ83D9z9ljtqtz+HxjBzDPw01k+7J\n1Zx/h4de+SsVA34y0sbu+x/jkWxHawnongWruT2ppnUNgPXrrZYH/vjLgoSrO2iglciWLOn+G0Dt\nwQ95Zuzj/HasVQq7PtMG9haMJHSq7ol3eoevUjHg/2x7hobDMyjPr2LcqBzqrz/oXRnsLUqabwK6\n7v8TKAtSVNT/JY+B0vFut5WiAv9jKli1hsmfnOTIGFh6Xx1vvJ/HgZt2gngo+uhi65Jbxd5HvNMr\nf6ViQUMDG3dUkrHvduqmbmfI6TGtK4P57gHoqgXClbiTNdS8fVfp+K6a2M1ptvFH1z7mvzP1civs\nAU3M//MtPFq/Pdjhxzy98lcqFjgclEoK9dcfhKYUzqXvI2NPDsXeoPedeUvg/lPwSkWnlw4f3ndX\n9+vXW8HZ1xLZV0num1iG7t9r1SprW3+dOqHzN4PUnxeQcyKVja+9BM6pLF24FxK+tf5jywDyvjgX\n+gHFML3yVyoG+EpA51VOBvtFMFA3dTuF2S4Ks128PeNlsg6ndnqdCDz9dN+MoW0lEXReRD6Y1tNJ\nSYH/W8dvBjknUimf+DKF2S7rF4lfg82D/UwauAex9L46nrohp2dvHEc0+CsVA7YkeZi63VoEfu2r\nE5i/2QqE5d/bRnl+FfM357J7R+e8izF9d9UfqK9+W4Hy+b4UkQgsWhR4Ytpfs7WNG9Yzf3Mu5flV\nLP3REbC5STo+Cc/AZisFJPDinf5bYcczDf5KxYCKlcvZQybTNqzgH+r3snFHJUMbplhXwI2j2bij\nEjvuTq9LTw/9PTvm9LuaU/Dxl8/v7huDT8BqI7ebjTsqGXjiekhuxH42jabnDlCyIY1NrlqmvHMv\nAxNuDuLI4oMGf6ViRPqx5eyuX4YbO4XZLs45PmLokSm4Uz6jMNuFG3u77XvTstjfzWLdCfR+PfnG\n0OW6uXbreL8d+QkDv5iA+5rjFGa7+If6vUzbsIJ99kwa32q/Fm+0NKy7onpyM0A4HnqTl1LB8d0c\nlZW92PCYmPnZLmPAzM92GR4TM/uexe0av/3sZ6E3gktP93+zWKBHV/v39qrr9vWBzF/o/3izshe3\nvl6k8+cUzQ3rukIPb/IKe5AP9NDgr1TwysqMsS2aa7KyF5tL2I0HjLHbzfyFi82IR+a22y7UAFhW\n1vOgP3z45dcEOtF0dyLpblwjHplr5i9cbIzdOt5L2K3Af/9cvyePQO/X1QkmmmjwV0q1mvvEk1ZL\nA28EPmq32j8w68mgAqC/k0ZXD5HuTzQ/+1nnq3/fc9+JouP4TXq6WbvyKTP3iSe7HV/Hk0dXY40F\nPQ3+mvNXKgZ1zGmPr7Wx1DxG4SgHGMNvx1rtH7LcNTDr8hKQ3TW77El+vi2HI3C7hpUrrXGuW9dh\nknfWGn40p4S5d83lf8o0ipYkkPjmm63jL8hxWW2aL65mTnP7ENZdS4r1663fBxprPNF+/krFGN9k\nbNuAe0Sc/P1tDsrzKxn26c2cHXXE2/6hlszKu6m2Z8K25djtVhcEh8OanO04wWqzBa7GSUyES5cu\nP/etJ7Bokf/XiHgXX59VQNbhVL4YV8tX13zJhLox1E77kMTGkVxKOQEXk2DQOTI/vI2aW7aRse92\n6q8/GFJf/kBVSSLw8suxsXqX9vNXKk75u9JOM1b7h2Gf3syZ6z/E9m0ym1y1zKucTI3rrdZvAG73\n5eqdH/+4cxVMoKvj9HR48UX/V9ydXjNrDUPuWUjSf8+iKecWJjeepzr/Zc6M+IxLI+qovaUKEpu4\nNOw4JDbB4C9JOjGe2sz9re0rOvbl7+0aBaYP73eIFhr8lYox/gJcA1Zf+7OjjmA/Mxb3sKPYvk1q\ndwKYfrx9OLh4ER55pP1+Vq2yrujbSk6GggLrpNPQ0Plbw6pVYHOtYbqzhDqclBx/hfNZf6Dpuo85\ndcMuaqftBHcCnms+a79j+0UQsJ9No3nMAZyfTKT++oOd+vL3xRoFvbnfIVpp2kepGOMvtTHdWcKe\nBatbUz1GWiD5K+TctWB3k3xmJOmfD8Ntg3FnbZRv28b07CKqx53CrG/fD8jXv8cX6AsKrLx9u28b\nRQUkpHxCwQejuWSHBLdh0+wPsDVdzbBToxnZmETNLVXgsYOtzc1nAlxMhgHenV0YAYO/ZNin0zkz\nfjfzN+eycUclpc6p1opmA1bwr2XLerzcpL+UWG+Xu4w0PU37aGM3pWKMv8Zoe8d6uHP/3WxyvcW8\nysmU37kXPGCGniTp+CScnw+zgnHLQB5aP4kfZudQnf8Sjp354HRSkOZkTuMlnhs/ADwtfD3rG+QH\nn9F0YjQvXWri0t80Y0v4Fk9yI7bmq0n+ahgXRhyiPP8QfD0ckhrB5sYzoImvm66mZsKHVkC/fnf7\nwTdfDYO+At816eDTjDwwkxPja8ncOYtNrlpKv5hq5fq9ffm7W26y48lqyRKoqPD/LSWe9OrKX0QW\nAL8AbgRuNcb4vVQXke8DTwN24HljzOru9q1X/kqFrmPAW7UK1tevIbG6hk1j25wAEr+2rrzdCWDs\nkHCRoQ03cc7xEZk7Z1GbuZ+SDWmA1SMfewu4ExhZfwMnJnn/fboTwd5mptcjYDOXf7ZhP5uGe9gx\nrjk0nbNpn0JCk7VPPxJPjuPStYcBmLwzlzFNV9F86i6qUjykH1veGrS7Wmje34kw1q70O+rplX9v\ng/+NgAf4d2CZv+AvInbgY+C7wDFgJ/DXxpjarvatwV+pvlew6vIJoGRDGm9kpFh974GMvTl8mXKB\nc+n7GHpkCl+9uK81vZK7K4vK23aDgYyDWdRN/S+SPptI8+gDHd5BuHzZ3uZXgFwYgbnqNAO/mMC3\nIw9ZJxz7JeskYRJAWqyThTuRyR/exsmUZm4/PJg/zDhOkvsG3C9V+A3iEDjA+9pLd+QvJRQr+iXt\nY4zZ732zrja7FThkjDns3fY1oBDoMvgrpfpexcrlFKxaQ8mrh4Cvqbx1t1VKKW7qst4Hm5uhR6Zw\nzvERhdkuNu6o5I1deVTmbcW1NQ+Ayryt1jbp+7A1jsaT4p2o/WYIDDrf+U295wIz+DRJxyfRnNrA\n4KOT+XrMAQYfzeSXf7HC0PNZQzg95FsaB7tJdM7j1L9d7sfjdMKRAPcK+IK4vwnnRYv8fw7d3c8Q\nD/oj5z8GONrm+THgNn8bishDwEMAjni740KpflKxcjmlzTaWtjwOAmtfuYEXJg9pzfk/9raw9Tqr\nRXLWsFxqM6txbc27fOW/J8e68j8+ybry913oDzwPxs+VvzsRxAM2N82jD5C5M5fazP1k/amIansm\ny+q9Qb7e+mG3wz4PON+6HMS7y+sHWm7S4fB/5a/hpQelniKyRUSq/TwK+3owxphnjTEzjTEzU1M7\nLzyhlOobW5I8TDo7l7WbrHr5EynNZO5yMWnvLWwZczUbd24ns+Z71Nz8HiUb0ri3rtGK6fYW6iZW\nM/LAjMspH3dimz17c/1tiQH3QCbvzCXxdAafp1xk2oYVrTeWddT2XgNfyWagYN1dEA9Umnollq2M\nNt1e+Rtj5vTyPY4DbVdSSPP+TikVJhUr2wfdU362caxaw4+b51BsnqEgzcnaN7Naq31OD/0G21ej\nGX5iNE2Dm7g4sBl3m2qfSR+P58ioMyQ3JfP14GbsX2RR+9XN8G/LOQOcgdYr/a74UjuBJm67C+K+\nbwOB7kGIZ31S5y8i7xB4wjcBa8L3LqygvxO43xhT09U+dcJXqcjStoKot2Gj7fq+PWGM/womDeKd\n9Ut7BxH5gYgcA24H3hKRzd7fjxaRCgBjTAvwMLAZ2A/8trvAr5S6soJdzKTjXbS9YbcHtw+7dw2a\noiJrctfj6WJhF9VjeoevUnEmlLtce7pMY3eSk4PrCuoToWEqImljN6WUX121WA6kL0ojhw+/3G7Z\nH7vd/+/jse9Of9Dgr1Sc6a5s0p+uGqKVlXWuqBkwwAr2vg6fZWVw+rT1zSJQBc5DD2llTn/S4K9U\nnAmlbLKrkkl/C6i88IIV7P3l59tuD9YVf1OT1W9nyZLAC7GovqXBX6k44y+QA1y4EHjit7sVsoKd\njC0qsrqBilh1/WDNKaxbZ41PJ3WvPA3+SsUZXyAfPrz977/8MnAffN/rugvwPa0iWr8efv3rzhO5\n3c09qL6j1T5KxamuumGG0vQsmCqirqqHRKwTjAqNVvsopboUysRvV4KpIgplcln1LQ3+SsWpUPvl\nBBIooB850jkFFOg9RLS6p79o8FcqTvV107OuThod19X1994i8NOf6iRvf9Hgr1Sc6q6CJ1iBqoh8\n2qaA/L33yy/Dr34V2nur4OmEr1IqZP4Wc6+o0MnccNIJX6XUFdWx2VvbOv1ALRl0MjdyaPBXSoWk\nq+oeXUQl8mnwV0qFpKtS0b6eT1B9rz/W8FVKxaDu1scNtK6uigx65a+UCommdqKbBn+lVEg0tRPd\nNO2jlAqZpnail175K6VUHNLgr5RScUiDv1JKxSEN/kopFYc0+CulVByK2MZuInIKCNAeqpMRwOkr\nOJz+EgvHoccQGWLhGCA2jqO/jyHdGJPa3UYRG/yDISK7etLFLtLFwnHoMUSGWDgGiI3jiNRj0LSP\nUkrFIQ3+SikVh2Il+D8b7gH0kVg4Dj2GyBALxwCxcRwReQwxkfNXSikVnFi58ldKKRUEDf5KKRWH\noj74i8j3ReSgiBwSkRXhHk+wROQFETkpItXhHkuoRGSsiLwtIrUiUiMij4R7TKEQkUEi8r6I7PUe\nxy/DPaZQiYhdRHaLyJvhHksoRKReRD4SkT0isivc4wmViKSIyH+KyAER2S8it4d7TD5RnfMXETvw\nMfBd4BiwE/hrY0xtWAcWBBG5A7gAvGSMyQr3eEIhIqOAUcaYD0VkCPABcG80/T0AiIgAg40xF0Qk\nEagCHjHG7Ajz0IImIsXATGCoMeaecI8nWCJSD8w0xkT1DV4isg6oNMY8LyIDgGRjTGO4xwXRf+V/\nK3DIGHPYGHMReA0oDPOYgmKMeRc4E+5x9IYx5nNjzIfeP58H9gNjwjuq4BnLBe/TRO8j6q6ORCQN\nuBt4PtxjiWcicjVwB/AbAGPMxUgJ/BD9wX8McLTN82NEYdCJJSLiBKYD74V3JKHxpkv2ACeBPxlj\novE4/gVYDnjCPZBeMMD/E5EPROShcA8mRBnAKeBFbwrueREZHO5B+UR78FcRRESuAl4Hfm6MORfu\n8YTCGOM2xkwD0oBbRSSqUnEicg9w0hjzQbjH0ku5xpibgbnA33nTo9EmAbgZ+L/GmOnA10DEzEtG\ne/A/Doxt8zzN+zvVz7w58teB9caY34V7PL3l/Xr+NvD9cI8lSLOA+d6c+WvAd0SkLLxDCp4x5rj3\n50ng91gp3mhzDDjW5tvjf2KdDCJCtAf/ncAEEcnwTqYsBMrDPKa4450o/Q2w3xhTGu7xhEpEUkUk\nxfvnJKxCggPhaVF4kAAAANlJREFUHVVwjDH/yxiTZoxxYv17+Isx5oEwDysoIjLYWziAN03yPSDq\nquGMMV8AR0VkovdXdwERUwQR1Qu4G2NaRORhYDNgB14wxtSEeVhBEZFXgTuBESJyDHjMGPOb8I4q\naLOARcBH3nw5wD8bYyrCOKZQjALWeavIbMBvjTFRWSoZ5UYCv7euKUgAXjHG/DG8QwrZ3wPrvRen\nh4G/CfN4WkV1qadSSqnQRHvaRymlVAg0+CulVBzS4K+UUnFIg79SSsUhDf5KKRWHNPgrpVQc0uCv\nlFJx6P8D4Obclx42P3sAAAAASUVORK5CYII=\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jWxvLGexKv0D",
-        "colab_type": "text"
-      },
-      "source": [
-        "We can see from the graph that the predictions for the original model, the converted model, and the quantized model are all close enough to be indistinguishable. This means that our quantized model is ready to use!\n",
-        "\n",
-        "We can print the difference in file size:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "6r42iBnULP4X",
-        "colab_type": "code",
-        "outputId": "afe526c9-498d-498e-d768-1edfbf21e870",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 68
-        }
-      },
-      "source": [
-        "import os\n",
-        "basic_model_size = os.path.getsize(\"sine_model.tflite\")\n",
-        "print(\"Basic model is %d bytes\" % basic_model_size)\n",
-        "quantized_model_size = os.path.getsize(\"sine_model_quantized.tflite\")\n",
-        "print(\"Quantized model is %d bytes\" % quantized_model_size)\n",
-        "difference = basic_model_size - quantized_model_size\n",
-        "print(\"Difference is %d bytes\" % difference)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Basic model is 2656 bytes\n",
-            "Quantized model is 2640 bytes\n",
-            "Difference is 16 bytes\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "C2vpZE9ZshVH",
-        "colab_type": "text"
-      },
-      "source": [
-        "Our quantized model is only 16 bytes smaller than the original version, which only a tiny reduction in size! At around 2.6 kilobytes, this model is already so small that the weights make up only a small fraction of the overall size, meaning quantization has little effect.\n",
-        "\n",
-        "More complex models have many more weights, meaning the space saving from quantization will be much higher, approaching 4x for most sophisticated models.\n",
-        "\n",
-        "Regardless, our quantized model will take less time to execute than the original version, which is important on a tiny microcontroller!\n",
-        "\n",
-        "## Write to a C file\n",
-        "The final step in preparing our model for use with TensorFlow Lite for Microcontrollers is to convert it into a C source file. You can see an example of this format in [`hello_world/sine_model_data.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/sine_model_data.cc).\n",
-        "\n",
-        "To do so, we can use a command line utility named [`xxd`](https://linux.die.net/man/1/xxd). The following cell runs `xxd` on our quantized model and prints the output:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "l4-WhtGpvb-E",
-        "colab_type": "code",
-        "outputId": "f975721f-bdd1-440a-93af-55f13c4c8690",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 3808
-        }
-      },
-      "source": [
-        "# Install xxd if it is not available\n",
-        "!apt-get -qq install xxd\n",
-        "# Save the file as a C source file\n",
-        "!xxd -i sine_model_quantized.tflite > sine_model_quantized.cc\n",
-        "# Print the source file\n",
-        "!cat sine_model_quantized.cc"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "unsigned char sine_model_quantized_tflite[] = {\n",
-            "  0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,\n",
-            "  0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,\n",
-            "  0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x0a, 0x00, 0x00,\n",
-            "  0xb8, 0x05, 0x00, 0x00, 0xa0, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x0b, 0x00, 0x00, 0x00, 0x90, 0x05, 0x00, 0x00, 0x7c, 0x05, 0x00, 0x00,\n",
-            "  0x24, 0x05, 0x00, 0x00, 0xd4, 0x04, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00,\n",
-            "  0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n",
-            "  0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x54, 0xf6, 0xff, 0xff, 0x58, 0xf6, 0xff, 0xff, 0x5c, 0xf6, 0xff, 0xff,\n",
-            "  0x60, 0xf6, 0xff, 0xff, 0xc2, 0xfa, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x40, 0x00, 0x00, 0x00, 0x7c, 0x19, 0xa7, 0x3e, 0x99, 0x81, 0xb9, 0x3e,\n",
-            "  0x56, 0x8b, 0x9f, 0x3e, 0x88, 0xd8, 0x12, 0xbf, 0x74, 0x10, 0x56, 0x3e,\n",
-            "  0xfe, 0xc6, 0xdf, 0xbe, 0xf2, 0x10, 0x5a, 0xbe, 0xf0, 0xe2, 0x0a, 0xbe,\n",
-            "  0x10, 0x5a, 0x98, 0xbe, 0xb9, 0x36, 0xce, 0x3d, 0x8f, 0x7f, 0x87, 0x3e,\n",
-            "  0x2c, 0xb1, 0xfd, 0xbd, 0xe6, 0xa6, 0x8a, 0xbe, 0xa5, 0x3e, 0xda, 0x3e,\n",
-            "  0x50, 0x34, 0xed, 0xbd, 0x90, 0x91, 0x69, 0xbe, 0x0e, 0xfb, 0xff, 0xff,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x67, 0x41, 0x48, 0xbf,\n",
-            "  0x24, 0xcd, 0xa0, 0xbe, 0xb7, 0x92, 0x0c, 0xbf, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x98, 0xfe, 0x3c, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x17, 0x9a, 0xbe,\n",
-            "  0x41, 0xcb, 0xb6, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x13, 0xd6, 0x1e, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x5a, 0xfb, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,\n",
-            "  0x4b, 0x98, 0xdd, 0xbd, 0x40, 0x6b, 0xcb, 0xbe, 0x36, 0x0c, 0xd4, 0x3c,\n",
-            "  0xbd, 0x44, 0xb5, 0x3e, 0x95, 0x70, 0xe3, 0x3e, 0xe7, 0xac, 0x86, 0x3e,\n",
-            "  0x00, 0xc4, 0x4e, 0x3d, 0x7e, 0xa6, 0x1d, 0x3e, 0xbd, 0x87, 0xbb, 0x3e,\n",
-            "  0xb4, 0xb8, 0x09, 0xbf, 0xa1, 0x1f, 0xf8, 0xbe, 0x8d, 0x90, 0xdd, 0x3e,\n",
-            "  0xde, 0xfa, 0x6f, 0xbe, 0xb2, 0x75, 0xe4, 0x3d, 0x6e, 0xfe, 0x36, 0x3e,\n",
-            "  0x20, 0x18, 0xc2, 0xbe, 0x39, 0xc7, 0xfb, 0xbe, 0xfe, 0xa4, 0x30, 0xbe,\n",
-            "  0xf7, 0x91, 0xde, 0xbe, 0xde, 0xab, 0x24, 0x3e, 0xfb, 0xbb, 0xce, 0x3e,\n",
-            "  0xeb, 0x23, 0x80, 0xbe, 0x7b, 0x58, 0x73, 0xbe, 0x9a, 0x2e, 0x03, 0x3e,\n",
-            "  0x10, 0x42, 0xa9, 0xbc, 0x10, 0x12, 0x64, 0xbd, 0xe3, 0x8d, 0x0c, 0x3d,\n",
-            "  0x9e, 0x48, 0x97, 0xbe, 0x34, 0x51, 0xd4, 0xbe, 0x02, 0x3b, 0x0d, 0x3e,\n",
-            "  0x62, 0x67, 0x89, 0xbe, 0x74, 0xdf, 0xa2, 0x3d, 0xf3, 0x25, 0xb3, 0xbe,\n",
-            "  0xef, 0x34, 0x7b, 0x3d, 0x61, 0x70, 0xe3, 0x3d, 0xba, 0x76, 0xc0, 0xbe,\n",
-            "  0x7d, 0xe9, 0xa7, 0x3e, 0xc3, 0xab, 0xd0, 0xbe, 0xcf, 0x7c, 0xdb, 0xbe,\n",
-            "  0x70, 0x27, 0x9a, 0xbe, 0x98, 0xf5, 0x3c, 0xbd, 0xff, 0x4b, 0x4b, 0x3e,\n",
-            "  0x7e, 0xa0, 0xf8, 0xbd, 0xd4, 0x6e, 0x86, 0x3d, 0x00, 0x4a, 0x07, 0x3a,\n",
-            "  0x4c, 0x24, 0x61, 0xbe, 0x54, 0x68, 0xf7, 0xbd, 0x02, 0x3f, 0x77, 0xbe,\n",
-            "  0x23, 0x79, 0xb3, 0x3e, 0x1c, 0x83, 0xad, 0xbd, 0xc8, 0x92, 0x8d, 0x3e,\n",
-            "  0xa8, 0xf3, 0x15, 0xbd, 0xe6, 0x4d, 0x6c, 0x3d, 0xac, 0xe7, 0x98, 0xbe,\n",
-            "  0x81, 0xec, 0xbd, 0x3e, 0xe2, 0x55, 0x73, 0x3e, 0xc1, 0x77, 0xc7, 0x3e,\n",
-            "  0x6e, 0x1b, 0x5e, 0x3d, 0x27, 0x78, 0x02, 0x3f, 0xd4, 0x21, 0x90, 0x3d,\n",
-            "  0x52, 0xdc, 0x1f, 0x3e, 0xbf, 0xda, 0x88, 0x3e, 0x80, 0x79, 0xe3, 0xbd,\n",
-            "  0x40, 0x6f, 0x10, 0xbe, 0x20, 0x43, 0x2e, 0xbd, 0xf0, 0x76, 0xc5, 0xbd,\n",
-            "  0xcc, 0xa0, 0x04, 0xbe, 0xf0, 0x69, 0xd7, 0xbe, 0xb1, 0xfe, 0x64, 0xbe,\n",
-            "  0x20, 0x41, 0x84, 0xbe, 0xb2, 0xc3, 0x26, 0xbe, 0xd8, 0xf4, 0x09, 0xbe,\n",
-            "  0x64, 0x44, 0xd1, 0x3d, 0xd5, 0xe1, 0xc8, 0xbe, 0x35, 0xbc, 0x3f, 0xbe,\n",
-            "  0xc0, 0x94, 0x82, 0x3d, 0xdc, 0x2b, 0xb1, 0xbd, 0x02, 0xdb, 0xbf, 0xbe,\n",
-            "  0xa5, 0x7f, 0x8a, 0x3e, 0x21, 0xb4, 0xa2, 0x3e, 0xcd, 0x86, 0x56, 0xbf,\n",
-            "  0x9c, 0x3b, 0x76, 0xbc, 0x85, 0x6d, 0x60, 0xbf, 0x86, 0x00, 0x3c, 0xbe,\n",
-            "  0xc1, 0x23, 0x7e, 0x3e, 0x96, 0xcd, 0x3f, 0x3e, 0x86, 0x91, 0x2d, 0x3e,\n",
-            "  0x55, 0xef, 0x87, 0x3e, 0x7e, 0x97, 0x03, 0xbe, 0x2a, 0xcd, 0x01, 0x3e,\n",
-            "  0x32, 0xc9, 0x8e, 0xbe, 0x72, 0x77, 0x3b, 0xbe, 0xe0, 0xa1, 0xbc, 0xbe,\n",
-            "  0x8d, 0xb7, 0xa7, 0x3e, 0x1c, 0x05, 0x95, 0xbe, 0xf7, 0x1f, 0xbb, 0x3e,\n",
-            "  0xc9, 0x3e, 0xd6, 0x3e, 0x80, 0x42, 0xe9, 0xbd, 0x27, 0x0c, 0xd2, 0xbe,\n",
-            "  0x5c, 0x32, 0x34, 0xbe, 0x14, 0xcb, 0xca, 0xbd, 0xdd, 0x3a, 0x67, 0xbe,\n",
-            "  0x1c, 0xbb, 0x8d, 0xbe, 0x91, 0xac, 0x5c, 0xbe, 0x52, 0x40, 0x6f, 0xbe,\n",
-            "  0xd7, 0x71, 0x94, 0x3e, 0x18, 0x71, 0x09, 0xbe, 0x9b, 0x29, 0xd9, 0xbe,\n",
-            "  0x7d, 0x66, 0xd2, 0xbe, 0x98, 0xd6, 0xb2, 0xbe, 0x00, 0xc9, 0x84, 0x3a,\n",
-            "  0xbc, 0xda, 0xc2, 0xbd, 0x1d, 0xc2, 0x1b, 0xbf, 0xd4, 0xdd, 0x92, 0x3e,\n",
-            "  0x07, 0x87, 0x6c, 0xbe, 0x40, 0xc2, 0x3b, 0xbe, 0xbd, 0xe2, 0x9c, 0x3e,\n",
-            "  0x0a, 0xb5, 0xa0, 0xbe, 0xe2, 0xd5, 0x9c, 0xbe, 0x3e, 0xbb, 0x7c, 0x3e,\n",
-            "  0x17, 0xb4, 0xcf, 0x3e, 0xd5, 0x8e, 0xc8, 0xbe, 0x7c, 0xf9, 0x5c, 0x3e,\n",
-            "  0x80, 0xfc, 0x0d, 0x3d, 0xc5, 0xd5, 0x8b, 0x3e, 0xf5, 0x17, 0xa2, 0x3e,\n",
-            "  0xc7, 0x60, 0x89, 0xbe, 0xec, 0x95, 0x87, 0x3d, 0x7a, 0xc2, 0x5d, 0xbf,\n",
-            "  0x77, 0x94, 0x98, 0x3e, 0x77, 0x39, 0x07, 0xbc, 0x42, 0x29, 0x00, 0x3e,\n",
-            "  0xaf, 0xd0, 0xa9, 0x3e, 0x31, 0x23, 0xc4, 0xbe, 0x95, 0x36, 0x5b, 0xbe,\n",
-            "  0xc7, 0xdc, 0x83, 0xbe, 0x1e, 0x6b, 0x47, 0x3e, 0x5b, 0x24, 0x99, 0x3e,\n",
-            "  0x99, 0x27, 0x54, 0x3e, 0xc8, 0x20, 0xdd, 0xbd, 0x5a, 0x86, 0x2f, 0x3e,\n",
-            "  0x80, 0xf0, 0x69, 0xbe, 0x44, 0xfc, 0x84, 0xbd, 0x82, 0xa0, 0x2a, 0xbe,\n",
-            "  0x87, 0xe6, 0x2a, 0x3e, 0xd8, 0x34, 0xae, 0x3d, 0x50, 0xbd, 0xb5, 0x3e,\n",
-            "  0xc4, 0x8c, 0x88, 0xbe, 0xe3, 0xbc, 0xa5, 0x3e, 0xa9, 0xda, 0x9e, 0x3e,\n",
-            "  0x3e, 0xb8, 0x23, 0xbe, 0x80, 0x90, 0x15, 0x3d, 0x97, 0x3f, 0xc3, 0x3e,\n",
-            "  0xca, 0x5c, 0x9d, 0x3e, 0x21, 0xe8, 0xe1, 0x3e, 0xc0, 0x49, 0x01, 0xbc,\n",
-            "  0x00, 0x0b, 0x88, 0xbd, 0x3f, 0xf7, 0xca, 0x3c, 0xfb, 0x5a, 0xb1, 0x3e,\n",
-            "  0x60, 0xd2, 0x0d, 0x3c, 0xce, 0x23, 0x78, 0xbf, 0x8f, 0x4f, 0xb9, 0xbe,\n",
-            "  0x69, 0x6a, 0x34, 0xbf, 0x4b, 0x5e, 0xa9, 0x3e, 0x64, 0x8c, 0xd9, 0x3e,\n",
-            "  0x52, 0x77, 0x36, 0x3e, 0xeb, 0xaf, 0xbe, 0x3e, 0x40, 0xbe, 0x36, 0x3c,\n",
-            "  0x08, 0x65, 0x3b, 0xbd, 0x55, 0xe0, 0x66, 0xbd, 0xd2, 0xe8, 0x9b, 0xbe,\n",
-            "  0x86, 0xe3, 0x09, 0xbe, 0x93, 0x3d, 0xdd, 0x3e, 0x0f, 0x66, 0x18, 0x3f,\n",
-            "  0x18, 0x05, 0x33, 0xbd, 0xde, 0x15, 0xd7, 0xbe, 0xaa, 0xcf, 0x49, 0xbe,\n",
-            "  0xa2, 0xa5, 0x64, 0x3e, 0xe6, 0x9c, 0x42, 0xbe, 0x54, 0x42, 0xcc, 0x3d,\n",
-            "  0xa0, 0xbd, 0x9d, 0xbe, 0xc2, 0x69, 0x48, 0x3e, 0x5b, 0x8b, 0xa2, 0xbe,\n",
-            "  0xc0, 0x13, 0x87, 0x3d, 0x36, 0xfd, 0x69, 0x3e, 0x05, 0x86, 0x40, 0xbe,\n",
-            "  0x1e, 0x7a, 0xce, 0xbe, 0x46, 0x13, 0xa7, 0xbe, 0x68, 0x52, 0x86, 0xbe,\n",
-            "  0x04, 0x9e, 0x86, 0xbd, 0x8c, 0x54, 0xc1, 0x3d, 0xe0, 0x3b, 0xad, 0x3c,\n",
-            "  0x42, 0x67, 0x85, 0xbd, 0xea, 0x97, 0x42, 0x3e, 0x6e, 0x13, 0x3b, 0xbf,\n",
-            "  0x56, 0x5b, 0x16, 0x3e, 0xaa, 0xab, 0xdf, 0x3e, 0xc8, 0x41, 0x36, 0x3d,\n",
-            "  0x24, 0x2d, 0x47, 0xbe, 0x77, 0xa5, 0xae, 0x3e, 0xc0, 0xc2, 0x5b, 0x3c,\n",
-            "  0xac, 0xac, 0x4e, 0x3e, 0x99, 0xec, 0x13, 0xbe, 0xf2, 0xab, 0x73, 0x3e,\n",
-            "  0xaa, 0xa1, 0x48, 0xbe, 0xe8, 0xd3, 0x01, 0xbe, 0x60, 0xb7, 0xc7, 0xbd,\n",
-            "  0x64, 0x72, 0xd3, 0x3d, 0x83, 0xd3, 0x99, 0x3e, 0x0c, 0x76, 0x34, 0xbe,\n",
-            "  0x42, 0xda, 0x0d, 0x3e, 0xfb, 0x47, 0x9a, 0x3e, 0x8b, 0xdc, 0x92, 0xbe,\n",
-            "  0x56, 0x7f, 0x6b, 0x3e, 0x04, 0xd4, 0x88, 0xbd, 0x11, 0x9e, 0x80, 0x3e,\n",
-            "  0x3c, 0x89, 0xff, 0x3d, 0xb3, 0x3e, 0x88, 0x3e, 0xf7, 0xf0, 0x88, 0x3e,\n",
-            "  0x28, 0xfb, 0xc9, 0xbe, 0x53, 0x3e, 0xcf, 0x3e, 0xac, 0x75, 0xdc, 0xbe,\n",
-            "  0xdd, 0xca, 0xd7, 0x3e, 0x01, 0x58, 0xa7, 0x3e, 0x29, 0xb8, 0x13, 0xbf,\n",
-            "  0x76, 0x81, 0x12, 0xbc, 0x28, 0x8b, 0x16, 0xbf, 0x0e, 0xec, 0x0e, 0x3e,\n",
-            "  0x40, 0x0a, 0xdb, 0xbd, 0x98, 0xec, 0xbf, 0xbd, 0x32, 0x55, 0x0c, 0xbe,\n",
-            "  0xfb, 0xf9, 0xc9, 0x3e, 0x83, 0x4a, 0x6d, 0xbe, 0x76, 0x59, 0xe2, 0xbe,\n",
-            "  0x54, 0x7d, 0x9f, 0xbb, 0x9d, 0xe8, 0x95, 0x3e, 0x5c, 0xd3, 0xd0, 0x3d,\n",
-            "  0x19, 0x8a, 0xb0, 0x3e, 0xde, 0x6f, 0x2e, 0xbe, 0xd0, 0x16, 0x83, 0x3d,\n",
-            "  0x9c, 0x7d, 0x11, 0xbf, 0x2b, 0xcc, 0x25, 0x3c, 0x2a, 0xa5, 0x27, 0xbe,\n",
-            "  0x22, 0x14, 0xc7, 0xbe, 0x5e, 0x7a, 0xac, 0x3e, 0x4e, 0x41, 0x94, 0xbe,\n",
-            "  0x5a, 0x68, 0x7b, 0x3e, 0x86, 0xfd, 0x4e, 0x3e, 0xa2, 0x56, 0x6a, 0xbe,\n",
-            "  0xca, 0xfe, 0x81, 0xbe, 0x43, 0xc3, 0xb1, 0xbd, 0xc5, 0xb8, 0xa7, 0x3e,\n",
-            "  0x55, 0x23, 0xcd, 0x3e, 0xaf, 0x2e, 0x76, 0x3e, 0x69, 0xa8, 0x90, 0xbe,\n",
-            "  0x0d, 0xba, 0xb9, 0x3e, 0x66, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x40, 0x00, 0x00, 0x00, 0x53, 0xd6, 0xe2, 0x3d, 0x66, 0xb6, 0xcc, 0x3e,\n",
-            "  0x03, 0xe7, 0xf6, 0x3e, 0xe0, 0x28, 0x10, 0xbf, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x3e, 0x3d, 0xb0, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x62, 0xf0, 0x77, 0x3e,\n",
-            "  0xa6, 0x9d, 0xa4, 0x3e, 0x3a, 0x4b, 0xf3, 0xbe, 0x71, 0x9e, 0xa7, 0x3e,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x34, 0x39, 0xa2, 0x3e, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0xcc, 0x9c, 0x4a, 0x3e, 0xab, 0x40, 0xa3, 0x3e, 0xb2, 0xff, 0xff, 0xff,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xb3, 0x71, 0x67, 0x3f,\n",
-            "  0x9a, 0x7a, 0x95, 0xbf, 0xe1, 0x48, 0xe8, 0xbe, 0x8a, 0x72, 0x96, 0x3e,\n",
-            "  0x00, 0xd2, 0xd3, 0xbb, 0x1a, 0xc5, 0xd7, 0x3f, 0xac, 0x7e, 0xc8, 0xbe,\n",
-            "  0x90, 0xa7, 0x95, 0xbe, 0x3b, 0xd7, 0xdc, 0xbe, 0x41, 0xa8, 0x16, 0x3f,\n",
-            "  0x50, 0x5b, 0xcb, 0x3f, 0x52, 0xb9, 0xed, 0xbe, 0x2e, 0xa7, 0xc6, 0xbe,\n",
-            "  0xaf, 0x0f, 0x14, 0xbf, 0xb3, 0xda, 0x59, 0x3f, 0x02, 0xec, 0xd7, 0xbe,\n",
-            "  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x66, 0x11, 0x1f, 0xbf,\n",
-            "  0xb8, 0xfb, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f,\n",
-            "  0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,\n",
-            "  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0xf0, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,\n",
-            "  0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xce, 0xff, 0xff, 0xff,\n",
-            "  0x00, 0x00, 0x00, 0x08, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x1c, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,\n",
-            "  0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00,\n",
-            "  0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff,\n",
-            "  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,\n",
-            "  0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n",
-            "  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x08, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,\n",
-            "  0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00,\n",
-            "  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x0a, 0x00, 0x00, 0x00, 0x10, 0x03, 0x00, 0x00, 0xa4, 0x02, 0x00, 0x00,\n",
-            "  0x40, 0x02, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00,\n",
-            "  0x48, 0x01, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00,\n",
-            "  0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x26, 0xfd, 0xff, 0xff,\n",
-            "  0x3c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x18, 0xfd, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,\n",
-            "  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n",
-            "  0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74,\n",
-            "  0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff,\n",
-            "  0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x60, 0xfd, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00,\n",
-            "  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n",
-            "  0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74,\n",
-            "  0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69,\n",
-            "  0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73,\n",
-            "  0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xce, 0xfd, 0xff, 0xff,\n",
-            "  0x34, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0xc0, 0xfd, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00,\n",
-            "  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n",
-            "  0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x52, 0x65, 0x6c,\n",
-            "  0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0x12, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00,\n",
-            "  0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0xfe, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n",
-            "  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n",
-            "  0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,\n",
-            "  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0x5a, 0xfe, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x4c, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n",
-            "  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n",
-            "  0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f,\n",
-            "  0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65,\n",
-            "  0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0xba, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00,\n",
-            "  0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0xac, 0xfe, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n",
-            "  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n",
-            "  0x73, 0x65, 0x5f, 0x32, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0xfe, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n",
-            "  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff,\n",
-            "  0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n",
-            "  0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,\n",
-            "  0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x46, 0xff, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,\n",
-            "  0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,\n",
-            "  0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n",
-            "  0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,\n",
-            "  0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,\n",
-            "  0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,\n",
-            "  0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0xa6, 0xff, 0xff, 0xff, 0x48, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n",
-            "  0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,\n",
-            "  0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,\n",
-            "  0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f, 0x69, 0x6e, 0x70, 0x75,\n",
-            "  0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x04, 0x00,\n",
-            "  0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,\n",
-            "  0x28, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00,\n",
-            "  0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x03, 0x00, 0x00, 0x00\n",
-            "};\n",
-            "unsigned int sine_model_quantized_tflite_len = 2640;\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1sqrhBLXwILt",
-        "colab_type": "text"
-      },
-      "source": [
-        "We can either copy and paste this output into our project's source code, or download the file using the collapsible menu on the left hand side of this Colab.\n",
-        "\n"
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc
index 3d1155ef41e..46976f30ecb 100644
--- a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc
+++ b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 // #include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/micro/examples/hello_world/sine_model_data.h"
+#include "tensorflow/lite/micro/examples/hello_world/model.h"
 #include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
 #include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_interpreter.h"
@@ -31,7 +31,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
 
   // Map the model into a usable data structure. This doesn't involve any
   // copying or parsing, it's a very lightweight operation.
-  const tflite::Model* model = ::tflite::GetModel(g_sine_model_data);
+  const tflite::Model* model = ::tflite::GetModel(g_model);
   if (model->version() != TFLITE_SCHEMA_VERSION) {
     TF_LITE_REPORT_ERROR(error_reporter,
                          "Model provided is schema version %d not equal "
@@ -43,8 +43,13 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
   tflite::ops::micro::AllOpsResolver resolver;
 
   // Create an area of memory to use for input, output, and intermediate arrays.
-  // `arena_used_bytes` can be used to retrieve the optimal size.
-  const int tensor_arena_size = 2208 + 16 + 100 /* some reserved space */;
+
+  // Minimum arena size, at the time of writing. After allocating tensors
+  // you can retrieve this value by invoking interpreter.arena_used_bytes().
+  const int model_arena_size = 2352;
+  /* Extra headroom for model + alignment + future interpreter changes */
+  const int extra_arena_size = 560 + 16 + 100;
+  const int tensor_arena_size = model_arena_size + extra_arena_size;
   uint8_t tensor_arena[tensor_arena_size];
 
   // Build an interpreter to run the model with
@@ -53,11 +58,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
 
   // Allocate memory from the tensor_arena for the model's tensors
   TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
-  // At the time of writing, the hello world model uses 2208 bytes, we leave
-  // 100 bytes head room here to make the test less fragile and in the same
-  // time, alert for substantial increase.
-  TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2208 + 100);
 
+  // Alert for substantial increase in arena size usage.
+  TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(),
+                          model_arena_size + 100);
   // Obtain a pointer to the model's input tensor
   TfLiteTensor* input = interpreter.input(0);
 
diff --git a/tensorflow/lite/micro/examples/hello_world/images/STM32F746.gif b/tensorflow/lite/micro/examples/hello_world/images/animation_on_STM32F746.gif
similarity index 100%
rename from tensorflow/lite/micro/examples/hello_world/images/STM32F746.gif
rename to tensorflow/lite/micro/examples/hello_world/images/animation_on_STM32F746.gif
diff --git a/tensorflow/lite/micro/examples/hello_world/images/arduino_mkrzero.gif b/tensorflow/lite/micro/examples/hello_world/images/animation_on_arduino_mkrzero.gif
similarity index 100%
rename from tensorflow/lite/micro/examples/hello_world/images/arduino_mkrzero.gif
rename to tensorflow/lite/micro/examples/hello_world/images/animation_on_arduino_mkrzero.gif
diff --git a/tensorflow/lite/micro/examples/hello_world/images/sparkfun_edge.gif b/tensorflow/lite/micro/examples/hello_world/images/animation_on_sparkfun_edge.gif
similarity index 100%
rename from tensorflow/lite/micro/examples/hello_world/images/sparkfun_edge.gif
rename to tensorflow/lite/micro/examples/hello_world/images/animation_on_sparkfun_edge.gif
diff --git a/tensorflow/lite/micro/examples/hello_world/images/model_architecture.png b/tensorflow/lite/micro/examples/hello_world/images/model_architecture.png
new file mode 100644
index 00000000000..792d18fab4b
Binary files /dev/null and b/tensorflow/lite/micro/examples/hello_world/images/model_architecture.png differ
diff --git a/tensorflow/lite/micro/examples/hello_world/main_functions.cc b/tensorflow/lite/micro/examples/hello_world/main_functions.cc
index 33180d4554c..404c8542432 100644
--- a/tensorflow/lite/micro/examples/hello_world/main_functions.cc
+++ b/tensorflow/lite/micro/examples/hello_world/main_functions.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/lite/micro/examples/hello_world/main_functions.h"
 
 #include "tensorflow/lite/micro/examples/hello_world/constants.h"
+#include "tensorflow/lite/micro/examples/hello_world/model.h"
 #include "tensorflow/lite/micro/examples/hello_world/output_handler.h"
-#include "tensorflow/lite/micro/examples/hello_world/sine_model_data.h"
 #include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
 #include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_interpreter.h"
@@ -49,7 +49,7 @@ void setup() {
 
   // Map the model into a usable data structure. This doesn't involve any
   // copying or parsing, it's a very lightweight operation.
-  model = tflite::GetModel(g_sine_model_data);
+  model = tflite::GetModel(g_model);
   if (model->version() != TFLITE_SCHEMA_VERSION) {
     TF_LITE_REPORT_ERROR(error_reporter,
                          "Model provided is schema version %d not equal "
diff --git a/tensorflow/lite/micro/examples/hello_world/model.cc b/tensorflow/lite/micro/examples/hello_world/model.cc
new file mode 100644
index 00000000000..232e4a14115
--- /dev/null
+++ b/tensorflow/lite/micro/examples/hello_world/model.cc
@@ -0,0 +1,250 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Automatically created from a TensorFlow Lite flatbuffer using the command:
+// xxd -i model.tflite > model.cc
+
+// This is a standard TensorFlow Lite model file that has been converted into a
+// C data array, so it can be easily compiled into a binary for devices that
+// don't have a file system.
+
+// See train/README.md for a full description of the creation process.
+
+#include "tensorflow/lite/micro/examples/hello_world/model.h"
+
+// We need to keep the data array aligned on some architectures.
+#ifdef __has_attribute
+#define HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define HAVE_ATTRIBUTE(x) 0
+#endif
+#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
+#define DATA_ALIGN_ATTRIBUTE __attribute__((aligned(4)))
+#else
+#define DATA_ALIGN_ATTRIBUTE
+#endif
+
+const unsigned char g_model[] DATA_ALIGN_ATTRIBUTE = {
+    0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,
+    0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
+    0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x60, 0x09, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00,
+    0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x75, 0x6e, 0x74,
+    0x69, 0x6d, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x48, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00,
+    0x0c, 0x02, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xac, 0x00, 0x00, 0x00,
+    0x8c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xfe, 0xfd, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00,
+    0x7c, 0xfd, 0xff, 0xff, 0x80, 0xfd, 0xff, 0xff, 0x84, 0xfd, 0xff, 0xff,
+    0x88, 0xfd, 0xff, 0xff, 0x22, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x04, 0x00, 0x00,
+    0x9f, 0x0a, 0x00, 0x00, 0x65, 0x06, 0x00, 0x00, 0x3d, 0xf8, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x00, 0xeb, 0x0a, 0x00, 0x00, 0x2f, 0xf8, 0xff, 0xff,
+    0xe8, 0x04, 0x00, 0x00, 0x21, 0x0a, 0x00, 0x00, 0x46, 0xfe, 0xff, 0xff,
+    0xc8, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa3, 0xf7, 0xff, 0xff,
+    0x28, 0xf9, 0xff, 0xff, 0x9a, 0x05, 0x00, 0x00, 0x6e, 0xfe, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x73, 0x1c, 0x11, 0xe1,
+    0x0c, 0x81, 0xa5, 0x43, 0xfe, 0xd5, 0xd5, 0xb2, 0x60, 0x77, 0x19, 0xdf,
+    0x8a, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x51, 0x0b, 0x00, 0x00, 0x47, 0xf6, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x9b, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xe7, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x92, 0x07, 0x00, 0x00, 0xf4, 0xf4, 0xff, 0xff, 0x55, 0xf0, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x00, 0xd6, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
+    0x00, 0x01, 0x00, 0x00, 0xee, 0xfc, 0x00, 0xec, 0x05, 0x16, 0xef, 0xec,
+    0xe6, 0xf8, 0x03, 0x01, 0x00, 0xfa, 0xf8, 0xf5, 0xda, 0xeb, 0x27, 0x14,
+    0xef, 0xde, 0xe2, 0xda, 0xf0, 0xdf, 0x32, 0x06, 0x01, 0xe6, 0xee, 0xf9,
+    0x00, 0x16, 0x07, 0xe0, 0xfe, 0xff, 0xe9, 0x05, 0xe7, 0xef, 0x81, 0x1b,
+    0x18, 0xea, 0xca, 0x01, 0x0f, 0x00, 0xdb, 0xf7, 0x0e, 0xec, 0x12, 0x1e,
+    0x04, 0x13, 0xb2, 0xe7, 0xfd, 0x06, 0xbb, 0xe0, 0x0c, 0xec, 0xf0, 0xdf,
+    0xeb, 0xf7, 0x05, 0x26, 0x19, 0xe4, 0x70, 0x1a, 0xea, 0x1e, 0x34, 0xdf,
+    0x19, 0xf3, 0xf1, 0x19, 0x0e, 0x03, 0x1b, 0xe1, 0xde, 0x13, 0xf6, 0x19,
+    0xff, 0xf6, 0x1a, 0x17, 0xf1, 0x1c, 0xdb, 0x1a, 0x1a, 0x20, 0xe6, 0x19,
+    0xf5, 0xff, 0x97, 0x0b, 0x00, 0x00, 0xce, 0xdf, 0x0d, 0xf7, 0x15, 0xe4,
+    0xed, 0xfc, 0x0d, 0xe9, 0xfb, 0xec, 0x5c, 0xfc, 0x1d, 0x02, 0x58, 0xe3,
+    0xe0, 0xf4, 0x15, 0xec, 0xf9, 0x00, 0x13, 0x05, 0xec, 0x0c, 0x1c, 0x14,
+    0x0c, 0xe9, 0x0a, 0xf4, 0x18, 0x00, 0xd7, 0x05, 0x27, 0x02, 0x15, 0xea,
+    0xea, 0x02, 0x9b, 0x00, 0x0c, 0xfa, 0xe9, 0xea, 0xfe, 0x01, 0x14, 0xfd,
+    0x0b, 0x02, 0xf0, 0xef, 0x06, 0xee, 0x01, 0x0d, 0x06, 0xe7, 0xf7, 0x11,
+    0xf5, 0x0a, 0xf9, 0xf1, 0x23, 0xff, 0x0d, 0xf2, 0xec, 0x11, 0x26, 0x1d,
+    0xf2, 0xea, 0x28, 0x18, 0xe0, 0xfb, 0xf3, 0xf4, 0x05, 0x1c, 0x1d, 0xfb,
+    0xfd, 0x1e, 0xfc, 0x11, 0xe8, 0x06, 0x09, 0x03, 0x12, 0xf2, 0x35, 0xfb,
+    0xdd, 0x1b, 0xf9, 0xef, 0xf3, 0xe7, 0x6f, 0x0c, 0x1d, 0x00, 0x43, 0xfd,
+    0x0d, 0xf1, 0x0a, 0x19, 0x1a, 0xfa, 0xe0, 0x18, 0x1e, 0x13, 0x37, 0x1c,
+    0x12, 0xec, 0x3a, 0x0c, 0xb6, 0xcb, 0xe6, 0x13, 0xf7, 0xeb, 0xf1, 0x05,
+    0x1b, 0xfa, 0x19, 0xe5, 0xec, 0xcf, 0x0c, 0xf4, 0xe2, 0xff, 0xff, 0xff,
+    0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x21, 0xa2, 0x8c, 0xc9,
+    0x5f, 0x1d, 0xce, 0x41, 0x9f, 0xcd, 0x20, 0xb1, 0xdf, 0x53, 0x2f, 0x81,
+    0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe2, 0xee, 0xff, 0xff,
+    0x80, 0xff, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f,
+    0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xbc, 0xf9, 0xff, 0xff,
+    0x48, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
+    0xb8, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x1a, 0xff, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xca, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,
+    0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
+    0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+    0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x04, 0x00,
+    0x08, 0x00, 0x0c, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xdc, 0x04, 0x00, 0x00,
+    0x54, 0x04, 0x00, 0x00, 0xc4, 0x03, 0x00, 0x00, 0x54, 0x03, 0x00, 0x00,
+    0xd0, 0x02, 0x00, 0x00, 0x4c, 0x02, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00,
+    0x5c, 0x01, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
+    0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x00, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0c, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f,
+    0x69, 0x6e, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc2, 0xfb, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x58, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc4, 0xfc, 0xff, 0xff,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xba, 0x2b, 0x4f, 0x38, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,
+    0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,
+    0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2a, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x6c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x2c, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xb9, 0x36, 0x0b, 0x3c,
+    0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34,
+    0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,
+    0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,
+    0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0xaa, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x6c, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x9c, 0xfc, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xaa, 0x7b, 0xbe, 0x3b, 0x01, 0x00, 0x00, 0x00,
+    0x2e, 0xbd, 0xbd, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33,
+    0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x58, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2c, 0xfe, 0xff, 0xff,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xe3, 0x04, 0x20, 0x39, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,
+    0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,
+    0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x6c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x94, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x76, 0x51, 0x3c,
+    0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33,
+    0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,
+    0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,
+    0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x12, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x6c, 0x00, 0x00, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x04, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0xd2, 0x91, 0x43, 0x3c, 0x01, 0x00, 0x00, 0x00,
+    0x40, 0xce, 0x42, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
+    0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,
+    0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x92, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x02, 0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff,
+    0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x28, 0xb3, 0xd9, 0x38, 0x20, 0x00, 0x00, 0x00,
+    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,
+    0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x2f, 0x4d, 0x61, 0x74,
+    0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xff, 0xff,
+    0x00, 0x00, 0x00, 0x09, 0x78, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0xd5, 0x6b, 0x8a, 0x3b, 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
+    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,
+    0x73, 0x65, 0x5f, 0x32, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f,
+    0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65,
+    0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65,
+    0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,
+    0x60, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0x01, 0x00, 0x00, 0x00, 0x5d, 0x4f, 0xc9, 0x3c, 0x01, 0x00, 0x00, 0x00,
+    0x0e, 0x86, 0xc8, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f,
+    0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x38, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,
+    0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+    0x6c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00,
+    0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0xde, 0x0a, 0x3c,
+    0x01, 0x00, 0x00, 0x00, 0x66, 0x64, 0x87, 0x3f, 0x01, 0x00, 0x00, 0x00,
+    0x13, 0x42, 0x8d, 0xbf, 0x0d, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e,
+    0x74, 0x69, 0x74, 0x79, 0x5f, 0x69, 0x6e, 0x74, 0x38, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0e, 0x00, 0x07, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,
+    0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x00, 0x72, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+    0x04, 0x00, 0x00, 0x00};
+const int g_model_len = 2512;
diff --git a/tensorflow/lite/micro/examples/hello_world/sine_model_data.h b/tensorflow/lite/micro/examples/hello_world/model.h
similarity index 59%
rename from tensorflow/lite/micro/examples/hello_world/sine_model_data.h
rename to tensorflow/lite/micro/examples/hello_world/model.h
index b7087c6bd9e..488f47b3afd 100644
--- a/tensorflow/lite/micro/examples/hello_world/sine_model_data.h
+++ b/tensorflow/lite/micro/examples/hello_world/model.h
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,15 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+// Automatically created from a TensorFlow Lite flatbuffer using the command:
+// xxd -i model.tflite > model.cc
+
 // This is a standard TensorFlow Lite model file that has been converted into a
 // C data array, so it can be easily compiled into a binary for devices that
-// don't have a file system. It was created using the command:
-// xxd -i sine_model.tflite > sine_model_data.cc
+// don't have a file system.
 
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_HELLO_WORLD_SINE_MODEL_DATA_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_HELLO_WORLD_SINE_MODEL_DATA_H_
+// See train/README.md for a full description of the creation process.
 
-extern const unsigned char g_sine_model_data[];
-extern const int g_sine_model_data_len;
+#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_HELLO_WORLD_MODEL_H_
+#define TENSORFLOW_LITE_MICRO_EXAMPLES_HELLO_WORLD_MODEL_H_
 
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_HELLO_WORLD_SINE_MODEL_DATA_H_
+extern const unsigned char g_model[];
+extern const int g_model_len;
+
+#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_HELLO_WORLD_MODEL_H_
diff --git a/tensorflow/lite/micro/examples/hello_world/sine_model_data.cc b/tensorflow/lite/micro/examples/hello_world/sine_model_data.cc
deleted file mode 100644
index 7252479fecd..00000000000
--- a/tensorflow/lite/micro/examples/hello_world/sine_model_data.cc
+++ /dev/null
@@ -1,255 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Automatically created from a TensorFlow Lite flatbuffer using the command:
-// xxd -i sine_model.tflite > sine_model_data.cc
-// See the README for a full description of the creation process.
-
-#include "tensorflow/lite/micro/examples/hello_world/sine_model_data.h"
-
-// We need to keep the data array aligned on some architectures.
-#ifdef __has_attribute
-#define HAVE_ATTRIBUTE(x) __has_attribute(x)
-#else
-#define HAVE_ATTRIBUTE(x) 0
-#endif
-#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
-#define DATA_ALIGN_ATTRIBUTE __attribute__((aligned(4)))
-#else
-#define DATA_ALIGN_ATTRIBUTE
-#endif
-
-const unsigned char g_sine_model_data[] DATA_ALIGN_ATTRIBUTE = {
-    0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
-    0x18, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
-    0x0e, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x10, 0x0a, 0x00, 0x00,
-    0xb8, 0x05, 0x00, 0x00, 0xa0, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x0b, 0x00, 0x00, 0x00, 0x90, 0x05, 0x00, 0x00, 0x7c, 0x05, 0x00, 0x00,
-    0x24, 0x05, 0x00, 0x00, 0xd4, 0x04, 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00,
-    0x74, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x54, 0xf6, 0xff, 0xff, 0x58, 0xf6, 0xff, 0xff, 0x5c, 0xf6, 0xff, 0xff,
-    0x60, 0xf6, 0xff, 0xff, 0xc2, 0xfa, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-    0x40, 0x00, 0x00, 0x00, 0x7c, 0x19, 0xa7, 0x3e, 0x99, 0x81, 0xb9, 0x3e,
-    0x56, 0x8b, 0x9f, 0x3e, 0x88, 0xd8, 0x12, 0xbf, 0x74, 0x10, 0x56, 0x3e,
-    0xfe, 0xc6, 0xdf, 0xbe, 0xf2, 0x10, 0x5a, 0xbe, 0xf0, 0xe2, 0x0a, 0xbe,
-    0x10, 0x5a, 0x98, 0xbe, 0xb9, 0x36, 0xce, 0x3d, 0x8f, 0x7f, 0x87, 0x3e,
-    0x2c, 0xb1, 0xfd, 0xbd, 0xe6, 0xa6, 0x8a, 0xbe, 0xa5, 0x3e, 0xda, 0x3e,
-    0x50, 0x34, 0xed, 0xbd, 0x90, 0x91, 0x69, 0xbe, 0x0e, 0xfb, 0xff, 0xff,
-    0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x67, 0x41, 0x48, 0xbf,
-    0x24, 0xcd, 0xa0, 0xbe, 0xb7, 0x92, 0x0c, 0xbf, 0x00, 0x00, 0x00, 0x00,
-    0x98, 0xfe, 0x3c, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x17, 0x9a, 0xbe,
-    0x41, 0xcb, 0xb6, 0xbe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x13, 0xd6, 0x1e, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x5a, 0xfb, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
-    0x4b, 0x98, 0xdd, 0xbd, 0x40, 0x6b, 0xcb, 0xbe, 0x36, 0x0c, 0xd4, 0x3c,
-    0xbd, 0x44, 0xb5, 0x3e, 0x95, 0x70, 0xe3, 0x3e, 0xe7, 0xac, 0x86, 0x3e,
-    0x00, 0xc4, 0x4e, 0x3d, 0x7e, 0xa6, 0x1d, 0x3e, 0xbd, 0x87, 0xbb, 0x3e,
-    0xb4, 0xb8, 0x09, 0xbf, 0xa1, 0x1f, 0xf8, 0xbe, 0x8d, 0x90, 0xdd, 0x3e,
-    0xde, 0xfa, 0x6f, 0xbe, 0xb2, 0x75, 0xe4, 0x3d, 0x6e, 0xfe, 0x36, 0x3e,
-    0x20, 0x18, 0xc2, 0xbe, 0x39, 0xc7, 0xfb, 0xbe, 0xfe, 0xa4, 0x30, 0xbe,
-    0xf7, 0x91, 0xde, 0xbe, 0xde, 0xab, 0x24, 0x3e, 0xfb, 0xbb, 0xce, 0x3e,
-    0xeb, 0x23, 0x80, 0xbe, 0x7b, 0x58, 0x73, 0xbe, 0x9a, 0x2e, 0x03, 0x3e,
-    0x10, 0x42, 0xa9, 0xbc, 0x10, 0x12, 0x64, 0xbd, 0xe3, 0x8d, 0x0c, 0x3d,
-    0x9e, 0x48, 0x97, 0xbe, 0x34, 0x51, 0xd4, 0xbe, 0x02, 0x3b, 0x0d, 0x3e,
-    0x62, 0x67, 0x89, 0xbe, 0x74, 0xdf, 0xa2, 0x3d, 0xf3, 0x25, 0xb3, 0xbe,
-    0xef, 0x34, 0x7b, 0x3d, 0x61, 0x70, 0xe3, 0x3d, 0xba, 0x76, 0xc0, 0xbe,
-    0x7d, 0xe9, 0xa7, 0x3e, 0xc3, 0xab, 0xd0, 0xbe, 0xcf, 0x7c, 0xdb, 0xbe,
-    0x70, 0x27, 0x9a, 0xbe, 0x98, 0xf5, 0x3c, 0xbd, 0xff, 0x4b, 0x4b, 0x3e,
-    0x7e, 0xa0, 0xf8, 0xbd, 0xd4, 0x6e, 0x86, 0x3d, 0x00, 0x4a, 0x07, 0x3a,
-    0x4c, 0x24, 0x61, 0xbe, 0x54, 0x68, 0xf7, 0xbd, 0x02, 0x3f, 0x77, 0xbe,
-    0x23, 0x79, 0xb3, 0x3e, 0x1c, 0x83, 0xad, 0xbd, 0xc8, 0x92, 0x8d, 0x3e,
-    0xa8, 0xf3, 0x15, 0xbd, 0xe6, 0x4d, 0x6c, 0x3d, 0xac, 0xe7, 0x98, 0xbe,
-    0x81, 0xec, 0xbd, 0x3e, 0xe2, 0x55, 0x73, 0x3e, 0xc1, 0x77, 0xc7, 0x3e,
-    0x6e, 0x1b, 0x5e, 0x3d, 0x27, 0x78, 0x02, 0x3f, 0xd4, 0x21, 0x90, 0x3d,
-    0x52, 0xdc, 0x1f, 0x3e, 0xbf, 0xda, 0x88, 0x3e, 0x80, 0x79, 0xe3, 0xbd,
-    0x40, 0x6f, 0x10, 0xbe, 0x20, 0x43, 0x2e, 0xbd, 0xf0, 0x76, 0xc5, 0xbd,
-    0xcc, 0xa0, 0x04, 0xbe, 0xf0, 0x69, 0xd7, 0xbe, 0xb1, 0xfe, 0x64, 0xbe,
-    0x20, 0x41, 0x84, 0xbe, 0xb2, 0xc3, 0x26, 0xbe, 0xd8, 0xf4, 0x09, 0xbe,
-    0x64, 0x44, 0xd1, 0x3d, 0xd5, 0xe1, 0xc8, 0xbe, 0x35, 0xbc, 0x3f, 0xbe,
-    0xc0, 0x94, 0x82, 0x3d, 0xdc, 0x2b, 0xb1, 0xbd, 0x02, 0xdb, 0xbf, 0xbe,
-    0xa5, 0x7f, 0x8a, 0x3e, 0x21, 0xb4, 0xa2, 0x3e, 0xcd, 0x86, 0x56, 0xbf,
-    0x9c, 0x3b, 0x76, 0xbc, 0x85, 0x6d, 0x60, 0xbf, 0x86, 0x00, 0x3c, 0xbe,
-    0xc1, 0x23, 0x7e, 0x3e, 0x96, 0xcd, 0x3f, 0x3e, 0x86, 0x91, 0x2d, 0x3e,
-    0x55, 0xef, 0x87, 0x3e, 0x7e, 0x97, 0x03, 0xbe, 0x2a, 0xcd, 0x01, 0x3e,
-    0x32, 0xc9, 0x8e, 0xbe, 0x72, 0x77, 0x3b, 0xbe, 0xe0, 0xa1, 0xbc, 0xbe,
-    0x8d, 0xb7, 0xa7, 0x3e, 0x1c, 0x05, 0x95, 0xbe, 0xf7, 0x1f, 0xbb, 0x3e,
-    0xc9, 0x3e, 0xd6, 0x3e, 0x80, 0x42, 0xe9, 0xbd, 0x27, 0x0c, 0xd2, 0xbe,
-    0x5c, 0x32, 0x34, 0xbe, 0x14, 0xcb, 0xca, 0xbd, 0xdd, 0x3a, 0x67, 0xbe,
-    0x1c, 0xbb, 0x8d, 0xbe, 0x91, 0xac, 0x5c, 0xbe, 0x52, 0x40, 0x6f, 0xbe,
-    0xd7, 0x71, 0x94, 0x3e, 0x18, 0x71, 0x09, 0xbe, 0x9b, 0x29, 0xd9, 0xbe,
-    0x7d, 0x66, 0xd2, 0xbe, 0x98, 0xd6, 0xb2, 0xbe, 0x00, 0xc9, 0x84, 0x3a,
-    0xbc, 0xda, 0xc2, 0xbd, 0x1d, 0xc2, 0x1b, 0xbf, 0xd4, 0xdd, 0x92, 0x3e,
-    0x07, 0x87, 0x6c, 0xbe, 0x40, 0xc2, 0x3b, 0xbe, 0xbd, 0xe2, 0x9c, 0x3e,
-    0x0a, 0xb5, 0xa0, 0xbe, 0xe2, 0xd5, 0x9c, 0xbe, 0x3e, 0xbb, 0x7c, 0x3e,
-    0x17, 0xb4, 0xcf, 0x3e, 0xd5, 0x8e, 0xc8, 0xbe, 0x7c, 0xf9, 0x5c, 0x3e,
-    0x80, 0xfc, 0x0d, 0x3d, 0xc5, 0xd5, 0x8b, 0x3e, 0xf5, 0x17, 0xa2, 0x3e,
-    0xc7, 0x60, 0x89, 0xbe, 0xec, 0x95, 0x87, 0x3d, 0x7a, 0xc2, 0x5d, 0xbf,
-    0x77, 0x94, 0x98, 0x3e, 0x77, 0x39, 0x07, 0xbc, 0x42, 0x29, 0x00, 0x3e,
-    0xaf, 0xd0, 0xa9, 0x3e, 0x31, 0x23, 0xc4, 0xbe, 0x95, 0x36, 0x5b, 0xbe,
-    0xc7, 0xdc, 0x83, 0xbe, 0x1e, 0x6b, 0x47, 0x3e, 0x5b, 0x24, 0x99, 0x3e,
-    0x99, 0x27, 0x54, 0x3e, 0xc8, 0x20, 0xdd, 0xbd, 0x5a, 0x86, 0x2f, 0x3e,
-    0x80, 0xf0, 0x69, 0xbe, 0x44, 0xfc, 0x84, 0xbd, 0x82, 0xa0, 0x2a, 0xbe,
-    0x87, 0xe6, 0x2a, 0x3e, 0xd8, 0x34, 0xae, 0x3d, 0x50, 0xbd, 0xb5, 0x3e,
-    0xc4, 0x8c, 0x88, 0xbe, 0xe3, 0xbc, 0xa5, 0x3e, 0xa9, 0xda, 0x9e, 0x3e,
-    0x3e, 0xb8, 0x23, 0xbe, 0x80, 0x90, 0x15, 0x3d, 0x97, 0x3f, 0xc3, 0x3e,
-    0xca, 0x5c, 0x9d, 0x3e, 0x21, 0xe8, 0xe1, 0x3e, 0xc0, 0x49, 0x01, 0xbc,
-    0x00, 0x0b, 0x88, 0xbd, 0x3f, 0xf7, 0xca, 0x3c, 0xfb, 0x5a, 0xb1, 0x3e,
-    0x60, 0xd2, 0x0d, 0x3c, 0xce, 0x23, 0x78, 0xbf, 0x8f, 0x4f, 0xb9, 0xbe,
-    0x69, 0x6a, 0x34, 0xbf, 0x4b, 0x5e, 0xa9, 0x3e, 0x64, 0x8c, 0xd9, 0x3e,
-    0x52, 0x77, 0x36, 0x3e, 0xeb, 0xaf, 0xbe, 0x3e, 0x40, 0xbe, 0x36, 0x3c,
-    0x08, 0x65, 0x3b, 0xbd, 0x55, 0xe0, 0x66, 0xbd, 0xd2, 0xe8, 0x9b, 0xbe,
-    0x86, 0xe3, 0x09, 0xbe, 0x93, 0x3d, 0xdd, 0x3e, 0x0f, 0x66, 0x18, 0x3f,
-    0x18, 0x05, 0x33, 0xbd, 0xde, 0x15, 0xd7, 0xbe, 0xaa, 0xcf, 0x49, 0xbe,
-    0xa2, 0xa5, 0x64, 0x3e, 0xe6, 0x9c, 0x42, 0xbe, 0x54, 0x42, 0xcc, 0x3d,
-    0xa0, 0xbd, 0x9d, 0xbe, 0xc2, 0x69, 0x48, 0x3e, 0x5b, 0x8b, 0xa2, 0xbe,
-    0xc0, 0x13, 0x87, 0x3d, 0x36, 0xfd, 0x69, 0x3e, 0x05, 0x86, 0x40, 0xbe,
-    0x1e, 0x7a, 0xce, 0xbe, 0x46, 0x13, 0xa7, 0xbe, 0x68, 0x52, 0x86, 0xbe,
-    0x04, 0x9e, 0x86, 0xbd, 0x8c, 0x54, 0xc1, 0x3d, 0xe0, 0x3b, 0xad, 0x3c,
-    0x42, 0x67, 0x85, 0xbd, 0xea, 0x97, 0x42, 0x3e, 0x6e, 0x13, 0x3b, 0xbf,
-    0x56, 0x5b, 0x16, 0x3e, 0xaa, 0xab, 0xdf, 0x3e, 0xc8, 0x41, 0x36, 0x3d,
-    0x24, 0x2d, 0x47, 0xbe, 0x77, 0xa5, 0xae, 0x3e, 0xc0, 0xc2, 0x5b, 0x3c,
-    0xac, 0xac, 0x4e, 0x3e, 0x99, 0xec, 0x13, 0xbe, 0xf2, 0xab, 0x73, 0x3e,
-    0xaa, 0xa1, 0x48, 0xbe, 0xe8, 0xd3, 0x01, 0xbe, 0x60, 0xb7, 0xc7, 0xbd,
-    0x64, 0x72, 0xd3, 0x3d, 0x83, 0xd3, 0x99, 0x3e, 0x0c, 0x76, 0x34, 0xbe,
-    0x42, 0xda, 0x0d, 0x3e, 0xfb, 0x47, 0x9a, 0x3e, 0x8b, 0xdc, 0x92, 0xbe,
-    0x56, 0x7f, 0x6b, 0x3e, 0x04, 0xd4, 0x88, 0xbd, 0x11, 0x9e, 0x80, 0x3e,
-    0x3c, 0x89, 0xff, 0x3d, 0xb3, 0x3e, 0x88, 0x3e, 0xf7, 0xf0, 0x88, 0x3e,
-    0x28, 0xfb, 0xc9, 0xbe, 0x53, 0x3e, 0xcf, 0x3e, 0xac, 0x75, 0xdc, 0xbe,
-    0xdd, 0xca, 0xd7, 0x3e, 0x01, 0x58, 0xa7, 0x3e, 0x29, 0xb8, 0x13, 0xbf,
-    0x76, 0x81, 0x12, 0xbc, 0x28, 0x8b, 0x16, 0xbf, 0x0e, 0xec, 0x0e, 0x3e,
-    0x40, 0x0a, 0xdb, 0xbd, 0x98, 0xec, 0xbf, 0xbd, 0x32, 0x55, 0x0c, 0xbe,
-    0xfb, 0xf9, 0xc9, 0x3e, 0x83, 0x4a, 0x6d, 0xbe, 0x76, 0x59, 0xe2, 0xbe,
-    0x54, 0x7d, 0x9f, 0xbb, 0x9d, 0xe8, 0x95, 0x3e, 0x5c, 0xd3, 0xd0, 0x3d,
-    0x19, 0x8a, 0xb0, 0x3e, 0xde, 0x6f, 0x2e, 0xbe, 0xd0, 0x16, 0x83, 0x3d,
-    0x9c, 0x7d, 0x11, 0xbf, 0x2b, 0xcc, 0x25, 0x3c, 0x2a, 0xa5, 0x27, 0xbe,
-    0x22, 0x14, 0xc7, 0xbe, 0x5e, 0x7a, 0xac, 0x3e, 0x4e, 0x41, 0x94, 0xbe,
-    0x5a, 0x68, 0x7b, 0x3e, 0x86, 0xfd, 0x4e, 0x3e, 0xa2, 0x56, 0x6a, 0xbe,
-    0xca, 0xfe, 0x81, 0xbe, 0x43, 0xc3, 0xb1, 0xbd, 0xc5, 0xb8, 0xa7, 0x3e,
-    0x55, 0x23, 0xcd, 0x3e, 0xaf, 0x2e, 0x76, 0x3e, 0x69, 0xa8, 0x90, 0xbe,
-    0x0d, 0xba, 0xb9, 0x3e, 0x66, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,
-    0x40, 0x00, 0x00, 0x00, 0x53, 0xd6, 0xe2, 0x3d, 0x66, 0xb6, 0xcc, 0x3e,
-    0x03, 0xe7, 0xf6, 0x3e, 0xe0, 0x28, 0x10, 0xbf, 0x00, 0x00, 0x00, 0x00,
-    0x3e, 0x3d, 0xb0, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x62, 0xf0, 0x77, 0x3e,
-    0xa6, 0x9d, 0xa4, 0x3e, 0x3a, 0x4b, 0xf3, 0xbe, 0x71, 0x9e, 0xa7, 0x3e,
-    0x00, 0x00, 0x00, 0x00, 0x34, 0x39, 0xa2, 0x3e, 0x00, 0x00, 0x00, 0x00,
-    0xcc, 0x9c, 0x4a, 0x3e, 0xab, 0x40, 0xa3, 0x3e, 0xb2, 0xff, 0xff, 0xff,
-    0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0xb3, 0x71, 0x67, 0x3f,
-    0x9a, 0x7a, 0x95, 0xbf, 0xe1, 0x48, 0xe8, 0xbe, 0x8a, 0x72, 0x96, 0x3e,
-    0x00, 0xd2, 0xd3, 0xbb, 0x1a, 0xc5, 0xd7, 0x3f, 0xac, 0x7e, 0xc8, 0xbe,
-    0x90, 0xa7, 0x95, 0xbe, 0x3b, 0xd7, 0xdc, 0xbe, 0x41, 0xa8, 0x16, 0x3f,
-    0x50, 0x5b, 0xcb, 0x3f, 0x52, 0xb9, 0xed, 0xbe, 0x2e, 0xa7, 0xc6, 0xbe,
-    0xaf, 0x0f, 0x14, 0xbf, 0xb3, 0xda, 0x59, 0x3f, 0x02, 0xec, 0xd7, 0xbe,
-    0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x66, 0x11, 0x1f, 0xbf,
-    0xb8, 0xfb, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f,
-    0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,
-    0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,
-    0xf0, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
-    0x48, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xce, 0xff, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x08, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x1c, 0xfc, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00,
-    0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00,
-    0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00,
-    0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff,
-    0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-    0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
-    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x08, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
-    0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00,
-    0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x0a, 0x00, 0x00, 0x00, 0x10, 0x03, 0x00, 0x00, 0xa4, 0x02, 0x00, 0x00,
-    0x40, 0x02, 0x00, 0x00, 0xf4, 0x01, 0x00, 0x00, 0xac, 0x01, 0x00, 0x00,
-    0x48, 0x01, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00,
-    0x50, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x26, 0xfd, 0xff, 0xff,
-    0x3c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x18, 0xfd, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00,
-    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,
-    0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74,
-    0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff,
-    0x50, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x60, 0xfd, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00,
-    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,
-    0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74,
-    0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69,
-    0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73,
-    0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xce, 0xfd, 0xff, 0xff,
-    0x34, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0xc0, 0xfd, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00,
-    0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,
-    0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33, 0x2f, 0x52, 0x65, 0x6c,
-    0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x10, 0x00, 0x00, 0x00, 0x12, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00,
-    0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x04, 0xfe, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
-    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,
-    0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,
-    0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x10, 0x00, 0x00, 0x00, 0x5a, 0xfe, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x4c, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
-    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,
-    0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f,
-    0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65,
-    0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65,
-    0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x10, 0x00, 0x00, 0x00, 0xba, 0xfe, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00,
-    0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0xac, 0xfe, 0xff, 0xff, 0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,
-    0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,
-    0x73, 0x65, 0x5f, 0x32, 0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0xfe, 0xfe, 0xff, 0xff, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
-    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xf0, 0xfe, 0xff, 0xff,
-    0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-    0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,
-    0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73,
-    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x46, 0xff, 0xff, 0xff, 0x50, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-    0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,
-    0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,
-    0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,
-    0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,
-    0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,
-    0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,
-    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0xa6, 0xff, 0xff, 0xff, 0x48, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
-    0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,
-    0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
-    0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f, 0x69, 0x6e, 0x70, 0x75,
-    0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x04, 0x00,
-    0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,
-    0x28, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
-    0x08, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79,
-    0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00,
-    0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x03, 0x00, 0x00, 0x00};
-const int g_sine_model_data_len = 2640;
diff --git a/tensorflow/lite/micro/examples/hello_world/train/README.md b/tensorflow/lite/micro/examples/hello_world/train/README.md
new file mode 100644
index 00000000000..93d8c0af0a6
--- /dev/null
+++ b/tensorflow/lite/micro/examples/hello_world/train/README.md
@@ -0,0 +1,69 @@
+# Hello World Training
+
+This example shows how to train a 2.5 kB model to generate a `sine` wave.
+
+## Table of contents
+
+-   [Overview](#overview)
+-   [Training](#training)
+-   [Trained Models](#trained-models)
+-   [Model Architecture](#model-architecture)
+
+## Overview
+
+1. Dataset: Data is generated locally in the Jupyter Notebook.
+2. Dataset Type: **Structured Data**
+3. Deep Learning Framework: **TensorFlow 2**
+4. Language: **Python 3.7**
+5. Model Size: **2.5 kB**
+6. Model Category: **Regression**
+
+## Training
+
+Train the model in the cloud using Google Colaboratory or locally using a
+Jupyter Notebook.
+
+<table class="tfo-notebook-buttons" align="left">
+  <td>
+    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Google Colaboratory</a>
+  </td>
+  <td>
+    <a target="_blank" href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />Jupyter Notebook</a>
+  </td>
+</table>
+
+*Estimated Training Time: 10 minutes.*
+
+
+## Trained Models
+
+| Download Link        | [hello_world.zip](https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/hello_world_2020_04_13.zip)           |
+| ------------- |-------------|
+
+
+The `models` directory in the above zip file can be generated by following the
+instructions in the [Training](#training) section above. It
+includes the following 3 model files:
+
+| Name | Format | Target Framework | Target Device |
+| :------------- |:-------------|:-------------|-----|
+| `model.pb` | Keras SavedModel | TensorFlow | Large-Scale/Cloud/Servers   |
+| `model.tflite` *(2.5 kB)*  | Fully Quantized* TFLite Model | TensorFlow Lite | Mobile Devices|
+| `model.cc`  | C Source File | TensorFlow Lite for Microcontrollers | Microcontrollers |
+
+**Fully quantized implies that the model is **strictly int8** quantized
+**excluding** the input(s) and output(s).*
+<!-- **Fully quantized implies that the model is **strictly int8** quantized
+including the input(s)and output(s).* -->
+
+
+## Model Architecture
+
+The final model used to simulate a sine wave is displayed below. It is a
+simple feed forward deep neural network with 2 fully connected layers with
+ReLu activations and a final fully connected output layer with as shown below.
+
+![model_architecture.png](../images/model_architecture.png)
+
+*This image was derived from visualizing the 'model.tflite' file in [Netron](https://github.com/lutzroeder/netron)*
+
diff --git a/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb b/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb
new file mode 100644
index 00000000000..129e278f540
--- /dev/null
+++ b/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb
@@ -0,0 +1,3530 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "train_hello_world_model.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aCZBFzjClURz",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Train a basic TensorFlow Lite for Microcontrollers model\n",
+        "\n",
+        "This notebook demonstrates the process of training a 2.5 kB model using TensorFlow and converting it for use with TensorFlow Lite for Microcontrollers. \n",
+        "\n",
+        "Deep learning networks learn to model patterns in underlying data. Here, we're going to train a network to model data generated by a [sine](https://en.wikipedia.org/wiki/Sine) function. This will result in a model that can take a value, `x`, and predict its sine, `y`.\n",
+        "\n",
+        "The model created in this notebook is used in the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world) example for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview).\n",
+        "\n",
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0Cz6uV1zU_hV",
+        "colab_type": "text"
+      },
+      "source": [
+        "**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and set **Hardware accelerator: GPU**."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_UQblnrLd_ET",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Configure Defaults"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5PYwRFppd-WB",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Define paths to model files\n",
+        "import os\n",
+        "MODELS_DIR = 'models/'\n",
+        "os.mkdir(MODELS_DIR)\n",
+        "MODEL_TF = MODELS_DIR + 'model.pb'\n",
+        "MODEL_NO_QUANT_TFLITE = MODELS_DIR + 'model_no_quant.tflite'\n",
+        "MODEL_TFLITE = MODELS_DIR + 'model.tflite'\n",
+        "MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dh4AXGuHWeu1",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Setup Environment\n",
+        "\n",
+        "Install Dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "outputId": "e5cbcfca-b6a5-4a61-ac95-1a8d3fd5411b",
+        "id": "cr1VLfotanf6",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 85
+        }
+      },
+      "source": [
+        "! pip install -q tensorflow==2"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "\u001b[K     |████████████████████████████████| 86.3MB 52kB/s \n",
+            "\u001b[K     |████████████████████████████████| 450kB 46.2MB/s \n",
+            "\u001b[K     |████████████████████████████████| 3.8MB 50.3MB/s \n",
+            "\u001b[?25h  Building wheel for gast (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6rLYpvtg9P4o",
+        "colab_type": "text"
+      },
+      "source": [
+        "Set Seed for Repeatable Results"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EIH9NN1c9PJn",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Set a \"seed\" value, so we get the same random numbers each time we run this\n",
+        "# notebook for reproducible results.\n",
+        "# Numpy is a math library\n",
+        "import numpy as np\n",
+        "np.random.seed(1) # numpy seed\n",
+        "# TensorFlow is an open source machine learning library\n",
+        "import tensorflow as tf\n",
+        "tf.random.set_seed(1) # tensorflow global random seed"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tx9lOPWh9grN",
+        "colab_type": "text"
+      },
+      "source": [
+        "Import Dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "53PBJBv1jEtJ",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Keras is TensorFlow's high-level API for deep learning\n",
+        "from tensorflow import keras\n",
+        "# Matplotlib is a graphing library\n",
+        "import matplotlib.pyplot as plt\n",
+        "# Math is Python's math library\n",
+        "import math"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "p-PuBEb6CMeo",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Dataset"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7gB0-dlNmLT-",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 1. Generate Data\n",
+        "\n",
+        "The code in the following cell will generate a set of random `x` values, calculate their sine values, and display them on a graph."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uKjg7QeMDsDx",
+        "colab_type": "code",
+        "outputId": "0afa45df-3766-467c-c92f-2428aa04f22b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 265
+        }
+      },
+      "source": [
+        "# Number of sample datapoints\n",
+        "SAMPLES = 1000\n",
+        "\n",
+        "# Generate a uniformly distributed set of random numbers in the range from\n",
+        "# 0 to 2π, which covers a complete sine wave oscillation\n",
+        "x_values = np.random.uniform(\n",
+        "    low=0, high=2*math.pi, size=SAMPLES).astype(np.float32)\n",
+        "\n",
+        "# Shuffle the values to guarantee they're not in order\n",
+        "np.random.shuffle(x_values)\n",
+        "\n",
+        "# Calculate the corresponding sine values\n",
+        "y_values = np.sin(x_values).astype(np.float32)\n",
+        "\n",
+        "# Plot our data. The 'b.' argument tells the library to print blue dots.\n",
+        "plt.plot(x_values, y_values, 'b.')\n",
+        "plt.show()"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD4CAYAAADhNOGaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3df5hcdX0v8Pd7syRBuJgQthDZNBtL\nlER7G9pp0gFNqWAWei2JVbxA9hIVn+GHVq2P7oT2eS5WrWaD1qAlkJGoyd0oBhCIt7QbREJAhoRN\nCUqyhexNQ9k0gYUENfxIzOZz//ieaWbmnM3u7MycM2fm/XqeeXbPZ87sflbMfOb7m2YGERFpXE1R\nJyAiItFSIRARaXAqBCIiDU6FQESkwakQiIg0uOaoExiLM844w9ra2qJOQ0QkVrZt2/aymbUUx2NZ\nCNra2tDb2xt1GiIisULy+aC4uoZERBqcCoGISINTIRARaXAqBCIiDU6FQESkwVWkEJD8LsmXSD4z\nzPMk+S2S/SR/QfIP855bQnKX91hSiXxERGT0KtUi+D6AS07w/KUAZnqPFIDbAIDk6QBuAjAPwFwA\nN5GcXKGcZAzSaWDaNOAd7wCmTAGamgASaG4GZswAMpmoMxSRSqtIITCzzQAOnOCWhQDWmvMEgEkk\npwJoB/CgmR0ws4MAHsSJC4pUWEcHMGECMG6ce9NfvhwYGAB27QIOHAByu5QPDQF79gDXXnu8MMye\nDbS3qziIxF1YYwRnA3gh73rAiw0X9yGZItlLsndwcLBqiTaCTMZ9um9qAtatA44cAY4dO/6mPxpD\nQ0BfH7BxoysO06YB118PZLPVy1tEqiM2g8VmljGzhJklWlp8K6RlFDIZ4K1vdW/ce/aU9sY/koEB\n4PbbgfPPB047zXUxiUg8hFUI9gKYlnfd6sWGi0sFpdPAqae6AvDrX4/uNZMmAaef7rqBSvWb37gu\nptmzS3+tiIQvrEKwAcDV3uyhPwHwKzPbB6AHwAKSk71B4gVeTCogm3VdNsuXA6+9NvL9J53kCkBn\nJ3DwIPDKK8e7jMyAxYuB8ePdvePGuUJxIn19bvxBrQOR2lap6aM/BJAF8E6SAySvIXkdyeu8Wx4A\nsBtAP4DvALgBAMzsAIAvA3jSe3zJi0mZ0mnXTTMwMPw9JDBxonuDN3NjBQcPAl1dwfd3dwOHD7t7\njx51haKz07U2hnPkiCtEHR3l/T0iUj2M4+H1iUTCtPvo8ObNA7ZuPfE9CxYAPRVse6XTwMqVwKFD\nw9+zaJErHMlk5X6viIweyW1mliiOx2awWEanre3ERWD+fODxxytbBADXivjNb9zPbm0Nvue++1wr\nZd68yv5uESmPCkGdyGaBlhbg+cDdxoGZM92b9COPVPcTeTIJvPCC++Q/nK1bNZAsUktUCOpAJuM+\nab/8cvDzCxYAzz0XbpdMV5crPIsWBT/f1+daLyISPRWCmJs3z00LDTJunPtkXuluoNFKJoF773WF\nKMjzz7tZRVqZLBKtWB5VKc6JBoVnzQJ27gw3n+H09LitKDZu9D935MjxQpZKhZuXiDhqEcRUJjN8\nEViwoHaKQE5Pj+sqmjQp+PmvfS3cfETkOBWCGMpk3L4+QRYvjq4raCTJpFuncNZZ/uf27FEXkUhU\nVAhipqPDdaUcO+Z/bvFit+ir1u3b53It9uUva+M6kSioEMRIR4fbLTRIZ2c8ikBOd7d/imn+xnXa\nlkIkPCoEMZHNBheBRYtc3/tw20LUsq4uYNUq4Jxz/M8tX66uIpGwaIuJGMhmgSuv9C8W+4M/ALZv\njyanSspmgQsu8G+LPWEC8Oab0eQkUo+0xURM5d4ki4sACdx2WzQ5VVoyCXzhC/744cNu2qmIVJcK\nQY1buDD4AJnbb6+vzdu6uoKnlm7cqL2JRKpNhaCGTZ0KBJ3K2dlZn4uvHnggOL51q7axFqkmFYIa\nNW8esH+/P75gQTwHhkcjmXSDx0Gnoq1bp2mlItWiQlCDstngVcOzZtXuYrFKSaWAn//cDRQXW7o0\n/HxEGkGlTii7hOSzJPtJ+v65kvwmye3e4zmSr+Y9N5T33IZK5BN3CxcGx2tt24hqSSaBb33LH9+9\nO/xcRBpB2YWA5DgAtwK4FMBsAFeSLNht3sz+2szmmNkcAN8G8OO8p9/IPWdml5WbT9y1tQWPCwy3\ng2e9SqX8f/Pb367uIZFqqESLYC6AfjPbbWZHANwJYJjPtACAKwH8sAK/t+60twcfLNMIXUJBenrc\nwHhrq9tS+9FHgfe8R6uORSqtEoXgbAAv5F0PeDEfktMBzADws7zwRJK9JJ8gOcwxJgDJlHdf72DQ\nR+aYy2SCt2mupe2ko9DVBdxwg9tbycx9Xb5c6wtEKinsweIrANxtZkN5seneSrerAKwg+XtBLzSz\njJklzCzR0tISRq6hCtqGefr0xi4CORde6J9JtHGjppSKVEolCsFeANPyrlu9WJArUNQtZGZ7va+7\nAWwCcF4FcoqVdNptw5zv5JP9sUaVTAKf/7w/vm6d9iMSqYRKFIInAcwkOYPkeLg3e9/sH5LnApgM\nIJsXm0xygvf9GQAuANBQn4HTadfVUeyv/ir8XGpZVxcwc6Y/fu21GkAWKVfZhcDMjgL4FIAeAH0A\n1pvZDpJfIpk/C+gKAHda4S53swD0knwawMMAlplZQxWClSv9sba2+l00Vo41a4Ljl18ebh4i9aYi\nZxab2QMAHiiK/e+i6y8GvO5xAL9fiRziKJ0GDh3yx2+8Mfxc4iCZdLOIiltQe/e6/y1VPEXGRiuL\nI5LNAl//uj++eHF97iNUKV1dwNy5/vjy5eoiEhkrFYKILF3qP24yLkdNRm3LFuC00/zxoLEWERmZ\nCkEE0mlg8+bCWNyOmozazTf7Yxs2qFUgMhYqBCHLZoFbby2Mtbaqf7tUqZT/zOPcYjMRKY0KQYiy\nWbdFwmuvFcavuiqafOKuq8ud2ZzvJz9Rq0CkVCoEIbr+ev+4wPTpag2Uo7PT7UOUc+wYcPXVWmgm\nUgoVghD19fljf/M34edRT5JJtxbjpJPcNhRmQH+/W2imYiAyOioEIenoAI4cKYy1tmqqaCWkUsAj\njwDFW1AF7d8kIn4qBCHIZNy+OMXWrw8/l3qVTAJNRf9v3rNH4wUio6FCEIJbbvHHOjvdm5dUzuTJ\n/thHPhJ+HiJxo0JQZZmMfyvpxYs1QFwNn/2sPzYwoO2qRUaiQlBlK1YUXs+apYVj1ZJKuSJb7J57\nws9FJE5UCKoonfbPFAr61CqV093t337izTd1vKXIiagQVEnQpnKzZ2uWUBiCtp9YvlzTSUWGo0JQ\nJWvX+hePfeYz0eTSaFIpYP58f3z16vBzEYkDFYIqyGaB7373+DXpZgmpNRCeZcv8sf/8z/DzEImD\nihQCkpeQfJZkP8mlAc9/lOQgye3e4xN5zy0huct7LKlEPlFbuxb47W/d96Rb5apZQuFKJv37EGkG\nkUiwsgsByXEAbgVwKYDZAK4kOTvg1h+Z2RzvcYf32tMB3ARgHoC5AG4iGTAbPD4yGeA733FbHQBu\n64Orr442p0ZVvDspoAPvRYJUokUwF0C/me02syMA7gSwcJSvbQfwoJkdMLODAB4EcEkFcopEJgNc\ndx0wNHQ89vGPa+FYVJLJ4OmkOvBepFAlCsHZAF7Iux7wYsU+RPIXJO8mOa3E14JkimQvyd7BwcEK\npF1Z2awrArmWAOC2PFBrIFrd3cCpp/rjS30dmCKNK6zB4p8AaDOz/w73qX9NqT/AzDJmljCzREvx\n7mI1YPnywiIAAOeeq9ZALbjhBn9s82a1CkRyKlEI9gKYlnfd6sX+i5m9YmaHvcs7APzRaF8bF088\n4Y9pumht6OoCZs70x3WamYhTiULwJICZJGeQHA/gCgAb8m8gOTXv8jIAufW2PQAWkJzsDRIv8GKx\nkskA+/cXxubM0XTRWrJmjZvBle+++9QqEAEqUAjM7CiAT8G9gfcBWG9mO0h+ieRl3m2fJrmD5NMA\nPg3go95rDwD4MlwxeRLAl7xYrBTve0+6w1KkdiSTwO23++MXXhh6KiI1h1bcsR0DiUTCent7o04D\nANDeDmzcWBjr7NS6gVo1fvzxNR45c+cCW7ZEk49ImEhuM7NEcVwri8uQyfiLQEuLikAt+7M/88dq\n5DOFSGRUCMrw13/tj33sY+HnIaPX0+NaBfmOHdNYgTQ2FYIxam8HXn+9MDZliloDcbBpkz+mk8yk\nkakQjNHDD/tjX/1q+HlI6ZJJt/VHvoEBtQqkcakQjNHJJxden3KKpovGSdBYQVBLQaQRqBCMQSYD\nHDpUGPuHf4gmFxmbnh43Wyinudl17Yk0IhWCEqXTbtOy/ENnFi1SayCOtmwBVq1y3URDQ8AnP6md\nSaUxqRCUIJPxb0vQ1BS83bHEwyuvuCJgBhw9Clx/vYqBNB4VghLccos/9s53amO5OLvwQlfMc44d\nc8VAA8fSSFQIRimbBXbu9Mc/+9nwc5HKSSaBW28t3IcoVwxEGoUKwSgF7VSpc4jrQyoF/O7vFsae\nflpdRNI4VAhG6dlnC69nz9bisXpy3nn+2E03hZ+HSBRUCEYhmwV27SqM6ayB+hI04L9/v8YKpDGo\nEIzC2rVuRknO/PnqEqo3yaQ7Q6KYxgqkEagQjCCbBf75nwtjs2dHk4tUV9AZErt3h5+HSNgqUghI\nXkLyWZL9JH3HgpP8HMmd3uH1D5GcnvfcEMnt3mND8WujlM26T//PP388Nm6cDqSvV8kksGBBYYzU\noLHUv7ILAclxAG4FcCmA2QCuJFn8mfkpAAnv8Pq7AeTPwXnDzOZ4j8tQQzZtKuwSAoA/+iOtG6hn\nxVtP/PrXbiW5ioHUs0q0COYC6Dez3WZ2BMCdABbm32BmD5tZbtPmJ+AOqa95r77qP+f2mmuiyUXC\nM2mSP7ZiRfh5iISlEoXgbAAv5F0PeLHhXAMgv9d9Islekk+QXDTci0imvPt6BwcHy8t4FNJpt3Yg\nd5JnW5vbl0aDxPXvQx/yx954I/w8RMIS6mAxyQ4ACQA354Wne2doXgVgBcnfC3qtmWXMLGFmiZaW\nlqrmmc0CX/96Yewd71ARaBSpFLB4cWHsP/5D3UNSvypRCPYCmJZ33erFCpC8GMDfArjMzA7n4ma2\n1/u6G8AmAAFLe8K1aVPh7qJA8KdEqV/d3W5X2RztQST1rBKF4EkAM0nOIDkewBUACmb/kDwPwCq4\nIvBSXnwyyQne92cAuABAwI4+4dqxo/B68WK1BhpRZ6d/Q7obboguH5FqKbsQmNlRAJ8C0AOgD8B6\nM9tB8kskc7OAbgZwKoC7iqaJzgLQS/JpAA8DWGZmkRaC9nZg3brC2LveFU0uEq1kEjj33MLY9u3q\nIpL6Q8uNhsZIIpGw3t7eiv/c3ABxvqYm4LHHNGW0UWUybvpovrPOAvbtiyYfkXKQ3OaNyRbQyuI8\n3/62P/b5z6sINLJUCjj11MKY9iCSeqNC4Ono8E8RbG3VDqMSPC4QtC25SFypEHjuussf+8AHws9D\nak9Xl3866X33aaxA6ocKgWdoyB/TnkKS091duPUEANx4YzS5iFSaCgHcJ7viQrBggcYGpFDx9iIH\nDrhZZiJxp1lDAN72tsJZIFOmAC+/XLEfL3WkubnwQ8OECcCbb0aXj0gpNGtoGOm0fyrge98bTS5S\n+97+9sLr4rOOReKo4QvBN77hjwUdWygCAGvWFO5Ie9FFmkoq8dfQhWDePP/YwPjxGhuQ4SWTwM9/\nDlx3nesmWrUKuPBCFQOJt4YtBJkMsHWrP3755eHnIvGS+6Bw9KjbpvzIEXeutUhcNWwhCJr6N2mS\nmyYoMpL9+wuvn3gimjxEKqEhC0E266b+FXvggfBzkXg666zC6+3b3cQDkThqyEKwdKk/pnUDUoqr\nr/YfY7p8ucYKJJ4arhBks8CjjxbGTjvNHVouMlrJZPA04yVLws9FpFwNVwg2bTp+DnHO+94XSSoS\nc8uW+WO7dmkPIomfhisE991XeN3UpHUDMjbJpH8zOgD42tfCz0WkHBUpBCQvIfksyX6Svh54khNI\n/sh7fgvJtrznbvTiz5Ks6s4tHR3+KaOplMYGZOy6u4GZMwtje/aoVSDxUnYhIDkOwK0ALgUwG8CV\nJGcX3XYNgINmdg6AbwLo8l47G+6M43cBuATASu/nVcW99/pj2mFUyrVmjT92yy3h5yEyVpVoEcwF\n0G9mu83sCIA7ASwsumchgNw/l7sBXESSXvxOMztsZv8OoN/7eRWXyQCvv14Y00whqYRkEpgzpzDW\n16cZRBIflSgEZwN4Ie96wIsF3uMddv8rAFNG+VoAAMkUyV6SvYODgyUnec89hddTp2qmkFTOn/xJ\n4bWZVhtLZWUybtvzanQ7xmaw2MwyZpYws0RLS0vJr//Qhwqvv/jFyuQlArguxpNOKox95ztqFUhl\nZDLAtdcCGze6r5UuBpUoBHsBTMu7bvVigfeQbAbwVgCvjPK1FZFKuQ3CFixwX1OpavwWaVTJJPDI\nI0Bb2/HY0FDw4kWRUhXPRFu9urI/vxKF4EkAM0nOIDkebvB3Q9E9GwDkltp8GMDPzJ2IswHAFd6s\nohkAZgII2AquMlIp1x2kIiDVEDTetHmzWgVSnkzGzUTL97a3VfZ3lF0IvD7/TwHoAdAHYL2Z7SD5\nJZKXebetBjCFZD+AzwFY6r12B4D1AHYC+BcAnzSzgNODReLh5JP9MbUKpBxB3diVXvukoypFKijX\nl5uPdGcYaIaalCqddntY5Zs/33VDjoWOqhQJQSrlX22sGUQyVt/+tj8WtLVJuVQIRCqsu9u/rkDn\nFUipMhngjTcKY6ecUp2WpQqBSBUUryvYvt1tcSIyWkGr0z/5yer8LhUCkSoIOq9g3TrtQSSjk80C\nO3cWxmbOBLq6qvP7VAhEqmC48wq0M6mMxg03+GNBe1pVigqBSJUEDert2aN1BXJi2azrSszX0lLd\nWWcqBCJVkkwCkyb545s2hZ6KxEjxdFEA+NjHqvs7VQhEqihoFfuOHeHnIfGQzfoPz5ozp3pjAzkq\nBCJV1NXlFgDlW7fOLRQSKbaweAN/ACtXVv/3qhCIVNmyZf4ZRMuXa6xACqXTQPEO+xMnhrMiXYVA\npMqSSWD6dH98yRJ/TBrXD37gj517bji/W4VAJAQ33uiP7d4dfh5SuyZO9MfC6BYCVAhEQpFKuSmA\n+Y4dU/eQOOk00N9fGFu1KryNClUIREJy//2F12bBUwWlsWSzwM03F8ZmzQr33BQVApGQJJPuU15T\n3r+6++7TDKJGt3y5+1CQ753vDDcHFQKREKVSQKJoN/ivf11dRI0qm/W3FMnKHzwzkrIKAcnTST5I\ncpf3dXLAPXNIZknuIPkLkv8z77nvk/x3ktu9x5zi14vUm2uuKbw+dkznFTSqtWv9rYGFC8M/xKjc\nFsFSAA+Z2UwAD3nXxV4HcLWZvQvAJQBWkMxfeP8FM5vjPbYHvF6krqRSrg843+23q1XQiPbvL7xu\nagq/NQCUXwgWAsjtibcGwKLiG8zsOTPb5X3/nwBeAtBSfJ9II3nxRX9MA8eNJZMBfvKT49dNTcBt\nt0VzpGm5heBMM9vnfb8fwJknupnkXADjAfy/vPDfe11G3yQ54QSvTZHsJdk7WLz8TiRmLr3UH3v0\n0fDzkGhks+6QmaEhd026lmKYM4XyjVgISP6U5DMBj4JdMczMANgwPwYkpwL4PwA+ZmbHvPCNAM4F\n8McATgcw7PwJM8uYWcLMEi3FE7JFYqa7G2htLYy98grQ3h5NPhKuTZvc2FBOc7M7zCgqIxYCM7vY\nzN4d8LgfwIveG3zujf6loJ9B8jQA/wTgb83sibyfvc+cwwC+B2BuJf4okThYv94f27hRp5g1ggsv\nBCZMcN1Bzc3AP/5jNF1COeV2DW0AkNsxZQmA+4tvIDkewL0A1prZ3UXP5YoI4cYXnikzH5HYSCaB\nxYv98RUrws9FwpNOu0//f/mXwFe+AmzeHF2XUE65hWAZgPeT3AXgYu8aJBMk7/Du+QiA+QA+GjBN\ndB3JXwL4JYAzAHylzHxEYqW7GzjnnMJYX59mENWrdNpNCujvd9uRv/pqtC2BHFrxJNYYSCQS1tvb\nG3UaIhWRzQIXXFA4n3zOHOCpp6LLSaqjufn4ADEAnHwy8Prr4f1+ktvMLFEc18pikYglk/51Bdu3\na6yg3qTThUUAAI4ciSaXYioEIjXgM5/xx266Kfw8pHqCCvtFF4WfRxAVApEakEoBZ51VGNu/X62C\nepHNuvGAfBMmAD090eRTTIVApEb83d/5Y0EH2kj8BK0a/9a3ws9jOCoEIjUilQJOP70wduCAtqmO\nu0zGv8Po/PnRTxnNp0IgUkPmz/fHvve98POQyshmgeuuK5wR1tQELFsWXU5BVAhEakjQzpPNzeHn\nIZURdOjMZZfVxtqBfCoEIjUkmfQXgxdf1KBxXD37rD8WxTbTI1EhEKkxXV3AorwN3Y8dA264QauN\n4yaddqvE83V21l5rAFAhEKlJnZ3AuHHHr4eGXDGQeMhk/DOF5s93Rb4WqRCI1KBkEviLvyiMbd+u\nGURxccst/tjs2eHnMVoqBCI1Kqgv+RvfCD8PKU02C+zcWRhraor2vIGRqBCI1KhkEjj11MLY0JAO\nr6l1QV14UR1BOVoqBCI1LOhN5aGHws9DRieTcV14+WbPrq3FY0FUCERqWFeX25Mm39AQ0NERTT5y\nYl/7mj8WtKFgrSmrEJA8neSDJHd5XycPc99Q3qE0G/LiM0huIdlP8kfeaWYikidoT5p16zRwXGvS\naWDPnsLYWWfVfmsAKL9FsBTAQ2Y2E8BD3nWQN8xsjve4LC/eBeCbZnYOgIMArikzH5G6k0oFbz2x\ncmX4uUiwoOmiQPBGgrWo3EKwEMAa7/s1cOcOj4p3TvH7AOTOMS7p9SKNJGhvmkOHtOK4VgSdHVFr\nG8udSLmF4Ewz2+d9vx/AmcPcN5FkL8knSObe7KcAeNXMjnrXAwDOHu4XkUx5P6N3cHCwzLRF4iVo\n6wkA+OIXQ09FimQy7uyIYrW2sdyJjFgISP6U5DMBj4X595k7/Hi4A5Cne+dkXgVgBcnfKzVRM8uY\nWcLMEi0tLaW+XCT2urqAxYsLY/v2aeA4aqtX+2NtbbU9XbTYiPsamtnFwz1H8kWSU81sH8mpAF4a\n5mfs9b7uJrkJwHkA7gEwiWSz1ypoBbB3DH+DSMPo7gbuvbfwwPMNG4a/X6pv925/LG4HCpXbNbQB\nwBLv+yUA7i++geRkkhO8788AcAGAnV4L4mEAHz7R60Wk0Ac/WHitsYLotLUBL79cGOvsjM/YQA6t\neLPsUl5MTgGwHsDvAngewEfM7ADJBIDrzOwTJM8HsArAMbjCs8LMVnuvfzuAOwGcDuApAB1mdnik\n35tIJKy3t3fMeYvE3Zw5wNNPH79uagIeeyxe3RFxN28esHVrYWzCBODNN6PJZzRIbvO66QuUdeSF\nmb0C4KKAeC+AT3jfPw7g94d5/W4Ac8vJQaQR3XYb8J73uC2qAff1+uv9q1qlOtJpfxEAgD/90/Bz\nqQStLBaJoWTSf77x00/rzIKwrFjhj02fDvT0hJ9LJagQiMTUxz/ujy0dbkmnVEw2Cxw5Uhgj/auK\n40SFQCSmurrcWEG+zZs1cFxtQSuI3//+8POoJBUCkRhbudJ9Gs0X1G0hlZFOA/fdVxhrbY1vl1CO\nCoFIjCWTwHvfWxjr69Mis2oI2k+oqQlYvz6afCpJhUAk5pYtc29I+bQ7aeUFtbQuu6w+puyqEIjE\nXDLpppMW0+6klZPNAi+84I8H7f8URyoEInUglXJ73+c7dEitgkrIZt36gEOHCuPz59dHawBQIRCp\nG0F73998s9YWlGvpUuC3vy2MNTXFa3fRkagQiNSJVMq/O6kZcPnl0eRTDzIZNyU3H1n7h9GXSoVA\npI50dwNvfWthbO9ezSIaq6AB4ttvj9+mciNRIRCpM9de64+tW6cuolJls+68h3xtbfVXBAAVApG6\n09UFzA3YynHBgvBziatMBjj/fODVVwvjcTtnYLRUCETq0JYtwbOI2toiSSdWstngVtWiRfXZGgBU\nCETqVtAsouef15TSkVx9dXC8XtYMBFEhEKlTQWsLALdNgjamC5bNAv39/viCBfU1S6hYWYWA5Okk\nHyS5y/s6OeCePyO5Pe/xJslF3nPfJ/nvec/N8f8WERmrffuAiRP98Xrt6y7X2rX+WJzPGRitclsE\nSwE8ZGYzATzkXRcws4fNbI6ZzQHwPgCvA9iYd8sXcs+bmc5XEqmwW27xxw4c0JTSYtks8MgjhbFz\nzon3OQOjVW4hWAhgjff9GgCLRrj/wwD+2cxeL/P3isgopVLB/dt33RV+LrUqnXazhPr6jsdOOim4\nhVCPyi0EZ5pZbqbtfgBnjnD/FQB+WBT7e5K/IPlNkhOGeyHJFMlekr2Dg4NlpCzSeLq6gClTCmNH\njgDt7dHkU0s6OoIPm7nmmvoeF8g3YiEg+VOSzwQ8FubfZ2YGwE7wc6bCHWKf39t2I4BzAfwxgNMB\nDDufwcwyZpYws0RLS8tIaYtIka9+1R/buLGxu4gyGbfYrti4ccPPHqpHzSPdYGYXD/ccyRdJTjWz\nfd4b/Usn+FEfAXCvmf3X9k15rYnDJL8H4POjzFtESpRKuX1zit/41q0DBgfrf0A0yKc/HRxfubJx\nWgNA+V1DGwAs8b5fAuD+E9x7JYq6hbziAZKEG194psx8ROQEuruDVxhv3AjMmxd+PlFqbwcOH/bH\nFy+u34Vjwym3ECwD8H6SuwBc7F2DZILkHbmbSLYBmAagaEwe60j+EsAvAZwB4Ctl5iMiI+jp8e9S\nCgBbtzbO+oJMxhW/YrNmuWLZaOi69uMlkUhYb29v1GmIxFp7u//NcPr0+p8umckA113ntujON2kS\ncPBgNDmFheQ2M0sUx7WyWKRB9fQAZ59dGHv++foePM7tI1RcBJqagAceiCanWqBCINLA7rrLHbSS\nr54Pvv/zP/fHZs0CHnussQaHi6kQiDSwZBL4whf88Ztvrr/xgnTav600AKxe3dhFAFAhEGl4XV3B\nR1xee239dBO1twcvGps1S6Z0iREAAAeQSURBVEUAUCEQEbiZMp2dwd1Es2dHk1OldHQEzxCaPh3Y\nuTP8fGqRCoGIAHAtg9tv98f7+vzbU8RFRwfwgx/4452d9T87qhQqBCLyX1Kp4DUGBw7Erxi0tbkW\nTfEMoQULXNGT41QIRKRAd7frOy924EB8uona291U2HykK3KNuJXGSFQIRMRn587gA236+oDzznPz\n8WtRJgO8613BYwJXXdWYq4ZHQ4VARAL97GfB8e3b3d79tbbWoKPDzXQKGgCePl1F4ERUCEQkUDIJ\nPP44cMYZwc8vX147XUXz5gVvJw24MQENDJ+YCoGIDCuZdFtUBw0gA66r6JRTolt8lk4Dzc1uw7xi\nTU3AqlUaExgNFQIRGVF3t3tTDfL6665LJuxtrKdMca2SoSH/c2ed5baNaLTtpMdKhUBERmW4s49z\ntm51A8zVHjvIZl0r4MCB4OdJ4Mc/1orhUqgQiMioBW1Hke/wYfcp/Xd+p/Izizo6gFNPdQPVQa0A\nwG0l/fOfqwiUSoVAREqS6yY67bTh7xkcdG/Yra3A9dePvSik066onHSSGwx+7bXh7507150noCJQ\nurIKAcnLSe4geYyk77CDvPsuIfksyX6SS/PiM0hu8eI/Ijm+nHxEJBypFPCrX7nWwbhxw9+3d6/b\ntuL8812XzVveMnLXUXu76/ohXeticBA4enT4+0lXmLZsGdvfIuW3CJ4B8JcANg93A8lxAG4FcCmA\n2QCuJJmbdNYF4Jtmdg6AgwCuKTMfEQlRd7d7kz7R2EG+N95wb+7TprlP+eTxx8SJbgbSxo3Dd/0U\nmzULOHZMg8LlKqsQmFmfmT07wm1zAfSb2W4zOwLgTgALvQPr3wfgbu++NXAH2ItIzHR1uTUHc+ac\nuIWQMzDg/5R/+LCbgTSSSZPcUZOPP67dQysljDGCswG8kHc94MWmAHjVzI4WxQORTJHsJdk7ODhY\ntWRFZGySSeCpp9wb/OLFbgzhlFMq9/MnT3Ytj4MHgdtu01hAJY1YCEj+lOQzAY+FYSSYY2YZM0uY\nWaKlpSXMXy0iJerudmMIhw65opDr8x8/3i30GknuXISmJmDmTPfp/8AB7RpaLSP+JzGzi83s3QGP\n+0f5O/YCmJZ33erFXgEwiWRzUVxE6kh3N/Db37q+/MOH3UKv+fNdccg3YYLbsmLVKnevmRsreO45\nffqvtuaRbynbkwBmkpwB90Z/BYCrzMxIPgzgw3DjBksAjLa4iEhMJZPAI49EnYXkK3f66AdJDgBI\nAvgnkj1e/G0kHwAAbwzgUwB6APQBWG9mO7wfkQbwOZL9cGMGq8vJR0RESkcrPr4nBhKJhPX29kad\nhohIrJDcZma+NV9aWSwi0uBUCEREGpwKgYhIg1MhEBFpcLEcLCY5COD5Mb78DAAvVzCdKMT9b4h7\n/kD8/4a45w/E/2+IIv/pZuZbkRvLQlAOkr1Bo+ZxEve/Ie75A/H/G+KePxD/v6GW8lfXkIhIg1Mh\nEBFpcI1YCDJRJ1ABcf8b4p4/EP+/Ie75A/H/G2om/4YbIxARkUKN2CIQEZE8KgQiIg2uoQoByUtI\nPkuyn+TSqPMpFcnvknyJ5DNR5zIWJKeRfJjkTpI7SH4m6pxKQXIiya0kn/by/7uocxorkuNIPkXy\n/0ady1iQ3EPylyS3k4zdDpQkJ5G8m+S/kewjGemJCw0zRkByHIDnALwf7ljMJwFcaWaxOfWU5HwA\nhwCsNbN3R51PqUhOBTDVzP6V5H8DsA3Aorj8N/DO2T7FzA6RPAnAYwA+Y2ZPRJxayUh+DkACwGlm\n9oGo8ykVyT0AEmYWywVlJNcAeNTM7iA5HsBbzOzVqPJppBbBXAD9ZrbbzI7AHYYT6nGb5TKzzQAO\nRJ3HWJnZPjP7V+/738CdTzHsOdW1xpxD3uVJ3iN2n6RItgL4HwDuiDqXRkTyrQDmwzt/xcyORFkE\ngMYqBGcDeCHvegAxehOqNyTbAJwHYEu0mZTG61LZDuAlAA+aWazy96wA0AngWNSJlMEAbCS5jWQq\n6mRKNAPAIIDved1zd5A8JcqEGqkQSI0geSqAewB81sx+HXU+pTCzITObA3fG9lySseqiI/kBAC+Z\n2baocynTe8zsDwFcCuCTXrdpXDQD+EMAt5nZeQBeAxDpmGUjFYK9AKblXbd6MQmR17d+D4B1Zvbj\nqPMZK68p/zCAS6LOpUQXALjM62O/E8D7SHZHm1LpzGyv9/UlAPfCdf3GxQCAgbzW5N1whSEyjVQI\nngQwk+QMb3DmCgAbIs6poXiDrasB9JnZP0SdT6lItpCc5H1/MtzEg3+LNqvSmNmNZtZqZm1w/wZ+\nZmYdEadVEpKneJMN4HWpLAAQm5l0ZrYfwAsk3+mFLgIQ6YSJ5ih/eZjM7CjJTwHoATAOwHfNbEfE\naZWE5A8BXAjgDJIDAG4ys9XRZlWSCwD8LwC/9PrZAeBvzOyBCHMqxVQAa7wZaE0A1ptZLKdfxtyZ\nAO51nyvQDOAHZvYv0aZUsr8CsM77ULobwMeiTKZhpo+KiEiwRuoaEhGRACoEIiINToVARKTBqRCI\niDQ4FQIRkQanQiAi0uBUCEREGtz/B3TdSrfISH+TAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iWOlC7W_FYvA",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 2. Add Noise\n",
+        "Since it was generated directly by the sine function, our data fits a nice, smooth curve.\n",
+        "\n",
+        "However, machine learning models are good at extracting underlying meaning from messy, real world data. To demonstrate this, we can add some noise to our data to approximate something more life-like.\n",
+        "\n",
+        "In the following cell, we'll add some random noise to each value, then draw a new graph:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "i0FJe3Y-Gkac",
+        "colab_type": "code",
+        "outputId": "38886dba-5757-4c7e-bcd6-32c1eb82863e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 265
+        }
+      },
+      "source": [
+        "# Add a small random number to each y value\n",
+        "y_values += 0.1 * np.random.randn(*y_values.shape)\n",
+        "\n",
+        "# Plot our data\n",
+        "plt.plot(x_values, y_values, 'b.')\n",
+        "plt.show()"
+      ],
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO2de5RcdZXvv7se6aAzTO4UrAkPYxxB\nFCdLGkOckjE2AwoBA9HccSlzpzMhpAMkIOMjmjtyzYhDnOCSCImYJo+bvpfxsYzkMSaCPMoEKEg6\ndJweCTgJgyHBXGI7GcYx9KPqd//Yvf39zq/Oqa5OV3W99metXt116pyqU1Vd+7fPfnw3GWOgKIqi\nND6xap+AoiiKMj6owVcURWkS1OAriqI0CWrwFUVRmgQ1+IqiKE1CotonEMUZZ5xhpk6dWu3TUBRF\nqSv27dv3K2PMmWH31azBnzp1Krq7u6t9GoqiKHUFEf0i6j4N6SiKojQJavAVRVGaBDX4iqIoTYIa\nfEVRlCZBDb6iKEqToAZfURSlSVCDrwAAsllgxQr+rShKY1KzdfjK+JHNApdfDgwMABMmAI89BqTT\n1T4rRVHKjXr4CjIZNva5HP/OZKp9RoqiVAI1+Ara2tizj8f5d1tbtc9IUZRKoCEdBek0h3EyGTb2\nGs5RlMZEDb4CgI28b+izWV0EFKWRUIOvhKKJXEVpPDSGr4SiiVxFaTzU4CuhaCJXURoPDekooWgi\nV1EaDzX4SiRhiVxFUeoXDek0CCqNoCjKSKiH3wCUu6ImrBxTSzQVpf5Rg98AuBU1b7wBdHWVbpR9\nQx62eAClLSi6KChKbaMGvwFoa+NqmlwOMAbYuBFobx/Z6IYZ96hyTH9bWJOW1u0rSm2jMfwGIJ0G\nbrgBIOLbQ0Ol1c37xr2rCzh8mBcPtxyzlBJNrdtXlNpHPfw6YaRwSXs7sGkTG9t4nA13NlvcyxZD\nLsds3MiLRSIBLFwYvEoYqUTTfSyt21eU2oSMMdU+h1CmT59uuru7q30aNUFUXD0ssbpyJbB9O4d2\nEgn2/IuFd2QhOXwYeOAB9tDjceDOO/mxS43JZ7N8hQCUFk5SFKUyENE+Y8z0sPvUw68DwkIv4s37\n8fIf/pD3A/j+tWt5Xz+mLoY+leLbra1BDz2VKj0m7y9I7e2VeicURRkLavDrAD9cAoQnUTMZa+wF\nYwoTrZ2dwJIlHL4xBojFgGQSmDWL7588GejpiU7U+uGlsPi9eviKUnuowa8DfJkDIOjhy7a2NqCl\nBejv5wRuLAbk8xzakZg+ACxezMZeyOf5mC1b+LYsAPE433afo7OTj8/n+bkee0zj94pSL6jBrxN8\nmQNZAFIpWxETtjB0dQEbNnB8ftMmYN48NtbFyOd5QVi4EJgyJVijL1cGAC8SmQywbJnq7ihKPaAG\nv04RoxoVZxfjO2UKh1ok3ALYq4BYDPj0p3nfPXuCj59IFCZfM5nglQGRXVhUd0dRah81+HVMVO27\nuwisWhUMt7S384/fXfuBDwTj/0NDQG9v0IinUhzzj8JNBPf08Laoih3tylWU8UcNfh0QZRzDYuf+\nItDXx56/lEz29vI297F6ewuTvbkccMstwLRpdr++Pvbqxejn8zZBK5U6/f3BkNG6dcCNNwYNv3bl\nKkp1KIvBJ6INAD4C4DVjzJ+E3E8AvgHgagC/BfDXxpjnyvHcjYhr4AFrSGMxYM0aoKODt6fT7MFv\n3gzMncu3e3ttwtZNoG7aZI2xJGXnz2dDvHlz+HnkckFdHllg+vv5djxuH18WGj8/MDRUWBqqVT2K\nUh3K5eH/bwCrAXRF3D8LwPnDP+8DcP/wb8XD937nzbOGOp/npCnA3nYqBdx+O++7ezdvv/123i8e\n58UAAJYvZ1E11zPv77eG+NZbgUceCT+f55+359XVBbzvfcCTT/JjGMOVPRLGkcXAN/p+aai7cBDZ\nXgBFUSpLWQy+MWYXEU0tsst1ALoMt/U+Q0STiOgsY8wvy/H8jYTv/QJWIwfg7YsX2/r5XI4N7MAA\nsH69NexEHEdfsgQYHAx/LmPY6E6aBMycCezaVbjPk08CH/0oN3T5j5PLcWdvLMaJ4FWreCE6cYK7\nfU+eBI4e5f1isaBhv/JK3ief50XKDR0pilIZxiuGfw6AV5zbR4a3BQw+EXUA6ACAKVOmjNOpVRc/\nPu/H5VtbrWEH2JCLty+GXX56eqwXn0iwdx5l7F22bAEmTgy/L5+39flRyIIjuYG2NrtYJRJ8brkc\nG3aAf7tXHBrWUZTxoaaStsaYTgCdAGvpVPl0Ko4bvkkkbEzdrWnPZIIhkksvBZ591gqeAbbsUiDi\nrtmnngp/3pijkZrP25LMRMJq6RhTmMiNws0XZDLBRWZoyCZ6BwY4XzAwYI295Bs0rKMolWe8DP5R\nAG9xbp87vK2pccM3uVwwublsmd1PDDHARlxCPOLpA9aAxmJssLdvDzfYySSwejV741u2BOvvL74Y\nmDOHDXdvL1fpRBn9CRNYmK21NVj109sbvCJJJPjcjOFj5s7lfMPAgO0EFu9fwzqKUlnGSw9/G4B2\nYv4UwH9o/J6NZMJZciWmnsnYGbWA1bgB7OIgRtSN78diwDvfWejxA7zfnDnAggVsWJct479dFizg\n7ek0G3H3WNHIb2kBbrqJz7G9PWjss1lOAEsl0MyZ/NsYm0Tu6OAFbeFCXiyMsSEh1dBXlMpSrrLM\nbwNoA3AGER0B8CUASQAwxnwLwA5wSeZBcFnm/HI8b73hx+vTaQ7jrF0b9NB9pcorryx8rFiMDfG0\naexVG8Pe+4svhksnGANs28Z/y1WElHdKWWdHh63GOXYsqKUjCdlUCti5E7j+etbnAWyJ57FjNnZv\nDMfpJalMxMe7Es6Sh/BLSBVFqQzlqtL55Aj3GwCLy/Fc9YpfbikG9PTTbWhD6uz7+oKVOpMn2xGG\ngPXsczlg/34+7tpreb/OzuhzkIXg5Ek2ug89xAtGTw//dHayhy5GO5kMDkLJZtlrd+UVAFviGfOu\nFydOtK+NiKt33IQuwPdfcQWXjmo4R1EqS00lbRsZN17f328VJ11vXCptXG168bLf8Q7gwAG7r3tc\nPs8e8+zZ1sACwUXCZ8sW4POfB77xDdtIJeEXYWiItXhcfR7f2AsSYorHrULnM8/YBO7QEHDPPYXH\nE6mxV5TxQg3+OOGWW7r18y7SlTpxIl8B7NzJhryz0xr+KHI5YOtWG+oRA3zhhXz/iy8WGv9166yx\nB+xVhhh9P8wiOQdfQC2R4GMnTOArhP37gTe9yYaQ3NfnJnQBfi5fs0dRlMqgBn+ccKWLpUO2WFdq\nTw/wT/8UNI4zZ9ou1zDEyEtVTz4PvPACh2Z8QwsAv/514WNIaAiwk6tWrLB5h127OBz085/zVcfS\npWywN28GLroIuO8+20HrC63FYsA3v8mLmVyt5POFmj2KolQGNfjjiCshPG2alSXeujVYly5TrVzD\nHo+zt/7kk4WPe/75wEsv8WNIx+vmzcCjj9oKGN/4ikGWKh8pm1y61MbrRUtfavNlPu5DD/Ex/j6P\nP24riMKYPZtf98GDwe3SsSuPqyhKZVCDXyXE8B8+bMMkiQSXRopnvWkTV7oQAR//OG8LM6Yvv1xY\n+jhtGte7u962LCB+GEYSyG555eWXB7thpU9g40auyGltBW67LRgScsNJPskkLyZdXeHdv9u38/Oq\nl68olUMN/jjT2VkY/nA9eVdGeNUqO2HqwQfDjalU67ilj4BV0lyyxHrocryrexOmR5/J8Hn5zyV9\nAlKR44eIWlqAj32Mz9U/xwUL+HlEptnHmKDUsmrlK0r5UYM/jnR2AosW8d+PPFJowAcHrRxxNssL\ngxsiCTP2118PfO97fNtPsvb12UogosKRhVGkUsFF6KKLOOYuoaEw2QUiXkRk8ImLDF4B+Pf69fxa\nJREti1Bbm2rlK0olUYM/jkTpzvv4w0TcWna3QeuTnwR+8AOrRrlqVfEBKf4Qkigv2h908rOfsRxD\nT4+N18diwdCMe3XhE1aKKT/vfz9w/Dhw5pnW+3d7ELq67Pao6VmKopSGGvxxZO7coO6877HH4xwb\nd9UkpTFp7lw2uA88YEM43/52MC7vG1x/qHmpE6fa2vhcxFDncvzY999vxyMePhzsEHaHocg5CpKU\nnTGDj5OrlqEhK8l84AD/nUxa8Tb/sTZsUFVNRRkTxpia/Hnve99rGpGlS42JxSQwEvz58IeNiceD\n2yZMMObpp/nYu+4yhij8WHe/kbjrLvs88Tjf9lm71phkks/1tNMKH/vpp3l7LGZMIsH7u8f65xmP\n809LC59r1OsgMmbmzPD3iCj8XBVFsQDoNhF2VT38MlMsVJLNclNSVB192NSpG24IjhhMJoPSBMLb\n3176OYbNwvWRSp+o1xJ19dDZyTH697wH+Od/tlcpUiVkDDB9OjeXhfUUJBKsCBr2HrlXEYqijB4y\nUUXTVWb69Ommu7u72qcxKoqFSqKGfBejpQV44gn+W+LYra0c2tm1y44fBDikM3Gifc6RKl0qUQnj\nJqUBNtCiiuk2mrk9ALEYa/wDHMY6+2zu0A17j266icNKiqJEQ0T7jDHTw+5TD7+MFBvO7Q75dpud\nojjrLOC667iL1RU0k0WgvR247DJbBy8duiIxPFKli9sEVi78pHQux1VBcrWwfLltBpNzBrihbNMm\n22HsSj67tLaW93wVpdkYLz38pkBCJfF4uA6N1MInk8DnPse/o4zba69xwtKfSesuJE88wV6v/5xh\nC894MHdu8HYiwSWeouu/fDm/ZhcpzZTzHRy0lUn+exNW8qkoSumoh19GouLaAHvqQ0M2Bfn66zam\n7SNVKq5evFSquAuJeOlSOeM+50gx+kog+vrr13NoZtYsDuW4Vxrz5wPf+pY9RuQapKvYTdP6bNig\npZmKMhbU4JcZGfO3fHlwqMgtt9hQxuAgDwtxRxcKF14IfOpTQUPpNjSFGTw/PFNs4ak0HR3W8K9Y\nUXil4YdlWlv5/Zo2jXMQMq83zODncpzLkNcFaEeuoowGNfhlxu+mPXQImDSpMAn56qvsAW/ZEtw+\nc+bIFTJRhE3UqiZh1UCZTHDgy86dwfdg6VK++vFr+QHef+NGXjAl4StNYGvW2IVGUZRw1OCXmfXr\ng7fvvpuTr4lEMBa/Z48dOC6GLZm0EgSjNdi1KEkQdaXR0mLP89VXg8fs3w88/DBPAlu5MnhfPm/D\nYoB93/J54Oab+W81+ooSjRr8MpLNFiYWjWH542SSh4i/+GJQC16Gi0+ePLb4dLEKoWoyUript5cX\nP0ESv6+/XvhYUsoZFu7J53mK2KFDvGhIOE1RFIsa/DKSyUQPEM/lWFpg8uTgqEJj2MsVHfpTpZRm\nqlrBXQTktz9Ifd264DGxGF8ZXHKJlWMAgguASDgAtolNjb6iWLQss4yI0fXLCWWoSSplb7vs2cPh\nmGz21J9bPOc776yNcM5o6OjgMI4Y566uoOCa5DXmzQP+8i/Z8BPx7899jsNlYWWcpYrVKUqzoB5+\nGRGj29XFyUUZaiIDQ269leP4rtSA0N8/9jBMLSRqTxU34exz/Lh9PydMAO6912r5Azb848f9/b4A\nRWl21OCPkpEkCdzaeJFDOP104Mtftt2yuRzw1rcCv/iFPY6otsMw5cY38G7C+dZbg8nsF16wYRvp\nxgXYuO/YwftJ+eqcORwiW7BAwzmK4qMGfxS4ejillAK6zUQ+b35z8Pbs2fXrnY8Wv6Jo3jybcO7v\nB77+9aDomrx/MppRBqi49PdzV7KMbZw2bfxfl6LUOhrDHwVdXcDJk7Y88JZbouPuUjUTZuxbWri5\nSuL9Mjy8WfArigArDxGP2yldABt4kY1YtIjDY/5AFWFoaPzlJBSlnlAPv0SyWY4ju0jnJ1AY5pEE\nru/hz5ljK3JOpbmqEQibxCXyEKlUYZexP2Dd9/DdKwGi0mWUdXau0myowS+RTCbcszx2zIYn4nGr\nDQNwqALgGP7+/TwbdtIke2w9J1nHgl+LDwQN70g6/AsW2GlbUpbpzv3N5bh715WU9ge212KjmqJU\nGjX4JZDN8mi+RMJW14jq5eTJNjyRy7Eh2rDBhn2SSeAnP2HPXg2MRRa7KMNb7L0RsTVZZI0Jevxu\nPb4gdfzy+LXaqKYolURj+CMgBumBB9iwdHSwUf/7v2cj0d4erL0XXXq5GhgcZONTLcniWqeU9yWb\nZSE2yZek01zJM3ky8K53lfY8+Xzw8YtJWStKo1IWD5+IrgLwDQBxAOuMMV/17v9rAHcDODq8abUx\nxuulrD2yWVa9dKdUyUAPF7f2XvTcXV59tb46YceTkd4X9wpAQmZuvf3Ro/4jRuM/voTcVHJZaRbG\nbPCJKA5gDYAPATgCYC8RbTPGPO/t+l1jzJKxPt944Y8kjMX4R7plXdza+9tvD2rDABxzrqZkcS0z\n0vviXgFIyCxqaAxgBene/W7OmwhEfFUQFkaSnIuiNDrl8PBnADhojHkJAIjoOwCuA+Ab/LoibCRh\nLscGfdq06LmxrpGJxYDPftZeETRrknYkir0v/gIridowkkleXMWAf+ADtnnLGOCeeziX0tVlq6c0\nfq80E+WI4Z8D4BXn9pHhbT5zieifiej7RPSWMjxvRXFHEsaG3yU3Dixe4h13WB2cri6bPCQCrr2W\nq3LGopHTzGSzwG23BXXxJfm6dCnH713jLzN0Af6MPvGJ4ONJGe2GDbaqJ5HQ8JrSPIxXlc52AN82\nxvQT0SIAmwD8ub8TEXUA6ACAKfLNrQJivMUoiDCXtPC3tQW9xP5+9vyfey54zI4dwPbtWpVzqshV\nlkAEXHEF51UAvpp64YXg/amUDdf4uFdqcnv+fP1clOahHAb/KADXYz8XNjkLADDG9Dk31wHwiuZ+\nt18ngE4AmD59ekiPauURz91tmMrngYUL2XsUDXep2pH79+4NNlgZw5U67lWBGpbRIQnd/n6+nUxa\nYy/5Ffc9J+IJWlFyFkRck+83fSlKs1AOg78XwPlE9Dawof8EgOvdHYjoLGPML4dvXgvgAGoUXxJB\npA+kkqOzk6cruZU4UUM55MpAq3JOjXQaeOIJ20Aln4HMyvWrofJ5vqIK+ywA3t7Xp8lzpXkZs8E3\nxgwR0RIAD4PLMjcYY35GRF8G0G2M2QbgNiK6FsAQgF8D+OuxPm+lSKVs3D6R4DLA1lY2OitXAtu2\nFRr7RMJW8uTzbFhaWgplAZTRE5bQlfyKG9uXcE3YABq5Xxbe3l4r46Cfi9JMkIlyh6rM9OnTTXd3\n97g+pxvOIeIKmzlzgMsus2EFH9FuufhirhBpVn2c8ebmm4PyCr7Egs955/GwlEOHgl24S5dyYl0/\nL6VRIKJ9xpjpofepwbesWAF88YvWS4zHOXYvhqUYRMDEiZqcHS+yWV6IBwaCA2Wiwmvi4fsKpm7Y\nTT87pREoZvBVWsGhra2wzO/YMQ7ZjIRb062MD26eJZnkBVrklP1affl8/MVAxNb0s1OaATX4w0gT\n1aWXBre//HIwVhyG6Lhrcnb8yGT4c5Ewzvz5fDUmHr7kYFwSicKZw7KvfnZKM9C0apn+iL22Nm6a\nkpi8GHm3cxYALrwQ+Nd/tQ1WLS3BGasaEhgfwjT1ZRGQBPpZZwFHjvD+RLYLd/ly4NFH7X5ubf+K\nFfo5Ko1LUxp8X0vlyitto44bJgiLBc+cCaxbV1gqqIwvURo8iYQ1+q6wmjEsupZOs3Hfvdt+/m5t\nv8pXK41MUxp8X5L31VcL9/FH7QFsTMTAqzGoPmGfg9/85iJaOuk0l8xu3gzMnRus7Vd9fKWRaUqD\n74cDFiwAenpsmGbCBOC++zhMk0rxfYB687WOhHSiyOXYm587l5UzBwf5mGnTbG1/Pl/6iERFqTea\nxuD7ypaiYQ/wF371ap6VevbZduas0NnJ9x07VnifUju4C7k0wbm6Ofk8x+4fe8xuHxjgBf+CC4Lh\nPEVpRJqiDj9sjB5gt8noQhFHk/szGeDEiWCjTiIB7NqlRr9WkYX98GFeqKU2/+1vB156KboT1yUe\nB+68E1i2rOKnqyhlp1gdflN4+FFj9GSbGAGp1e7qsjNTfQMxNKTx3VpGPpeVK4Of68c+xmE6d3pZ\nGKp9pDQyTVGHHza/1N8mjTsTJvAx/f22ztslFlNjUMvI1dzWrXZbLMbyCY89xiWYMt/AnXUA8Od/\nySWc0AWCc3QVpRFoCg8/qoTP3QbYv3t7g16g1OXHYsD996t3X8v4aqcAf5YilDZ3LvD447w9mQRm\nzbL77dwJ7NsH/PSnhSE+/cyVRqApDP5IbNnCDVZSopfJ2KRfLBbUwtcvfm0jV26uJn4sxhVX2SwP\nqpG4fi5nB9RIL4YYeqG/X0N4SuPQ8AZfpletW8df5ESCv7zHjwMHD3JMXgzDI4/w77Y29v4GBvi3\nlmPWD24F1oYN/JnH45zE7eoqnFMsE8uidPTl6kBRGoGGNvgSzz150m4bHOQqmyg2b+YyTfny12gR\nk1IEachqb2cjv3EjTygTcTXAVmYNDfHtqESuXB0oSiPQ0ElbfyZqKcydGxTmyuVURbFeSac5FDc0\nxJ9jLscia3feyZO07rvPSiuLrj5gE7kyMF1yPNmsJnKV+qahPfxUKjiFqhixGA886ejgL7TbiZtK\nqahWvRImsiafYSZjjb2IqJ15JvDss8D73ge8+932Mw/r5dD/BaXeaFiDn80Ct91m5RIAvox/29tY\n7dKHiEv3gKDWykUXcaJPv+j1SViFljRnpVJ2SHosxsb+wQf5uIMHefCNuzio1o5S7zSswe/qKhxL\nKElb8foFXw9dqjkGBvjSX5qz9Iten7gia66nnkiwJ//UU/wZ/+M/Bo+7+27O56TTwVnH8r/iy3Uo\nSq3TsAY/DGOAAwfsWDv3Un758nBvTvbRDszGwP1sc7niCfxDh3hxWLXKlnPG47YxS0M8Sr3RsEnb\n9nY73cgXw5KyPEnKucYeCHbhtrQAa9Zwok+/1PWPfLbFBNJOO41/i9TG+vVc1y9y2X190XIdilLL\nNKyHLw1UK1dyY5WPNFWtWlVoxKM6c5X6x63TX7fOlmW6uGW8sRjLY0vS35VOlmSw1Plns/q/otQ2\nDevhA/zl++1vo+/P57mdPurYZcv0C9yISI3+xRcHSzHPO6/Q8x8asosCEXDDDTYn8Nhjdo7uAw9w\niEdLNpWxUsny34Y2+Nks8KY3Fd8nbNqV0thI4ra72+ZoWlqAz32OvXUXt2wzkQBaW1l2+corWXPJ\nrfPX0I4yVuR/8447KuNANGxIp7MTWLIk/JLdZcGC8TkfpXaQ+Ls/xDyd5kStO//AJZcDFi+2/1OP\nPMIDcdw6f03qK2Oh0uW/DWnws9ngFzOKOXO40UppLvxmLDdpP2lSYdkuYD19f/v+/ZrvUcqH/79Z\nbgeiIQ1+JjPyZKNkEpg8WRNtzUhUUj6b5eSrCOe5Iw8TiWATnyAKq9LQpR3ZyliodMFIQ444lDiY\nNF6FGX/RUEkmtZlKCTZkxePA1VcDO3ZYTfyPfcx24QrnncdJXPHCtC5fqQWabsRhOg3ceivwgx/w\nUPLduwu1dGQRkJGG+uVsbtzYKQDMmMHx+a4uHl7/7W8XHvNv/wZ88YvsPHzkIyq9oIwet1sbqHxo\nsCENfmenTbwdPFjdc1Hqg6jY6aZNwWEqLrI45POspy8VPpq8VUrBvaqUvJF0c69ZU5n8YlnKMono\nKiJ6kYgOEtEXQu5vIaLvDt//LBFNLcfzRrF5c+n7yoATpbmR2KnbUZ3JcFgwzNifc07wdi7HQnsL\nF2o4RykN96pycNBKuQwNcYVhTdbhE1EcwBoAswBcCOCTRHSht9sCAP9ujDkPwD0A/mGsz1uMiy6K\nvs+VWiDiskz9cipAYbNdW1twyLlLmDTDnj08ZUtRSqGtzQ7k8anUHI5yePgzABw0xrxkjBkA8B0A\n13n7XAdg0/Df3wdwOVExNZOxMWlStFYKEV8yxePAxInq3TczI3U0ptN8aZ1MWsMvInrXXx9+jOSE\nFGUk0mkeyBNmq1wJj3JSjhj+OQBecW4fAfC+qH2MMUNE9B8AUgB+5e5ERB0AOgBgypQpp3xCbW1s\nzPv7C2WQW1pYP6evT8vnmplSB5p0dLBEsujny/8NADzzDPDkkyOXACvNR6nS2e3tnCdybRVRkyRt\njTGdADoBLss81cdxa1nlS+p+WdXIK6PpaHT19IFg2S8RMHMmbxsaslO1lOZlNNPRwsT8jGHZ7ssu\n43kctdZpexTAW5zb5w5vC9vnCBElAPwBgIqOhva/pIriMpaORknmikeWzQKrVwe9f23Aal6inIko\nrz+dZoPvKwPUqrTCXgDnE9HbwIb9EwD8COc2APMAZAH8dwCPm1rt+FKagrF0NEoyVwx+LsfGftky\nnX2rhDsTxf4vsln27n1qUlphOCa/BMDDAOIANhhjfkZEXwbQbYzZBmA9gP9DRAcB/Bq8KIwLOoZO\niWI0V4H+/9GaNVw6l8txXki+1MuXW+/fVc/U/8HmIcyZWLEiOoSYydieDoEIuPfe8v+/lCWGb4zZ\nAWCHt+1/OX+/AeAvyvFco0G9LaUchP0fSTJXKnJ6e7m7e2CAb8vs21RK/webEd+Z8L3+Eyd4nvLZ\nZwOzZln9JoGIrxrLTU0lbctNpaVGleag2P/Rpk32i+p6aWecAbz//TwtS4594w2V8Wgm/KtC8fpP\nnAhKcG/dymW+//Vf3LFtjL1qLDcNafCzWauBIo0N4m1pMk0ZLVEJXl9/x+W113i0Zixm66yNATZu\n5Coe/f9rbKKiC+k0e/YuxrAw39q1rN9UyfBfwxn8bJbLmUQpM5HgdvfWVuD22/XSWhk9UQleWQj8\nfg8XGbIiDA3plWYzUKxSp6cn/JhVq4C/+qvKOqQNN+JQ3mghl+MxdH19hR+AopRK2IxjWQiuuCK6\ns1tIJLh7UoXVmgNxBuJx/uxlyH2xWR0HDrD6aiVnIzecwZc3WkgmeZv7AeiXTikX6TRX5rj/c/5c\n3GSSq3pcYTalsXGH3Btjh9ynUsH/D1+rya/uKjcNF9JJp7k7Taon3HipjqJTKoH/P/f889wpKcya\npaM0mxFRXM3lbGShr4+H5l33Ox0AAB1tSURBVKxda1VYEwkrjSzVXZVySBvO4APR9dXafatUCnfM\n4Z/9WfC+yZPDj9EekcZFPttUKpjwT6VYVVWMfT4PfPKTwPHjrPI7aZIOQBk1+kVSKon//yVVYQBX\nhvmCfa2thdVh2iPSuPifrYg1plJcOHLyZHB/mab2+OMc+qvk/0HDGXz9IimVJOzL7Ddcubz97cHq\nMPnyHz4cLCLo6lInpVHwK3REduPmm7kXw0cchHyeu7enTVMPv2S02UqpJP7/1+bNPK1I8CswDh2y\n2/v7+Qudz9uqHYB/b9xo1TbVSalfsllezP1xl9ksD8cZSUFMBp+owS+RsaggKspI+P9fc+cWlgK7\nGMNffpm0lsvZRWHhQi4ZPnyYqzjUSalv3Ku/RAKYPdvmbzKZQjVMosIFoFKDT4SGM/hjUUFUlJEI\n+/+aNo3DNnv2FO5vDPDpTwOvv87VO089ZSsxpIIsm7USDeqk1C/u1Z8xLJlgDHv2990XVFgF+D7f\n6Fd6mA7Vqkrx9OnTTXd3d7VPQ1FKorMTWLQo/L6pU4Ff/MJ+seNx4JvftJO0XKkG/291WOoHdzCO\nb7hvuomT9zffXHifa/RjMeArX+GY/6lCRPuMMdPD7mu4xitFqQZ9fdHdti+/XOjF9fSwcbjjDv4N\n8Je8txf44Acr33GplJ90mpPyUT50Rwdw//3RM2xlBKuGdBSlxmlrK5S4jUIqecKkPhYvtrHe/n6N\n59cbPT2FBl8MfDZrG/BuusnuR2TzOZW+qlODryhlQLoqRaUVAH74w2AFj/CZz3C5JlGws9LXWal0\nAk8pL9ksV1u5xGL8OT7wAOdpZJYCwIt7Ps9e/XgpqKrBV5Qy4Xdyd3YGPTnhnntsK30iwWEAOa6l\nhT37WIzn5Kp3Xz90dQWv8GbMAC6+2FZg9fez7tLy5XaAznjnatTgK0qFkLi+b/Bdr39w0E420gqz\n+sLtuAaCdfYtLbyQA+zZSyL30UeB3buD+vjjiRp8RakQbW3WY5fwTViI50c/sgZe9Z7qA7/jet68\n4CCc97yHf8sivnw5G3tXDbMan7NW6ShKhZAv+1e+wl7d6tXAuecW7rdrV7AiJ5tl7R2t0Kld/I5r\ngA2/JOS7u+1nKhLaiQQv/NXMzajBV5QKIoNTAG7OOno0fD+pyOnstGWZl13Gddtq+GsPf75Ge7sd\nhiMNVhKzl89PqnVGGpZTSdTgK8o4kMmwAYiq0c7nebj14sUc9hGDsXat1uPXInL1duedHKuXstrl\nyzmMJ0b/0Uf58+vq4nJbY+yYy2qgMXxFGQdSqeJt80TAD35QqLdijOrr1AJhkuvyW2L58TgPN1m1\nCli/nqU2JGYP1IbGlxp8RSkDI81giKrYEYyxypouRKqvU22iJNezWfbopQInl+MrsmQy+DknEhzy\naW+vfgWWGnxFGSOlzGBoa2MPUDz4MOMvYlpurPfSS4ELL4x+3mobkGYgTHIdCNfNMSZYiUUEzJ9f\neFVQLTSGryhjJMoguKTTPM0ombTdtaKZnkjY2xMn8sg7gB9v1y5O5PoJXFlkRItHY/yVw0/QSlf0\nwAAbeyJelFtaeJ9kMvh5trdX+QU4qIevKGOklBkM2SyHdVavthOvHniA7zOGY78AyzJ85ztBr1ES\nuN/6lm3P10E/44eIom3ezPMP0mkWuROMAQ4eZAnkvj77+cvYy1pCDb6ijJGROmTDQj6A1cCPx9nQ\n79zJhr0YUr6pg37Gj2zWjqn8yU9Y8fKnPw2G5HI5O8pQjpHPVxbpWliQ1eArShko1iHre+Pi+V1y\nCfCrXwEvvghs2VL6cx0+zL9VhmF8cD+/XA7Yvz94f1hivVavwMZk8InoDwF8F8BUAC8D+Lgx5t9D\n9ssBkIugw8aYa8fyvIpST7jeeCLBJXthEgtRuAnefJ5DOxs2sBEZy6AMJRo3IZ5KRe9HxINvWltt\n7iadrt0rsLF6+F8A8Jgx5qtE9IXh258P2e+kMeaiMT6XotQlbsjn8GEu3RsNYaWcAwMcZliwwMaN\na8GDbAT82bRSchnGhz7Exn7xYt4nkeA8TUdHbV6BjWnEIRG9CKDNGPNLIjoLQMYYc0HIfr8xxvze\naB5bRxwqjUg2yxU3xWL1orfiN2FFIZOSaiVOXO+sWMHVT7lc8d4JwGrnuEn2RIKrq6r1WVRyxOEf\nGWN+Ofz3MQB/FLHfRCLqJqJniGhOkRPtGN6v+/jx42M8NUWpDVwxtHQaeOIJ1smfM4d/r10LfPjD\nwfr7G29kPXWXqVOtgXFxFRiVseOWYYrgWRQy18DfVqufxYghHSJ6FMDkkLv+1r1hjDFEFLUWvtUY\nc5SI/hjA40TUa4wp6Cs0xnQC6ATYwx/x7BWlxolqyvK9v0OHgEce4b/zeeD003l4Rk8Pe/qJBPDq\nq3y/zD/N5Xhfd2qWMjqimtfmzePKqR07Rv+YLS0c91+xorbCOUAJBt8Yc0XUfUT0/4joLCek81rE\nYxwd/v0SEWUAtAIIaSRXlMailGqNbBb4+teD2772Nf6dTHJSEOC6/XyeDf6CBTwDNZXSGP6pIuE1\nWYyfeIK3ywJNxJ/bSCEdIvu53Hgjx/SljDOq87pajDWksw3AvOG/5wHY6u9ARP+NiFqG/z4DwKUA\nnh/j8ypKXRDWpemTyRQmBSVUMDjIhr29vVCOt62tNGOv+vrhdHVZBdP+fr7tl2C6xv788wvDO8bw\n1deiRRy3v/9+vip7443indfVYqxVOl8F8D0iWgDgFwA+DgBENB3ATcaYGwG8C8BaIsqDF5ivGmPU\n4CtNQSljC9vabIjGR4Zl+N2egNVyicdtZQhQOHpvJJ2fRiebtb0PMiw8mwWeey6437Fj7J2LnpHv\n2R86FK5/NDTEi7I8rjvqMJGosVCbMaYmf9773vcaRWkWZs40hs2E/YnHjVm7lu9/+mljTjuNt512\nmjE33WRMLGb3TSR4H9kvFjMmmTRmzhw+Rh7vrruq+zrHm6efNqalxb5PsRi/1xMmBN8/uY/I/u1/\nHrGYfS/dbaedxs9jDL+/sg8Rf07jDYBuE2FXVTxNUapMNgs8+6y9TcQVPLt3W6+9qysYJgCs+Bpg\nK0Nk0IqEg7Zu5asHCQVJMrFZwjsSohHyeQ69iPCZSz4fbHBzIWJv/TOf4feSiPMr117LCV7BDeHV\nmnAaoNIKilJ1MpmgbPKiRRwLlth7KhUME0hp5t/8DSd783muDGlrC4p6AXxMPg8sXFjbycRKIQZ4\nJI2iYrjlsq+/zn8bw4vv9u38/m7cyEnfUkJ41UQNvqJUGb8Nv709WM7px/fzea7YmTCBJZddhcbb\nby/0To3hGLMkE5tpipb0PXzhC3zFJItmMgmcdx5w4MDIjyHHyKIhnxVgP5f+fmDlSu6daGurXckL\nNfiKUmXCvMIVK2y1iDG2/M8tFXzjDTbiMklpzx7e5hOP81XC3/2dNV6SDG4WnnkmeIW0ejX/fcst\nvEAmk8Cf/imHe4px+un2s9qzJyh6t307/9Ty1ZMafEWpAfxmLN/rX7WKPfkTJ9iTBNiArVvHP2Ey\nDCLRIBr8btjohhtq0yCNhc5OFqY7+2xg1ix75ZPJBMXqjOGFctMmu5hefTUfs3cvcPJk9HPs3w/8\nwz/YipwdO/ixYzEbPqvlqyc1+IpSg0TFglesCJYMRuntzJwJXHWVPTabLQwbNQrZLC+Crre9ZYvV\nGFq1ij14d5g4EEzcbt0KPPww79vTw4toWNOVlMQC/L5KojyVCuZHavXqSQ2+otQoYRIMbW1B4xUl\n7vWHfxiMI/sLCGBb/4HChaVe5uVKriPMKxdvu6+PX4tbiw+why85Dclr9PVxwlzCZKkUD6b5+c+B\nM87gxUA0kYDgZzRtWu2/Z2rwFaWOSKfZk7z77uIt/5ND1K/EOPnyv1JxIqGjnh6uCpJttRqPBgrL\nLl1cjaHeXuCll+yIQoBfV1cXV9hIWEa0731DftllwPPPc4xfZhH470mxITi1ghp8RakjRHdHjH2U\n0W9tjX4MVz5AQhoiL7BkCYeJ5HFrOR4NBHMdsRhX3lxwQTCG39tr9YhEoK6jwxpoV8/+9tvZwLuv\n119UBgdr+z0phhp8RakjwnR3wujr499hoRl/Apd4+L5YGFHQ660mUSGmUurely8P3r777qBR7+sr\nnnD1a/mTydqN0Y+EGnxFqSPa2kobjnLiRHFpZj+e7yceYzE76SnM6x1Pol6HEBZKcReIuXOtZw+w\nJs7ll9vH8SuifGnjdBq4915bAbR0aX1694AafEWpK9JpbrZasoSNcTIJ/PEfFzYQ3XMPd4VK6Ka/\nnz3duXNtqMNP6gI28Xj4sJVjrnZYJ0xiWraHefVhC8TSpTwL+PXXwxvPRB5BupH7+3nRW7OG3xNZ\nCHt7+bHqFTX4ilJndHQEK0IA4IMfDNaaS9hHQhH5PPDjH7OnW2wkopvY3bSp/GWGo63+yWZ58Ukk\n7OtJpYp7/P4C0dXFiVY3Di8qltks/x4Y4Cun2bPt+5XP88K6YMHIMw3qBTX4ilIH+IbSDWNks8A1\n13B1zSuv8LaWFi4tbG9nz/7HPw4Kg7mGK0w+GLBer7ttrK9hNFLN7v7xOOsBSblkmAGW9yiVsgtd\nLMayx+5iCADz5/MxH/1oUCZh27ag5r27cNZ6jX0pqMFXlBqnmKH0h6InEsBHPsJ/d3WxgTzzzGA1\nD5E1XK6HC3CcevXqYBNRuZq0Spn+FbU/YDXngUID7L9Ht97K1Uy5HNfRu3kPWQyzWZZCcDGGFTB/\n+EM+1l04a73GvhTU4CtKjVPMUPolg0ND3DUqBn7t2sLSzQ99yFauLF9eWHK4fn1hSKQcxs5Pjo7k\nKbe18QImpaNujbwknVMpm3Nwz3n/flt5MzTEVwcAe/uAvaLx35sJEzhGv3Rp4WuuZ0MvqMFXlBqn\nmKEMk/91jZhv0GIxa+xlYpbP2WdzclLKNsvVhHUq0sGiUZ/Lca28VAvJsW4DmcwHmDCBk9O7dxde\npbhXM8kkHzc0xFc9s2cHK3AawcD7qMFXlBqnmKEU+d+uLg5PHD1a/LE++9mgGmc+H4xZJ5NBD1eq\ndcqVsBxNN6ovejY0xK/Tv7qRkM/ChRz2kQXxyiuBV1/lpCtgw1Tu4y1aZI9pRAPvowZfUeqAYobS\n7RiVjtIw5sxhpUcgXI2zp6fwMUdbrdPZaefuyrSukYhKGsusX1/fXwibIwDwY8kiBfDrktmzLnLM\nSInjRojd/46o2YfV/tGZtooyetauNea88+xsVpmtetppfN9dd9n5q08/zbfXrg3O1G1psfvIY374\nw/xbjnHvd/dz572683ijjnn6aZ4vG/bcTz/Nc3nlvmSy8DHcx5Z5vu5rj/qZMSP8fPzHducIj7R/\nrYAiM23Vw1eUBkJq9N3Y9vz57P3fdpv1hmUcH8BSyq73299vPe6VK7lU0Rg+RuQXXI1+8X43bw6e\ny+bNwXOJqpl3wzZu2KirK6jhf801tukqTK1SwlR+3kIGx8iVgkgmj+Sxj7aqqB5Qg68oDUZYzP/m\nm22CVgy6b1Rdjh0LJjiBoGHu7+ckqjHWkPsSBnPnBoeq9/eH69S4cs+i3ZPNBuf4GsMLz7ZtwaYx\nt/b+8GFO3Ep1jjxePM5GOxbjeP1FF5X2Po62qqgeUIOvKA1IKcnRbBZ47rnC7fE4yyv7zUo+IrQm\n3q+UUQ4N8e9p07jaR4xvPl8oxCZDRFau5KSzMZxcnTevcCGSx5GFA7CVRpJ8TiT4Kqe1la8+XIkI\nAHj5Zf7ZsWNkiYZaH0h+KqjBV5QGICq5KNtbW9lLHRxko3jsWKEcAxHwgQ8AX/0q33YTn2H483G7\nuuz+xliD6g5p2bkzmMyV8xP9fukCBoonbLds4Zmy7tQqY+zrEekJed0y6EQYHOTzdRPSxaQmGgU1\n+IpS50R14vrb77vPDjdxm7MA4NxzgTvusMa4szPa2AL2PpmPC/AgEX8R6O0NPs+WLSxnIAJkrnSC\nq5fT3s7G+pZbgouOxOL37LHP43PsGHcfy+u+915+3evX2wUhmeTfjRajHwk1+IpS54SJhYV1n/b1\ncQw7bFbrkSMsFHboECtKPvBA8YlagjFsmDOZ8CHp4uW7bNnC82PnzQsOYrnuOmDGDNs929YGfPOb\nbKgnTuSxjU89BRw/bh/rrLP43AVZANx8xc6dwEMP8Xm6EsdAZQTiahk1+IpS57jJxXicPW2Jo7vd\np2LQXAVNl8FBjqVHzckV3FBLLGYrdVzBMpm45cb1XcQgS0LVGDbMs2bZBilJwMprCaul/+AHge9+\nlx8jHmc5Y7efAOCrmc9/nq9wXInjRozRjwSZUpbxKjB9+nTT3d1d7dNQlLpAYuFuZ6woTPqdpG5l\nS0/PyLH6YkyYYEMhnZ1Wp9+vpFm5kvVtXn7ZHrt2LT+/6P0QAZdcAuzbZydwFTNPM2cCe/fy4uFK\nIwCci3BfUyzGv/N5fl/uvDM4D6CRIKJ9xpjpYffFxvtkFEUpP+k0G7D2djbC8biNhS9bVijHsGwZ\nJzanTAE+8xn2oEVigYiPf897ij+nG79fsYKNt+jIu4NKenuB3/6WyyHF8MqVQWur3WYMG3u535V8\n8EkmOcTzxht2MtfWrZwTADgUFPOsm9TjixZ+MzKmkA4R/QWA5QDeBWCGMSbUJSeiqwB8A0AcwDpj\nzFfH8ryKooRTapjCT+h++tPsgZ95JodI8nng5z9nj3n79uBELQm1xGLAf/4ne9r5PBtSN/Ha1sZe\nvyv3EI/bxejECeBLXwp64uLZj+Tdp9NcWukLxclCI967OxnMrSBqVsYaw/8XAB8DsDZqByKKA1gD\n4EMAjgDYS0TbjDHPj/G5FUUJoZRSQjfR+8YbPNjbmGDj0sAAMGkSx8lfeMEa+dmzWS9+cBB48EH7\nmENDXOXjhpD8AeLG2Dr5xYvDm75EAKEYx4/bihsZtg4EcxXuZDAJdYnyZjNU5IQxJoNvjDkAAFTs\n2guYAeCgMeal4X2/A+A6AGrwFaWCFBP+cpOsbvJWYv+uF+5W7BCxAmWYoY7FCsXILroo2H0rz9XX\nV5g3iPLqYzHgne8EXnzRXkkcPGj3TSY5IevKPAiVHtlYb4xHlc45AF5xbh8B8L6wHYmoA0AHAEyZ\nMqXyZ6YoDcpI4wQl9LN8edAgA+zBS3nk4sWFIZe9e8P1atasKWz6uu++wnPbsIErcfzHiDL2sRgb\ne0lCA7wIyfNefXW4sXdpxoqcMEY0+ET0KIDJIXf9rTFmazlPxhjTCaAT4Cqdcj62ojQTpQh/Scjl\nJz+xZZKihy+a+WHNV2GG+brrbNOWWzHkavEIQ0Ph9flhXHAB5xLcMYdtbdZbTyQ4lr99+8gDWvxQ\nV8NJH5fAiAbfGHPFGJ/jKIC3OLfPHd6mKEqFKFX4yx2gAhTq0be0ACdPFh6XTNp4uCwSQOHgcamf\ndxeOfN5W44yEVAzJIvOjH/FCIkqdUQNaRjLmox2o3iiMR0hnL4DziehtYEP/CQDXj8PzKkrTMpoQ\nhuv5ZrPs2csxq1ZxpYuvuXPNNVb/xl0k/ClUs2cD3d3BbliguGyDywsv2Nh+Pg/s2sU/Uv8PFMbm\nSzHmjSh9XApjLcv8KID7AJwJ4IdEtN8YcyURnQ0uv7zaGDNEREsAPAwuy9xgjPnZmM9cUZSijFb4\nSwxlfz971atXc229m6AlYo9+507e7k6aAgq7fnfuDJ+bG4vZRcFN1hJxgvb3f58XiqiFYXDQll/6\nC5vo4hcz5o0ofVwKY63SeQjAQyHbXwVwtXN7B4AdY3kuRVEqi6tdn8+zcFksFqyGkfmwUXNu3SsL\nCbeE8Y532FJPCdvI877wAhvhRIINe1jOIJm0Rtpf2Eox5s2axFVpBUVRALCHHyaZLEZ50SLg/vvD\nQyZAofGU/cJyAHPmsICaPMattwJf+1pQhfOSS1jobOdOPici4NJLgQsvbMJZtKOgmLSCGnxFUX5H\nZyeXYkq9O5EN3bjaOG6SF2DDOjjInrfr8WezwI03As87XTexGPDkk/y3GOVMBvjiF4MhnFjMjiMU\nQbSRDL1S3OCrWqaiKL/D7U6VUIjrKfvefXs7G38pvxwYYAO/bp099h3vCBr8a68NhoCElhYb73e7\nfXt6WAF0YIDljRcsUMN/qqiHryhKyaxYwYNSZEbsFVcAb3oTa9y7JJP26gCwcgktLcEB6u7Vgowl\nTKWsRPKECcCVVwYfn4j18ZullHK0qIevKEpZ8CUZHn3Uhn78EYL+tliM9e6Fzs7gRCtXatm9ypAF\nQXBF0tTgjw6VR1YUpWSkuuWKK+wglFwOeNe7gvuJJIJLPm8ljF3tfEFKLeV5RNZZJJ/dx26mUspy\noh6+oiijQiQZdu+2YZdPfco2aMViXM0DADffHEzEine+eXOheJpbauk/XyZjh7aMpJujRKMGX1GU\nUSEljyJvIEbaHTAybRpvSybt2ENptkokgLlzecHwp1WVqoOjnBpq8BVFKZko2YIVK+xsWtGbB4JJ\nW3cAybRpzdn4VG3U4CuKUjJRGjRR3a1hmvuilumPXlQqjyZtFUUpGTHsMiDFlTd47DEeDi5ev5vg\ndWckxeOacK0WWoevKMqoGK1sgSvKFovxoBRfO1/DOuVD6/AVRSkbo02gRgmVNasmfTVRg68oSsUJ\nWySaVZO+mmgMX1GUqhCVD1Aqh3r4iqJUhWbVpK8mavAVRaka2lA1vmhIR1EUpUlQg68oitIkqMFX\nFEVpEtTgK4qiNAlq8BVFUZoENfiKoihNQs1q6RDRcQC/GMNDnAHgV2U6nWpQ7+cP1P9rqPfzB/Q1\n1ALjff5vNcacGXZHzRr8sUJE3VECQvVAvZ8/UP+vod7PH9DXUAvU0vlrSEdRFKVJUIOvKIrSJDSy\nwe+s9gmMkXo/f6D+X0O9nz+gr6EWqJnzb9gYvqIoihKkkT18RVEUxUENvqIoSpPQcAafiK4ioheJ\n6CARfaHa5zNaiGgDEb1GRP9S7XM5FYjoLUT0BBE9T0Q/I6JPVfucRgsRTSSiPUT00+HX8HfVPqdT\ngYjiRNRDRP9U7XM5FYjoZSLqJaL9RFSXA66JaBIRfZ+IXiCiA0RUVTHohorhE1EcwM8BfAjAEQB7\nAXzSGPN8VU9sFBDRTAC/AdBljPmTap/PaCGiswCcZYx5joh+H8A+AHPq7DMgAG82xvyGiJIAngTw\nKWPMM1U+tVFBRJ8GMB3A6caYj1T7fEYLEb0MYLoxpm6brohoE4Ddxph1RDQBwJuMMSeqdT6N5uHP\nAHDQGPOSMWYAwHcAXFflcxoVxphdAH5d7fM4VYwxvzTGPDf8938COADgnOqe1egwzG+GbyaHf+rK\nMyKicwFcA2Bdtc+lWSGiPwAwE8B6ADDGDFTT2AONZ/DPAfCKc/sI6szYNBJENBVAK4Bnq3smo2c4\nHLIfwGsAfmyMqbfXsArAUgD5ap/IGDAAHiGifUTUUe2TOQXeBuA4gI3DobV1RPTmap5Qoxl8pUYg\not8DsBnA7caY16t9PqPFGJMzxlwE4FwAM4iobsJrRPQRAK8ZY/ZV+1zGyJ8ZYy4GMAvA4uFwZz2R\nAHAxgPuNMa0A/gtAVfOKjWbwjwJ4i3P73OFtyjgyHPfeDOBBY8wPqn0+Y2H4EvwJAFdV+1xGwaUA\nrh2OgX8HwJ8T0f+t7imNHmPM0eHfrwF4CByyrSeOADjiXB1+H7wAVI1GM/h7AZxPRG8bTpB8AsC2\nKp9TUzGc8FwP4IAx5uvVPp9TgYjOJKJJw3+fBi4CeKG6Z1U6xphlxphzjTFTwd+Bx40x/6PKpzUq\niOjNw0l/DIdBPgygrirXjDHHALxCRBcMb7ocQFWLFxLVfPJyY4wZIqIlAB4GEAewwRjzsyqf1qgg\nom8DaANwBhEdAfAlY8z66p7VqLgUwF8B6B2OgQPA/zTG7KjiOY2WswBsGq76igH4njGmLksb65g/\nAvAQ+w9IAPhHY8yPqntKp8StAB4cdkBfAjC/mifTUGWZiqIoSjSNFtJRFEVRIlCDryiK0iSowVcU\nRWkS1OAriqI0CWrwFUVRmgQ1+IqiKE2CGnxFUZQm4f8DVAgRlRU5GYAAAAAASUVORK5CYII=\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Up8Xk_pMH4Rt",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 3. Split the Data\n",
+        "We now have a noisy dataset that approximates real world data. We'll be using this to train our model.\n",
+        "\n",
+        "To evaluate the accuracy of the model we train, we'll need to compare its predictions to real data and check how well they match up. This evaluation happens during training (where it is referred to as validation) and after training (referred to as testing) It's important in both cases that we use fresh data that was not already used to train the model.\n",
+        "\n",
+        "The data is split as follows:\n",
+        "  1. Training: 60%\n",
+        "  2. Validation: 20%\n",
+        "  3. Testing: 20% \n",
+        "\n",
+        "The following code will split our data and then plots each set as a different color:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nNYko5L1keqZ",
+        "colab_type": "code",
+        "outputId": "a016bf4f-60a9-4c3f-9954-71218f7f4a25",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 265
+        }
+      },
+      "source": [
+        "# We'll use 60% of our data for training and 20% for testing. The remaining 20%\n",
+        "# will be used for validation. Calculate the indices of each section.\n",
+        "TRAIN_SPLIT =  int(0.6 * SAMPLES)\n",
+        "TEST_SPLIT = int(0.2 * SAMPLES + TRAIN_SPLIT)\n",
+        "\n",
+        "# Use np.split to chop our data into three parts.\n",
+        "# The second argument to np.split is an array of indices where the data will be\n",
+        "# split. We provide two indices, so the data will be divided into three chunks.\n",
+        "x_train, x_test, x_validate = np.split(x_values, [TRAIN_SPLIT, TEST_SPLIT])\n",
+        "y_train, y_test, y_validate = np.split(y_values, [TRAIN_SPLIT, TEST_SPLIT])\n",
+        "\n",
+        "# Double check that our splits add up correctly\n",
+        "assert (x_train.size + x_validate.size + x_test.size) ==  SAMPLES\n",
+        "\n",
+        "# Plot the data in each partition in different colors:\n",
+        "plt.plot(x_train, y_train, 'b.', label=\"Train\")\n",
+        "plt.plot(x_test, y_test, 'r.', label=\"Test\")\n",
+        "plt.plot(x_validate, y_validate, 'y.', label=\"Validate\")\n",
+        "plt.legend()\n",
+        "plt.show()\n"
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOyde3wU1d3/32dmd4MKJhr15wVBioAE\nc+HiZYrgYKz0Uaq0aEWrQfEBRUVRlBYvT3kerLQoGi9UAYWSp1pqyyNKvdbFUZBR5BISWECgCFJF\n7WoCVLOzO3N+f5xsbgQBSbgk5/165ZXs7GXObmY/c+Z7vt/PV0gp0Wg0Gk3LxzjYA9BoNBrNgUEL\nvkaj0bQStOBrNBpNK0ELvkaj0bQStOBrNBpNKyF0sAewO4477jh52mmnHexhaDQazWHFsmXL/iWl\nPL6x+w5ZwT/ttNNYunTpwR6GRqPRHFYIITbv7j4d0tFoNJpWghZ8jUajaSVowddoNJpWwiEbw9do\nNK2PZDLJ1q1bqaqqOthDOeRp06YN7du3JxwO7/VztOBrNJpDhq1bt9KuXTtOO+00hBAHeziHLFJK\n4vE4W7dupVOnTnv9PB3S0Wg0hwxVVVVkZ2drsd8DQgiys7P3+UpIC74GANeFSZPUb43mYKLFfu/4\nPp+TDulocF0oLATPg0gEolGwrIM9Ko1G09ToGb4Gx1Fi7/vqt+Mc7BFpNAeHeDxOQUEBBQUFnHji\niZxyyik1tz3P+87nLl26lNtuu+0AjfT7oWf4GmxbzezTM3zbPtgj0mgODtnZ2ZSWlgIwYcIE2rZt\ny1133VVzfyqVIhRqXDb79OlDnz59Dsg4vy96hq/BslQYZ+JEHc7RHH409/rTddddx0033cQ555zD\nuHHjWLJkCZZl0bNnT374wx+ybt06ABzHYdCgQYA6WQwfPhzbtvnBD37A448/3jyD20f0DF8DKJFv\nKPSVlS4VFQ5ZWTaZmfosoDn0OFDrT1u3bmXx4sWYpsn27dtZuHAhoVCIt956i3vuuYe5c+fu8py1\na9fy9ttvs2PHDrp168aoUaP2KWe+OdCCr2mUykqXlSsLCQIPw4iQnx/Voq855Ghs/ak5BP+KK67A\nNE0AKisrGTZsGOvXr0cIQTKZbPQ5l1xyCRkZGWRkZHDCCSfw+eef0759+6Yf3D6gQzqaRqmocAgC\nD/AJAo+KCudgD0mj2YX0+pNpNu/601FHHVXz9/3338+AAQNYtWoV8+fP320ufEZGRs3fpmmSSqWa\nZ3D7gJ7haxolK8vGMCI1M/ysLPtgD0mj2YX0+pPjKLE/EOtPlZWVnHLKKQD84Q9/aP4dNiFa8DWN\nkplpkZ8f1TF8zSFPY+tPzcm4ceMYNmwYDzzwAJdccsmB23ETIKSUB3sMjdKnTx+pG6BoNK2LNWvW\n0L1794M9jMOGxj4vIcQyKWWj+aE6ht9C0NYIGo1mT+iQTgugqVPTXHfXmGhj2zQazeGFFvwWQN3U\ntKoqKCnZe1FuKOSNnTxg704oOm9fozm00YLfArBtlZbm+yAlzJoFRUV7Fv3GxH13vjp7ynXWefsa\nzaGPjuG3ACwLhg+HtFtqKrV3BmgNxb2kBLZsUSePunnNe5PrrPP2NZpDHz3DP0zYU7ikqAhmz1bC\nbZpKuF33u2f5dU3TTFNdGaRSEArBiBH1rxL2lOus8/Y1mkMfLfiHAY2FS2Ixq54ApwtQXnnF5ZNP\nHBYtslkzE2YPd+hYZDeq0nWLVrZsgRkz1GwfoEMH9XvSpPr72B2xmMXKlVEKChzy8nQMX3N4Eo/H\nKSwsBGDbtm2Ypsnxxx8PwJIlS4hEIt/5fMdxiEQi/PCHP2z2sX4ftOAfBjQMl5SVOQwcaO2yiJqT\n4/LNN4WAxy+uCpE/VnL8NB9m77rSml6szc5Wt3v2rG+RnJ2995k/tWsBFpGIpR03NYcte7JH3hOO\n49C2bdtDVvB1DP8wIB0uARPDiFBaaje6sFpR4WCaHqbpEwp5fFOQxJC7djWZPh3OPx/uvRduvBHu\nuw9uuw0GDoRf/crljTcmkUi4u22K0jDnXzdQ0RxUmrkIZdmyZZx//vn07t2bgQMH8tlnnwHw+OOP\nk5OTQ15eHkOHDuXjjz/m6aef5tFHH6WgoICFCxc2y3j2Bz3DPwxoaHMQDluNNiypG0eHEFkxCaaP\nH4rw3BabLtXfh1tuUbH6NEEAPRMu535UQq8bZ+H7KXJzI+TlRSkrs+rtY/p09fwggIwMNfPXDVQ0\nB41m9keWUjJ69Gheeukljj/+eP785z9z7733MnPmTH7729+yadMmMjIyqKioICsri5tuummfrwoO\nJFrwDxMyM62auHjd2Ht2du2M2rIa+N/8HjaXOAybabNohkVkNgwbpsS6LufiEqWQzwuq2BxOW214\nPPaYw6JFVr0c/VtvrT1ZJBJq3+PHH3gDK40GaHZ/5EQiwapVq/jRj34EgO/7nHTSSQDk5eXxi1/8\ngsGDBzN48OAm22dzogX/MCV9TO86ubGIxSyef16Jr9PBYpFf+30ANTNPJMAw4M47wXIcIks8ji2V\nfJKEADDMEHl5Nv361e7TcaBbN5dL8krIKoVX1hVh27UnIS30mgNOM19eSinp0aMHbiPholdeeYV3\n332X+fPn85vf/Iby8vIm3XdzoAX/MGZ3sfO6J4Hi4vrfh6Ii9VNvNu7afH2ToPJM6Pwk7OwKmCkI\nl0PfWhXv39+lT88BhMMJjCSM/OVMjsUB6vsvlGfb/H6F2ra7AjBt1aBpEprZHzkjI4Mvv/wS13Wx\nLItkMslHH31E9+7d+eSTTxgwYADnnXcec+bMYefOnbRr147t27c36RiaEi34hwG7E8fGJjcNTwLx\nuPo+LF3qUlDgkBNkk/luHKvOi1UG5ZT/LkUQBpECBEjT5/Nvbya/MrcmlNS+vcOmjR4YEEjYkZfk\n2PQldHUsVSY8OgcRSonyPhbPPAP/+Z/1hf9AtaXTtBKa8fLSMAz++te/ctttt1FZWUkqlWLMmDF0\n7dqVa665hsrKSqSU3HbbbWRlZfGTn/yEyy+/nJdeeoknnniCfnUvkQ8BmkTwhRAzgUHAF1LKMxu5\nXwCPARcD3wDXSSmXN8W+WyJ1BR6UOPZKuHxrOLSdapM7sjaMUlwMc+fCkCHqdhC4XHONw7JlNhs3\nqvh7To5LMlmI7ydY+e+A/OcNMv87DNdfD0VFVGyYS9AeMEECCJSop3wqykrI7Kf2V7Mo7CcQQHin\nWTvI6jONCHzCeNg4vI9FKgXTpqmisLSwH6i2dBrN/jBhwoSav999991d7l+0aNEu27p27UpZWVlz\nDmu/aKoZ/h+AJ4GS3dz/H0CX6p9zgKeqf2sa0HD2O2yYEvs3g0IigYe8NUI5Uf4Wt8jOhjFj1GMX\nLoQePVx8v5DrrvO49toIbdpEsYDN8yYQnFoFhiQIQUVewNGrEjBtGmL2bLKeHo2RfJMgvV4bqB8j\nBf7cbdBPjWt9CfT8wX/wVe+XkSJg7S0mb62HfzowKNsmNxJBJjySQQQHu+Y9SVlf2NNXJomEsoNI\n1wJoNJrmpUkEX0r5rhDitO94yGVAiVTdVt4XQmQJIU6SUn7WFPtvSTSc/QIM7lHC57lVHFsqabfW\n4y+3ODwoLQxDPS4I1GOXLXPIzfUQwsc0PdoHJQQDZpHZJYExBYKQEvFMVVeCkBKZ8Mj8ZxZffj6O\nY3o+DEJiBpITX4X/9yYctfY1Jm9zeeUVeC1ZyOdXVxHvJcGEIEjxxhsOc+ZYTMyw+KA4Sm7c4bOK\nbO75RwmfdSvhqVeKKCtTY60r7AMHwvz5auxjxkBurp7lazTNzYEqvDoF+KTO7a3V2+ohhBgphFgq\nhFj65ZdfHqChHVwqK102b55EZaXKAmhoVHbNNS59imexebhk5RT4+kyTBYGNX515I0Ttz6xZNolE\nhFTKBCIEz29DJBNkxSBvLHSaBWeOFayNnU2CCElMPCKMmWfzr6OzkEJgmJLAEGR8AVkxIEhRMc/h\nh0mHCCqTx0hCkBKkUqoILH3C+VtcTd+P+/toMm94mq4DnuaRhwdUX3koYZ8+XV3BvPRS/ZOVLtbS\naJqfQ2rRVko5HZgOqsXhQR5Os1PXIwcilJdH6dPHqpd0cPLJDps2qcT3ANhy88Usvd1CVBueAZxx\nhkvPng6lpTZjx0bp1cuhY0ebnnNLOK16X5kxaBeDBG24yygG4HwcFgQ27y+xyNkJjz8eQUoPRIh2\nqyQpfJJEeLs6POMR4ciYR7exJo8WDGd+aRGxmJq912TEOQ4VPZIEYcAE4Xvk5zuUlysriLlzlcCn\nO2sKwS6zf41G0zwcKMH/J3Bqndvtq7e1aup65KRSHh984PDLXyrBHz9ePaay0sYgRJDyMVLQ8fev\ncrbvsgiLIIDu3V0eeqiQcNgjmYxw991RVq2yEcJh/fE9ueTLCGGSpAgxkxuYEy6i6EmLeBzmzbNY\nskTtJxazeO65KL/+tSra2nILPHqzQ9S3eb867bKQKDYO78Rslm2wGD4cbr9dZQKlk37Ky21OLgtj\nJD0CCVJEWLnSrjkpDBmi1hs8Twl9EFAz+9dhHY2meTlQgv8ycKsQYg5qsbZSx++pthCOkEp5pFIR\nVqywa6pXc3Jq7ZD/31vXE9k0jWNLJUfGfM4TDgulhZTQs6dDOKz8cwzD4/bbS+jQYTam6ZG8OsJ/\njx3D+bFSXhRD+OKykeScWCus2dnUCD7AuedadOyoFPdvcfgtFj61s/APsVgRsrj+ephSBBb180Vd\nF255Gn6cO5xTf7+N0y45kXunFbF6tYVpqoyikSPV/pcudZHSYcYMm1WrLJ2to9EcAJoqLfNPgA0c\nJ4TYCvwaCANIKZ8GXkWlZG5ApWVe3xT7PdxomE+fmWlRXh7lgw8cVqywicUsQiFV4FTXDvlTirn0\n+TZ8m5Ng89WCTWXZnIkK43TokI2UEcBDiAjr1kGnTuoEgExwQ8EjnBaT9JcLuejlXF7GqkmRHDlS\njSud1jlypBrj0qUup5+ubI7TXjrFxWomn50NFa+5VFxdQnLLLEKkCMIRnrs+ykfZ8OCDtVcbL7yg\nvHiCQJ004nH1+q+84mLbhZimx0MPqauSdAqpRnMwGTBgAL/61a8YOHBgzbbi4mLWrVvHU089tcvj\nbdvm4Ycfpk+fPlx88cU8//zzZGVl1XvM3rhuzps3j65du5KTk9N0b6YRmipL56o93C+BW5piX4cr\nDdMt0wJaUWGxeQ5cFjgca8K1Uy1V4LSp1g75lIFxRswvZvjvboWwz3Wp0ZimQIgUyWSExx8v5tpr\n42zbZvP663DhhTOBAALBsaUBIQIkHv0Ch/ew+PZbmDwZXnwRrrzSZWB3ZZVQPr2IW56uFe2HH46w\nerVaV0jXVv2yv8vrqUIyqMJAIoAg4bFumsPWa6m52pDS45xzHP74x1rBr6hQJ7vLL3cYMEBlE2Vk\neNx1l0PXrpae3WsOOldddRVz5sypJ/hz5sxh8uTJe3zuq6+++r33O2/ePAYNGtTsgq/tkQ8QddMt\nEwnlOHnfffDuZJc3/EL+R95PVBTSZoXL1q21dshCmJxwwhYGjF4B4QDTDAiFkhhGrQ1yu3Zxrrpq\nPAsWqNCJEEKFYUyDJCGSmCSpnxs/bx6UlLisXDGATd7TrOzyNCc/YfPjnJIa0RbC49JLnRohdhw4\nL6WydUxVooWPIEmEBdJm+XIbKSNIqWycp02z6drV5aqrJtGtm8ujj0IyCaWlNsmkyiby/QiDB9ta\n7DXfm4aZbvvD5ZdfziuvvIJXnRP98ccf8+mnn/KnP/2JPn360KNHD3796183+tzTTjuNf/3rXwD8\n5je/oWvXrpx33nmsW7eu5jEzZszgrLPOIj8/nyFDhvDNN9+wePFiXn75Ze6++24KCgrYuHEjGzdu\n5Mc//jG9e/emX79+rF27dr/fGxxiWTotmbo2CHXz520cqnISfFkQ0LY0wbppDjfNHs/f/x4FSkil\nZvHZZzM4/fQQiYSp0lt8gS8MAiOoSY30fZXqeO21DqFQCsOQQMCbV4+A5zswfZ3N+36tqubkuKRS\nE9SisamsErbnJTlmJSSTKlsnFKrfqtC24ZchGy8VQeLhYzJbDOf5UBEfBhaRjXDsp8Vky7m464eQ\nTMIjjwwgFFJrFGPHvs2aNcrcbezYKBddVIJhwBFHQN++B/gfomkRNNYNbn+6rR177LGcffbZvPba\na1x22WXMmTOHn//859xzzz0ce+yx+L5PYWEhZWVl5OXlNfoay5YtY86cOZSWlpJKpejVqxe9e/cG\n4Gc/+xkjRowA4L777uPZZ59l9OjRXHrppQwaNIjLL78cgMLCQp5++mm6dOnCBx98wM0338yCBQu+\n9/tKowX/ANHQ0njMGDXT/8cZ2ayaEhCEwUgGbBibjbcO/vhHi2+/dbj22hSg+g4e/e1POPH//kbo\nK5/tXWA+P+F/3xxHLKYOcClh+XKboiIV06+qijB5ThHrN1oEBpzru9g4/CMnm+unjCEjI4FEInxV\nkHVkaRjZtYj164tqWhU2bKX4u3ctnpwc5aSPHD7ratNvnIUohzZzYVSBS6+bxiATHieJhWy5fSDh\ncAIhIBxO8OMflzB6tEWxygpl4MDZhMMe3347m8rK/fuialonDbvBVVQ4+30cpcM6acF/9tlneeGF\nF5g+fTqpVIrPPvuMWCy2W8FfuHAhP/3pTznyyCMBuPTSS2vuW7VqFffddx8VFRXs3LmzXugozc6d\nO1m8eDFXXHFFzbZEIrFf7ymNFvwDSF2Pp9xcJf6nnx7HjxgIIyApDSK94kQ2qccsW2YzdKiabQsR\nIeeTE2FFQOkUiQz5nJWaz/++OY6iLi4d/uHwtrRZvtGiTZsoGzY4PPywyoARAs6VLm9RSASPjwsE\nn0QChAgAgw0r+vDVH3rxXxuKmPSMiqVXVkJZmcPtt0NZmcV5psvs4Q5WkY31ogWooP7mkkmMn2mz\nyLc4Z4HDpb6HIX0EHifLT+u9/5/8BMJhOH6Dyy+umEA4nMA0A1IpjzffdLjiCi34mn2jbtMfw6h/\nRfp9ueyyy7jjjjtYvnw533zzDcceeywPP/wwH374IccccwzXXXcdVVVV3+u1r7vuOubNm0d+fj5/\n+MMfcBqpOAyCgKysrJpWi02JjuEfJNKeMtu22QgjDAjMUJizz7aJRpW75MaNFrPvLmbz7EK+WVLM\nS7EiNl8JMgwYYIR9igZO5pmP1RrA22YhHxS79O1r0bXreDZutOhruNxrTGKYUUIEjxA+x5UFmNJE\ntUzMoLtVjP+Lp5jkpMVeXSanUvfz4IOFDOk2nVe9Qk6ddj/+gEJKRrmUT3fxBxTS/un7edUr5Czf\nZUFg44kI8RyDzVcLlnxk43kRfF8gRIT8/CLWl7i8lizk8tK3CCcD/JRBKhVh4kS7uTrUaVow6W5w\nnTpN3O9wTpq2bdsyYMAAhg8fzlVXXcX27ds56qijyMzM5PPPP+e11177zuf379+fefPm8e2337Jj\nxw7mz59fc9+OHTs46aSTSCaTPPfcczXb27Vrx44dOwA4+uij6dSpE3/5y18A5cm/cuXK/X5foGf4\nB5z33nPZsMFBCJubbrLo3BkeflgSDoNhSIqKIDNTPfaDYpczbh2DudqjSi7khjOLOaUYzDqvd4r4\nlJBf7VIpPHLjDqCEWz2/kJDvEZgmUoSQErI2RshvU0xF+7jqjJVp1Yuhq8vkBEIEhEMJLiyYSySm\nZu7J6oycjwyY4KsTSAZVDKOEOzOe4u/3F3NkT5VNdHHyCZ544gmOOSbO2Wfb2LbF+UwigseRsYAe\nYw3mFlzIY6UTWLvWqsnD1175mn2hbje4puKqq67ipz/9KXPmzOGMM86gZ8+enHHGGZx66qn03cOC\nU69evbjyyivJz8/nhBNO4Kyzzqq5b+LEiZxzzjkcf/zxnHPOOTUiP3ToUEaMGMHjjz/OX//6V557\n7jlGjRrFAw88QDKZZOjQoeTn5+/3+9KCfwB57z2XHTsKad9e5al37hwlL88hFPIxDEkq5VNW5tCv\nnypiSsx1lJhLZTk8IG+ueiGhfklpknncDaTMckJ4iAYdf3LjDgQeBD6mAEaMgA4dwLbJtCwydzPO\nrK3ZGFUBQQjCqYDuHI/IiOB7HkmpMnKED/cRwsTHQHK9mEnf4iIqusfxPJVNJKVHZmacv/xlPDff\nrF67Y5FN6tkIqaRH23UR/nfdBNZKi4wMNXTtla85FBg8eDBS1rq7/OEPf2j0cXVDMh9//HHN3/fe\ney/33nvvLo8fNWoUo0aN2mV73759icVi9ba9/vrr+zbovUAL/gFkwwaH9u1r89Tz81XBVTorJpWK\nsGaNTSiU9sC3eTOI0MZQlsNvlw2hU/IdTJFAYND2i7EMmjiSXn4uFxgOVxTb5H5Xh5SGXUh2M43O\nfDdO/vOCijxJVilkrn8BnnySzSviDJtp86Gv/HNmJa9nJNMwkYSFT27c4YlSmy5d1PsJUiFyS7ew\nJeWS7orlYjFeROkrHBYKG+OHFt2+hOOPh5Jqc+26bqElJbXbd9c9S6PR7B2i7lnsUKJPnz5y6dKl\nB3sYTUp6hh8KJQCB6/6EOXPGAVBQ4LBqlc0tt1i8/76L56mTwTFr4YELHbKH2KxYAT9Z3J/tZ6bI\nKoUj12ZwfvA2Lir/fuLEWg+eGhoT9j1No10X+vev7VZuGPDAAzB+fM3LbdkCZdNc/i4LCeNBOEL4\nnSguFjfe6HJxbgl3lM4kO+bjEeHle4uxRsR5+WWbO+6w8P3GP6NwWBVpnZVyGSAcHNSCMKihavuF\nls2aNWvo3r37wR7GYUNjn5cQYpmUsk+jT5BSHpI/vXv3li2Rd9+dJqNRUy5YgFywAPn66xF5ySXT\n5NVXPyifvWOafCj3JvnGaxnyrbdM+dprR8iePRfLxYurn/zgg1IKIaXKwJRf5SCnXX2RzMlZLCMR\nWfu4PfHgg1Kapnod01S3GzJtmpThsJSGIeURR+zy4osXq819jcXy/tCDsmza4npPvUc8KJOoffwr\nx5DO38Py7bdN+fbb6j3VeRv1foSQ8pf9F8tvjSNkClP+myPkuSyuua+xoWpaDrFYTAZBcLCHcVgQ\nBIGMxWK7bAeWyt3oqg7pNDHfteDouvCPf8Tp0CFAVMfhQ6EkY8bciiFShJKSEyRsq7YWBo/HHnOw\n0i9k22oK7HlU5ED5FOgcfospyYU8+2yUmmbie6KxZrgNSbuc7ebN1NYVWNi2RW713ZXvTeecqrlk\nXVWAPyeCkB47+ghkyEcZPCd49NEJzJkzgenTle1CXUIhCL3nEAo8TOq3SzTNxoeqaTm0adOGeDxO\ndnY2Iv0l0eyClJJ4PE6bNm326Xla8JuQ74qUpO/r3NlmypQI4bAqpJDSwBAp1XgE+HcXED74UmCG\nIuTl2TWGZgUFDnkLniDzjyv4Z/vlpMJLaxZHMzIcCgutmn1WVta6be6SwVC3Cuy7UmH20By64d2V\n701n5Y4bCXrAiV3f5IHycfxn3yyOuSabLf4YgiABBEj5FpddtpBFi6KsWVObIVRVBSefDM7LNh6q\nmverHBNZsIWcUpf+/bXfTkunffv2bN26ldbSAGl/aNOmDe3bt9+n52jBb0K+qzl3+r5VqyzGjn2b\niy4q4fhgG23XQ49bXwYkGLCjG4gUbF1+GZVHjOOjjyyeftrlwQcL8TyPFWaEnr+N0p4ivlxRiO/X\nWiun95mTsxfl5nsQ8+9D3WbogYQOuaU83+ENxveF/MpcPv54Al9//RYQEAp55OU5rFljkZOjmpx7\nHrRZ4WILhzEU84OcFfSaMosfhWdgJ2fTrt0+XMVoDkvC4TCdOnU62MNosWjBb0K+K1Ji23Ce6dI3\ncHjvI5t+FxdxyaOFmKkE2zcFbB4GX/cGTPClQXT12Tz3nMqG+fnPnXoulBUVDh07jqdnzyhlZQ53\n322zbp1Vs8/mKDffG7JOH4KxQzVDN1Lwzuoh9D8fJk0C27bIyZnA118vrPH/Ly21a7p2eR6c5bu8\n5heSITwqzjR5cVgBZjiJaQaYoor2QQla8DWa748W/CbkuyIlOcF0Zl15C1nLAv57fQah7cNUjrwM\nyIzBabOhMg8CBAEZLF+uDNGkhLIyu1FDs8xMi379LKZOrb/PysqmLzffGzL7jiT/Pfh42VzWfDGE\n/jePZMyYuiEu5f//8cclNZbJw4erdMvZs+GCKoeI9NjZ3Wf1Qz6nR5aomoMUGClJ1l0zYarOzdRo\nvi9a8JsYy4K25S7xCQ7lQ2xyR1pUVrqUfnsrcliKT66GHmMTeNvghFAE6ScwCTgqZtBtbIg3rx7O\nD+wiNm60ahqZ33qrxfr10RpDs4az9YbRmXS5+W5j+M1IZt+R5PcdST5qZt8wxNW/P3TpokzTBg5U\nYZrycovcXPDb2IgPIlT0qiIIqxAXPhyzXJ0QM9f5VC4toeJk9b4aGrtpNJrvRgt+E1M+3aXzjYV0\nx8N7M8K8jVF63uwQGD7CULHtrwsMntxQxMn/UUTFPIcvyeY44jgxm4KtFtf2bexKodqw7DuonyHU\n9OXm+0pjIS7VlF2Fm0zT49NPHW68UY1zCRbnjosyoH0JhpxR28d3NrSLCb4uMCnPnUWwKUUqpTpl\nHVUG3xoObaeqk6tGo9k9WvCbmI3POnSvNimTeHz4kMPSsM0FF2QQpBKQMrm39EleiKn4vDBri5DC\nYXikSP29r2uqh6IlQWMhrobhptdes+s956lSi8G/s8gvOZqKNyaTWQqZMZjHZSzLPZFCOQMhfAyq\nGJYzmZGlbxAJEohRJvBkbd9GjUazC1rwmxDXhUdX2FxUnVKY7gT1wYMWr74aZfx4hxdesPlrzCIn\nR6VZlpXZnH66xYkn7p91wHdlCB1M9hRuOvdci1mzau8fMkT9rnCzOPV5A4OAFAZLOJu/rbCxr5lF\nyPAJpSTXls4ngwATiQwCuOUWNm6ET0rjZA/RM36NpiFa8JsQx4H3AotCotgoW4D3sUAqT/kNGyyO\nO06lTU6ZovrG+skQkReux95Po5i9qaU6VKjrbri7RupT3WwmXG1wTCkcEctgoWGzcaPFUS9ez4nb\np3FsqeSomEQIgZSqt670fWQshY4AACAASURBVE6dfAsdkXhvRignqkVfo6mDFvwmJC26H1RZvC9r\nhUYItT07W3nQ9OpVm2ZpSp/TEtOgcPZ+xWH2tpbqUGTkyPqRmKVLXa757Rg2hwM2J03WvVTMlTkw\nqWASOUZP2ha2UWe2jAjG7aPhkUcgCAgwMaRf07Q9PtcBLfgaTQ1a8JuQtOiWlMCsWcp7LBSC66+H\nnj3h+dEufZMO23OzCYIIQlYRSkmOKZXIhIfYzzhMM9RSHTjqrDgXFDh4nqe6YUlBpPcKTj99DL7v\nsVJGyI8Wk/luHGwbF4v12wdzPg6po7M5afKYmnBa9hD7YL8rjeaQQgv+PrKn5hxp0S0qqrX1Pfpo\n+Pv/uLzqqRaDXlmE/y4uRmSt4I7SmRwZ80maESKHchymian3OVJ/xfmop4tJnKwWdlOpCLEYdOqk\nroiCwKNMxHl+y3i2TYZXXwXft4hEVK/cisG5dP3UofMNOoav0TREC/4+UOuH47JunUMQ2PTtu3tR\nmT1b+cNICb/CqWkxKPE48+M4RbGneIcibByO+YnNuMN2er5vNMwoWjPMoWP1irNMeMy7Ic6LZ6i6\ng9JSmyCAiy6ajWGo3r633WbTsN1nIgG33gpBoMQ/mntw3ptGcyijBX8fWLrUZdSoEgYOnIVppvj2\n2wiVlY330UxnzaTbDTgoQzDwMDIiFNxuExkNHyQtloctnHEH9K0cVBpmFL2DTVH1inPKiLDAV83X\nYzFVfBYEcM89UR57TJ0AVq5s/MSYSqnP+1DKUtJoDiW04O8llZUuubmF9OhRhRASISCV8igrcwiF\ndq34TC/gpmf476Oydx4b7HD2ONWZytm9+3CLpmFGUZciC4rUivPabJvlYyzM6vuKiyEeV148lmUp\n++QQJJO1r2cY6jM+R6qmKYtN1T93T+jeuZrWhu54tZds3jyJTZvuB/x0qw48rw2LFkV55BELz4O8\nPJfHHlP2BwBlZWpGunWrRWkpFBRAVpYWGKgvtlBfePckxKNGwbRpSuTTlunnSJcohSRyElT2Mfiq\ncCrvVeZSUOBgGDbvvmvtU9MvjeZw5bs6XukZ/t7gumQt3YKRGyIAPM/kjTeG4zhFnHOOEvtu3ZSF\ncSrlUVpq4vvK9atHjwhFRVFiMUsLTB3Si9u7E97v+mzSZmueB6YJ50qXe5IT+OqSb9l4O0gjwE/d\nTLcTQ3heimQywvPPR5k4sbZfwKFaqKbRNCfGwR7AIU+1ImXeMYP8OyWdzBG0betw/PFPMXWqRVGR\nEqp0br0QPoGfRJDAMFRWyZtvOo0KjKZx4W2I6yojNtdVty0LRo+GwkKXkgdGMa+HzTk5f2fjGJAh\nwAQR8gmFVGZP2nu/7uunw0ppg7pWlCClacU0yQxfCPFj4DFUY75npJS/bXD/dcBDwD+rNz0ppXym\nKfbdnLguJCY4nJ/wEIFPZhlkLupAx/G1XZpAzUqXLrUxzQiQwEgGIEAaEKRCvPaazYgRh08l7IFk\nTxXCda8ATFPZKR99NPztb6paORKuojxX8v/eAClQdsoSAmkS+CEMI1XjvV/39S1c1gxzeAebLkW6\nk5amdbDfgi+EMIGpwI+ArcCHQoiXpZSxBg/9s5Ty1v3d34EiLTS9EjZvBhHaGB6+EWFttk3DjD8V\ngrCorIxSMXsMmdOWIICvCgR/Lr2ec2+3DutK2OZkT59L3SsA31exeyFg6FB1RWWYkqB6GcpIQmAa\nCMPEiT7Jiy+qGH5lZTY9ezr88pfq/5T+53b0PJUdVKQ7aWlaB00xwz8b2CCl/AeAEGIOcBnQUPAP\nK9JC815g8SOhvHHe8VUGSTS38cXFzBhk3l0KHkjgiLURzririMHVtgGHdSVsM/Jdn8ugbJd/4xCt\n9iVKL9SWlqqmMMgqQlLSbgOc6ISpePwGsvKKiEQsflt9nZn2LQKVRltR4nBqlYchdQBf07poihj+\nKcAndW5vrd7WkCFCiDIhxF+FEKc2wX6bFdtWWTfXXDOJnbnwW8bzXmDV6EP6CuD++9Vv14XNJQ5B\nUnkdCyGIX3o9a7KsmtizZh9xXXJuK2SCfz9RCjkXF8OAjAwYNMhiwYJiTCGQBmy4BUj5dFzUoaYx\nytChUFBQ61tkGCqNdthMmyoZIYmJH9LxNU3r4UBl6cwH/iSlTAghbgRmAxc0fJAQYiQwEqBDhw4H\naGi74rqqyOrhhwsRwkPKCHfdFaWsrLZv7PoSlzuqHBZIm52d4cMPHZa42UzpbvJNQUDmqjBXvVrE\novk6K+d74zgIz8Osrk4eIByOvtBiwgR190cfxdWURVQ3lullsCXbron55+S4XHjhFlIps7oALsKq\nVTaLfFUTcYFw6Ha9TZH+x2haCU0h+P8E6s7Y21O7OAuAlDJe5+YzwOTGXkhKOR2YDioPvwnGts+k\nZ+5Dhjjk5HgYho8QHo895rBokcrlblvuMnRGIYb0uC3HpOwhgRlJ0e13JmVCqupQXxAfC/4qHTX4\n3tg2MhIhmVBmaIvDNpMmqLuUxYXNQw9lEA6pxjL3rXySf2+3qKqC7t1dJk+utqD2Q7zyygjefruI\nm29WJ+0PPYuVEYto0UF9hxrNAaUpBP9DoIsQohNK6IcCV9d9gBDiJCnlZ9U3LwXWNMF+m4V07H75\ncptf/CKCYXiYZoS8PJt+/eC991yWLJzAKd0SHBsL+LYgIBwBDEkoFCAEGIZEGCl693ZYs8bSWTnf\nF8vCfDvK1hKVTTOpOpsm3Su37SpYOXYYFQXwcmkRa9daiFWqIKtuKEdK+OKLDqxaZRGP68VzTetl\nvwVfSpkSQtwKvIFKy5wppVwthPgfYKmU8mXgNiHEpUAK+Aq4bn/321z07+9yzTUOy5bZNf4thmHz\nq19ZZGe7nH9+Iaddl2DVLwJ6jDU4qiyEygVMYZomQZD+O8KIETbdumlh2S8si46WRd2JuG3DeabL\nq34hkZiHF4vwlejJYOHgBDaLsWoWdaVUjptlZbVpmeXlSvCzs/X/RdO60NYKdaisdFm5shDfVyLx\n2WdRunSxGDBAuTFeffUkhg+/H9P08VMGG2ddyOw/TeDfeTB8uEPv3jZnnklN+76D3US8JbN51CRO\nnXY/hvSRwiAQJkIGVMkIhUR5n9o2ktu3Z9O7d5wf/cjmy5fhg8m13cheHOcyOMvRZ2VNi0FbK+wl\nFRUOQaCqZU2zivLyElxXZeYA9WaNQRBizQk/oKI7xFZajBlj0aZNenFWC0dz07HIxp8Vwfc8hCEw\npQ8yIEN4DJAO76PcNkGlZWZkqHaSfV+VDMLHI8LtFPPjh8eA0H4XmtaBtlaoQ1aWTRCEqnO9JRde\nOJPsbJdQ9WkxFrMYOzbKK6+MQErJoEEzmDKlkJwct54tr6b5cbEolFH+i4ncKqbihzPANJGhCAtN\nu8ZULR3LF8InkB7/zksSwieMxxDmEpba70LTetCCX43rwu9/b1FZeT1SCoQA0/Tp0qWEoUMnkZOj\nkuljMYsvvuhAKORjmj7hsEefPo72ZDnAOA4s8i0elOOZLkfy3PVRNo+YyIUiymJpYRjKRjl9VZZK\nmSAjHLMmjC9MkkT4P4aQkBGkof95mtZBq43hN7TnHW+7/DDp8HGPbIY/Ogbw8H0TIQSmqRwXx45V\nJfiRiMukSYWEQh6hUIRwOLqL/a6meWnMZdNxVCGc78OZZ7pcfLHDK6/YSAk9ezqcc47N6D7gTHC4\n7y2b9wKLvobLAxc62BNUf1ydvaM53NEx/AY0FIsJA+v0m10VYfiYYkIFcY4/fguDBs2oTu3zKChw\nOPpoi6Iii6VLVQu+vDy1OFvXTE3T/OzOgycUgjPOcHnooUIiEY8BA9SJ+rnnxnPKKcBoyJhgsXwh\nmB4sj1hkTLBw0f74mpZPqxT8hpa8XT+t32/2B6vjPLRuPGec4TJw4Oya1L7Vq22eeqrWLE0bbh1c\nGvPgkRLy86uN1QxljVxQ4BCLWTz6KAwerJ5TXAxz58KQIdTL7df++JqWTKsU/IaWvJ1vsGFFhGSy\nuqIzYjP1CYjHLdq1ixIEDmvW2Dz1lLbRPZRxHCXY6bi9EF6NNTKo+yZMUCL//GiXvkmH5x2b3Fyr\nxjupRw+H1av3rkWiRnO40Wpi+A2dLV0XSkrUfUVFyi5h47MOH51s029cfWGfPh2efRZOPhnGjdMz\nv0OVuqG63FyX3FyH5cttVq+2EELN/g0Dfihc3vBVCC9FiLLL/4PM0fBZ8BpSphAiQs+ejTen12gO\ndVp9DL+xBT6obZM3axZIaeH7FpFyiI6rPUFUVMC7k10uQBXr9P+bxbvvatE/FKkb19+yxaJ8Ovwk\ncMgU8EVni3/8A4IA+lEbwtuZ41N1wzy+SQFGukeuR0WFowVf0+JoFYK/uzZ66W1BoG6nc+lLSmpP\nBucEqjl2BA+PCIWpKI6jQzuHKpalulltm1xCVjCTED6ejPDmz6Jc/YRFIgFvBzYeEQRVVBZIgjAq\nQVkCQmAYEbKy7IP7RjSaZqBVCP7u2uilt4VCSux9X20D6NzZJS/PIa90C5FY7YLuBYaj47uHMtWX\ncydWVSGRCMAwPAZnOUSjylr5rbcsCoMow0QJPy97BiOZUl2zQibt2vXmpJNuqPHU1ymampZEqxD8\n3aXw1d0GtX8Hgctllylr3SAZonKcSWY5BEaEoU/Z5GoBOHRJX85Vr035CBJBhI3ZNpYF//VfLied\npMzxxqx/iu2nF3HZohK4aBvb5Gvs2LGMHTvKufPO3Jr+BzpFU9NSaDWVthYu45mEyriuz/r1Lh99\nNIn+/V0sC9q3d2jTRlnrhtuk2Dl1OKEHJ5KxKEruSP3NP6SpvpzzhUmCDKZzIxcZUf4Wt6isdPH9\nQq677n4efbSQM85wuWe+Rc9HnqLs87MJghTgI2UVV9qTudufRK+Eqx0XNC2Glj/Dr07HqXSfoSLX\n5+gXwvzuaIeXv7TYsAG6dVNFOuGwx44dEd57L8qZZ9qAMkkTIkJWXhH000J/WFB9Obe1RLUyXORb\nmCbkboGyMmWOBz6m6ZGb67BypYrrT5xo89BDJpGID0jO/fE8ct94mftjGWzM1k3ONS2Dlj3Dr47n\nVi58mpW/TbHpOkn5JI+sf5WwZg0kk5CXV9soIxTy2LBBFenceWeUmTMncued0RrXRc1hgmXR8anx\nTHIsRoxQmTczZsDtt9tABDARQhXSGdXfgFWrLF5/fXiNj5I0YEdBwBGGR27cOXjvRaNpQlq24FfH\ncyvyUZkYJgQhqCiofUhlZTZSGvi+QSoV4fTTbRwHysos/vjH8ZSVWfqS/jDFsqBDB0il1IJ8WZlF\neXmUTp0m0rNnlJtuUiZrUqqft94qwvPagDQwUpBVZiAyalf5XVdV5Oqm9JrDlRYd0inPtjnDiJBZ\nlsBIBgQIvFSE+aWqf1JOjsvo0WOqvXIMjjqqmL59lQjUzeoZlO3CJEenbByGNMzQ6tPHomNH9T+M\nx2vF3jDg5JMtkiuKaZOYy3EZBWRenVXzP2+slkMfCprDjRYr+K4LA26z6JWMcv4qh013ZdOmT5yK\nCrsmRFNQ4BAKeQgRIISgQwfVa92y4O9/d9mwwSFXZJN70xj9TT9MaSxDK11Ul51dba3R2aV3b4ch\n3bIpvGdMdc3FQsqnRcmt/l83VsuhDwPN4UaLFfySEtWW0MXCxYLVIGJwxhlqNhcEtZ4rpunVK7ZJ\nZ3N07Oix0zeo7OyTuSrQ3/TDlLoma3Vn6qEQXH+9y5AhhZimh0waVOX4HBkLgARHPTQBcieAZTEo\n2+Vb4bDAsFkeUd47De06NJpDnRYr+I0hJaxZowRfCNXM5O67o9x1l8PgwbU9aNOtDsEnMCUVvQ0y\n1wjdJKMFUHem7vuwfbuDENUdsQzJVwUGR8fAJKDTxregcCEUF5M7ZgxnBh73mxHWFkfZiaVDPJrD\njha7aFtUpL6IQlDT7i5NEKhthgEbN1p07Tq+nm9KVpaNYahsDsPIIGvEVJg4UX+rWwDpmH76mEiV\nZkPSwE8ZJFMZ3LdyKouPuBCJgZDVV3XPPgtVVYjAJxyorJ3d2XVoNIcyLXaGb1nqSzh5MsybV/++\nnByXggKHsjKb0aN39cXJzLTIz49SUeGQlVU989cNTloE6Zh+SQmsfsZlVmwMVWN9viowuG9lMX9e\nPZKjAYsoAhCGAStW1FTuYppg29jULgabJmzZokI8ej6gOZRpsYIP6sv3zTf1t+XkuEyZogqtkskI\nixY1XlSTmWlpt8QWSlqUP1/ukPGhx5GxgGPWCi76QZxPhMvj8jZC+AD4KR8DEKAuC4YPB8vCovbE\nMWuWyvOfPVtfBGr2n+ZcG2rRgu+6cOSR6u9zcbFxkAVbagqtpPQ44QQHXUXZukgv3PZK2FwkIxxh\neIiMCGfdbfPxLQ7fdk3weQFklULbWECSMCFD4JsR1vYswp1e2y2rbp6/XtPX7C/Nnf7bYgV/+nS4\n9Vb1Zfx5znR+U3ALx5YGJEtDrEyGkBJSqQi9e9sHe6iaA0w6/v5eYHGREa1pYp5rWSTblFN+girU\nM5LQfWyIe2NPcjxx3vFtPrjFIpVSr/Pmm6ohTmNOrBrN96G5039bpOC7LtxyixL7nByXkVNuZWs4\nxadJ6DE2xfKxIykv6ECnTjYPPKCnY62NusVY6Sbm6Yu87PPjbN9kAAG+FEwp+E9mxEYiqgu0zg3U\nlaKDzftYlJY27sSq0Xwfdmfl3lS0SMF3nNqmJgUFDoR9Zasg4esCg5eeLyKyHh68wQEX/S1tZezO\nLtt1YelSm9zcDKT0SFRXZQuhcvb7JF3eqtsMhyhDhlg1ef5p6wUt/Jrvy+6OzaaiRQq+bUNGhiq8\nKiuzSSYzQCYgZXJv6ZMI4P+62ezcmaTyljCZUx39DW1l1C3GgrqxU4u8vCjjxztMmmSzbp1Fmzbw\ns5/Bqc/VtkaUeIw43eHzuFXjraPz8jVNQcNjsylpkYJvWfD8aJev/s/ho2NtZt9dzIC8uURLh/BC\nbCSTckax+iGvOk7rkb+0hEz97WzV1I2dlpVZbNhgMXWqysLZtg3+9Cc4G9UaUeKRJMKsTTaL71P1\nHIMGaesFzb5TWenWpH8fiC5rLVLwy6e7XDRZXXr7GwxMAlgluYaFbCGXyoI67plSuWdmHuxBaw4q\nu4udzp4NVVUqfv8+FoVEa2P4vvpWBgHMn6/y8UEv3mr2jspKl5UrCwkCjyCIcMcdUYIA1q1zCAKb\nvn2bXvWbRPCFED8GHgNM4Bkp5W8b3J8BlAC9gThwpZTy46bYd2PE5zp0J0GIALM6n1oVViYYgMNL\npUUMSM4iJKs9dPKKmmsomsOExmKnkyapsGC65gqU6L+PxSmnAP+sTfeN+9lc3DvO9l42XYp0k3vN\nnqln4RJ4XHBBCQMHziYc9qiqilBZGW3yWqD9FnwhhAlMBX4EbAU+FEK8LKWM1XnYDcDXUsrThRBD\ngd8BV+7vvnfHqQXZmG8GpL+nApBAgMk7wmbNGos7xr5Nr14OZ59tY1+gv52aXWOntl1rtNcQIZTY\nRykkQoKdOQGVpwuyPgiTWeSgazs0e0KZNUZIpTxSqQhAvRqhigrn0BN84Gxgg5TyHwBCiDnAZUBd\nwb8MmFD991+BJ4UQQsq6c6emo3NWHCmUF4pENbL2MbmFJ3lfWJgGrFtnsWmTxc03N8cINIcDe6po\ntCyYOlXVc/h+rQdTOAxXXw3GZLWI+++cgLIpEISlXhPS7DWZmaohzwcfOKxYYQMwcODs2taq1e69\nTUlTCP4pwCd1bm8FztndY6SUKSFEJZAN/Kvug4QQI4GRAB06dPj+I7JtRJsMZMIjEYSYxfWUUMQS\nwyIjA4qLVfMLnT7XetnbisaRIyE3t9Y/P33cALz0vk1qUYSvC6oIwlKvCWlq2Ft7hD59LH75S9VX\n+ezAZeXYYbTpuY2z251I5pE0+YXiIbVoK6WcDkwH6NOnz/ef/VcHZIXjsD7bpiJucX02XKpFXlPN\nvlQ07i6Fs3NnCP1iGFfmbsMQrxDIFEZIrwm1dvbFHiG9drS+xGXoM4UYsQRmLCDAwJ81G/Ptps3v\nbQrB/ydwap3b7au3NfaYrUKIEGoCFG+Cfe+e6m9pLpDbrDvSHI7sT0Wj46guWQ89pEz4viBC9zOe\nJJmM11yGb948qdZpVdOq2N1kom4KZt3jwrLg5BIHI+URQoWhTQL8ZsjvbQrB/xDoIoTohBL2ocDV\nDR7zMjAMVdd6ObCgueL3Gs3esD8VjbatUufSC2zgkUzG6dhxfL1UO8OIkJ/f9JkWmkObxiYT33Vc\nuC788hmb14kACUwCUhiIZsjv3W/Br47J3wq8gUrLnCmlXC2E+B9gqZTyZeBZ4H+FEBuAr1AnhQOC\nbkOn2R37UtHY8DgKApuqqghQ2x7TdeGjjxw6dqxNtauocA5IQY3m0KGxycTmzfVTMOtm4DgOLPJr\nazz+RTYniDhDH7dreio3FU0Sw5dSvgq82mDbf9X5uwq4oin2tS80t9WopnXQ2HHUt69FZWWUirIS\nskphy1oYP9rl4i5bOPnhEOE2YBgRtm61+dGP9DHY2mg4mUh30UvP8LdsyWbFS6Po/gUM6lDEhLDF\n+56q8QAwBLSNN304+pBatG1qmttqVNM62N1xlBmDzIGzwfPozkxe9wWh1Sm+HitYfVFvPj3qBt6J\nW3genOW7XFDlsL7ExtIHYaug/lVhbRe9LVuyqfr3bWT1SPB5V+h+90weucLhrX9bzJ+vCv0yMpqn\nWrtFCr7r1nqghKrfYSSi0uq0m6FmX9ntAm+dM8HOHj4V+XBMKRwXg36xD/lXTimreq/gypyezCgf\nQ0R6iFkRKNLT/JZO49EF1UVvxUujyOyRqEnj3ZmXZOtzDv8xzWLcuOYNQbc4wXddGDBAlcSDEvwR\nI6BnTxgzRl9aa/ad3S7wVp8JKjsnKHsoIAjDliTkjQWBZN0UjwvC0xDXhPDu9DlydQApfanZGtht\ndMF1OX/WTMonKbE3UtC2NEQHtvBN8XSsa+NYzTgjbXGCn/6g0/i+akMXj+vwjub70+gCb/WZoOKj\nCfiRtxCGapoSLzAwCFTlrSnB8KnsY5C1VmhntVZC3avCUKhOk3vH4Zhyn/yx8HUBfF16Gu1inzKC\n6ZhrArjPUPGcZpqRGk3+igeZ9AedJhxW29LbTVN/5zRNiGWRNXgCwsgglTJJpNrwX6t+z19W3oiX\nVNsQGWSNmAoTJ+pLy1ZC+qpwxAgVk58xQ4V4yrNtME0yY3Da85C/9hPCpAgRKIPHIKidkTYDLW6G\nb1nw9tsqhg9QVFT7/dKt6DTNQWamRc+eUcrKHNassdmaZfH8u5AztoiCAodOnWzsByzoe7BHqjmQ\nWJbSG9+vjSz8LW6RO3w4cto0hJQIJEbIgAAl9obRrDPSFif4sPv86ubsJKNp3WRmWvTrZxEKqbUi\ngFjMIhazuOmm3TxJF4m0WNL/2uzs+gv+g7Jdti0Bs7vJzgKfI0tDxK8Zw1Enl5IlCsj8Z1azHg8t\nUvB3V8Ks0TQFuxxfrkvl0hIqCuCVN4oIgtpjzjDg5p4uTHJ2baCri0RaJA3/tWmzxkHZLrljCqn4\nQYKyKQF+WJBKSYzQY5hmEiEW0OXCqZx8cvMdBy1O8HVpu6Y52eX4Moth9GhWPugReDDg/Fm8+OLb\nABQUOPzg39nk1kkPq/x7MRXt42Qt3UJmnSyCyqUlVJysJyktgYYZOvE4jB8Pm0c5BFUelfkqo0uY\nEoMUQiiXGSkD1q+/laOOym22Y6DFCX7DLjLN0URA03rZ5fjaNBd6JGtaZhp4XHRRbeciwzeo3OiT\nuSqgsnOClVW3EmwKMHJD5OeZZJZBZZ7JytxZBJtSepJymFNZ6XLeeQ55eTZlZVZNON51YfxMm9el\nydGlPkYSktIg5YcwTR8hfIQAKf1m1awWJ/gNS5ibo4mApvWyy/F1+hB4zMFIegQSkjWdixKYZgCG\npKK3QeYaQUVvQWD6QEAAVDw2gsxFHag4bwuBPwM9STm8qXv198gjEd56K0o8XuuXk0oBCI6OCU6b\navDm+b156Z0bkBLuuONmhAgwjHCzalaLE/zMzNoSZn15rGlqGj2+puYSzChhZWIbmaXQ94fvYxjV\nfRFFQPiGu9h8RBavkE3X1BhCoTq9lPtZZFW6GCtn60nKYU7dqz9IkJU1gXnzJjBzpsUTT8AFhkMo\nSLEzR7JptE/n0BJuy1/J448/TiplEg4H+H7zmgiLQ9WluE+fPnLp0qUHexgazV5RPt2ly402VTke\nKx8DaaKaKQdQuWAwSx48mwXSZnsO9OnjMHKkjWHUumjm5NQuBAN6wnIYUjvDTyBlgJSQTEa4806H\n/v0tbu7p0mVUIZtuq+LzSyUIlaO/du3ZdO26rNpq26RTp4l07Dj+e49DCLFMStmnsfta3AxfozkY\n5MYdpEjyeQFIgRJ7CcKH81+cxyXyZe4lg8JYlP9dM54jj4TZs+sm6VhYlkXle9NVnN/0MYwMHc8/\njMjMtDDNKLHYGLp1W1KdUq/WdKqqLHJHWpQTZeW6MZzCElVoBcTjJyNlOXWttpuLFldpq9EcFGwb\nEQ6TVQpGEvCV2Hd5TLlqhggI42HjYFR/6xpafeC6VMy4hYAkEBAECSoqnIP2ljT7zh//aPHRR73q\nbUv/v10Xcv9/e+ceJUV17/vPru6uwRc9OjGiRtAgICMDw0O0RLDIKD5jzOGcxGDuuHyhAkbiKCck\ny4RzzJVEwaAGDRDgMPfqiUlQ8HlFG0p5lA9gZhhtREGQ+CB6RmfQRLq6q/b9Y/drhuHlAD2P/VmL\n1dPd1VW7uhff2vXbv9/3N97itH+Zhe+b+L7A80wef3wKGzfGOO20uw/5BV7P8DWag0G6rDJaXc2g\nVTtoHATd73mOY2pTkO5glMTkZWyqqlQ/XCFaFFY6DsXrAoyr0sZaIqTj+R0I14WFC6F370ouvngh\n4bBHEJgsX15Jfb26F/kfRgAAIABJREFUo8v0Uli92mHePId162y2bLEYNgx69Tr0d3Ja8DWag0W6\nlDuKatpc/7nLn252+ESW8A0acLB5FYu1v1NV9EGgjLVmzcrUXNlE7y5i0J0JGocaFN/4ex3O6UBU\nV6u7tXjc4vbbVzB+vIMQNvX1Fr6vHHynTVP/Roywmq3hHK6aOy34Gs0h4pkGi98Ii6BFXsTQpMv3\nSlVl7jN1ldnUvYzjVtRxiGq7hXZPfsV1PG6xYIFahAXYvNnirLPU72eaSuyDAF56CVauzBVWH+6f\nWAu+RnOIsG3ldJtI5MI3w5IuT5babJrpEUTgO8mFPLl0Ba5rqf/82vCpQ9Cy4rquLobvq9/tzDNd\nxo93KC1VWVaxmJrVv/RSczPMQvzMWvA1mkNEy8YpR9e7GHdP45/lucrcsPTYudOhosLKzvq0p1r7\np2XFdXm5g2la9O7tct99FXTr5lFXp6qmLcti2jRIOC4jkg6rQza2XZgfVgu+RnMIyU7YXRcmVyB3\nJWiqlfwtqRZmg1SYstrtvJ9wcRyL+nqYNEll70QicO21zS2+Ne2DlhXXAwfaxGLwzjsO3bplLgQJ\ntm2bxqmnTsMCYqICgYcUJiFiwOH/UXVapkZzOHAcZMJDyICj4wZfVQ2nduGVlFUJ7ojPY1lQQf9G\nl4kToU8fl6uumk7v3i5z5ijnRdct9Alo8slUXJ922t2EQjEefliJ95VXqguBktaAzz9/ibq6CprW\nVhNKeRjSJ5Q6dA1O9oWe4Ws0h4H6EpvegUkEjyQm0+KzsOMOJTxNGB8hPD57wqFfP5gxo4JIxCOZ\nNKmqirFpk6VbchaY1izXo1HV7+DCC1VcPhSC666z+PGPY0TlZD4PXgcRKH+kcojmG+MXqOWeFnyN\n5iCwrx4MzzRYPCNijJJONj0TwMNE4pGUJvO32Az6kUMk4hEK+UjpMWSIw9atlm7JWUD2aLnuuiSm\nOQxJ2KwOVOrlnDmwcQE8WVpL03QIwmCEw8o3KVZZ8MUZLfgaTRvZnx4Mtg2/DFmsSanXhYBXpUUF\nMWwcXsbGlRY7N0AQmBiGB5hYR5fw62um0wub3WK+enX3sNCq5XocqKjg/ITHssDkAmK4WEgJI5IO\n0TrVqLxxiKB4+LVEbUv9fAX+nbTgazRtZH96MFgWzJ7dfEE2lYJXfYu1YQvDgJAPW7ZYrF0bY+NG\nB299CQvemswRhoe/0OTRa2P0qbRyi8C6Y9ZhoVXL9ccc8DxE4FMkPG7p77B+i0UqBatDNlKYRDd5\nRLeaMKGywGeQQwu+RtNG9qcHQ1OTy0UXOaxYYfPKKxbbt8O8eeo9KeG665RjZo8eDnffbeP7NreV\nT2OXTJAk4LPyXexYWc3Ni9Lpmy3bKukg/yEjY4q2davD6aerkF19CfTDRGTCcZttHnxIdbeybYsQ\nMd6vVndufbAKkI/TOlrwNZo2sq8eDC1DPhMmxIjHraxbZigEJSUuZWVqmxkzwkgpCYdT1PsBSJBh\nSXlyIb3vrMRxLCzbbt4dWwf5DxmuCxdeaOF5qkn9/f3n0qduMc/KW2miGAebN3yLi9KtDNVnLCoW\nWernWdR+bsC04Gs0B4Fo1Nqj703LkM+OHdWkUg4zrinhmFcamLvJZutWB99Xi7XhsGqeYhiSwAAh\nAQMMmaK83GH7dgsXCyu/qqs9qEknJf9m6s5+/85lpfcS9eCC+DLGM4fXhEW3Ftfc9noD1ibBF0Ic\nBzwOnApsA34gpfy8le18oD79dLuU8oq2HFej6Ujkh3yECPPxxwsI/BQDvhcw4BWDsX4R19bOIpk0\nkdJDSoNwOImUanEX38APIJUyWb/eJh6HBQtQM/2p7UBFOiH5WVclJeo7Li11GTNzBlsjygJ7YBX8\nYONiwjeNZ/DgXGq9ZSnxb483YG2d4f8MiEkpfyOE+Fn6+b+3st1XUsryNh5Lo+mQ5Id8du3azkcf\nzcMIBQQSvigPOCbu8e14A1VVMcrLHb75ze1cfvlc1RM3Bce8E9Dt3RD/uWwW8bgSn9NPd3njDYcg\nUGsCepJ/8MgPwYHJI48on5zycgciEkIQAO9fA6fuOJ4JZ7j8ZaJDzLf5Vdji97+H8eOb22q0l9+m\nrZW23wMWpf9eBFzZxv1pNJ2SaNSiV6+p9OhRiRAmfsrASMHRtTmf/Hjc4rHHpvLSS5VAEUggBF/0\ng4aLfIZQA6iZ5syZFQwYcBdffFHBY4+5uhr3IJIfgpPSo7TUASBVW4JICtWy1oDPh8JHl/yJkx6y\n+WXqLl6UFQxNqmpp11UiP3Vq+xF7aLvgnyCl/Dj99w7ghD1s100IsVYI8aoQYo8XBSHE+PR2az/9\n9NM2Dk2jaR+4Lkyfrh6jUYvBg2OYRb/mo1Vz+HLUr9kyJ8YxYyzOFS4/YzrROMTjMY41hqupZEgV\n8Bz7feW4WV6uirMMwycc9hg40Ml1zdK0mUwIDkIIGaZ8w3ZuYC6L4j+hvCrg2HXkfhcRsHNgkjB+\ntqNZELTf32KfIR0hxEtAj1be+kX+EymlFELsqSN6Lynlh0KIbwPLhRD1UsotLTeSUs4F5oJqYr7P\n0Ws07ZzW0+UtRo60YGRuu1u2uIxZVoGJhxeYvB2dRbceQxAf1yBlCgyTPzyn8rnr622EMAGPVMpk\nwwa7XcWJOxKt1a7F4xZ1dTHO7lFNv+kLGPnWPHwMQiTpFgexCJoGgi8FyZTJkbWSJD5JTBxsioqg\npERd5NtTOAf2Q/CllBfs6T0hxN+FECdKKT8WQpwIfLKHfXyYfnxPCOEAg4HdBF+j6WzsT7aG68L6\n+x0uxyOMzxelu/iixy3s/BiEiHDiiTexbFklGzZYBIESpLfeinHFFQ4ffGAzbpyO4X8dXBdGj85d\njFesUK+rC7TFz4XDMN/HkD4SSUAIA5/ucehfFeb+8ht4ZkMlxZtglHBYGbIpv8Hi2sEweXL7rIlr\n66LtU8A1wG/Sj0tbbiCEOBb4p5QyIYT4BjACuLeNx9VoOgT7k63hOBDzbX6GCSTYWR4gQ+oGV8ok\n3br1ZNgwq9l+hg2zKC4GcJgwgb22QtQODK1TXa2a04B6rK6Gnj1zF+iYsPmZzBneze4zi+6bawgk\nVMcreTVuIYT6PQZca/HbtI31LbfArl2qoK49pWRC2wX/N8CfhRDXA+8DPwAQQgwDbpZS3gD0B+YI\nIQLUmsFvpJTxNh5Xo+kQtGyC0tp/fNuGuwyLCj/Gr5jG8NoXMZKyWSPzXr1U79vFi2HsWLVwm8kk\n8X2Tbt1ijBihdp4v8KAdGJqaXDZscKittRk2LGdNcdF6h9o8I7sdO2DwYJUKKwSsyfM6crB5fcvu\n7SqlVBYZPXvmmtfktzoMh9tXqK1Ngi+lbAAqWnl9LXBD+u81QFlbjqPRdGT21bXQsmDECHjlFYv/\nYBqx+ErOrErwxTCDY8erRuaumwsTrFwJ/fs3zySZN8/BMNRBKirUjDUUgssua58FQIeLpiaXmpoK\nfN+jb98Qy5dfCjug9O7n+F6dz8WEWci1VFPJp0/B+0sdzpI2rxnKCO1VrOwFwUB9p76f279hNL9z\nc5zc+0KoBjbt6fvWlbYaTYFxXXjtNfX3q1hcKGL8rq/D8JvtrFpUVzcPE9TW2pSWqkKtVMpk3To7\nmxmSaZg9PHApXerwWdhmNSok1F4XEw8VjY0OUqoKZsPwOffcJSSA2ulQfjt0j/uMZw7XshACSRif\nX2BSEcSyQn8OLqOFw5qwzdmTLWbOVN9vOKwuqD3yUlpahvAq249vGqAFX6MpOI6jwgKgZoUDb7IY\n/oia1TvTlUjnhwlGGC5XxB0aorN4cHkD69bZbNmiFm7r0/XsNzCX2UzCkD5BUMSfboyxa7DVbhcT\nDxXFxSqjyfd3YRhSVS4DMgyN5dA9DiEk4AHqb4lHJdXYOPwPJTwoJmPigTB5dKdqTSilmsk//bQS\n/4UL1aLv/oTwCokWfI2mwLQ2K8xP5zSMXJjAwuXFoAJznkevRSZVs2I80y/XIGXyZDg7cJnNRCKk\nEICUCSp7OlTXwE93OSyXNm94XaOLVqbuYfnyao45Zj6hUFLF6EMR/u5dxgk8RwgfnzAgCfDxCXEt\nCwmTIsDgy/5JPimHaN0uzkc1K/fU9SH7uyQScO+9MHy4+j0zJmrtDS34Gk2BaW1WOH16LvYupRJ9\nIeAC4fDPMxJsvSAAsYueiWpsW4n366+rsM/5OBgESuwBoew4ufo/KpDS4xeYXBqKYdudXO3TxOMW\nV11lcfrplYwZU00oBN//fiW1F1pUPekyMnBYE7E55xwwXnE4he3cyDzC+HxeGvDmTAgiYCQlgz5p\nzDauev11WLIkd5ynn1b/2vPdkxZ8jaYd0HJht+Wsf9Ys5bV+0ckl1J0QIE0AycdyPpNvqaSuzqK0\n1OVHP3J4r7YEL15ESCQQIQN+/3toaFDNs9P9cxdd59CrPSpSG5g7F+LzXcad5HDEJTbPNKg7H8eB\nZFIJfzydSvmPf8CiRZCQFhgw81L1mbPfmEr5Vy7XsAiJx+flkiDjnyOhUdZmfyvXheeeU/s2DHVh\nDoL2vTiuBV+jaYfsKRb8/vsN7NyambuDJMWZZzokkzBzZq75+X8tncWEng25D7tu9goSMk16VdqF\nObFDgOuqcMqOJS4x0tXKS0yeNWLcXWQxa5bqMJYJw5imevQ8tbD9AhV0W+phvGDy2qwYD9dYXPzH\nGOf5DrK2kUuS96oU2RQUnz42e1zLUr+P46h1lvz1kfaUipmPFnyNpp3SWjqnWoSMIKUHEgI/TG2t\nnfXXyTQ/P/47DfBvU5vvLP8KAtl0HRdrtwtLRynWyqx1fPUV/AwHM12tLPEYGTi86lk0NKhzqa5W\nn8lkzixaBN/Z5WBKD0OqvNWyBodHHrFwKy0cx6KkBD5a1ZsBR8znqA9OYill9DFy30n+b1RW1v6/\nMy34Gk0HIhq1OOb9hziybgJCBpS8KOgeh1rsrJ9+KmWyY4e9+4fzYxHpFWE/bDJVxljlq7TN12a5\ndK9xmLrAzr7WXuPRkLOuAHCw8TCR6crYlUbOY6i+Ht57TxWtZc4lFoN3q23EQhOSCRWXKSkBWlxs\n3TL80fXIxDr+lRe4dEGM6Y6123eyr3qL9kBb3TI1Gs1hxHVhyfUNfPP/Sbr9XSKCFDYO8bhFVVWM\nhQvvpqoqRlHRXpSnhcHPiKSD78OQhMsZkyo4Zc5dPOdVcJbvtnsXzsxaRygE6yIWt/aPUXPl3WyZ\nE+OyX6v+v/X1cNNNsGyZepw7V33WsqDyEYvQg7NyqVCTJ+/uM+04CM/LOmKOSDrt+jvZG3qGr9F0\nIBwH3j2jhDfvC9KZIwHvVZVwTtylMl4NcXgPtcALtB6byV8RDpusljYhH74jHMK+h5BK2EYLh3WG\nlZn0FpQ9hZh2X+uwIF0wlSnvnzYtt/05uITvc6Asb0cNDXtfcbVtpGmSTKg7h9URm+n2oTjLQ48W\nfI2mA2HbsGlTA8mIQSgUkJQG3y6vYVH8VorwaCqFMeXzaDr5YXDLWjfSyVPJkG0zPR3Dv7zERkxW\nFwJhmKwM7Oykt6yscOGK1i2mc++3GkrJu0KMHWuxbJkS+xgVdNviQUXejlqkRNWX2DyTX41sWcQf\njLFlvsM7J9lMn7J7OKejoAVfo+lAWBYEgc2uXUWARyhscq4HEZLsLIUNMyGI+BhMomnt9UTToRuZ\n8Hh5msM7Y9Uipm3n+uGmNU39VaYuBI9tt3HnWe0izbA1i+nM660ukLa4QoyPxdgyxaL4Dw7mztwC\nbeakXCzevSbG+TjsHKzsExLpkP7s2epiVzHZwvMszHqITTmcZ39w0YKv0XQwRoywaGqKZZtsR28H\nf+kCGss9ggjpnqs+jeUQNU1kwuOrwOQXL9qsWaaErKhoD4ux6elyHxfMRQc/zTC/OfjeLJ0zuC5s\n3658a4CsH9BeHUBbXCHer3aYtcBiiGdzW3pR1wibhGwb14WptsulfaqZNwTYnvMiCgKYNAmuv77z\nGNBpwddoOgAtY9jRqJUVzKZSl02PXof/chwRrEaGJIZRRPHASohV8vI0R4m9VNu3nLU3Nbns2KFy\nFnv0qMzu95pr1LErKw+OwOU3BzcMk0GDYvv08c8IeygEN96oxrKnpjKZ7+jyEpuy9IUuZZg8vsMm\nmVTGdBXEGI3DGdfaVFoWK7/v8vjpNptmqIulHyzkqadWUF+vxpWxTthXT4OOghZ8jaads7cYdtb+\n9ziP5OUmcx5+mKnjaojWwdL/hj6VFq4Np37DobGWbKVpRriamlxqa22k9JASPv54IZHICi680Dro\njo/5zcGDwKOx0dmr4OcLO+Q852F3Ac7/ju42LR67Ncb6+x2W+zZrn7cIhZRB3atY1BRZrEj7Fe18\n2uGfP0xm74xCwuOuuxyuvtrC99WdUGVl7kLTnnPs9wct+BpNO2dvbRLz7X+l9DjntOcpWfoMx9YE\n/CC+gD+vupSh9z3P8OEpkkmTO+6IcfLJVjZzZckSh549k9mmH0HgsW6dg+dZ2eNVVx8cscs0B8/M\n8IuL7b1ub9sqlBME6nkmWyg/M2fUKJeTTnJ46im72ZgfqbWISQs/gFBK3R2AanJy5pkuqZRqiOJI\nm1trIxhJT1XThk3GjLF5+eXdz7kjC30GLfgaTTtnb20SM/a/qZRHEIQZdtHT/C3s8+GPYWCVx6iB\nS9iWnr1K6TFkiMMtt+QapfTubTNjRgTTVNVLqZTJJ5/YuazNsLJm9v22m4JFoxaDBsUOKIYfBGSt\niCdOzGULWVau69fWrR5lZSYDB8bYsEEVi40dqxrFtLxLmTjR5aabKvA8jzPPNPnnoBhj6xyunFLN\nqLug3xgV0uoIRVRfBy34Gk07Z28e6xn73w0bHN54YzuDBs3NGn29fw0c/woYSUhJQSplcsEFdjM3\nzjfftLjjDocxY6qREhynktmzLS67TB1v+3aYN+/gLVjmrz3si4zpWYZUSt1t5N/dZEJE4PHAAw6r\nVqUbuuNy/u3VbPwmHD+0ElC9APr3b25BoT4zlRG2xfBOKPAt0YKv0XQA9jbjjEYtRo60MAyXL75Y\nBHIXRkjy+VBoGgin/D7M3OgNhE6r5Ne/VjvJv2vYssWiXz+Lmho4++zmx3Nd5TmzvwuWc+fm+u6O\nH79/5+a68G61y/k4ytTNyo3RMHIhnZa0DBENHGgTDqt99Vtj8/ffehRHIJlYyOTbV1BXZ/HllzkL\ninBYfWbkyL2PrTPE7rNIKdvlv6FDh0qNRnNgrFq1Rj7xxBi5YoUhV6xAxl4UcurVN8sjjpByw5w1\nUt5zj5Rr1kgp1cM996jPrP/DlfLFM4fLG5kji4qym2T3+V//dY9ctWpN9jP572eYM0dKFYBR/+bM\nkc2O09pn1qyR8nxzjfwHR8gkIZkqOqLZ+CKR3P4ikd330di4Rm7bdo9sbFRjO+IIKaeKe+R744Rc\n8RJyxQrk8peEHDfunux+SkvXyAceUJ/ZG5n9hULqsbXxt0eAtXIPuqpn+BpNJ0Ll6E+jrm6lmvmG\nTU48u5LXRrmU/qSCwPOQpkloRQzLUh76tetG0dQnReR+uPenryPjUF2tpufPPuty/vkVnHKKRyKh\nFn0zcfKMR39m9rt4cfOxLF6cLlraS86848CIZM7l0s+LG1VXN2/9mAkzQW4f+SGizOL2CmlzW95C\nbECYE0/cTmmpSzxusWWLxVlnWUSje/8u97ZY3lHR5mkaTScjszh62ml3M3hwjFtvtehe4yATqso0\nSKhiJIAdO6qRIgUGyAj8fQyMZTE7digh37rVwTASGIYPMsEt/adxlu+SSKhF1LvuUoLuuiqMk8/Y\nsUokEwklmonE7kZstg2rI8rlMoWBSDtWuq5aLO7f32XcuOlcdtlcjjlmOo895maPB+px+nRYvdrl\nvPOmM3Cgy+uGxZVxB/eOm3nj+SsJhQ0uv3weD84azZ8uu4X/vKSFOdoeyDdm6+j59xn0DF+j6YS0\nXBx9GZt/zbMOfhmbPi5s2wYn9mj+2aWhsfTooRZMjzyyEcMI0m0WAy5uepGrWckYYqzxVTPvzOw3\nk0aZSqnHsjLlVJmJwQcBuxmxWRZMdyyce2dx8dMTle3B5Mm8e00ZffvCffdVEIkkMIyAIDAYN66I\nO++M4Tj5mUYuQ4bYFBUluf/+CPX1DkVFFg0NFueeNx3ffxrwCQmf4dE5fPexRVz6nLI4hj3H6Nt7\nQ/KvgxZ8jaYTsKfFxczrJYMtLjVjjEg6rArbHLfD4tnzoW/fSmbOXEBRJAk+bN11J5UrVThn5+q5\n/OiHMwAVUiEFQVQSweN8HFanK3dDIXXctWtdrrrKYf16m02brOxs/lzhUiXv5WQ+ouH565ut5mbG\nN65HAwY5x8rzcVg+lHRGjbrgqEePyjH3Muaof7Lx2bF43nguuKA6m1YKHmPGVNOjh8riiURK2LzZ\nJPB3YaQkx9XKrMVxdbXVbEG6tZTTzpaeqQVfo+ng7KkSt+Xrsx6yqKmxeHUBJJeqJcy33rK4/XaH\nyy93uOIKG/uRtLrNncuJg25hmwhApBupSzi61iCJyYrABtSF4LrrVE58IlFBaanH1Veb/Pznqkn6\n0fUut8tRmKhgfNM7r/P+X56neMwU4nErO74XQjaxsEkINdhelTY3BvDVVya+r2b4qZSBwGD4RUv4\nPAQn9V3GD8/c/ftIJndQU6Matgth0rfvLJJbauh++wKOjPtZi+P+dL4Y/b7Qgq/RdHBaLi5mKmO3\nb2/+ekODsifwfSXgGTLNvX/3O/jpT+HMnS4/njeRY/sFbL8aAgBpsOLPP2JT+acsF2Nx31LKKCUM\nHqxm06Dy2w1D5bdb6aavkhQCaMq4eZpLMOpeoK4uhudZ9OvncsoQh7/0msVVRzUoe2JH5dOfdcQs\ntq1bzOZEOcGpxXTfvITQ8NeztQY/vngxdz47jUsuWUg47AEmn33WA99XYwmCXWzeXMOoUY/ALYP5\nfP5i3JPGMn2KGv+BpJx2BrTgazQdnPyc+lAIFi7MxdFDIbVNvqCZZs4RMp9kUjUDnyocAhkQjcPA\nKmgqB5oEyUl/JRRJcVqwkvrJZcTjFoahLiS5nPgEhiHo3bskOziRDuw3lqM8awxl4VBe7jBwINxz\nT0U6dGOyOhLL+vicF3J5SU6mLOVxZngl35ExTuxbwsTBr2ebiqeKx7Jpk0VV1QqGDnW48Uab9euh\nX7+FGIaPEJJUah4frezOSZMf4ljP49L6lTBFlex2thj9vtCCr9F0cPIXF/MrY0F5yPTs2VzQMtuW\nlEBNTfPtQaU1ehRh8BXROBTHYeu4gHDEwwhJpFRiHY9bhMNq39Goxemnz+Lddychpc/mzZM56qgy\nopYFr7wC996L2bSJVHIzyIBUyuSYY2weeMAhlfIQQlW+5vv4nBs4ID2+KPX5rHwXl9ZWMzX+CFTB\nBeWLSXYfyx2LxhMEsHGjxemnqwvQsGHwxBPXcsklczAMiRA+7yZncFRviL7Z3Cq0s8Xo94UWfI2m\nE7CnytjWrI0z2zY1uVx0kYNl2Vx/vZUN9bwmLC4yYtw7oJrBdQsI4XN0rYCkxJeSVMqkttbOxu9B\npUaed14DUgZA0MwNc269xeJ/PsmRR8LmO10GDnTYsMFm3DiLUaPA901ANV9fvryEq6+ezvr1Ntso\nYdMF8D+XgAxJhiXnM70KlsYrmfjueC67DHbtyoWnli6FF15QF7Qrr6wkkfgjpNcOpAGNQwTRtwTZ\nq1QXpE2CL4T4N2Aa0B8YLqVcu4ftLgYeAELAH6WUv2nLcTUaTevsbyphvjf9qaeazJ8f49FHLY4/\nHh5/HNYEFhXvWDw2pZJjn67m7PhCyquSfFZu8Kv6Wbz9trIc/uILGDVKhYcGDbK5/34l3hk3zLlz\nVePw0lKX8nIHw7B5/PGpmCY0NsLo0RZ9+8YYNMihqamESZMmY5oelZVhpJT83QiyTp5hmeSH5XP4\nSXwR/2nF+N1zVrO1iPwU0alTLT76aHb2jsMgQnGdn9uwi9LWGf6bwL8Ac/a0gRAiBMwGLgQ+AN4Q\nQjwlpYy38dgajaYV9idM0dybPsEnn0zjww+nEYtZzfp5byy2GHe+Q+TtFMfFA459W3DbFQ385W3o\n29dFSoe+fW3icYu6Oov6+hhXXJFzw1y8WIn9zJkqTu/7Jps2xSgqspg4Ua01ZPS3b98aIhEPw/AJ\nAiX0hiHV+4GK2WfSKi/5tJpQ0mE5Nq8JFcqB5msVJ500nqOOKlPunE9tJ/rmvJz1ZldIyWmFNgm+\nlHIjgBBib5sNBzZLKd9Lb/sn4HuAFnyN5hCyN+Ov/EVWKQMGD36JAQNWUlUVY9MmK1td2tgIP55n\n84I0ieDxxYAQH43ezkWpudw68SeEIx6ppMlPq1bwzjsWw4ZZ9OqVO9gt5S6ffGMakUiCUCjAMHYx\nZkw1TzxhccYZLhdcUM3FFy8gHPaRMoTvh5UoB4JAGgShAAjx1bZLGfHQcxy90YdIiJGbFzJCpvgF\nJpdGYox7yGpm85AhW4A27BD1bOxgHI4Y/snA3/KefwCc3dqGQojxwHiAnj17HvqRaTSdlL11yYKc\n/cK2bdNoaHgpW9RUXu7Qt6/F8OFqUXfiREj5qjXg90qrGfLbhUSL5jG5FELChxCYMsGVQ6rpdZvV\n/MLiulz5UAWN305Q5wcEBggh2bFjAeedN5jBgydjmrsQQiIEyECy89l+lH+yiWNrfVIIHhh6I89s\nqOTNNy3OC7ksusmhF2pl2sBHCI97L3WINVh7z7TpjGWzX4N9Cr4Q4iWgRytv/UJKufRgDkZKOReY\nCzBs2LCuG2jTaNrI/hh/RaMWp546jc8/X0kqpRZN43Gbhx8m65mfSd18FYtvlzucFUkhhE/IEIj0\nIq+Rgspe0C9dQJutnt3u0MvzKH4r4LjXoOE89b6UPkGwmCIzgTAkSMCHUDJg9LKNFMdBAJIUF0r4\nzTtqQXkVFo8Vw0ZcAAAKgklEQVT1tJhq561Mh02mPGez6un9aNDSItbV6ayP94N9Cr6U8oI2HuND\n4JS8599Kv6bRaA4Re+uSlU9+A5WNG20efjg3S7dt1dP1q6/U89raPC95I0yfh32SR/sUvxUhOlu1\nlMq/s8hUz37ZP8FnwwMkgIRkMszDD4/l1ptXEAoHiABOeB5OWAbHxpX+ZxGqlkDK9MLvcdU0rYVo\n2qrz0e02q+ZZu1/Y9qHm+7oD6qwcjpDOG0AfIcRpKKG/Chh3GI6r0XRZDiSCkWmgMnKkyt55/321\n6GpZFrNmwaRJqigrHreoqooxZIiDbZdw2u01FNdCdEIu9zP/zmIVFjO/G2Nk38nI8OvKj8eH9c9f\nwlNPjaf/u1u4sXwGxbWSI+MRQJAiSYgAiSCByS/ersQXSuxn3DsaM5KgLgmDfm4Sne3QB2v30Px+\nqHlntD7eH9qalvl94CHgeOBZIUStlPIiIcRJqPTLS6WUKSHEJOAFVFrmAinlW20euUaj2SsHWlSU\nn6rp+ybdusVIJOCHP1SGaPG4xcaNFkVFcP31FWz1PYxBJoNKK8lYy7es+v3l8xZ/eOckvj2abHVs\n6bLPGGG43BV/CDMOiBATeIh6yhgtHL55RgmnHtPAjLU2qwN1AgMHOoQjXtZSofHMJFHHwZraSrXs\ndGefar6/d0CdjbZm6TwJPNnK6x8Bl+Y9fw54ri3H0mg0h5b8VE0pPZYsqeaSSxZlDdGmTo1xzjkW\n48Y5+H4mpTNXYAWtV/1+Fe+RtWiI1sJR8dVE+1fT7W0PQwZIIegRamB+YPFqYCHeViIswyCSKpxT\nW2uTSpqYMoGRguK3IjDBzh6zmZ7vh5p31TVcXWmr0WgAlarp+ypGn0qZBAEIofLiDcPjwQcdRo60\naGqyqavL9ZItLrZ3C5nnV/3+n68quSE+j2jcR7ksS/r1A2ObEmVhmgy51QblxIyUMNRzufMsh3dO\nsvnl8xZvv21xx5QV/HZiNed6qDWDPan0fqp5V7NVAC34Go0mTTRq0a1bjHnzHNats4lE4LvfXYRy\nwVQNvzOccMI1APToUUk8rlIik0mIRHIRlIzu3nCDxYT4w8xmEgY+KaOIHlMqYUplVpQ3OjnlPQeX\nF2UFR6z1EEUmlzwY4+EaC7AoHmgR3R+R7opqvh9owddoNFlGjLAwDCs7OS4tjalK1XTlbH6c3zBM\nevSopLpaRU8ATj/dZflyh9JSm2gcLMfhmr42/x4fz5uUYeNw7BU2UzJinH60URlBiQR8B4du0kME\nKgbfvcZh4ULloBmf71J9vUOvSlsL+tdAC75Go2lG88lx81aJzS0ZPJYscdixQ73/g9K5jJ85CSI+\nNesiDL5D0r3O56eEiYprWSQr+V3RVFZMyR3LdZV/P+Saol9eYmNMzsXgH99hk0iomf/zyQqK5niw\nqAvlUh5EtOBrNJr9JmfJ4LFrl8mMGTbvvKPaGP7v8ol8EEmpXDzfo7EUojWSMD43MId/HbCALb+8\njn6llYDF3LkwYULOmtk0VYSnzLKgLBeD31qduQtwMPEIyS6WS3kQ0YKv0Wj2m4wlw5IlDjNm2Lz5\npvLdubm/w3G1AR8lVdqkkAZH14fwSWIg+bJU8tZ9HkHRHOrqFhEKxZg0yWrmw59M5ml43m1GJbBg\nAbzs2XiYhAy10NtlcikPIlrwNRrNARGNWvTta7FlC1mTtfLbbLpPKmJAVYKmoQbH3TSb7RPL+Ost\n1VwTLOCz8iRBRIKQBIHH1q0Ovt98dh6JtK7h6U6JOI7FlpIYZQ1O18qlPIgI2U69oYcNGybXrm3V\nXl+j0RSQpiaXxkaHDz6weeUVKyvSU22XEUmH1RGb6emsm9GjYUjC5fsDqjnrgflgpBDCJBxewYUX\nWiQSyuv+u9+FKVO0hh8MhBDrpJTDWntPz/A1Gs1+0zJLZ8KEGNGoxfTpsMq3eFlahNJ286D87l0s\nelHPWUEABoBkwICuWfhUaLTgazSa/aZllk6mynZPxa2mqWb4dw+cyAdCBeylTNHY6GBZlhb6w4wW\nfI1Gs9/kZ+lkqmxhz8WtsRgkpjl8Y0NuQdcQoeznNIcXHcPXaDQHRCaGnynG2idp98qm3gkahxoU\n3zib6Ijx2bd0WOfgomP4Go3moJFtG7i/pKf/UcchmqfsXdWTvpBowddoNIeeVrxtuqonfSExCj0A\njUbTNcks9GZy+XUd1aFHz/A1Gk1B6Kqe9IVEC75GoykY2sX48KJDOhqNRtNF0IKv0Wg0XQQt+BqN\nRtNF0IKv0Wg0XQQt+BqNRtNF0IKv0Wg0XYR266UjhPgUeL8Nu/gG8D8HaTiFoKOPHzr+OXT08YM+\nh/bA4R5/Lynl8a290W4Fv60IIdbuyUCoI9DRxw8d/xw6+vhBn0N7oD2NX4d0NBqNpougBV+j0Wi6\nCJ1Z8OcWegBtpKOPHzr+OXT08YM+h/ZAuxl/p43hazQajaY5nXmGr9FoNJo8tOBrNBpNF6HTCb4Q\n4mIhxCYhxGYhxM8KPZ4DRQixQAjxiRDizUKP5esghDhFCLFCCBEXQrwlhLit0GM6UIQQ3YQQrwsh\n6tLn8B+FHtPXQQgREkLUCCGeKfRYvg5CiG1CiHohRK0QokM2uBZCFAsh/iqEeFsIsVEIUVAz6E4V\nwxdChIB3gAuBD4A3gB9JKeMFHdgBIIQYBXwJVEspBxR6PAeKEOJE4EQp5XohxDHAOuDKDvYbCOAo\nKeWXQogIsAq4TUr5aoGHdkAIIW4HhgHdpZSXF3o8B4oQYhswTErZYYuuhBCLgJVSyj8KIUzgSCll\nY6HG09lm+MOBzVLK96SUHvAn4HsFHtMBIaV8Bfis0OP4ukgpP5ZSrk///QWwETi5sKM6MKTiy/TT\nSPpfh5oZCSG+BVwG/LHQY+mqCCGiwChgPoCU0iuk2EPnE/yTgb/lPf+ADiY2nQkhxKnAYOC1wo7k\nwEmHQ2qBT4AXpZQd7RxmAVOAoNADaQMSWCaEWCeEGF/owXwNTgM+BRamQ2t/FEIcVcgBdTbB17QT\nhBBHA4uByVLKnYUez4EipfSllOXAt4DhQogOE14TQlwOfCKlXFfosbSR86SUQ4BLgInpcGdHIgwM\nAR6RUg4G/gEUdF2xswn+h8Apec+/lX5NcxhJx70XA49KKZ8o9HjaQvoWfAVwcaHHcgCMAK5Ix8D/\nBHxHCPF/CzukA0dK+WH68RPgSVTItiPxAfBB3t3hX1EXgILR2QT/DaCPEOK09ALJVcBTBR5TlyK9\n4Dkf2CilvL/Q4/k6CCGOF0IUp/8+ApUE8HZhR7X/SCmnSim/JaU8FfV/YLmU8scFHtYBIYQ4Kr3o\nTzoMMgboUJlrUsodwN+EEP3SL1UABU1eCBfy4AcbKWVKCDEJeAEIAQuklG8VeFgHhBDivwEb+IYQ\n4gPgV1LK+YUd1QExAvhfQH06Bg7wcynlcwUc04FyIrAonfVlAH+WUnbI1MYOzAnAk2r+QBh4TEr5\n/wo7pK/FrcCj6Qnoe8C1hRxMp0rL1Gg0Gs2e6WwhHY1Go9HsAS34Go1G00XQgq/RaDRdBC34Go1G\n00XQgq/RaDRdBC34Go1G00XQgq/RaDRdhP8PTbAQXVY+FCEAAAAASUVORK5CYII=\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Wfdelu1TmgPk",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Training"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t5McVnHmNiDw",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 1. Design the Model\n",
+        "We're going to build a simple neural network model that will take an input value (in this case, `x`) and use it to predict a numeric output value (the sine of `x`). This type of problem is called a _regression_. It will use _layers_ of _neurons_ to attempt to learn any patterns underlying the training data, so it can make predictions.\n",
+        "\n",
+        "To begin with, we'll define two layers. The first layer takes a single input (our `x` value) and runs it through 8 neurons. Based on this input, each neuron will become _activated_ to a certain degree based on its internal state (its _weight_ and _bias_ values). A neuron's degree of activation is expressed as a number.\n",
+        "\n",
+        "The activation numbers from our first layer will be fed as inputs to our second layer, which is a single neuron. It will apply its own weights and bias to these inputs and calculate its own activation, which will be output as our `y` value.\n",
+        "\n",
+        "**Note:** To learn more about how neural networks function, you can explore the [Learn TensorFlow](https://codelabs.developers.google.com/codelabs/tensorflow-lab1-helloworld) codelabs.\n",
+        "\n",
+        "The code in the following cell defines our model using [Keras](https://www.tensorflow.org/guide/keras), TensorFlow's high-level API for creating deep learning networks. Once the network is defined, we _compile_ it, specifying parameters that determine how it will be trained:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gD60bE8cXQId",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# We'll use Keras to create a simple model architecture\n",
+        "model_1 = tf.keras.Sequential()\n",
+        "\n",
+        "# First layer takes a scalar input and feeds it through 8 \"neurons\". The\n",
+        "# neurons decide whether to activate based on the 'relu' activation function.\n",
+        "model_1.add(keras.layers.Dense(8, activation='relu', input_shape=(1,)))\n",
+        "\n",
+        "# Final layer is a single neuron, since we want to output a single value\n",
+        "model_1.add(keras.layers.Dense(1))\n",
+        "\n",
+        "# Compile the model using a standard optimizer and loss function for regression\n",
+        "model_1.compile(optimizer='adam', loss='mse', metrics=['mae'])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "O0idLyRLQeGj",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 2. Train the Model\n",
+        "Once we've defined the model, we can use our data to _train_ it. Training involves passing an `x` value into the neural network, checking how far the network's output deviates from the expected `y` value, and adjusting the neurons' weights and biases so that the output is more likely to be correct the next time.\n",
+        "\n",
+        "Training runs this process on the full dataset multiple times, and each full run-through is known as an _epoch_. The number of epochs to run during training is a parameter we can set.\n",
+        "\n",
+        "During each epoch, data is run through the network in multiple _batches_. Each batch, several pieces of data are passed into the network, producing output values. These outputs' correctness is measured in aggregate and the network's weights and biases are adjusted accordingly, once per batch. The _batch size_ is also a parameter we can set.\n",
+        "\n",
+        "The code in the following cell uses the `x` and `y` values from our training data to train the model. It runs for 500 _epochs_, with 64 pieces of data in each _batch_. We also pass in some data for _validation_. As you will see when you run the cell, training can take a while to complete:\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "p8hQKr4cVOdE",
+        "colab_type": "code",
+        "outputId": "5e9fcc84-1733-4786-8fde-ce47a510cde6",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        }
+      },
+      "source": [
+        "# Train the model on our training data while validating on our validation set\n",
+        "history_1 = model_1.fit(x_train, y_train, epochs=500, batch_size=64,\n",
+        "                    validation_data=(x_validate, y_validate))"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Train on 600 samples, validate on 200 samples\n",
+            "Epoch 1/500\n",
+            "600/600 [==============================] - 1s 971us/sample - loss: 0.6936 - mae: 0.6897 - val_loss: 0.6396 - val_mae: 0.6501\n",
+            "Epoch 2/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.5965 - mae: 0.6254 - val_loss: 0.5594 - val_mae: 0.6035\n",
+            "Epoch 3/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.5240 - mae: 0.5830 - val_loss: 0.5021 - val_mae: 0.5765\n",
+            "Epoch 4/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.4724 - mae: 0.5549 - val_loss: 0.4634 - val_mae: 0.5615\n",
+            "Epoch 5/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.4392 - mae: 0.5390 - val_loss: 0.4375 - val_mae: 0.5533\n",
+            "Epoch 6/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.4174 - mae: 0.5305 - val_loss: 0.4215 - val_mae: 0.5487\n",
+            "Epoch 7/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.4026 - mae: 0.5244 - val_loss: 0.4119 - val_mae: 0.5464\n",
+            "Epoch 8/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.3939 - mae: 0.5225 - val_loss: 0.4057 - val_mae: 0.5452\n",
+            "Epoch 9/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.3880 - mae: 0.5216 - val_loss: 0.4015 - val_mae: 0.5439\n",
+            "Epoch 10/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.3836 - mae: 0.5210 - val_loss: 0.3981 - val_mae: 0.5425\n",
+            "Epoch 11/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.3802 - mae: 0.5205 - val_loss: 0.3950 - val_mae: 0.5412\n",
+            "Epoch 12/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.3770 - mae: 0.5200 - val_loss: 0.3922 - val_mae: 0.5400\n",
+            "Epoch 13/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.3741 - mae: 0.5189 - val_loss: 0.3894 - val_mae: 0.5385\n",
+            "Epoch 14/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.3712 - mae: 0.5173 - val_loss: 0.3866 - val_mae: 0.5368\n",
+            "Epoch 15/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.3686 - mae: 0.5162 - val_loss: 0.3837 - val_mae: 0.5354\n",
+            "Epoch 16/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.3655 - mae: 0.5143 - val_loss: 0.3808 - val_mae: 0.5335\n",
+            "Epoch 17/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.3627 - mae: 0.5122 - val_loss: 0.3777 - val_mae: 0.5314\n",
+            "Epoch 18/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.3597 - mae: 0.5101 - val_loss: 0.3748 - val_mae: 0.5296\n",
+            "Epoch 19/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.3567 - mae: 0.5080 - val_loss: 0.3717 - val_mae: 0.5276\n",
+            "Epoch 20/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.3538 - mae: 0.5059 - val_loss: 0.3686 - val_mae: 0.5256\n",
+            "Epoch 21/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.3507 - mae: 0.5037 - val_loss: 0.3654 - val_mae: 0.5234\n",
+            "Epoch 22/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.3477 - mae: 0.5012 - val_loss: 0.3622 - val_mae: 0.5211\n",
+            "Epoch 23/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.3447 - mae: 0.4993 - val_loss: 0.3591 - val_mae: 0.5195\n",
+            "Epoch 24/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.3414 - mae: 0.4970 - val_loss: 0.3558 - val_mae: 0.5172\n",
+            "Epoch 25/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.3385 - mae: 0.4949 - val_loss: 0.3526 - val_mae: 0.5153\n",
+            "Epoch 26/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.3352 - mae: 0.4926 - val_loss: 0.3493 - val_mae: 0.5130\n",
+            "Epoch 27/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.3321 - mae: 0.4904 - val_loss: 0.3461 - val_mae: 0.5110\n",
+            "Epoch 28/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.3288 - mae: 0.4880 - val_loss: 0.3429 - val_mae: 0.5087\n",
+            "Epoch 29/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.3257 - mae: 0.4854 - val_loss: 0.3395 - val_mae: 0.5064\n",
+            "Epoch 30/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.3227 - mae: 0.4831 - val_loss: 0.3362 - val_mae: 0.5041\n",
+            "Epoch 31/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.3195 - mae: 0.4806 - val_loss: 0.3330 - val_mae: 0.5018\n",
+            "Epoch 32/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.3165 - mae: 0.4782 - val_loss: 0.3298 - val_mae: 0.4996\n",
+            "Epoch 33/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.3133 - mae: 0.4760 - val_loss: 0.3267 - val_mae: 0.4976\n",
+            "Epoch 34/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.3103 - mae: 0.4738 - val_loss: 0.3235 - val_mae: 0.4952\n",
+            "Epoch 35/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.3072 - mae: 0.4713 - val_loss: 0.3203 - val_mae: 0.4930\n",
+            "Epoch 36/500\n",
+            "600/600 [==============================] - 0s 100us/sample - loss: 0.3042 - mae: 0.4694 - val_loss: 0.3173 - val_mae: 0.4913\n",
+            "Epoch 37/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.3012 - mae: 0.4673 - val_loss: 0.3141 - val_mae: 0.4890\n",
+            "Epoch 38/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.2981 - mae: 0.4651 - val_loss: 0.3111 - val_mae: 0.4869\n",
+            "Epoch 39/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.2952 - mae: 0.4625 - val_loss: 0.3078 - val_mae: 0.4841\n",
+            "Epoch 40/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.2921 - mae: 0.4602 - val_loss: 0.3049 - val_mae: 0.4822\n",
+            "Epoch 41/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.2891 - mae: 0.4585 - val_loss: 0.3021 - val_mae: 0.4810\n",
+            "Epoch 42/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.2861 - mae: 0.4568 - val_loss: 0.2991 - val_mae: 0.4790\n",
+            "Epoch 43/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.2832 - mae: 0.4546 - val_loss: 0.2961 - val_mae: 0.4767\n",
+            "Epoch 44/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.2803 - mae: 0.4523 - val_loss: 0.2931 - val_mae: 0.4741\n",
+            "Epoch 45/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.2775 - mae: 0.4503 - val_loss: 0.2902 - val_mae: 0.4723\n",
+            "Epoch 46/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.2746 - mae: 0.4482 - val_loss: 0.2873 - val_mae: 0.4701\n",
+            "Epoch 47/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.2719 - mae: 0.4464 - val_loss: 0.2846 - val_mae: 0.4685\n",
+            "Epoch 48/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.2691 - mae: 0.4444 - val_loss: 0.2818 - val_mae: 0.4666\n",
+            "Epoch 49/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.2663 - mae: 0.4425 - val_loss: 0.2791 - val_mae: 0.4646\n",
+            "Epoch 50/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.2636 - mae: 0.4404 - val_loss: 0.2764 - val_mae: 0.4625\n",
+            "Epoch 51/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2610 - mae: 0.4382 - val_loss: 0.2736 - val_mae: 0.4599\n",
+            "Epoch 52/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.2583 - mae: 0.4361 - val_loss: 0.2711 - val_mae: 0.4580\n",
+            "Epoch 53/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.2558 - mae: 0.4344 - val_loss: 0.2685 - val_mae: 0.4561\n",
+            "Epoch 54/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.2532 - mae: 0.4326 - val_loss: 0.2659 - val_mae: 0.4539\n",
+            "Epoch 55/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2508 - mae: 0.4307 - val_loss: 0.2634 - val_mae: 0.4518\n",
+            "Epoch 56/500\n",
+            "600/600 [==============================] - 0s 65us/sample - loss: 0.2483 - mae: 0.4288 - val_loss: 0.2609 - val_mae: 0.4499\n",
+            "Epoch 57/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2459 - mae: 0.4271 - val_loss: 0.2586 - val_mae: 0.4485\n",
+            "Epoch 58/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.2436 - mae: 0.4255 - val_loss: 0.2561 - val_mae: 0.4464\n",
+            "Epoch 59/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.2411 - mae: 0.4239 - val_loss: 0.2540 - val_mae: 0.4451\n",
+            "Epoch 60/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.2387 - mae: 0.4220 - val_loss: 0.2516 - val_mae: 0.4431\n",
+            "Epoch 61/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.2365 - mae: 0.4202 - val_loss: 0.2493 - val_mae: 0.4411\n",
+            "Epoch 62/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.2343 - mae: 0.4186 - val_loss: 0.2472 - val_mae: 0.4395\n",
+            "Epoch 63/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.2322 - mae: 0.4169 - val_loss: 0.2450 - val_mae: 0.4375\n",
+            "Epoch 64/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.2301 - mae: 0.4151 - val_loss: 0.2428 - val_mae: 0.4355\n",
+            "Epoch 65/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.2280 - mae: 0.4134 - val_loss: 0.2408 - val_mae: 0.4338\n",
+            "Epoch 66/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.2260 - mae: 0.4118 - val_loss: 0.2388 - val_mae: 0.4323\n",
+            "Epoch 67/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.2241 - mae: 0.4104 - val_loss: 0.2369 - val_mae: 0.4308\n",
+            "Epoch 68/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2222 - mae: 0.4089 - val_loss: 0.2351 - val_mae: 0.4293\n",
+            "Epoch 69/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.2204 - mae: 0.4076 - val_loss: 0.2334 - val_mae: 0.4280\n",
+            "Epoch 70/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2188 - mae: 0.4062 - val_loss: 0.2314 - val_mae: 0.4255\n",
+            "Epoch 71/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.2168 - mae: 0.4043 - val_loss: 0.2297 - val_mae: 0.4246\n",
+            "Epoch 72/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.2151 - mae: 0.4031 - val_loss: 0.2280 - val_mae: 0.4231\n",
+            "Epoch 73/500\n",
+            "600/600 [==============================] - 0s 40us/sample - loss: 0.2135 - mae: 0.4019 - val_loss: 0.2265 - val_mae: 0.4224\n",
+            "Epoch 74/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.2120 - mae: 0.4007 - val_loss: 0.2247 - val_mae: 0.4203\n",
+            "Epoch 75/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.2102 - mae: 0.3992 - val_loss: 0.2233 - val_mae: 0.4194\n",
+            "Epoch 76/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.2087 - mae: 0.3980 - val_loss: 0.2216 - val_mae: 0.4178\n",
+            "Epoch 77/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.2071 - mae: 0.3965 - val_loss: 0.2199 - val_mae: 0.4158\n",
+            "Epoch 78/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.2056 - mae: 0.3951 - val_loss: 0.2185 - val_mae: 0.4144\n",
+            "Epoch 79/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.2044 - mae: 0.3938 - val_loss: 0.2170 - val_mae: 0.4122\n",
+            "Epoch 80/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.2029 - mae: 0.3926 - val_loss: 0.2159 - val_mae: 0.4123\n",
+            "Epoch 81/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.2015 - mae: 0.3915 - val_loss: 0.2145 - val_mae: 0.4108\n",
+            "Epoch 82/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.2002 - mae: 0.3902 - val_loss: 0.2131 - val_mae: 0.4091\n",
+            "Epoch 83/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1989 - mae: 0.3890 - val_loss: 0.2119 - val_mae: 0.4081\n",
+            "Epoch 84/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1977 - mae: 0.3878 - val_loss: 0.2107 - val_mae: 0.4071\n",
+            "Epoch 85/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1965 - mae: 0.3867 - val_loss: 0.2095 - val_mae: 0.4057\n",
+            "Epoch 86/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1953 - mae: 0.3857 - val_loss: 0.2082 - val_mae: 0.4044\n",
+            "Epoch 87/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1941 - mae: 0.3843 - val_loss: 0.2072 - val_mae: 0.4032\n",
+            "Epoch 88/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1930 - mae: 0.3834 - val_loss: 0.2062 - val_mae: 0.4028\n",
+            "Epoch 89/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1920 - mae: 0.3825 - val_loss: 0.2053 - val_mae: 0.4018\n",
+            "Epoch 90/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.1913 - mae: 0.3819 - val_loss: 0.2046 - val_mae: 0.4018\n",
+            "Epoch 91/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1902 - mae: 0.3808 - val_loss: 0.2033 - val_mae: 0.3994\n",
+            "Epoch 92/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1892 - mae: 0.3796 - val_loss: 0.2025 - val_mae: 0.3989\n",
+            "Epoch 93/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1882 - mae: 0.3786 - val_loss: 0.2015 - val_mae: 0.3970\n",
+            "Epoch 94/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1875 - mae: 0.3776 - val_loss: 0.2006 - val_mae: 0.3959\n",
+            "Epoch 95/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.1870 - mae: 0.3768 - val_loss: 0.1998 - val_mae: 0.3941\n",
+            "Epoch 96/500\n",
+            "600/600 [==============================] - 0s 67us/sample - loss: 0.1861 - mae: 0.3760 - val_loss: 0.1992 - val_mae: 0.3947\n",
+            "Epoch 97/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1852 - mae: 0.3751 - val_loss: 0.1984 - val_mae: 0.3937\n",
+            "Epoch 98/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1843 - mae: 0.3742 - val_loss: 0.1980 - val_mae: 0.3939\n",
+            "Epoch 99/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1837 - mae: 0.3737 - val_loss: 0.1976 - val_mae: 0.3940\n",
+            "Epoch 100/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1832 - mae: 0.3733 - val_loss: 0.1970 - val_mae: 0.3936\n",
+            "Epoch 101/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1828 - mae: 0.3727 - val_loss: 0.1960 - val_mae: 0.3910\n",
+            "Epoch 102/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1820 - mae: 0.3717 - val_loss: 0.1956 - val_mae: 0.3913\n",
+            "Epoch 103/500\n",
+            "600/600 [==============================] - 0s 64us/sample - loss: 0.1812 - mae: 0.3708 - val_loss: 0.1950 - val_mae: 0.3903\n",
+            "Epoch 104/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1806 - mae: 0.3701 - val_loss: 0.1946 - val_mae: 0.3898\n",
+            "Epoch 105/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.1802 - mae: 0.3695 - val_loss: 0.1939 - val_mae: 0.3886\n",
+            "Epoch 106/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1795 - mae: 0.3686 - val_loss: 0.1932 - val_mae: 0.3871\n",
+            "Epoch 107/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1790 - mae: 0.3679 - val_loss: 0.1928 - val_mae: 0.3866\n",
+            "Epoch 108/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1786 - mae: 0.3674 - val_loss: 0.1924 - val_mae: 0.3864\n",
+            "Epoch 109/500\n",
+            "600/600 [==============================] - 0s 40us/sample - loss: 0.1783 - mae: 0.3667 - val_loss: 0.1919 - val_mae: 0.3849\n",
+            "Epoch 110/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1781 - mae: 0.3666 - val_loss: 0.1919 - val_mae: 0.3861\n",
+            "Epoch 111/500\n",
+            "600/600 [==============================] - 0s 68us/sample - loss: 0.1774 - mae: 0.3658 - val_loss: 0.1912 - val_mae: 0.3843\n",
+            "Epoch 112/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1770 - mae: 0.3653 - val_loss: 0.1911 - val_mae: 0.3846\n",
+            "Epoch 113/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1766 - mae: 0.3647 - val_loss: 0.1906 - val_mae: 0.3833\n",
+            "Epoch 114/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1763 - mae: 0.3642 - val_loss: 0.1903 - val_mae: 0.3831\n",
+            "Epoch 115/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1758 - mae: 0.3636 - val_loss: 0.1898 - val_mae: 0.3817\n",
+            "Epoch 116/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1755 - mae: 0.3630 - val_loss: 0.1897 - val_mae: 0.3821\n",
+            "Epoch 117/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1752 - mae: 0.3627 - val_loss: 0.1893 - val_mae: 0.3810\n",
+            "Epoch 118/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1749 - mae: 0.3621 - val_loss: 0.1890 - val_mae: 0.3805\n",
+            "Epoch 119/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1747 - mae: 0.3617 - val_loss: 0.1888 - val_mae: 0.3802\n",
+            "Epoch 120/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1743 - mae: 0.3612 - val_loss: 0.1885 - val_mae: 0.3794\n",
+            "Epoch 121/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1743 - mae: 0.3610 - val_loss: 0.1885 - val_mae: 0.3803\n",
+            "Epoch 122/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.1740 - mae: 0.3608 - val_loss: 0.1884 - val_mae: 0.3802\n",
+            "Epoch 123/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1736 - mae: 0.3602 - val_loss: 0.1879 - val_mae: 0.3786\n",
+            "Epoch 124/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1737 - mae: 0.3597 - val_loss: 0.1876 - val_mae: 0.3765\n",
+            "Epoch 125/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1738 - mae: 0.3597 - val_loss: 0.1876 - val_mae: 0.3780\n",
+            "Epoch 126/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1734 - mae: 0.3591 - val_loss: 0.1872 - val_mae: 0.3762\n",
+            "Epoch 127/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1727 - mae: 0.3583 - val_loss: 0.1873 - val_mae: 0.3775\n",
+            "Epoch 128/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1726 - mae: 0.3583 - val_loss: 0.1872 - val_mae: 0.3776\n",
+            "Epoch 129/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1724 - mae: 0.3579 - val_loss: 0.1869 - val_mae: 0.3763\n",
+            "Epoch 130/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1723 - mae: 0.3575 - val_loss: 0.1867 - val_mae: 0.3757\n",
+            "Epoch 131/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1722 - mae: 0.3573 - val_loss: 0.1866 - val_mae: 0.3759\n",
+            "Epoch 132/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1720 - mae: 0.3572 - val_loss: 0.1868 - val_mae: 0.3770\n",
+            "Epoch 133/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1721 - mae: 0.3570 - val_loss: 0.1864 - val_mae: 0.3754\n",
+            "Epoch 134/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1717 - mae: 0.3566 - val_loss: 0.1864 - val_mae: 0.3754\n",
+            "Epoch 135/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1717 - mae: 0.3563 - val_loss: 0.1861 - val_mae: 0.3741\n",
+            "Epoch 136/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1715 - mae: 0.3559 - val_loss: 0.1861 - val_mae: 0.3744\n",
+            "Epoch 137/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1714 - mae: 0.3558 - val_loss: 0.1861 - val_mae: 0.3748\n",
+            "Epoch 138/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1713 - mae: 0.3555 - val_loss: 0.1859 - val_mae: 0.3737\n",
+            "Epoch 139/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1712 - mae: 0.3551 - val_loss: 0.1857 - val_mae: 0.3731\n",
+            "Epoch 140/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1712 - mae: 0.3551 - val_loss: 0.1857 - val_mae: 0.3732\n",
+            "Epoch 141/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1710 - mae: 0.3547 - val_loss: 0.1856 - val_mae: 0.3724\n",
+            "Epoch 142/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1713 - mae: 0.3546 - val_loss: 0.1855 - val_mae: 0.3718\n",
+            "Epoch 143/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1711 - mae: 0.3545 - val_loss: 0.1857 - val_mae: 0.3740\n",
+            "Epoch 144/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1708 - mae: 0.3545 - val_loss: 0.1856 - val_mae: 0.3733\n",
+            "Epoch 145/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1708 - mae: 0.3541 - val_loss: 0.1854 - val_mae: 0.3717\n",
+            "Epoch 146/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1707 - mae: 0.3539 - val_loss: 0.1854 - val_mae: 0.3720\n",
+            "Epoch 147/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1706 - mae: 0.3539 - val_loss: 0.1854 - val_mae: 0.3725\n",
+            "Epoch 148/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1706 - mae: 0.3537 - val_loss: 0.1853 - val_mae: 0.3722\n",
+            "Epoch 149/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1705 - mae: 0.3536 - val_loss: 0.1853 - val_mae: 0.3725\n",
+            "Epoch 150/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1707 - mae: 0.3537 - val_loss: 0.1853 - val_mae: 0.3720\n",
+            "Epoch 151/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1704 - mae: 0.3532 - val_loss: 0.1851 - val_mae: 0.3704\n",
+            "Epoch 152/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1705 - mae: 0.3530 - val_loss: 0.1851 - val_mae: 0.3709\n",
+            "Epoch 153/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1703 - mae: 0.3529 - val_loss: 0.1851 - val_mae: 0.3714\n",
+            "Epoch 154/500\n",
+            "600/600 [==============================] - 0s 63us/sample - loss: 0.1703 - mae: 0.3530 - val_loss: 0.1852 - val_mae: 0.3720\n",
+            "Epoch 155/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1703 - mae: 0.3529 - val_loss: 0.1851 - val_mae: 0.3713\n",
+            "Epoch 156/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1702 - mae: 0.3526 - val_loss: 0.1850 - val_mae: 0.3711\n",
+            "Epoch 157/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1701 - mae: 0.3526 - val_loss: 0.1852 - val_mae: 0.3719\n",
+            "Epoch 158/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1701 - mae: 0.3528 - val_loss: 0.1852 - val_mae: 0.3721\n",
+            "Epoch 159/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1705 - mae: 0.3528 - val_loss: 0.1849 - val_mae: 0.3698\n",
+            "Epoch 160/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1701 - mae: 0.3525 - val_loss: 0.1852 - val_mae: 0.3723\n",
+            "Epoch 161/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1701 - mae: 0.3528 - val_loss: 0.1851 - val_mae: 0.3721\n",
+            "Epoch 162/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1701 - mae: 0.3527 - val_loss: 0.1851 - val_mae: 0.3717\n",
+            "Epoch 163/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1701 - mae: 0.3527 - val_loss: 0.1852 - val_mae: 0.3722\n",
+            "Epoch 164/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1704 - mae: 0.3531 - val_loss: 0.1852 - val_mae: 0.3722\n",
+            "Epoch 165/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1700 - mae: 0.3525 - val_loss: 0.1847 - val_mae: 0.3697\n",
+            "Epoch 166/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1702 - mae: 0.3518 - val_loss: 0.1847 - val_mae: 0.3694\n",
+            "Epoch 167/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1704 - mae: 0.3519 - val_loss: 0.1847 - val_mae: 0.3680\n",
+            "Epoch 168/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1699 - mae: 0.3516 - val_loss: 0.1848 - val_mae: 0.3704\n",
+            "Epoch 169/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1700 - mae: 0.3522 - val_loss: 0.1851 - val_mae: 0.3718\n",
+            "Epoch 170/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1700 - mae: 0.3524 - val_loss: 0.1851 - val_mae: 0.3720\n",
+            "Epoch 171/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1699 - mae: 0.3522 - val_loss: 0.1848 - val_mae: 0.3702\n",
+            "Epoch 172/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1698 - mae: 0.3518 - val_loss: 0.1849 - val_mae: 0.3711\n",
+            "Epoch 173/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1699 - mae: 0.3521 - val_loss: 0.1849 - val_mae: 0.3710\n",
+            "Epoch 174/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1699 - mae: 0.3521 - val_loss: 0.1849 - val_mae: 0.3711\n",
+            "Epoch 175/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1700 - mae: 0.3518 - val_loss: 0.1847 - val_mae: 0.3699\n",
+            "Epoch 176/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1699 - mae: 0.3517 - val_loss: 0.1847 - val_mae: 0.3701\n",
+            "Epoch 177/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1702 - mae: 0.3524 - val_loss: 0.1852 - val_mae: 0.3721\n",
+            "Epoch 178/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.1700 - mae: 0.3523 - val_loss: 0.1849 - val_mae: 0.3710\n",
+            "Epoch 179/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3517 - val_loss: 0.1847 - val_mae: 0.3701\n",
+            "Epoch 180/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1703 - mae: 0.3515 - val_loss: 0.1846 - val_mae: 0.3681\n",
+            "Epoch 181/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3515 - val_loss: 0.1849 - val_mae: 0.3708\n",
+            "Epoch 182/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1698 - mae: 0.3518 - val_loss: 0.1850 - val_mae: 0.3715\n",
+            "Epoch 183/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1698 - mae: 0.3520 - val_loss: 0.1848 - val_mae: 0.3708\n",
+            "Epoch 184/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1698 - mae: 0.3516 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 185/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1699 - mae: 0.3514 - val_loss: 0.1846 - val_mae: 0.3698\n",
+            "Epoch 186/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1700 - mae: 0.3517 - val_loss: 0.1848 - val_mae: 0.3706\n",
+            "Epoch 187/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1696 - mae: 0.3513 - val_loss: 0.1846 - val_mae: 0.3693\n",
+            "Epoch 188/500\n",
+            "600/600 [==============================] - 0s 63us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1845 - val_mae: 0.3687\n",
+            "Epoch 189/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1698 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3675\n",
+            "Epoch 190/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1699 - mae: 0.3510 - val_loss: 0.1845 - val_mae: 0.3688\n",
+            "Epoch 191/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1698 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3693\n",
+            "Epoch 192/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1698 - mae: 0.3512 - val_loss: 0.1848 - val_mae: 0.3706\n",
+            "Epoch 193/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1700 - mae: 0.3520 - val_loss: 0.1850 - val_mae: 0.3714\n",
+            "Epoch 194/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1698 - mae: 0.3513 - val_loss: 0.1845 - val_mae: 0.3684\n",
+            "Epoch 195/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1697 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3687\n",
+            "Epoch 196/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3691\n",
+            "Epoch 197/500\n",
+            "600/600 [==============================] - 0s 76us/sample - loss: 0.1697 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3684\n",
+            "Epoch 198/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3683\n",
+            "Epoch 199/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1698 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3703\n",
+            "Epoch 200/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3511 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 201/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3694\n",
+            "Epoch 202/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3512 - val_loss: 0.1847 - val_mae: 0.3696\n",
+            "Epoch 203/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1697 - mae: 0.3513 - val_loss: 0.1850 - val_mae: 0.3708\n",
+            "Epoch 204/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3513 - val_loss: 0.1847 - val_mae: 0.3697\n",
+            "Epoch 205/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3685\n",
+            "Epoch 206/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1699 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3669\n",
+            "Epoch 207/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3500 - val_loss: 0.1845 - val_mae: 0.3680\n",
+            "Epoch 208/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 209/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 210/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1698 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 211/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1699 - mae: 0.3513 - val_loss: 0.1849 - val_mae: 0.3703\n",
+            "Epoch 212/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3510 - val_loss: 0.1846 - val_mae: 0.3693\n",
+            "Epoch 213/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 214/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3681\n",
+            "Epoch 215/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1847 - val_mae: 0.3698\n",
+            "Epoch 216/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3702\n",
+            "Epoch 217/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3512 - val_loss: 0.1846 - val_mae: 0.3694\n",
+            "Epoch 218/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1847 - val_mae: 0.3699\n",
+            "Epoch 219/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1696 - mae: 0.3511 - val_loss: 0.1847 - val_mae: 0.3700\n",
+            "Epoch 220/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.1697 - mae: 0.3513 - val_loss: 0.1848 - val_mae: 0.3705\n",
+            "Epoch 221/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3513 - val_loss: 0.1847 - val_mae: 0.3699\n",
+            "Epoch 222/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1698 - mae: 0.3515 - val_loss: 0.1848 - val_mae: 0.3707\n",
+            "Epoch 223/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3514 - val_loss: 0.1845 - val_mae: 0.3695\n",
+            "Epoch 224/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3691\n",
+            "Epoch 225/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3511 - val_loss: 0.1846 - val_mae: 0.3695\n",
+            "Epoch 226/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.1697 - mae: 0.3510 - val_loss: 0.1845 - val_mae: 0.3691\n",
+            "Epoch 227/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1698 - mae: 0.3513 - val_loss: 0.1846 - val_mae: 0.3699\n",
+            "Epoch 228/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1699 - mae: 0.3510 - val_loss: 0.1844 - val_mae: 0.3685\n",
+            "Epoch 229/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3510 - val_loss: 0.1845 - val_mae: 0.3691\n",
+            "Epoch 230/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1846 - val_mae: 0.3696\n",
+            "Epoch 231/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3689\n",
+            "Epoch 232/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3512 - val_loss: 0.1846 - val_mae: 0.3697\n",
+            "Epoch 233/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1698 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3689\n",
+            "Epoch 234/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3694\n",
+            "Epoch 235/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3511 - val_loss: 0.1846 - val_mae: 0.3693\n",
+            "Epoch 236/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1700 - mae: 0.3506 - val_loss: 0.1844 - val_mae: 0.3673\n",
+            "Epoch 237/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1698 - mae: 0.3502 - val_loss: 0.1844 - val_mae: 0.3676\n",
+            "Epoch 238/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1845 - val_mae: 0.3690\n",
+            "Epoch 239/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1697 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3691\n",
+            "Epoch 240/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1844 - val_mae: 0.3676\n",
+            "Epoch 241/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1698 - mae: 0.3502 - val_loss: 0.1844 - val_mae: 0.3674\n",
+            "Epoch 242/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3507 - val_loss: 0.1847 - val_mae: 0.3696\n",
+            "Epoch 243/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1697 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3685\n",
+            "Epoch 244/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3689\n",
+            "Epoch 245/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1701 - mae: 0.3519 - val_loss: 0.1856 - val_mae: 0.3727\n",
+            "Epoch 246/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1701 - mae: 0.3519 - val_loss: 0.1850 - val_mae: 0.3708\n",
+            "Epoch 247/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3516 - val_loss: 0.1848 - val_mae: 0.3702\n",
+            "Epoch 248/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3508 - val_loss: 0.1844 - val_mae: 0.3671\n",
+            "Epoch 249/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1700 - mae: 0.3506 - val_loss: 0.1844 - val_mae: 0.3682\n",
+            "Epoch 250/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1844 - val_mae: 0.3676\n",
+            "Epoch 251/500\n",
+            "600/600 [==============================] - 0s 61us/sample - loss: 0.1697 - mae: 0.3504 - val_loss: 0.1844 - val_mae: 0.3676\n",
+            "Epoch 252/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1695 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3687\n",
+            "Epoch 253/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3507 - val_loss: 0.1847 - val_mae: 0.3698\n",
+            "Epoch 254/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3512 - val_loss: 0.1849 - val_mae: 0.3704\n",
+            "Epoch 255/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1698 - mae: 0.3514 - val_loss: 0.1848 - val_mae: 0.3700\n",
+            "Epoch 256/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3680\n",
+            "Epoch 257/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1844 - val_mae: 0.3679\n",
+            "Epoch 258/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3685\n",
+            "Epoch 259/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3689\n",
+            "Epoch 260/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1847 - val_mae: 0.3698\n",
+            "Epoch 261/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1847 - val_mae: 0.3698\n",
+            "Epoch 262/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1699 - mae: 0.3510 - val_loss: 0.1845 - val_mae: 0.3684\n",
+            "Epoch 263/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3685\n",
+            "Epoch 264/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3692\n",
+            "Epoch 265/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1698 - mae: 0.3513 - val_loss: 0.1848 - val_mae: 0.3700\n",
+            "Epoch 266/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3691\n",
+            "Epoch 267/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1847 - val_mae: 0.3696\n",
+            "Epoch 268/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1697 - mae: 0.3507 - val_loss: 0.1845 - val_mae: 0.3681\n",
+            "Epoch 269/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3686\n",
+            "Epoch 270/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3699\n",
+            "Epoch 271/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1699 - mae: 0.3516 - val_loss: 0.1848 - val_mae: 0.3701\n",
+            "Epoch 272/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1698 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3683\n",
+            "Epoch 273/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1848 - val_mae: 0.3699\n",
+            "Epoch 274/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1847 - val_mae: 0.3697\n",
+            "Epoch 275/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 276/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1847 - val_mae: 0.3693\n",
+            "Epoch 277/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3679\n",
+            "Epoch 278/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 279/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 280/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 281/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1698 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3700\n",
+            "Epoch 282/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1847 - val_mae: 0.3694\n",
+            "Epoch 283/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 284/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 285/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3682\n",
+            "Epoch 286/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1699 - mae: 0.3501 - val_loss: 0.1846 - val_mae: 0.3664\n",
+            "Epoch 287/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1698 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 288/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 289/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1695 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 290/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3689\n",
+            "Epoch 291/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3694\n",
+            "Epoch 292/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1698 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 293/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1701 - mae: 0.3513 - val_loss: 0.1850 - val_mae: 0.3705\n",
+            "Epoch 294/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1702 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 295/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1849 - val_mae: 0.3702\n",
+            "Epoch 296/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3699\n",
+            "Epoch 297/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3509 - val_loss: 0.1847 - val_mae: 0.3691\n",
+            "Epoch 298/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1848 - val_mae: 0.3695\n",
+            "Epoch 299/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1847 - val_mae: 0.3690\n",
+            "Epoch 300/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 301/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3685\n",
+            "Epoch 302/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1698 - mae: 0.3507 - val_loss: 0.1848 - val_mae: 0.3696\n",
+            "Epoch 303/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1695 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 304/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1700 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3667\n",
+            "Epoch 305/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3498 - val_loss: 0.1845 - val_mae: 0.3679\n",
+            "Epoch 306/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1699 - mae: 0.3509 - val_loss: 0.1850 - val_mae: 0.3706\n",
+            "Epoch 307/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3513 - val_loss: 0.1847 - val_mae: 0.3694\n",
+            "Epoch 308/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 309/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3691\n",
+            "Epoch 310/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3680\n",
+            "Epoch 311/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1699 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 312/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1694 - mae: 0.3502 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 313/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1698 - mae: 0.3512 - val_loss: 0.1850 - val_mae: 0.3706\n",
+            "Epoch 314/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1698 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 315/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3674\n",
+            "Epoch 316/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3680\n",
+            "Epoch 317/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3675\n",
+            "Epoch 318/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1697 - mae: 0.3500 - val_loss: 0.1845 - val_mae: 0.3674\n",
+            "Epoch 319/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3499 - val_loss: 0.1845 - val_mae: 0.3672\n",
+            "Epoch 320/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3503 - val_loss: 0.1846 - val_mae: 0.3685\n",
+            "Epoch 321/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3507 - val_loss: 0.1847 - val_mae: 0.3695\n",
+            "Epoch 322/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 323/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3676\n",
+            "Epoch 324/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3680\n",
+            "Epoch 325/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 326/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 327/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 328/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 329/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1695 - mae: 0.3503 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 330/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3691\n",
+            "Epoch 331/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1699 - mae: 0.3512 - val_loss: 0.1847 - val_mae: 0.3697\n",
+            "Epoch 332/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 333/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1702 - mae: 0.3514 - val_loss: 0.1847 - val_mae: 0.3696\n",
+            "Epoch 334/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 335/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3680\n",
+            "Epoch 336/500\n",
+            "600/600 [==============================] - 0s 40us/sample - loss: 0.1697 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3675\n",
+            "Epoch 337/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 338/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 339/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1700 - mae: 0.3513 - val_loss: 0.1851 - val_mae: 0.3711\n",
+            "Epoch 340/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1846 - val_mae: 0.3689\n",
+            "Epoch 341/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 342/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1701 - mae: 0.3509 - val_loss: 0.1848 - val_mae: 0.3700\n",
+            "Epoch 343/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 344/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3682\n",
+            "Epoch 345/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1847 - val_mae: 0.3690\n",
+            "Epoch 346/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3511 - val_loss: 0.1851 - val_mae: 0.3711\n",
+            "Epoch 347/500\n",
+            "600/600 [==============================] - 0s 65us/sample - loss: 0.1697 - mae: 0.3513 - val_loss: 0.1849 - val_mae: 0.3701\n",
+            "Epoch 348/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1694 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 349/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1696 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3672\n",
+            "Epoch 350/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1698 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 351/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3679\n",
+            "Epoch 352/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1695 - mae: 0.3504 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 353/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1697 - mae: 0.3509 - val_loss: 0.1849 - val_mae: 0.3701\n",
+            "Epoch 354/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1847 - val_mae: 0.3689\n",
+            "Epoch 355/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3685\n",
+            "Epoch 356/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1701 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3664\n",
+            "Epoch 357/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1699 - mae: 0.3503 - val_loss: 0.1847 - val_mae: 0.3689\n",
+            "Epoch 358/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 359/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1695 - mae: 0.3503 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 360/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3681\n",
+            "Epoch 361/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3685\n",
+            "Epoch 362/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3676\n",
+            "Epoch 363/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1848 - val_mae: 0.3695\n",
+            "Epoch 364/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3688\n",
+            "Epoch 365/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1849 - val_mae: 0.3699\n",
+            "Epoch 366/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1849 - val_mae: 0.3701\n",
+            "Epoch 367/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 368/500\n",
+            "600/600 [==============================] - 0s 39us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 369/500\n",
+            "600/600 [==============================] - 0s 40us/sample - loss: 0.1698 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 370/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1697 - mae: 0.3507 - val_loss: 0.1848 - val_mae: 0.3697\n",
+            "Epoch 371/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1698 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 372/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 373/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3689\n",
+            "Epoch 374/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1697 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 375/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3691\n",
+            "Epoch 376/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 377/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 378/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1700 - mae: 0.3507 - val_loss: 0.1847 - val_mae: 0.3690\n",
+            "Epoch 379/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1695 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3670\n",
+            "Epoch 380/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.1696 - mae: 0.3501 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 381/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1847 - val_mae: 0.3691\n",
+            "Epoch 382/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3690\n",
+            "Epoch 383/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3693\n",
+            "Epoch 384/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1850 - val_mae: 0.3703\n",
+            "Epoch 385/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.1699 - mae: 0.3510 - val_loss: 0.1847 - val_mae: 0.3689\n",
+            "Epoch 386/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1851 - val_mae: 0.3709\n",
+            "Epoch 387/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1698 - mae: 0.3512 - val_loss: 0.1846 - val_mae: 0.3688\n",
+            "Epoch 388/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 389/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1697 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3700\n",
+            "Epoch 390/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1847 - val_mae: 0.3694\n",
+            "Epoch 391/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1701 - mae: 0.3505 - val_loss: 0.1846 - val_mae: 0.3666\n",
+            "Epoch 392/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3501 - val_loss: 0.1846 - val_mae: 0.3681\n",
+            "Epoch 393/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1848 - val_mae: 0.3698\n",
+            "Epoch 394/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1847 - val_mae: 0.3693\n",
+            "Epoch 395/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1699 - mae: 0.3507 - val_loss: 0.1845 - val_mae: 0.3675\n",
+            "Epoch 396/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1695 - mae: 0.3501 - val_loss: 0.1847 - val_mae: 0.3693\n",
+            "Epoch 397/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3698\n",
+            "Epoch 398/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 399/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1695 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3673\n",
+            "Epoch 400/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1697 - mae: 0.3498 - val_loss: 0.1845 - val_mae: 0.3667\n",
+            "Epoch 401/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3498 - val_loss: 0.1845 - val_mae: 0.3681\n",
+            "Epoch 402/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 403/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 404/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1699 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3667\n",
+            "Epoch 405/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3500 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 406/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1847 - val_mae: 0.3689\n",
+            "Epoch 407/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1695 - mae: 0.3504 - val_loss: 0.1847 - val_mae: 0.3684\n",
+            "Epoch 408/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3673\n",
+            "Epoch 409/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3499 - val_loss: 0.1846 - val_mae: 0.3678\n",
+            "Epoch 410/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3682\n",
+            "Epoch 411/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3499 - val_loss: 0.1846 - val_mae: 0.3668\n",
+            "Epoch 412/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3496 - val_loss: 0.1846 - val_mae: 0.3673\n",
+            "Epoch 413/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1698 - mae: 0.3508 - val_loss: 0.1852 - val_mae: 0.3710\n",
+            "Epoch 414/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1703 - mae: 0.3519 - val_loss: 0.1854 - val_mae: 0.3716\n",
+            "Epoch 415/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1695 - mae: 0.3511 - val_loss: 0.1846 - val_mae: 0.3686\n",
+            "Epoch 416/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1696 - mae: 0.3499 - val_loss: 0.1845 - val_mae: 0.3666\n",
+            "Epoch 417/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1700 - mae: 0.3496 - val_loss: 0.1846 - val_mae: 0.3665\n",
+            "Epoch 418/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1694 - mae: 0.3497 - val_loss: 0.1847 - val_mae: 0.3687\n",
+            "Epoch 419/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1849 - val_mae: 0.3698\n",
+            "Epoch 420/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1697 - mae: 0.3509 - val_loss: 0.1850 - val_mae: 0.3702\n",
+            "Epoch 421/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1849 - val_mae: 0.3700\n",
+            "Epoch 422/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3686\n",
+            "Epoch 423/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 424/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3498 - val_loss: 0.1845 - val_mae: 0.3668\n",
+            "Epoch 425/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3497 - val_loss: 0.1845 - val_mae: 0.3671\n",
+            "Epoch 426/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1696 - mae: 0.3497 - val_loss: 0.1846 - val_mae: 0.3676\n",
+            "Epoch 427/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3500 - val_loss: 0.1847 - val_mae: 0.3683\n",
+            "Epoch 428/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1847 - val_mae: 0.3686\n",
+            "Epoch 429/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1848 - val_mae: 0.3694\n",
+            "Epoch 430/500\n",
+            "600/600 [==============================] - 0s 40us/sample - loss: 0.1698 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3675\n",
+            "Epoch 431/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3498 - val_loss: 0.1846 - val_mae: 0.3675\n",
+            "Epoch 432/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1850 - val_mae: 0.3703\n",
+            "Epoch 433/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1699 - mae: 0.3514 - val_loss: 0.1853 - val_mae: 0.3713\n",
+            "Epoch 434/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1700 - mae: 0.3510 - val_loss: 0.1846 - val_mae: 0.3686\n",
+            "Epoch 435/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1699 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3689\n",
+            "Epoch 436/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1849 - val_mae: 0.3703\n",
+            "Epoch 437/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1847 - val_mae: 0.3696\n",
+            "Epoch 438/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1846 - val_mae: 0.3691\n",
+            "Epoch 439/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1695 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 440/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1698 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3683\n",
+            "Epoch 441/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3670\n",
+            "Epoch 442/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 443/500\n",
+            "600/600 [==============================] - 0s 82us/sample - loss: 0.1704 - mae: 0.3519 - val_loss: 0.1849 - val_mae: 0.3702\n",
+            "Epoch 444/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3685\n",
+            "Epoch 445/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1697 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3679\n",
+            "Epoch 446/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1697 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3673\n",
+            "Epoch 447/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1700 - mae: 0.3501 - val_loss: 0.1845 - val_mae: 0.3671\n",
+            "Epoch 448/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1705 - mae: 0.3515 - val_loss: 0.1852 - val_mae: 0.3713\n",
+            "Epoch 449/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1698 - mae: 0.3512 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 450/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 451/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1695 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 452/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3681\n",
+            "Epoch 453/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 454/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1696 - mae: 0.3504 - val_loss: 0.1846 - val_mae: 0.3686\n",
+            "Epoch 455/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1698 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3682\n",
+            "Epoch 456/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1698 - mae: 0.3508 - val_loss: 0.1847 - val_mae: 0.3695\n",
+            "Epoch 457/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1697 - mae: 0.3511 - val_loss: 0.1847 - val_mae: 0.3697\n",
+            "Epoch 458/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1695 - mae: 0.3507 - val_loss: 0.1845 - val_mae: 0.3684\n",
+            "Epoch 459/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3677\n",
+            "Epoch 460/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1696 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3692\n",
+            "Epoch 461/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1847 - val_mae: 0.3696\n",
+            "Epoch 462/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1846 - val_mae: 0.3692\n",
+            "Epoch 463/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1698 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3674\n",
+            "Epoch 464/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3685\n",
+            "Epoch 465/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3695\n",
+            "Epoch 466/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1698 - mae: 0.3513 - val_loss: 0.1850 - val_mae: 0.3706\n",
+            "Epoch 467/500\n",
+            "600/600 [==============================] - 0s 40us/sample - loss: 0.1698 - mae: 0.3512 - val_loss: 0.1847 - val_mae: 0.3698\n",
+            "Epoch 468/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1700 - mae: 0.3519 - val_loss: 0.1850 - val_mae: 0.3712\n",
+            "Epoch 469/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1697 - mae: 0.3515 - val_loss: 0.1847 - val_mae: 0.3700\n",
+            "Epoch 470/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1695 - mae: 0.3508 - val_loss: 0.1845 - val_mae: 0.3683\n",
+            "Epoch 471/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1697 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3675\n",
+            "Epoch 472/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 473/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1846 - val_mae: 0.3689\n",
+            "Epoch 474/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1696 - mae: 0.3505 - val_loss: 0.1845 - val_mae: 0.3682\n",
+            "Epoch 475/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1697 - mae: 0.3506 - val_loss: 0.1845 - val_mae: 0.3683\n",
+            "Epoch 476/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.1695 - mae: 0.3506 - val_loss: 0.1847 - val_mae: 0.3697\n",
+            "Epoch 477/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3511 - val_loss: 0.1848 - val_mae: 0.3701\n",
+            "Epoch 478/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3512 - val_loss: 0.1848 - val_mae: 0.3702\n",
+            "Epoch 479/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3507 - val_loss: 0.1845 - val_mae: 0.3676\n",
+            "Epoch 480/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1699 - mae: 0.3502 - val_loss: 0.1845 - val_mae: 0.3669\n",
+            "Epoch 481/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1697 - mae: 0.3500 - val_loss: 0.1845 - val_mae: 0.3676\n",
+            "Epoch 482/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1695 - mae: 0.3506 - val_loss: 0.1850 - val_mae: 0.3706\n",
+            "Epoch 483/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1698 - mae: 0.3516 - val_loss: 0.1853 - val_mae: 0.3716\n",
+            "Epoch 484/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1699 - mae: 0.3515 - val_loss: 0.1847 - val_mae: 0.3692\n",
+            "Epoch 485/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3507 - val_loss: 0.1846 - val_mae: 0.3687\n",
+            "Epoch 486/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1699 - mae: 0.3505 - val_loss: 0.1845 - val_mae: 0.3679\n",
+            "Epoch 487/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1695 - mae: 0.3506 - val_loss: 0.1848 - val_mae: 0.3698\n",
+            "Epoch 488/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1701 - mae: 0.3517 - val_loss: 0.1851 - val_mae: 0.3709\n",
+            "Epoch 489/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1698 - mae: 0.3509 - val_loss: 0.1845 - val_mae: 0.3678\n",
+            "Epoch 490/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3680\n",
+            "Epoch 491/500\n",
+            "600/600 [==============================] - 0s 42us/sample - loss: 0.1696 - mae: 0.3502 - val_loss: 0.1846 - val_mae: 0.3683\n",
+            "Epoch 492/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1699 - mae: 0.3512 - val_loss: 0.1853 - val_mae: 0.3714\n",
+            "Epoch 493/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1698 - mae: 0.3513 - val_loss: 0.1848 - val_mae: 0.3697\n",
+            "Epoch 494/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.1696 - mae: 0.3509 - val_loss: 0.1847 - val_mae: 0.3691\n",
+            "Epoch 495/500\n",
+            "600/600 [==============================] - 0s 41us/sample - loss: 0.1695 - mae: 0.3504 - val_loss: 0.1845 - val_mae: 0.3679\n",
+            "Epoch 496/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1696 - mae: 0.3503 - val_loss: 0.1846 - val_mae: 0.3684\n",
+            "Epoch 497/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1695 - mae: 0.3505 - val_loss: 0.1847 - val_mae: 0.3693\n",
+            "Epoch 498/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1696 - mae: 0.3510 - val_loss: 0.1848 - val_mae: 0.3699\n",
+            "Epoch 499/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1695 - mae: 0.3508 - val_loss: 0.1846 - val_mae: 0.3690\n",
+            "Epoch 500/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1697 - mae: 0.3503 - val_loss: 0.1845 - val_mae: 0.3681\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cRE8KpEqVfaS",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 3. Plot Metrics"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SDsjqfjFm7Fz",
+        "colab_type": "text"
+      },
+      "source": [
+        "**1. Mean Squared Error**\n",
+        "\n",
+        "During training, the model's performance is constantly being measured against both our training data and the validation data that we set aside earlier. Training produces a log of data that tells us how the model's performance changed over the course of the training process.\n",
+        "\n",
+        "The following cells will display some of that data in a graphical form:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "CmvA-ksoln8r",
+        "colab_type": "code",
+        "outputId": "2796d3ca-deb7-4cf9-cc01-78df3cacf12a",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 295
+        }
+      },
+      "source": [
+        "# Draw a graph of the loss, which is the distance between\n",
+        "# the predicted and actual values during training and validation.\n",
+        "loss = history_1.history['loss']\n",
+        "val_loss = history_1.history['val_loss']\n",
+        "\n",
+        "epochs = range(1, len(loss) + 1)\n",
+        "\n",
+        "plt.plot(epochs, loss, 'g.', label='Training loss')\n",
+        "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
+        "plt.title('Training and validation loss')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('Loss')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZyVZf3/8ddndnZkUZQZGyyQMGCQ\nARxQG9MKl3DJSvIrkrtllqaIlUKamWVlfsOULG2h0Kz4YeoXE0FcKBkWURQSbYhxBWR1YNbP74/7\nPjNn9gHmnjMz5/18PM5jzn3d2+c658z5nOu67sXcHRERSV4piQ5AREQSS4lARCTJKRGIiCQ5JQIR\nkSSnRCAikuSUCEREkpwSgbQpM3vCzC5s62UTycyKzeyUCLbrZvax8Pm9ZnZTa5Y9gP2cb2ZPHmic\nzWy30MxK2nq70v7SEh2AJJ6Z7Ymb7A6UAVXh9OXuPq+123L3U6NYtqtz9yvaYjtmlgv8B0h398pw\n2/OAVr+HknyUCAR37xl7bmbFwCXu/lT95cwsLfblIiJdh7qGpEmxpr+Z3WBm7wIPmNkhZvZ3M9ti\nZtvD59lx6yw1s0vC59PN7DkzuzNc9j9mduoBLjvEzJaZ2W4ze8rM5pjZH5qIuzUx3mpmz4fbe9LM\nBsTNv8DMNpnZNjP7TjOvzwQze9fMUuPKzjazteHz8Wa23Mx2mNk7ZvYLM8toYlsPmtn346avD9d5\n28wuqrfs6Wa22sx2mdlmM5sdN3tZ+HeHme0xs4LYaxu3/kQzW2FmO8O/E1v72jTHzD4err/DzNaZ\n2ZS4eaeZ2avhNt8ys+vC8gHh+7PDzD4ws2fNTN9L7UwvuLRkENAP+AhwGcFn5oFw+khgL/CLZtaf\nAGwABgA/An5tZnYAy/4ReBHoD8wGLmhmn62J8cvAV4BDgQwg9sU0AvhluP0jwv1l0wh3/xfwIfCp\netv9Y/i8CrgmrE8BcDLw1WbiJoxhchjPp4GhQP3xiQ+BaUBf4HTgSjM7K5x3Yvi3r7v3dPfl9bbd\nD3gMuDus20+Bx8ysf706NHhtWog5HXgUeDJc7+vAPDM7Olzk1wTdjL2ATwBPh+XfAkqAgcBhwLcB\nXfemnSkRSEuqgVnuXubue919m7v/xd1L3X03cBvwyWbW3+Tuv3L3KuC3wOEE//CtXtbMjgTGATe7\ne7m7PwcsbGqHrYzxAXf/t7vvBR4G8sLyc4G/u/sydy8Dbgpfg6b8CZgKYGa9gNPCMtx9pbv/090r\n3b0YuK+ROBrzxTC+V9z9Q4LEF1+/pe7+srtXu/vacH+t2S4EieN1d/99GNefgPXA5+KWaeq1ac5x\nQE/gh+F79DTwd8LXBqgARphZb3ff7u6r4soPBz7i7hXu/qzrAmjtTolAWrLF3ffFJsysu5ndF3ad\n7CLoiugb3z1Sz7uxJ+5eGj7tuZ/LHgF8EFcGsLmpgFsZ47txz0vjYjoiftvhF/G2pvZF8Ov/HDPL\nBM4BVrn7pjCOYWG3x7thHD8gaB20pE4MwKZ69ZtgZkvCrq+dwBWt3G5s25vqlW0CBsdNN/XatBiz\nu8cnzfjtfp4gSW4ys2fMrCAs/zGwEXjSzN40s5mtq4a0JSUCaUn9X2ffAo4GJrh7b2q7Iprq7mkL\n7wD9zKx7XFlOM8sfTIzvxG873Gf/phZ291cJvvBOpW63EARdTOuBoWEc3z6QGAi6t+L9kaBFlOPu\nfYB747bb0q/ptwm6zOIdCbzVirha2m5Ovf79mu26+wp3P5Og22gBQUsDd9/t7t9y96OAKcC1Znby\nQcYi+0mJQPZXL4I+9x1hf/OsqHcY/sIuAmabWUb4a/JzzaxyMDE+ApxhZseHA7u30PL/yR+BbxAk\nnD/Xi2MXsMfMhgNXtjKGh4HpZjYiTET14+9F0ELaZ2bjCRJQzBaCrqyjmtj248AwM/uymaWZ2ZeA\nEQTdOAfjXwSthxlmlm5mhQTv0fzwPTvfzPq4ewXBa1INYGZnmNnHwrGgnQTjKs11xUkElAhkf90F\ndAO2Av8E/q+d9ns+wYDrNuD7wEME5zs05oBjdPd1wNcIvtzfAbYTDGY2J9ZH/7S7b40rv47gS3o3\n8Ksw5tbE8ERYh6cJuk2errfIV4FbzGw3cDPhr+tw3VKCMZHnwyNxjqu37W3AGQStpm3ADOCMenHv\nN3cvJ/jiP5Xgdb8HmObu68NFLgCKwy6yKwjeTwgGw58C9gDLgXvcfcnBxCL7zzQuI52RmT0ErHf3\nyFskIl2dWgTSKZjZODP7qJmlhIdXnknQ1ywiB0lnFktnMQj4K8HAbQlwpbuvTmxIIl2DuoZERJKc\nuoZERJJcp+saGjBggOfm5iY6DBGRTmXlypVb3X1gY/M6XSLIzc2lqKgo0WGIiHQqZlb/jPIa6hoS\nEUlySgQiIklOiUBEJMlFOkYQnvjzcyAVuN/df1hv/s+Ak8LJ7sCh7t43yphEZP9VVFRQUlLCvn37\nWl5YEiorK4vs7GzS09NbvU5kiSC85O8cgptrlAArzGxheLVGANz9mrjlvw6MiSoeETlwJSUl9OrV\ni9zcXJq+r5Akmruzbds2SkpKGDJkSKvXi7JraDyw0d3fDC9INZ/gsgBNmUp4Qw8R6Vj27dtH//79\nlQQ6ODOjf//++91yizIRDKbuzTVKqHvzixpm9hFgCA2vshibf5mZFZlZ0ZYtWw4omOWbl3P7s7ez\nfPPylhcWkQaUBDqHA3mfOsp5BOcBj4S3KGzA3ecCcwHy8/P3+5oYyzcv5+TfnUx5VTkZqRksnraY\ngpyCllcUEUkCUbYI3qLuXZayafouSOcRYbfQ0uKllFeVU+VVlFeVs7R4aVS7EpEIbNu2jby8PPLy\n8hg0aBCDBw+umS4vL2923aKiIq6++uoW9zFx4sQ2iXXp0qWcccYZbbKt9hJli2AFMNTMhhAkgPOo\neyclAMI7Nx1CcFOKSBTmFpKRmlHTIijMLYxqVyISgf79+7NmzRoAZs+eTc+ePbnuuutq5ldWVpKW\n1vjXWX5+Pvn5+S3u44UXXmibYDuhyFoE7l4JXAUsAl4DHnb3dWZ2i5lNiVv0PGC+R3gZ1IKcAhZP\nW8ytJ92qbiGRdhL1uNz06dO54oormDBhAjNmzODFF1+koKCAMWPGMHHiRDZs2ADU/YU+e/ZsLrro\nIgoLCznqqKO4++67a7bXs2fPmuULCws599xzGT58OOeffz6xr6fHH3+c4cOHM3bsWK6++uoWf/l/\n8MEHnHXWWYwaNYrjjjuOtWvXAvDMM8/UtGjGjBnD7t27eeeddzjxxBPJy8vjE5/4BM8++2ybv2ZN\niXSMwN0fJ7hHanzZzfWmZ0cZQ0xBToESgEg7aa9xuZKSEl544QVSU1PZtWsXzz77LGlpaTz11FN8\n+9vf5i9/+UuDddavX8+SJUvYvXs3Rx99NFdeeWWDY+5Xr17NunXrOOKII5g0aRLPP/88+fn5XH75\n5SxbtowhQ4YwderUFuObNWsWY8aMYcGCBTz99NNMmzaNNWvWcOeddzJnzhwmTZrEnj17yMrKYu7c\nuXz2s5/lO9/5DlVVVZSWlrbZ69SSjjJYLCJdSGPjclEkgi984QukpqYCsHPnTi688EJef/11zIyK\niopG1zn99NPJzMwkMzOTQw89lPfee4/s7Ow6y4wfP76mLC8vj+LiYnr27MlRRx1Vc3z+1KlTmTt3\nbrPxPffcczXJ6FOf+hTbtm1j165dTJo0iWuvvZbzzz+fc845h+zsbMaNG8dFF11ERUUFZ511Fnl5\neQf12uwPXWJCRNpcbFwu1VIjHZfr0aNHzfObbrqJk046iVdeeYVHH320yWPpMzMza56npqZSWVl5\nQMscjJkzZ3L//fezd+9eJk2axPr16znxxBNZtmwZgwcPZvr06fzud79r0302Ry0CEWlzsXG5pcVL\nKcwtbJdu2Z07dzJ4cHCq0oMPPtjm2z/66KN58803KS4uJjc3l4ceeqjFdU444QTmzZvHTTfdxNKl\nSxkwYAC9e/fmjTfeYOTIkYwcOZIVK1awfv16unXrRnZ2NpdeeillZWWsWrWKadOmtXk9GqNEICKR\naO9xuRkzZnDhhRfy/e9/n9NPP73Nt9+tWzfuueceJk+eTI8ePRg3blyL68QGp0eNGkX37t357W9/\nC8Bdd93FkiVLSElJ4ZhjjuHUU09l/vz5/PjHPyY9PZ2ePXu2a4ug092zOD8/33VjGpH29dprr/Hx\nj3880WEk3J49e+jZsyfuzte+9jWGDh3KNddc0/KK7ayx98vMVrp7o8fRaoxARKSVfvWrX5GXl8cx\nxxzDzp07ufzyyxMdUptQ15CISCtdc801HbIFcLDUIhARSXJKBCIiSU6JQEQkySkRiIgkOSUCEenw\nTjrpJBYtWlSn7K677uLKK69scp3CwkJih5qfdtpp7Nixo8Eys2fP5s4772x23wsWLODVV2vusMvN\nN9/MU089tT/hN6ojXa5aiUBEOrypU6cyf/78OmXz589v1YXfILhqaN++fQ9o3/UTwS233MIpp5xy\nQNvqqJQIRKTDO/fcc3nsscdqbkJTXFzM22+/zQknnMCVV15Jfn4+xxxzDLNmzWp0/dzcXLZu3QrA\nbbfdxrBhwzj++ONrLlUNwTkC48aNY/To0Xz+85+ntLSUF154gYULF3L99deTl5fHG2+8wfTp03nk\nkUcAWLx4MWPGjGHkyJFcdNFFlJWV1exv1qxZHHvssYwcOZL169c3W79EX65a5xGIyH755jchvEdM\nm8nLg7vuanp+v379GD9+PE888QRnnnkm8+fP54tf/CJmxm233Ua/fv2oqqri5JNPZu3atYwaNarR\n7axcuZL58+ezZs0aKisrOfbYYxk7diwA55xzDpdeeikA3/3ud/n1r3/N17/+daZMmcIZZ5zBueee\nW2db+/btY/r06SxevJhhw4Yxbdo0fvnLX/LNb34TgAEDBrBq1Sruuece7rzzTu6///4m65foy1Un\nTYtgxw54/XWork50JCJyIOK7h+K7hR5++GGOPfZYxowZw7p16+p049T37LPPcvbZZ9O9e3d69+7N\nlCm198h65ZVXOOGEExg5ciTz5s1j3bp1zcazYcMGhgwZwrBhwwC48MILWbZsWc38c845B4CxY8dS\nXFzc7Laee+45LrjgAqDxy1Xffffd7Nixg7S0NMaNG8cDDzzA7Nmzefnll+nVq1ez226NpGkR3Hcf\nzJwJH34I3bsnOhqRzqu5X+5ROvPMM7nmmmtYtWoVpaWljB07lv/85z/ceeedrFixgkMOOYTp06c3\nefnplkyfPp0FCxYwevRoHnzwQZYuXXpQ8cYuZX0wl7GeOXMmp59+Oo8//jiTJk1i0aJFNZerfuyx\nx5g+fTrXXnvtQV+lNGlaBLEbEDVxrwoR6eB69uzJSSedxEUXXVTTGti1axc9evSgT58+vPfeezzx\nxBPNbuPEE09kwYIF7N27l927d/Poo4/WzNu9ezeHH344FRUVzJs3r6a8V69e7N69u8G2jj76aIqL\ni9m4cSMAv//97/nkJz95QHWLXa4aaPRy1TfccAPjxo1j/fr1bNq0icMOO4xLL72USy65hFWrVh3Q\nPuMlTYsgIyP4G441iUgnNHXqVM4+++yaLqLRo0czZswYhg8fTk5ODpMmTWp2/WOPPZYvfelLjB49\nmkMPPbTOpaRvvfVWJkyYwMCBA5kwYULNl/95553HpZdeyt13310zSAyQlZXFAw88wBe+8AUqKysZ\nN24cV1xxxQHVK9GXq06ay1Dfdx9ccQW89RYccUQEgYl0YboMdeeiy1A3QV1DIiKNUyIQEUlySgQi\n0iqdrRs5WR3I+6REICItysrKYtu2bUoGHZy7s23bNrKysvZrPR01JCItys7OpqSkhC1btiQ6FGlB\nVlYW2dnZ+7VO0iQCtQhEDlx6ejpDhgxJdBgSEXUNiYgkOSUCEZEkF2kiMLPJZrbBzDaa2cwmlvmi\nmb1qZuvM7I9RxaJEICLSuMjGCMwsFZgDfBooAVaY2UJ3fzVumaHAjcAkd99uZodGFY8SgYhI46Js\nEYwHNrr7m+5eDswHzqy3zKXAHHffDuDu70cVjI4aEhFpXJSJYDCwOW66JCyLNwwYZmbPm9k/zWxy\nVMHEWgR/fnkByzcvj2o3IiKdTqIHi9OAoUAhMBX4lZk1uLGomV1mZkVmVnSgxzG/snU1AA+//DdO\n/t3JSgYiIqEoE8FbQE7cdHZYFq8EWOjuFe7+H+DfBImhDnef6+757p4/cODAAwpmxbsvBNuqTKW8\nqpylxUsPaDsiIl1NlIlgBTDUzIaYWQZwHrCw3jILCFoDmNkAgq6iN6MI5vjcCQBYdSYZqRkU5hZG\nsRsRkU4nskTg7pXAVcAi4DXgYXdfZ2a3mFnsRqGLgG1m9iqwBLje3bdFEU9BbnAZ7jM+ejaLpy2m\nIKcgit2IiHQ6kV5iwt0fBx6vV3Zz3HMHrg0fkYodNVR45GcoyGl+WRGRZJLoweJ2o/MIREQap0Qg\nIpLkkiYRpIWdYEoEIiJ1JU0iMAuSgRKBiEhdSZMIIOgeUiIQEakrqRJBRoauNSQiUl9SJQK1CERE\nGlIiEBFJckoEIiJJLukSgcYIRETqSqpEkJmpRCAiUl/SJYKyskRHISLSsSgRiIgkOSUCEZEkl1SJ\nICsL9u1LdBQiIh1LUiUCtQhERBpSIhARSXJKBCIiSS7pEoHGCERE6kqqRJCVpRaBiEh9SZUI1DUk\nItJQ0iWCfWXV3P7s7SzfvDzR4YiIdAhJlQje37eZqsoUvrt4Fif/7mQlAxERkiwRlJRuBKC6Io3y\nqnKWFi9NbEAiIh1AUiWC4YflApBS3Z2M1AwKcwsTGo+ISEeQlugA2tPRg4YAcMOEm/nc2HEU5BQk\nOCIRkcRLqkSQmRn8vXzM1XwkJ7GxiIh0FEnVNRRLBDqpTESkVlIlgqys4K/OJRARqRVpIjCzyWa2\nwcw2mtnMRuZPN7MtZrYmfFwSZTyxFoESgYhIrcjGCMwsFZgDfBooAVaY2UJ3f7Xeog+5+1VRxRFP\niUBEpKEoWwTjgY3u/qa7lwPzgTMj3F+LYl1De/cmMgoRkY4lykQwGNgcN10SltX3eTNba2aPmFmj\nx/KY2WVmVmRmRVu2bDnggLp3D/4qEYiI1Er0YPGjQK67jwL+Afy2sYXcfa6757t7/sCBAw94Zz16\nBH8//PCANyEi0uVEmQjeAuJ/4WeHZTXcfZu7x3rs7wfGRhiPEoGISCOiTAQrgKFmNsTMMoDzgIXx\nC5jZ4XGTU4DXIoynpmuotDTKvYiIdC6RHTXk7pVmdhWwCEgFfuPu68zsFqDI3RcCV5vZFKAS+ACY\nHlU8oBaBiEhjIr3EhLs/Djxer+zmuOc3AjdGGUO8zExISVEiEBGJl+jB4nZlFrQKlAhERGolVSKA\nYJxAYwQiIrWSLhGoRSAiUlfSJQLL+JDV/92g21SKiISSKhEs37ycNz98hVff3qR7FouIhJIqESwt\nXoqnfQjl3XXPYhGRUFIlgsLcQlIy90JFD92zWEQklFSJoCCngE8Nm8CA9CNZPG2x7lksIkKSJQKA\nIYcNIK2iv5KAiEgo6RJB376wc2eioxAR6TiSMhHs3au7lImIxCRlIgC1CkREYlqVCMysh5mlhM+H\nmdkUM0uPNrRoxBLBjh2JjUNEpKNobYtgGZBlZoOBJ4ELgAejCipKSgQiInW1NhGYu5cC5wD3uPsX\ngGOiCys6sUSwfXti4xAR6ShanQjMrAA4H3gsLEuNJqRoxRLBg8sX6BITIiK0PhF8k+AGMn8L7zJ2\nFLAkurCis/HDIgAeWrlI1xsSEaGVicDdn3H3Ke5+RzhovNXdr444tkis3hHkL9/bR9cbEhGh9UcN\n/dHMeptZD+AV4FUzuz7a0KLx6aOPh4w9WOkgXW9IRITWdw2NcPddwFnAE8AQgiOHOp2JRxaQMziV\nT3T7jK43JCJC629enx6eN3AW8At3rzAzjzCuSB11ZDeqK0ZQkJPoSEREEq+1LYL7gGKgB7DMzD4C\n7IoqqKgdcQS8/XaioxAR6RhaO1h8t7sPdvfTPLAJOCni2CJz+OFBIvBO26YREWk7rR0s7mNmPzWz\novDxE4LWQadU3r2YvXvhyVdeTHQoIiIJ19quod8Au4Evho9dwANRBRWl5ZuXM/c/MwA4c871Oo9A\nRJJeaxPBR919lru/GT6+BxwVZWBRWVq8lMr+awEof3eoziMQkaTX2kSw18yOj02Y2SRgbzQhRasw\nt5CMASWQtpfUrZ/QeQQikvRae/joFcDvzKxPOL0duDCakKJVkFPA09P/wbl/2MHh/hUKcvq0vJKI\nSBfW2qOGXnL30cAoYJS7jwE+FWlkESrIKeALpx7OulV92Lcv0dGIiCTWft2hzN13hWcYA1zb0vJm\nNtnMNpjZRjOb2cxynzczN7P8/YnnYAwetZ59+2DuX19tr12KiHRIB3OrSmt2plkqMAc4FRgBTDWz\nEY0s1wv4BvCvg4hlvyzfvJybN02CzJ1ce+dKHTkkIkntYBJBS6djjQc2hkcZlQPzgTMbWe5W4A6g\n3TpplhYvpSJ1J3ziT1S9fC4Li1a0165FRDqcZhOBme02s12NPHYDR7Sw7cHA5rjpkrAsfvvHAjnu\n/hjNMLPLYiezbdmypYXdtqwwt5CM1Axs0k+gOp0Vf+60J0mLiBy0ZhOBu/dy996NPHq5e2uPOGpU\neF+DnwLfamlZd5/r7vnunj9w4MCD2S0QDBbfNfku0gZsgtG/Z/GfP8b/e3HlQW9XRKQzOpiuoZa8\nBcRf3zM7LIvpBXwCWGpmxcBxwML2GjDeVrqNquoqOPF74CncMiu9PXYrItLhRJkIVgBDzWyImWUA\n5wELYzPdfae7D3D3XHfPBf4JTHH3oghjqtG/e3+qqYZDNsFxd7F60UhWrWqPPYuIdCyRJQJ3rwSu\nAhYBrwEPh/c7vsXMpkS139baVrqNFAurf8LtZPbaw7e+pSuSikjyibJFgLs/7u7D3P2j7n5bWHaz\nuy9sZNnC9moNQDBgnJYSDnNk7aT8xBtZuhQWNohMRKRrizQRdGQFOQWc9rHTaqarx9zLITnvcP31\nUF6ewMBERNpZ0iYCgEE9B9VOpFYx/Mv38/rrcM89iYtJRKS9JXUimDZ6GukptUcL/avb9/j4hBJu\nvRV2ddobcYqI7J+kTgQFOQVcPObimulqqtiQdw4ffAA//WkCAxMRaUdJnQggaBWkWmrNdPXhKzhi\nwgv85CewdWsCAxMRaSdJnwgKcgr43NGfq1P2Tv6llJY6P/xhgoISEWlHSZ8IAGZMnEFK3EvhA18l\n+4Sn+cUvoKQkgYGJiLQDJQKCVsGU4XXPcftv3iVUVlXxgx8kKCgRkXaiRBCaMXFGnbECDimmOu9X\n/Or+ajZtSlxcIiJRUyIIFeQUcM/p92Bx99vxE75PtVdx660JDExEJGJKBHEuG3sZZw6Pu3dOn7fw\nsffy4IPOxo2Ji0tEJEpKBPXU7yLy42/HUiu55ZYEBiUiEiElgnoKcgr41sS4e+X0eocxU5Yzbx6s\nX5+4uEREoqJE0Ii+mX3rjBUUHfUF0jMr+N73EhiUiEhElAgaUZhbSGpKXPdQ9/cpz/8JDz3kvPJK\nAgMTEYmAEkEjCnIKmHPanLpHEBX8iNSsvcyalcDAREQioETQhAZHEHXfTuX4O/nrX2H16sTFJSLS\n1pQImtHgJLPjfgpZ27l6xgeJC0pEpI0pETQjdpJZzb2Nu+2EiT/huaf68eKLiY1NRKStKBG04LKx\nl3HdxOtqCyb8nG69P+TmmxMXk4hIW1IiaIU6h5Nm7mHfhFtYtAiefz6xcYmItAUlglYozC2s7R4C\nfNz/ktlnOzfdlMCgRETaiBJBKzS4eU3GXsqOu4UlS2DJksTFJSLSFpQIWqnBEUT5v4Reb3HNDbtw\nT1xcIiIHS4mglRpcpjq9DE64jZdW9OYf/0hsbCIiB0OJYD80OMns2F/TbcD73HQTahWISKelRLCf\nZkycQXpKejCRVk75pJt48UV47LHExiUicqCUCPZTQU4Bpw89vWa6atRv6H7ou9x8s1oFItI5KREc\ngEE9B9VOpFZSOnEmq1fDggWJi0lE5EBFmgjMbLKZbTCzjWY2s5H5V5jZy2a2xsyeM7MRUcbTVqaN\nnlb3CKKRf4D+G7juxlKqqxMXl4jIgYgsEZhZKjAHOBUYAUxt5Iv+j+4+0t3zgB8BP40qnrbU4Aii\n1CoonM2bG7rz8MOJjU1EZH9F2SIYD2x09zfdvRyYD5wZv4C774qb7AF0ml72BkcQHfMQHPYS35qx\nj7KyxMUlIrK/okwEg4HNcdMlYVkdZvY1M3uDoEVwdWMbMrPLzKzIzIq2bNkSSbAHYsbEGaTEXsIU\nh89cx9ubs5gzJ7FxiYjsj4QPFrv7HHf/KHAD8N0mlpnr7vnunj9w4MD2DbAZBTkFTBk+pbbgo09x\n6OhV3HorfKBbFohIJxFlIngLyImbzg7LmjIfOCvCeCJR57wCYOukr7BzVzXf/34CgxIR2Q9RJoIV\nwFAzG2JmGcB5wML4BcxsaNzk6cDrEcYTiYKcAi4ec3HNdPWhayHvAf73F9W8+WYCAxMRaaXIEoG7\nVwJXAYuA14CH3X2dmd1iZrH+lKvMbJ2ZrQGuBS6MKp4o1T+c1AtvwlPKufHGBAYlItJK5p3sdNj8\n/HwvKipKdBgNnP3Q2SxYH3dG2ZLZ8MwsnnkGTjwxYWGJiABgZivdPb+xeQkfLO4qGlymetId0LeY\n6ZeWUlGRuLhERFqiRNBGGpxklrEXJl/Nf/7dnZ//PLGxiYg0R4mgDTU4yWz4ozDsUW6eVUVJSeLi\nEhFpjhJBG6tzkhnAqVdTVlHJNdckLiYRkeYoEbSxBieZHVJM9Qm38sgjsGhR4uISEWmKEkEEGgwc\nT/wx9N/AJVfsZd++xMUlItIYJYIINBg4TiuH075GSXE3nXEsIh2OEkFEGgwcf3QxjP4tP/yhs2pV\n4uISEalPiSBCDQaOJ3+T9F7b+cpXoLw8cXGJiMRTIohQg4HjbjvYN/krrF0Lt9+euLhEROIpEUSs\nwcDx8IXYqD9w663Os88mLjsiRB8AAA89SURBVC4RkRglgog1GDgG/LSvkjXwHc47D95/P4HBiYig\nRNAuGgwcZ+2m9KzT2bqtmgsugKqqxMUmIqJE0E7qDxz7oDWMuOBennwSfvCDBAYmIklPiaCdNBg4\nBtYMvorxp77O7NmwZEli4hIRUSJoRw0Gjs0pGpNPzlGlTJ0K776buNhEJHkpEbSjxgaOqzN2MWDa\nV9m1C778ZY0XiEj7UyJoZw0GjoGV1b+l8Kt/ZskSuOoq6GQ3jRORTk6JIAFmTJxRp1UA8H+9vsTk\n6au591747ncTFJiIJCUlggQoyCng+knX1ylznCdzx3Hml9/jBz+An/wkQcGJSNJRIkiQO065gxmT\nZtQpq6aKfx93Cp86fSvXXQe/+U2CghORpKJEkEB3nHIHZw0/q07Zax+8wrKxH2H8iTu49FL4858T\nFJyIJA0lggRrcEgpUJlSSvrULzFxIpx3HjzwQIKCE5GkoESQYI0dUgrw/HtPkj/ju3z603DRRTBn\nToICFJEuT4mgA7hs7GXce8a9DcrvWnUbx1z9baZMCQ4rveYaqKxMQIAi0qUpEXQQl429rMHgMcBP\nV9zOsK/eyNVXw113wWc/C1u2JCBAEemylAg6kMaOJAK4858/5Jhpc3ngAXj+eRg7FlasSECAItIl\nKRF0ME0lg8v/fjmvZd/A889DSgocfzzcfbfOQhaRgxdpIjCzyWa2wcw2mtnMRuZfa2avmtlaM1ts\nZh+JMp7O4o5T7uDEj5zYoPxHz/+In236H4qK4NOfhm98I0gIah2IyMGILBGYWSowBzgVGAFMNbMR\n9RZbDeS7+yjgEeBHUcXT2fzw5B+SnpLeoHzey/P4/N8/ybfvWc6vfw0bN8L48XDBBVBSkoBARaTT\ni7JFMB7Y6O5vuns5MB+oc7U1d1/i7qXh5D+B7Ajj6VQKcgp4ZvoznHhkw5bBsk3LOOHB46kcPZfX\nX4eZM4MTz4YNg1mzYPv2BAQsIp1WlIlgMLA5brokLGvKxcATjc0ws8vMrMjMirYk0SEzBTkFPPOV\nZzh/5PkN5lV7NZf//XJue/EGbr8d1q+HKVPgllvg8MODS1o/9BDs25eAwEWkU+kQg8Vm9j9APvDj\nxua7+1x3z3f3/IEDB7ZvcB3AH875Q6MDyBCMG+Tdm8c7qcuZPx/WrIFLLoFFi4Kzko8+Ojj/4KWX\n2jloEek0okwEbwE5cdPZYVkdZnYK8B1giruXRRhPp3bHKXdw3xn31bnvccxL773ExN9M5JMPfpLS\nfsv5xS/g/fdh4UIYMSI4KzkvD0aPhm9/Gx55BP77Xx1xJCIB84i+DcwsDfg3cDJBAlgBfNnd18Ut\nM4ZgkHiyu7/emu3m5+d7UVFRBBF3Dss3L2fmUzNZ9t9lTS5z1vCzmDFxBgU5BQBs2wZ/+hM8/DAs\nX157dvKgQVBYCOPGwTHHBElj0CBIbzhGLSKdnJmtdPf8RudFlQjCHZ8G3AWkAr9x99vM7BagyN0X\nmtlTwEjgnXCV/7r7lCY2BygRxPzPX/+HeS/Pa3K+YVw/6XruOOWOOuW7dsFrrwUnpq1cCUuXwttv\n11134MDgpLWcHOjZE7Kzg+e9ekGfPnDoodCvH/TuDal1r5cnIh1UwhJBFJQIas1dOZcfPPsDNu3c\n1OQyg3oOYlj/YYwYMIJpo6fVtBLibdkCr74aPLZuhTfegLVrg+6jvXuhtLSRDYe6dQtOcKuuDhJF\nz56QlhY8UlNr/6amQvfusGcPZGZCVlbwyMyEiopgHwMHBoPbGRnw3ntBohk0CN55J1iuT59gn/36\nBa2aHTuCI6SGDQvK3YNHdXXt8/rT6enB361bg+cpKcHDLHg09TwzM3iUlQXr7dwZHLr71lswalQQ\ne1ZWUF5aGtS1Tx8oLw/qnJ5eu355ebBsaWlQVlkZ7KNbt2C5qqrg4V4bQ0xlZfB6pKUF01lZwXrV\n1cFrnJ4OmzbBkUcGr2NZWdBNmJMTbMs92HZpafBeZWQ0/r5WVwfvS1kZFBfDkCHB652REew/KyvY\n5t69QXl6OvTvXzdWs4bbjX1WqquD57E6xeoce93jHxC8zykpwX5ij5SUIMZYvGa1P0yKioLpsWOD\n7e7cGcR6+OHBuhUVtZ+HsrJgbG3IEBgwoPY9r/8Z+O9/g+0fdlhtWWVl8Jnt3TvYfmyb770X7KO6\nOthuamowr6IiqEtWFvTtG7y2778PI0fChx8Grfejjgp+rPXsGcSTlRWsl5YGX/wiTJzY9P9jc5QI\nurgbnrqBHz//Y5yW38u8QXkcN/i4JpNCY7ZsCT7Yu3YF/1Dvvx98mHftgt27g3+0tLRges+e2n/s\n+L/V1fDBB3DIIbX/PLFHamrwBbNzZ/Chj/1jlZfDu+/CEUcE/wg7dgTxbN8e7K9Xr2C9t9+u/YeN\n/8dtbDr2BTBwYG1c9ZNFY8/37QviycwM/vbpE8R9xBFBnbdsCZbp2zdIArt2BV+26enBP3R5eW0C\nSE8PvjS6dw/WiXXFxfYRS5xmtTHEf6n27h3UPyMj2E7syDD3YJ/Z2cHrVlkZxNuzZ/Cexb8esaTc\n1EUMzWq/cPv1CxLnIYcEr1/v3sH8zZuhR4+gvLw8eH9jGvtaidUlvm7udX8sQLBMLBHGkkbfvsG8\nioraR3V17WsXn+SqqoIfB/v2BefWpKYG62dmBp+V2Hqxz0NKCnzsY8EXfWy7jf2QiP3oKSurrV9q\navCe7toV/I3NP/TQYF5KSu14nFlQ10MOCb709+wJluvdO/hBkZER7GPr1iABxH5wxD4jFRXws5/B\nxRe36t+2kfdUiaDLW755OT96/kcs2LCg1esM7TeUtJQ0jh5wdJ0xBWlc7J9ZpDNSIkgirRlMbsqg\nnoPISsuib1ZfyirLGNhjYLNdSiLSeSgRJKFYC2H1u6ubHUNoraH9hlJeVY6Z0TerL9v3bq95XlZZ\nRmZaZk3ZkX2OVAIR6WCUCJJcWyeF/bE/CaT+/PZYVvvovPF0lX20dtmD/YGlRCA1Yklhw7YNVFZX\n8voHrTp9Q0Q6iMzUTJZcuGS/k0FziSCtTSKTTqMgp4C/nfe3munlm5fzu5d+x6tbXmXTzk11foW0\nd+tBRFpWXlXO0uKlbdrtqkSQ5ApyCpr8QMW3HpprusY3bcuqynh3z7vtXAuR5JGRmkFhbmGbblOJ\nQJpUv/XQWgeSQLpyH3BX2UdHi6er7KO9xgiao0Qgbe5AE4iIJEaHuAy1iIgkjhKBiEiSUyIQEUly\nSgQiIklOiUBEJMkpEYiIJLlOd4kJM9sCHOgprwOArW0YTmegOicH1Tk5HEydP+LuAxub0ekSwcEw\ns6KmrrXRVanOyUF1Tg5R1VldQyIiSU6JQEQkySVbIpib6AASQHVODqpzcoikzkk1RiAiIg0lW4tA\nRETqUSIQEUlySZEIzGyymW0ws41mNjPR8bQVM/uNmb1vZq/ElfUzs3+Y2evh30PCcjOzu8PXYK2Z\nHZu4yA+cmeWY2RIze9XM1pnZN8LyLltvM8sysxfN7KWwzt8Ly4eY2b/Cuj1kZhlheWY4vTGcn5vI\n+A+GmaWa2Woz+3s43aXrbGbFZvayma0xs6KwLPLPdpdPBGaWCswBTgVGAFPNbERio2ozDwKT65XN\nBBa7+1BgcTgNQf2Hho/LgF+2U4xtrRL4lruPAI4Dvha+n1253mXAp9x9NJAHTDaz44A7gJ+5+8eA\n7cDF4fIXA9vD8p+Fy3VW3wBei5tOhjqf5O55cecLRP/Zdvcu/QAKgEVx0zcCNyY6rjasXy7wStz0\nBuDw8PnhwIbw+X3A1MaW68wP4P8Bn06WegPdgVXABIIzTNPC8prPObAIKAifp4XLWaJjP4C6Zodf\nfJ8C/g5YEtS5GBhQryzyz3aXbxEAg4HNcdMlYVlXdZi7vxM+fxc4LHze5V6HsPk/BvgXXbzeYRfJ\nGuB94B/AG8AOd68MF4mvV02dw/k7gf7tG3GbuAuYAVSH0/3p+nV24EkzW2lml4VlkX+2davKLszd\n3cy65PHBZtYT+AvwTXffZWY187pivd29Csgzs77A34DhCQ4pUmZ2BvC+u680s8JEx9OOjnf3t8zs\nUOAfZrY+fmZUn+1kaBG8BeTETWeHZV3Ve2Z2OED49/2wvMu8DmaWTpAE5rn7X8PiLl9vAHffASwh\n6Bbpa2axH3Px9aqpczi/D7CtnUM9WJOAKWZWDMwn6B76OV27zrj7W+Hf9wkS/nja4bOdDIlgBTA0\nPNogAzgPWJjgmKK0ELgwfH4hQR96rHxaeKTBccDOuOZmp2HBT/9fA6+5+0/jZnXZepvZwLAlgJl1\nIxgTeY0gIZwbLla/zrHX4lzgaQ87kTsLd7/R3bPdPZfgf/Zpdz+fLlxnM+thZr1iz4HPAK/QHp/t\nRA+OtNMAzGnAvwn6Vb+T6HjasF5/At4BKgj6By8m6BddDLwOPAX0C5c1gqOn3gBeBvITHf8B1vl4\ngn7UtcCa8HFaV643MApYHdb5FeDmsPwo4EVgI/BnIDMszwqnN4bzj0p0HQ6y/oXA37t6ncO6vRQ+\n1sW+q9rjs61LTIiIJLlk6BoSEZFmKBGIiCQ5JQIRkSSnRCAikuSUCEREkpwSgUjIzKrCqz7GHm12\npVozy7W4q8SKdCS6xIRIrb3unpfoIETam1oEIi0IrxH/o/A68S+a2cfC8lwzezq8FvxiMzsyLD/M\nzP4W3j/gJTObGG4q1cx+Fd5T4MnwLGHM7GoL7q+w1szmJ6iaksSUCERqdavXNfSluHk73X0k8AuC\nq2IC/C/wW3cfBcwD7g7L7wae8eD+AccSnCUKwXXj57j7McAO4PNh+UxgTLidK6KqnEhTdGaxSMjM\n9rh7z0bKiwluDPNmeMG7d929v5ltJbj+e0VY/o67DzCzLUC2u5fFbSMX+IcHNxfBzG4A0t39+2b2\nf8AeYAGwwN33RFxVkTrUIhBpHW/i+f4oi3teRe0Y3ekE14w5FlgRd3VNkXahRCDSOl+K+7s8fP4C\nwZUxAc4Hng2fLwauhJobyvRpaqNmlgLkuPsS4AaCyyc3aJWIREm/PERqdQvvAhbzf+4eO4T0EDNb\nS/CrfmpY9nXgATO7HtgCfCUs/wYw18wuJvjlfyXBVWIbkwr8IUwWBtztwT0HRNqNxghEWhCOEeS7\n+9ZExyISBXUNiYgkObUIRESSnFoEIiJJTolARCTJKRGIiCQ5JQIRkSSnRCAikuT+P9hMeDL/0YJT\nAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iOFBSbPcYCN4",
+        "colab_type": "text"
+      },
+      "source": [
+        "The graph shows the _loss_ (or the difference between the model's predictions and the actual data) for each epoch. There are several ways to calculate loss, and the method we have used is _mean squared error_. There is a distinct loss value given for the training and the validation data.\n",
+        "\n",
+        "As we can see, the amount of loss rapidly decreases over the first 25 epochs, before flattening out. This means that the model is improving and producing more accurate predictions!\n",
+        "\n",
+        "Our goal is to stop training when either the model is no longer improving, or when the _training loss_ is less than the _validation loss_, which would mean that the model has learned to predict the training data so well that it can no longer generalize to new data.\n",
+        "\n",
+        "To make the flatter part of the graph more readable, let's skip the first 50 epochs:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Zo0RYroFZYIV",
+        "colab_type": "code",
+        "outputId": "5844429f-cb52-41e0-c41c-52485efcd0ac",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 295
+        }
+      },
+      "source": [
+        "# Exclude the first few epochs so the graph is easier to read\n",
+        "SKIP = 50\n",
+        "\n",
+        "plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')\n",
+        "plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')\n",
+        "plt.title('Training and validation loss')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('Loss')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de3xV1bnv/8+TQIgKSAkoSNBgxQs2\nkkhAF1SNxbMr6sELtpXaBjYqir1s2m3Vs31ZPfZ2RPcu21+xGmspdOtGK7vUa22NRFBjyy2AIFS0\nQVBUjCJQhJDk+f0x5wori5V7Vm7r+3698sqa1zXmXEmejPGMMaa5OyIiIvHSOrsAIiLSNSlAiIhI\nQgoQIiKSkAKEiIgkpAAhIiIJKUCIiEhCChDSIczsOTOb1t77diYzqzCzC5JwXjezk8LXD5jZ7c3Z\ntxXvc7WZ/am15WzkvIVmtr29zysdr1dnF0C6LjPbG7N4JHAAqAmXr3f3R5p7LneflIx9ezp3v6E9\nzmNmOcDfgd7uXh2e+xGg2Z+hpB4FCGmQu/eNvjazCuBad38hfj8z6xX9oyMiPYeamKTFok0IZnaL\nmb0PzDezz5nZ02a208w+CV9nxxxTambXhq+nm9nLZnZvuO/fzWxSK/cdYWbLzGyPmb1gZvPM7L8a\nKHdzyvgjM3slPN+fzGxQzPZvmtlWM6s0s9sauT9nmdn7ZpYes+5yM1sXvh5nZmVmtsvMdpjZL8ws\no4Fz/cbMfhyz/IPwmPfMbEbcvheb2Roz221m28zszpjNy8Lvu8xsr5lFovc25vjxZrbCzD4Nv49v\n7r1pjJmdFh6/y8w2mNnkmG0XmdnG8JzvmtlN4fpB4eezy8w+NrPlZqa/Vx1MN1xaawgwEDgBmEnw\nszQ/XD4e+Az4RSPHnwVsBgYBc4CHzcxase+jwF+BLOBO4JuNvGdzyvh14J+BY4AMIPoHaxTwy/D8\nx4Xvl00C7v4X4B/Al+LO+2j4ugb4Xng9EWAicGMj5SYsw4Vhef4XMBKIz3/8AygCBgAXA7PM7LJw\n27nh9wHu3tfdy+LOPRB4BrgvvLb/AJ4xs6y4azjs3jRR5t7AU8CfwuO+AzxiZqeEuzxM0FzZD/gC\n8GK4/l+B7cBg4Fjg3wDNC9TBFCCktWqBO9z9gLt/5u6V7r7Y3fe5+x7gJ8B5jRy/1d0fcvcaYAEw\nlOAPQbP3NbPjgbHAD929yt1fBp5s6A2bWcb57v43d/8MeBzIC9dfCTzt7svc/QBwe3gPGvLfwFQA\nM+sHXBSuw91Xuftr7l7t7hXAgwnKkchXw/K97u7/IAiIsddX6u7r3b3W3deF79ec80IQUN5099+G\n5fpvYBPwv2P2aejeNOZsoC/w/8LP6EXgacJ7AxwERplZf3f/xN1Xx6wfCpzg7gfdfblr4rgOpwAh\nrbXT3fdHF8zsSDN7MGyC2U3QpDEgtpklzvvRF+6+L3zZt4X7Hgd8HLMOYFtDBW5mGd+Peb0vpkzH\nxZ47/ANd2dB7EdQWrjCzPsAVwGp33xqW4+Sw+eT9sBw/JahNNKVeGYCtcdd3lpktDZvQPgVuaOZ5\no+feGrduKzAsZrmhe9Nkmd09NpjGnncKQfDcamYvmVkkXH8PsAX4k5m9bWa3Nu8ypD0pQEhrxf83\n96/AKcBZ7t6fQ00aDTUbtYcdwEAzOzJm3fBG9m9LGXfEnjt8z6yGdnb3jQR/CCdRv3kJgqaqTcDI\nsBz/1poyEDSTxXqUoAY13N2PBh6IOW9T/32/R9D0Fut44N1mlKup8w6Pyx/UndfdV7j7pQTNT0sI\naia4+x53/1d3PxGYDHzfzCa2sSzSQgoQ0l76EbTp7wrbs+9I9huG/5GvBO40s4zwv8//3cghbSnj\nE8AlZvbFMKF8F03//jwK/AtBIPpdXDl2A3vN7FRgVjPL8Dgw3cxGhQEqvvz9CGpU+81sHEFgitpJ\n0CR2YgPnfhY42cy+bma9zOxrwCiC5qC2+AtBbeNmM+ttZoUEn9Gi8DO72syOdveDBPekFsDMLjGz\nk8Jc06cEeZvGmvQkCRQgpL3MBY4APgJeA/7YQe97NUGitxL4MfAYwXiNRFpdRnffAHyL4I/+DuAT\ngiRqY6I5gBfd/aOY9TcR/PHeAzwUlrk5ZXguvIYXCZpfXozb5UbgLjPbA/yQ8L/x8Nh9BDmXV8Ke\nQWfHnbsSuISgllUJ3AxcElfuFnP3KoKAMIngvt8PFLn7pnCXbwIVYVPbDQSfJwRJ+BeAvUAZcL+7\nL21LWaTlTHkf6UnM7DFgk7snvQYj0tOpBiHdmpmNNbPPm1la2A30UoK2bBFpI42klu5uCPA/BAnj\n7cAsd1/TuUUS6RnUxCQiIgmpiUlERBLqMU1MgwYN8pycnM4uhohIt7Jq1aqP3H1wom09JkDk5OSw\ncuXKzi6GiEi3YmbxI+jrqIlJREQSUoAQEZGEFCBERCShHpODEJGOd/DgQbZv387+/fub3lk6VWZm\nJtnZ2fTu3bvZxyhAiEirbd++nX79+pGTk0PDz3uSzubuVFZWsn37dkaMGNHs49TEJCKttn//frKy\nshQcujgzIysrq8U1PQUIoKwMfvaz4LuItIyCQ/fQms8p5ZuYyspg4kSoqoKMDCgpgUik6eNERHq6\nlK9BlJYGwaGmJvheWtrZJRKR5qqsrCQvL4+8vDyGDBnCsGHD6parqqoaPXblypV897vfbfI9xo8f\n3y5lLS0t5ZJLLmmXc3WUlK9BFBYGNYdoDaKwsLNLJCLNlZWVRXl5OQB33nknffv25aabbqrbXl1d\nTa9eif/MFRQUUFBQ0OR7vPrqq+1T2G4o5WsQkUjQrPSjH6l5SaQjlG0r42fLf0bZtuQk/aZPn84N\nN9zAWWedxc0338xf//pXIpEI+fn5jB8/ns2bNwP1/6O/8847mTFjBoWFhZx44oncd999defr27dv\n3f6FhYVceeWVnHrqqVx99dVEZ8N+9tlnOfXUUxkzZgzf/e53m6wpfPzxx1x22WWcccYZnH322axb\ntw6Al156qa4GlJ+fz549e9ixYwfnnnsueXl5fOELX2D58uXtfs8akvI1CBHpOGXbypi4cCJVNVVk\npGdQUlRCZHj7/1e2fft2Xn31VdLT09m9ezfLly+nV69evPDCC/zbv/0bixcvPuyYTZs2sXTpUvbs\n2cMpp5zCrFmzDhszsGbNGjZs2MBxxx3HhAkTeOWVVygoKOD6669n2bJljBgxgqlTpzZZvjvuuIP8\n/HyWLFnCiy++SFFREeXl5dx7773MmzePCRMmsHfvXjIzMykuLubLX/4yt912GzU1Nezbt6/d7lNT\nUj5AKEkt0nFKK0qpqqmixmuoqqmitKI0KQHiK1/5Cunp6QB8+umnTJs2jTfffBMz4+DBgwmPufji\ni+nTpw99+vThmGOO4YMPPiA7O7vePuPGjatbl5eXR0VFBX379uXEE0+sG18wdepUiouLGy3fyy+/\nXBekvvSlL1FZWcnu3buZMGEC3//+97n66qu54ooryM7OZuzYscyYMYODBw9y2WWXkZeX16Z70xIp\n38SkJLVIxynMKSQjPYN0SycjPYPCnMKkvM9RRx1V9/r222/n/PPP5/XXX+epp55qcCxAnz596l6n\np6dTXV3dqn3a4tZbb+VXv/oVn332GRMmTGDTpk2ce+65LFu2jGHDhjF9+nQWLlzYru/ZmJSvQShJ\nLdJxIsMjlBSVUFpRSmFOYVJqD/E+/fRThg0bBsBvfvObdj//Kaecwttvv01FRQU5OTk89thjTR5z\nzjnn8Mgjj3D77bdTWlrKoEGD6N+/P2+99Ra5ubnk5uayYsUKNm3axBFHHEF2djbXXXcdBw4cYPXq\n1RQVFbX7dSSS8gEimqQuLQ2Cg5qXRJIrMjzSIYEh6uabb2batGn8+Mc/5uKLL2738x9xxBHcf//9\nXHjhhRx11FGMHTu2yWOiSfEzzjiDI488kgULFgAwd+5cli5dSlpaGqeffjqTJk1i0aJF3HPPPfTu\n3Zu+fft2aA2ixzyTuqCgwNvywKCyMgUJkZZ64403OO200zq7GJ1u79699O3bF3fnW9/6FiNHjuR7\n3/teZxfrMIk+LzNb5e4J+/umfA0ClKgWkbZ56KGHWLBgAVVVVeTn53P99dd3dpHahQIEiRPVChAi\n0lzf+973umSNoa1SvhcTHEpUp6crUS0iEqUaBEpUi4gkogARigaF6DgIBQkRSXUKECElqkVE6lMO\nIqQR1SLdz/nnn8/zzz9fb93cuXOZNWtWg8cUFhYS7RJ/0UUXsWvXrsP2ufPOO7n33nsbfe8lS5aw\ncePGuuUf/vCHvPDCCy0pfkJdaVpwBYiQEtUi3c/UqVNZtGhRvXWLFi1q1oR5EMzCOmDAgFa9d3yA\nuOuuu7jgggtada6uSgEipGm/RTpGez7i98orr+SZZ56pezhQRUUF7733Hueccw6zZs2ioKCA008/\nnTvuuCPh8Tk5OXz00UcA/OQnP+Hkk0/mi1/8Yt2U4BCMcRg7diyjR49mypQp7Nu3j1dffZUnn3yS\nH/zgB+Tl5fHWW28xffp0nnjiCQBKSkrIz88nNzeXGTNmcODAgbr3u+OOOzjzzDPJzc1l06ZNjV5f\nZ08LrgAhIh0mmuu7/fbge1uDxMCBAxk3bhzPPfccENQevvrVr2Jm/OQnP2HlypWsW7eOl156qe6P\nayKrVq1i0aJFlJeX8+yzz7JixYq6bVdccQUrVqxg7dq1nHbaaTz88MOMHz+eyZMnc88991BeXs7n\nP//5uv3379/P9OnTeeyxx1i/fj3V1dX88pe/rNs+aNAgVq9ezaxZs5psxopOC75u3Tp++tOf1s3B\nFJ0WvLy8nOXLl3PEEUfw6KOP8uUvf5ny8nLWrl3bLrO+KkCE2vsHV0QOl4xcX2wzU2zz0uOPP86Z\nZ55Jfn4+GzZsqNccFG/58uVcfvnlHHnkkfTv35/JkyfXbXv99dc555xzyM3N5ZFHHmHDhg2Nlmfz\n5s2MGDGCk08+GYBp06axbNmyuu1XXHEFAGPGjKGioqLRc7388st885vfBBJPC37fffexa9cuevXq\nxdixY5k/fz533nkn69evp1+/fo2euzkUIEJKUoskXzJyfZdeeiklJSWsXr2affv2MWbMGP7+979z\n7733UlJSwrp167j44osbnOa7KdOnT+cXv/gF69ev54477mj1eaKiU4a3ZbrwjpoWXAEipCS1SPIl\nI9fXt29fzj//fGbMmFFXe9i9ezdHHXUURx99NB988EFdE1RDzj33XJYsWcJnn33Gnj17eOqpp+q2\n7dmzh6FDh3Lw4EEeeeSRuvX9+vVjz549h53rlFNOoaKigi1btgDw29/+lvPOO69V1xadFhxIOC34\nLbfcwtixY9m0aRNbt27l2GOP5brrruPaa69l9erVrXrPWBoHQfAYxNLqUuY+eglrns/t7OKI9GiR\nSPt3Apk6dSqXX355XVPT6NGjyc/P59RTT2X48OFMmDCh0ePPPPNMvva1rzF69GiOOeaYelN2/+hH\nP+Kss85i8ODBnHXWWXVB4aqrruK6667jvvvuq0tOA2RmZjJ//ny+8pWvUF1dzdixY7nhhhtadV2d\nPS14Uqf7NrMLgf8E0oFfufv/i9v+feBaoBrYCcxw963htuOBXwHDAQcucveKht6rtdN9xz4jN/3d\nL2ILS6g+mK7BciLNoOm+u5eWTvedtCYmM0sH5gGTgFHAVDMbFbfbGqDA3c8AngDmxGxbCNzj7qcB\n44APk1HO2GfkHnxrAlVVpjyEiAjJzUGMA7a4+9vuXgUsAi6N3cHdl7r7vnDxNSAbIAwkvdz9z+F+\ne2P2a1exz8jt/flXyMhw5SFEREhuDmIYsC1meTtwViP7XwNEM0knA7vM7H+AEcALwK3uXhN7gJnN\nBGYCHH/88a0qZPwzcpmerlldRVrA3TGzzi6GNKE16YQu0YvJzL4BFAD3hKt6AecANwFjgROB6fHH\nuXuxuxe4e8HgwYPbpSyRSBAcSks1FkKkKZmZmVRWVrbqj490HHensrKSzMzMFh2XzBrEuwQJ5qjs\ncF09ZnYBcBtwnrsfCFdvB8rd/e1wnyXA2cDD7V3I2CR1RnoGc0//C7O/nqtZXUWaITs7m+3bt7Nz\n587OLoo0ITMzk+zs7BYdk8wAsQIYaWYjCALDVcDXY3cws3zgQeBCd/8w7tgBZjbY3XcCXwJa3kWp\nGWKT1FU1VSx+rlKPHxVppt69ezNixIjOLoYkSdKamNy9Gvg28DzwBvC4u28ws7vMLDqO/R6gL/A7\nMys3syfDY2sImpdKzGw9YMBDyShnbJI6Iz2DKZOyNGBORIQkj4PoSK0dBwHhQLkwSR0ZHqG4GBYv\nhilTYObMdi6oiEgX0tg4CI2kJujJBEFz0/pVfZk9O8hBLF8OublqYhKR1KQAQf1Etb38GbVVX6C2\nxpSDEJGU1iW6uXa22ER17Qkvkt6rWjkIEUl5qkFwKFFdVVNFRs5q5i7aROUbuRosJyIpTQGCw0dT\nR4bnUnbsobmYFCREJBUpQIQOS1RrsJyIpDgFiJAS1SIi9SlJHVKiWkSkPtUgQokS1Xq6nIikMo2k\njhE7oprtESZORHkIEenROuWJct1daSmHTdonIpJK1MQUSjTtd0bGoZ5MykOISKpRDSIUP+13ZdbT\nzJ0LEyfC3LlqXhKR1KMaRKhekjo9g6zKS5g9G03aJyIpSwEiFB1NvXDtQgDWLO+vBweJSEpTgIiz\nYO0CqmqqSN/1Br16lwDpykGISEpSgIgRm4dg2Mtc9x+PwLqizi6WiEinUJI6RvzjR/OH5rNgATz0\nUJCsLivr7BKKiHQc1SBixM/qWvpfucpDiEjKUoCIEzura9ZpfTUWQkRSlgJEnPoD5n7E3Ef/ojmZ\nRCQlKQcRJ37A3Joda5SHEJGUpAARJz5RTcV5mpNJRFKSAkScyPAIcy+cy8QRE5l74VyKLjuBjAz0\nbAgRSTnKQcQp21bG7D/OpqqmiuXvLKekKJeSkgilpUFwUC8mEUkVqkHEic9BlFaUdnaRREQ6hWoQ\ncRJN2jfx63pwkIikHgWIOBosJyISUIBIINFguQMHwAyysjq5cCIiHSSpOQgzu9DMNpvZFjO7NcH2\n75vZRjNbZ2YlZnZC3Pb+ZrbdzH6RzHLGiw6Wu33p7czecBbf+eFbpKdDbS3Mnq2xECKSGpIWIMws\nHZgHTAJGAVPNbFTcbmuAAnc/A3gCmBO3/UfAsmSVsSHxieryv2+jtjYIEBoLISKpIpk1iHHAFnd/\n292rgEXApbE7uPtSd98XLr4GZEe3mdkY4FjgT0ksY0Lxg+WmTMrSWAgRSTnJzEEMA7bFLG8Hzmpk\n/2uA5wDMLA34d+AbwAUNHWBmM4GZAMcff3wbi3tI/NPlckfvpaQEFi5st7cQEenyukSS2sy+ARQA\n54WrbgSedfftZtbgce5eDBQDFBQUeHuXK/p0uQVrFzD39L+wYEHQo2nBAnV3FZGeL5kB4l1geMxy\ndriuHjO7ALgNOM/dD4SrI8A5ZnYj0BfIMLO97n5YojtZ4vMQi5+rVHdXEUkpycxBrABGmtkIM8sA\nrgKejN3BzPKBB4HJ7v5hdL27X+3ux7t7DnATsLAjgwMoDyEikrQahLtXm9m3geeBdODX7r7BzO4C\nVrr7k8A9BDWE34VNSe+4++Rklakl4gfMRYbnkqs8hIikkKTmINz9WeDZuHU/jHndYAI6Zp/fAL9p\n77K11oIFKA8hIimhSySpu6L6T5bLCGoTpRHlIUQkZWg21wYkmtW1sDDIQZgF35WHEJGeTAGiAfFJ\n6sKcQiAIDrHfRUR6KjUxNSB+sBwETUrV1eAefFcTk4j0ZAoQTYgfLJeRkVv3bAg1MYlIT6YA0Yj4\nPERl1tOUlOSqq6uIpATlIBrRUB5iwQJ46CGYOFFTf4tIz6UaRCMaykOoq6uIpAIFiGZIlIfQE+ZE\npKdTE1MTEuUh5s5FT5gTkR5PAaIJifIQlZXoCXMi0uOpiakJkeER5l44l8UbFzNl1BQiwyNQGHRz\nVXdXEenJFCCaULatjNl/nE1VTRXL31lO7jG5RCIR5s6FxYthyhQlqUWkZ1KAaEKiOZnYHmH27KAG\nsXw55OYqSIhIz6McRBMS5SASdXUVEelpVINoQqKxEIWFQe5BXV1FpCdTDaKZFqxdwEOrH2LiwomQ\nXaauriLS4ylANEOiPIS6uopIT6cmpmaI5iGiT5crzCmEXmpmEpGeTTWIZojmIa478zqmjZ4WrIug\nZiYR6dEUIFogNg9Rtq1MzUwi0qM1K0CY2VFmlha+PtnMJptZ7+QWrWvRM6pFJNU0twaxDMg0s2HA\nn4BvAr9JVqG6omgeIo00zIysI4Okg55RLSI9VXMDhLn7PuAK4H53/wpwevKK1fVE52RKT0un1muZ\n/cfZLFyy9bBnVIuI9BTNDhBmFgGuBp4J16Unp0hdV+W+Smq9llqvpaqmCnJeIiMD0tLUk0lEep7m\nBojZwP8Bfu/uG8zsRGBp8orVNcVPu1F0yUj1ZBKRHqtZ4yDc/SXgJYAwWf2Ru383mQXrihJN/V2a\noCeTJu4TkZ6gWQHCzB4FbgBqgBVAfzP7T3e/J5mF62oSTf1dWBjRgDkR6ZGa28Q0yt13A5cBzwEj\nCHoypZREXV01YE5EeqrmBoje4biHy4An3f0g4E0dZGYXmtlmM9tiZrcm2P59M9toZuvMrMTMTgjX\n55lZmZltCLd9rSUXlSwNdXXVgDkR6YmaGyAeBCqAo4Bl4R/y3Y0dYGbpwDxgEjAKmGpmo+J2WwMU\nuPsZwBPAnHD9PqDI3U8HLgTmmtmAZpY1aRJ1dS3bVqYBcyLSIzUrQLj7fe4+zN0v8sBW4PwmDhsH\nbHH3t929ClgEXBp33qXh+AqA14DscP3f3P3N8PV7wIfA4GZfVRLFd3UtrSgFNGBORHqe5k61cbSZ\n/YeZrQy//p2gNtGYYcC2mOXt4bqGXEOQ34h/73FABvBWgm0zo2XauXNnk9fRHhI1M5WWogFzItLj\nNLeJ6dfAHuCr4dduYH57FcLMvgEUAPfErR8K/Bb4Z3evjT/O3YvdvcDdCwYP7pgKRqJmpqzT1mvA\nnIj0OM0NEJ939zvC5qK33f3/Aic2ccy7wPCY5exwXT1mdgFwGzDZ3Q/ErO9PMGr7Nnd/rZnl7BDx\nzUyVWU+rJ5OI9DjNDRCfmdkXowtmNgH4rIljVgAjzWyEmWUAVwFPxu5gZvkECfDJ7v5hzPoM4PfA\nQnd/opll7DDxI6oLcwrVk0lEepzmPlHuBmChmR0dLn8CTGvsAHevNrNvA88TzNv063CajruAle7+\nJEGTUl/gdxZkd99x98kEzVjnAllmNj085XR3L2/+pSVPohHVFOoJcyLSs5h7k8MZDu0cNPvg7rvN\nbLa7z01ayVqooKDAV65c2SHvVbatjIkLJ9Y9grSkqITI8AjFxfDtb0NNDfTpAyUlmnZDRLo2M1vl\n7gWJtrXoiXLuvjscUQ3w/TaXrJtKNKIaggFzNTVBM9OBA2pmEpHurS2PHE3ZHv8NjajOygqCAwTf\n1cwkIt1ZWwJE89umepiGRlRXVgZdXSHIQ6xZ07nlFBFpi0YDhJntMbPdCb72AMd1UBm7pEQjqgsL\noVeY9neH+fPV3VVEuq9GA4S793P3/gm++rl7c3tA9UiJmpkiEZgx49B0GxpVLSLdWVuamFJaQ81M\nRUWQmalR1SLS/SlAtEGiZiY9H0JEegoFiDYozCkkPS0dw0hPS6cwpxBQd1cR6RkUINrIwt6+FtPr\nV91dRaQnUIBog9KKUqprq3Gc6trqegPmot1d09KCZRGR7kYBog0STdoHwRPl+vQJgkNammoQItI9\nKUC0QWR4hJKiEq478zqmjT40d6ES1SLSEyhAtIMFaxfw0OqHmLhwImXbgkgQO/33/v2wcGEnF1JE\npIUUINooduK+/dX7Wbg2iASFhUENAjSqWkS6JwWINop2dQVwnPnl8ynbVlY3qjrq4EF1dxWR7kUB\noo0iwyPMyJtR1801tjdTfv6h/dTdVUS6GwWIdlA0uoje6b0TDpjT7K4i0l0pQLSTRAPmNLuriHRn\nChDtoKEBc/Gzu1ZVqTeTiHQfChDtoKEnzAEUFUHv3sFr1SJEpDtRgGgHDU39Dag3k4h0WwoQ7SR2\n6u/Y8RBweG+mXbs6oYAiIi2kANFOGhoPAUFvJjuUu+bnP1czk4h0fQoQ7aSx8RCxo6oheFaEmplE\npKtTgGhHDY2HiERg3rwgWW2mGV5FpHtQgGhnicZDAMycCb/4RRAcamrgO99RM5OIdG0KEO2oofEQ\nUWvWBMHBXWMiRKTrU4BoR42Nh0jk/fc7qGAiIq2gANGOGhsPAfUHzQE895yamUSk60pqgDCzC81s\ns5ltMbNbE2z/vpltNLN1ZlZiZifEbJtmZm+GX9Pij+2qGhsPEYnANddo6g0R6R6SFiDMLB2YB0wC\nRgFTzWxU3G5rgAJ3PwN4ApgTHjsQuAM4CxgH3GFmn0tWWdtTY+Mh4PCpNx5+WLUIEemaklmDGAds\ncfe33b0KWARcGruDuy91933h4mtAdvj6y8Cf3f1jd/8E+DNwYRLL2m4aGw8BQS3ioosO7X/wIMyZ\n08GFFBFphmQGiGHAtpjl7eG6hlwDPNeSY81sppmtNLOVO3fubGNx209D4yGihgypv/9TT6kWISJd\nT5dIUpvZN4AC4J6WHOfuxe5e4O4FgwcPTk7hWqmh8RAQNDPFjqyurVUuQkS6nmQGiHeB4THL2eG6\neszsAuA2YLK7H2jJsV1V7HiIqpqqeolqCJqZ7r//UJBwhwcfhFtu6YTCiog0IJkBYgUw0sxGmFkG\ncBXwZOwOZpYPPEgQHD6M2fQ88E9m9rkwOf1P4bpuoalENQQjq6+77tCye5CLUJAQka4iaQHC3auB\nbxP8YX8DeNzdN5jZXWY2OdztHqAv8DszKzezJ8NjPwZ+RBBkVgB3heu6haYS1VFFRYeeWR11773K\nR4hI15DUHIS7P+vuJ7v75939J+G6H7p7NBBc4O7Hunte+DU55thfu/tJ4df8ZJYzGYpGF5HZK7PR\nUdWRCNx00+HHaqZXEekKulUTuBQAABKJSURBVESSuidqalR11N13w803169J6IFCItIVKEAkUeW+\nSmpqa6j1Wg5UH0jYzARBkIjWJGprlYsQka5BASKJso7MopZaAGqpbXTyvvLy+sv33APFxcksnYhI\n4xQgkqhyXyVpFtxiw1izY02D+06ZUn/ZHW68UQlrEek8ChBJVJhTSK+0XkDD3V2jZs4MchGxamo0\ngE5EOo8CRBLFd3dNNGgu1t13w2WX1V+nZ0aISGdRgEiy6LxM0HQtAoJaROwzI/7wByWsRaRzKEAk\nWbQWEXWw5mCDvZng0DMjoqIjrM87T/kIEelYChAdIH9oft3rWmrZdaDxgQ6JRlgvWwbnn68gISId\nRwGiA1Tuq6w3q+vPy37eaDNTQyOsDxxQ0lpEOo4CRAeInbwPgrmZGktWw6ER1vE066uIdBQFiA4Q\nGR5h3kXzSLfGZ3iNd/fdcMMN9dcpJyEiHUUBooPMHDOT6848NL93U8nqqKIiyMg4fP2yZQoSIpJc\nChAdKD5Z3djUG1GRSDC767nnHr7t4EG49loFCRFJDgWIDtSSqTdiRSLw0kuJcxIbN8KECXD55QoU\nItK+FCA6UEum3kjk7ruDJLXFPebaHZYsUaAQkfalANGBWjr1RiIzZ8IDDxweJOBQoBg/XvkJEWk7\nBYgOFj/1xsNrHm5RLQIOBYn4wXSxli0LAkV+PsyapWAhIi2nANHBIsMjXHTSRXXLB2sPMueVOS0+\nz8yZ8PLLweR+iWoTUeXlQTAZPx5GjFATlIg0nwJEJxjSd0i95af+9lSLaxEQJK9//3t45ZVgvERe\nXuP7V1QcaoI6+WQYNSpoilIN45CyMvjZz3Q/RADM3Tu7DO2ioKDAV65c2dnFaJaybWWcM/8carwG\nCHo0XT/men55yS/bfO7iYvjpT2Hr1pYfm5MDAwYEU3r06RN8Hzw42LZ/P4wcCW++CVVVwdiMa64J\najLFxbB4cRCgdu8O9s/Ph8pKKCwMlktLg9eRyOHvW1bW8PbGtrW3sjKYODG47rQ0mDcvuL6OVlZ2\naEqVoqLm3bP2uk+JztOa92ppeaL7Z2Ud+rlp63XE3kMIBpi+996hn9umjmns/VtzfS05d3zZo+8V\n+7q9fh/MbJW7FyTcpgDROYpXFXPjMzfWBYk+6X1YOm0pkeHt86kXF8PcubBpU5C8TpZ+/WDPnsb3\nMQvKkJYGZ5xRPwBVV8NbbwXP4jaDk06CXr2C7Z98Au+8ExxrBiec0HAA27nz0LrWft+1C3bsqF/2\nkSMPlac152xp+T755PDgnqgM69YdumfHHgsffnhoOdF9ak75En0WVVX1P4Pmvlds+ZoqzyefwLZt\nwf6xPzOxx7XkPlZXw5Ytjf/cx9/TRMck+oepoZ/L449vuHytPXf870/s6/j7c8opQTf41gQNBYgu\natbTs3hw1YM43q61iFjR/0Y2boS//U0PIBLpqXr3DsZLtTRINBYglIPoRO3Ro6kpkQj88pfBD86O\nHcE4inHjguagE05oPMEtIt3HwYNB81N7Ug2ik12+6HKWbF5St3zZKZfx+6t+32HvH1vDSFQ9Hjw4\nyCvENhkMGwbbt3dYEUWkGZJRg+jVHgWT1ovv0fSHzX+geFUxM8d0THY0EmlZEjE2UTlnDmzeHLR/\nTpoEa8KZQ/r3D7rXDh4cJLUzM2HgwGDbxx8fHogyMuonwOMD1MCBiY9r7xxEtCzRJ/o9/PDh5Ul2\nDiJ6zKhRwX0sLW34nsTez6buU3PLl+iziD93cz6Txj7vhsoRPSbRcS29jxkZwc/q7t3BPz/79wef\na25uw/8QxR/T2HsluieNla8l5479fYqWPfqZRH+X4u9PW3IQjVENopPF92gCSLd0lv/z8nZLWIuI\nNEQ5iC4sMjzC/RffX++JczVe06rBcyIi7UkBoguYOWYml556ab11rR08JyLSXpIaIMzsQjPbbGZb\nzOzWBNvPNbPVZlZtZlfGbZtjZhvM7A0zu8+sZ/e3uXn8zXVPnAPVIkSk8yUtQJhZOjAPmASMAqaa\n2ai43d4BpgOPxh07HpgAnAF8ARgLnJessnYF0aamtJiPZMnmJRSvKu7EUolIKktmDWIcsMXd33b3\nKmARUK8dxd0r3H0dUBt3rAOZQAbQB+gNfJDEsnYJM8fMpOC4+rmih1c/3EmlEZFUl8wAMQzYFrO8\nPVzXJHcvA5YCO8Kv5939jfj9zGymma00s5U7d+5shyJ3vmvOvKbe8or3VnDLC7d0UmlEJJV1ySS1\nmZ0EnAZkEwSVL5nZOfH7uXuxuxe4e8HgaEfkbm7mmJlcdupldcuOM+eVOZz3m/OUtBaRDpXMAPEu\nMDxmOTtc1xyXA6+5+1533ws8B6TMoICbx99c9+zqqGVbl3H+gvMVJESkwyQzQKwARprZCDPLAK4C\nnmzmse8A55lZLzPrTZCgPqyJqaeKDI9w0/ibDlt/oOYApRWlHV8gEUlJSQsQ7l4NfBt4nuCP++Pu\nvsHM7jKzyQBmNtbMtgNfAR40sw3h4U8AbwHrgbXAWnd/Klll7YruvuBubp5w82Hr//jWH1WLEJEO\noak2urhZT8/igVUP1FvXO603L01/SVNxiEibaaqNbqxodBG90urPqXiw9iDXPnmtahIiklQKEF1c\nZHiEeRfNqzdXE8DGjzaqZ5OIJJUCRDcwc8xMHrjkgcOChGoSIpJMChDdRENBYuNHG5nw6wlc/tjl\nChQi0q70wKBuJPoQoRuevgHnUOcCx1myaQl/2PQHzjnhHEYNGkXR6CIlsUWkTVSD6GYaqklAECiW\nbV3GA6se0KA6EWkzBYhuKBok0hr5+A7UHOCrv/uqZoMVkVZTgOimZo6ZycszXuayUy5LWJsA2L5n\nO9c/fT1D/32ochQi0mIaKNcDlG0rY+HahSzbuoyNH21sdN+8IXmcPexs8ofmU7mvksKcQuUqRFJY\nYwPlFCB6kLJtZRQuKKSqpqrZxxjGCQNOIG9IHjePD6b2KK0oVeAQSREKECkkWpt4bftrlH9Q3uLj\nDcPxusBx/NHHM2rQKPKH5rNmxxqAutrHrgO7KN9RzpRRU+p6WMWWo7SilKwjs1RTaaHoveuIe9aR\n79XR2vPa4s/Vkz4jBYgUVbatjDmvzOG17a/x/j/eT+p7Dek7hCF9h3Cg+gDVtdW89fFb1MY8KNAw\nRg8ZTf+M/uzct5M+vfpwoPpAk98HHzWYUYNG0T+zP+U7yskbmsfu/bvZuHMjO/ft5JRBpzDppEl1\nAav076VU1VaRkZbByKyRvFn55mHLmb0zGZg5kI8/+zhhWQYfNRgcdu7bWff+0QD5/t736643f2g+\nz735HO/teY+RWSPZ+Y+d9cq3v3o/15x5DbnH5FJaUVoXUPOG5jGgzwCyjsyqO+eQvkMoGl3E+g/X\nc+MzN1LrtaRZGmOGjqk7x8K1C+v27Z/Zn9K/l9ZdS1T8NQ0+anDd9miZK/dV1r33/PL5VNdWk56W\nztnDzq675kTHRO9vZu/Mus+k9O+lHNf/OCadNInn3nyOzZWb646PHht7jdHPcfBRg+vuV+y9iN63\nwhGF9T7nhs4Z+/MXXb9x50a2frqVbbuD55X1SuvFjLwZde89ZdSUus+kMKcQgDmvzKn7HGN/RqKe\nefMZqmurSbM0svtn886n7wCQnpbOJSMvSfiZxJc1+tlEryV+OfZaYu/T4xser3vv3GNzyUjL4Joz\nr2HmmJl1/xACre7argAhFK8qZu5rc9n00aZ6Yygk+aK1Muk6esJnMvCIIMhE9Unvw9JpS1scJBoL\nEBoolyJmjplZ7z+O6H9fH3/2MX+r/FvSaxiprLv/IeqJesJnEhscAKpqqiitKG3XZigFiBQTGR5J\n+ANUvKqYh1c/fFjzS3VtNW9+/GYnlFREWiLN0uqazNqLAoQAh2oYiRSvKmbxxsV17euJ2uE3V26u\n15YfbfPf+Y+gjTXarhtt229ODuKTzz7hnU/fafC/vSF9h/DB3g8O257dL5v39rxHLbUYxrB+w+qW\n4fBE/IDMAQlzEFs/3crWT7c2ee+S0VyRZmnUem2T+8W/d+w1ffLZJ2zbva1Z54kaeMRAdu3f1aJj\nYkVzUWvfX9tu9yRnQA4DMge06JwN/Ww0JvZeNvaZGsZJA086LM+W6Dzx64/te2xdueKX2yLN0rj/\n4vvbPYmtHIR0abFJuNieVNGEXGyTWTTRm6iXSewyNL8rb/z5E/XgiiY8o4nWaPmAes15iRKs8cnV\n+MR21pFZdYnwwhGFdQndaM+w6LU01FsstjdZfGI3+v6xydtoM2SiY6L3N3pd0etc/+F6Fm9cXK83\nW6L7Ft8Lrrnnj15PQ59FomR//M9GQ/f/488+Puyex9/X+HLH/kzF/mzGfybxnRIa6gEV3+Mv9nui\n94YgqR7tENDWudeUpBYRkYT0RDkREWkxBQgREUlIAUJERBJSgBARkYQUIEREJCEFCBERSajHdHM1\ns51A06OaurZBwEedXYguRPfjEN2L+nQ/6mvL/TjB3Qcn2tBjAkRPYGYrG+qPnIp0Pw7RvahP96O+\nZN0PNTGJiEhCChAiIpKQAkTXUtzZBehidD8O0b2oT/ejvqTcD+UgREQkIdUgREQkIQUIERFJSAGi\nA5nZr83sQzN7PWbdQDP7s5m9GX7/XLjezOw+M9tiZuvM7MzOK3n7M7PhZrbUzDaa2QYz+5dwfare\nj0wz+6uZrQ3vx/8N148ws7+E1/2YmWWE6/uEy1vC7TmdWf5kMLN0M1tjZk+Hy6l8LyrMbL2ZlZvZ\nynBd0n9XFCA61m+AC+PW3QqUuPtIoCRcBpgEjAy/ZgK/7KAydpRq4F/dfRRwNvAtMxtF6t6PA8CX\n3H00kAdcaGZnA3cDP3f3k4BPgGvC/a8BPgnX/zzcr6f5F+CNmOVUvhcA57t7Xsx4h+T/rri7vjrw\nC8gBXo9Z3gwMDV8PBTaHrx8Epibaryd+AX8A/pfuhwMcCawGziIYHdsrXB8Bng9fPw9Ewte9wv2s\ns8vejvcgO/yj9yXgacBS9V6E11UBDIpbl/TfFdUgOt+x7r4jfP0+cGz4ehiwLWa/7eG6HidsEsgH\n/kIK34+wSaUc+BD4M/AWsMvdq8NdYq+57n6E2z8Fsjq2xEk1F7gZ6h76nEXq3gsAB/5kZqvMLPrw\n+KT/rvRqzUGSHO7uZpZS/Y7NrC+wGJjt7rvNrG5bqt0Pd68B8sxsAPB74NROLlKnMLNLgA/dfZWZ\nFXZ2ebqIL7r7u2Z2DPBnM9sUuzFZvyuqQXS+D8xsKED4/cNw/bvA8Jj9ssN1PYaZ9SYIDo+4+/+E\nq1P2fkS5+y5gKUEzygAzi/4jF3vNdfcj3H40UNnBRU2WCcBkM6sAFhE0M/0nqXkvAHD3d8PvHxL8\n8zCODvhdUYDofE8C08LX0wja4qPri8IeCWcDn8ZUJ7s9C6oKDwNvuPt/xGxK1fsxOKw5YGZHEORj\n3iAIFFeGu8Xfj+h9uhJ40cMG5+7O3f+Pu2e7ew5wFcG1XU0K3gsAMzvKzPpFXwP/BLxOR/yudHby\nJZW+gP8GdgAHCdoFryFoKy0B3gReAAaG+xowj6Adej1Q0Nnlb+d78UWCdtV1QHn4dVEK348zgDXh\n/Xgd+GG4/kTgr8AW4HdAn3B9Zri8Jdx+YmdfQ5LuSyHwdCrfi/C614ZfG4DbwvVJ/13RVBsiIpKQ\nmphERCQhBQgREUlIAUJERBJSgBARkYQUIEREJCEFCJEmmFlNOItm9OvWpo9q9rlzLGZ2X5GuRFNt\niDTtM3fP6+xCiHQ01SBEWimco39OOE//X83spHB9jpm9GM7FX2Jmx4frjzWz34fPfFhrZuPDU6Wb\n2UPhcyD+FI6kxsy+a8HzMtaZ2aJOukxJYQoQIk07Iq6J6Wsx2z5191zgFwQzkAL8f8ACdz8DeAS4\nL1x/H/CSB898OJNgVCwE8/bPc/fTgV3AlHD9rUB+eJ4bknVxIg3RSGqRJpjZXnfvm2B9BcFDft4O\nJx58392zzOwjgvn3D4brd7j7IDPbCWS7+4GYc+QAf/bgoS+Y2S1Ab3f/sZn9EdgLLAGWuPveJF+q\nSD2qQYi0jTfwuiUOxLyu4VBu8GKCOXXOBFbEzGQq0iEUIETa5msx38vC168SzEIKcDWwPHxdAsyC\nuocDHd3QSc0sDRju7kuBWwimsD6sFiOSTPqPRKRpR4RPeov6o7tHu7p+zszWEdQCpobrvgPMN7Mf\nADuBfw7X/wtQbGbXENQUZhHM7ptIOvBfYRAx4D4PnhMh0mGUgxBppTAHUeDuH3V2WUSSQU1MIiKS\nkGoQIiKSkGoQIiKSkAKEiIgkpAAhIiIJKUCIiEhCChAiIpLQ/w8rWrjKB6F2NQAAAABJRU5ErkJg\ngg==\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "W4EQD-Bb8hLM",
+        "colab_type": "text"
+      },
+      "source": [
+        "From the plot, we can see that loss continues to reduce until around 500 epochs, at which point it is mostly stable. This means that there's no need to train our network beyond 500 epochs.\n",
+        "\n",
+        "However, we can also see that the lowest loss value is still around 0.155. This means that our network's predictions are off by an average of ~15%. In addition, the validation loss values jump around a lot, and is sometimes even higher.\n",
+        "\n",
+        "**2. Mean Absolute Error**\n",
+        "\n",
+        "To gain more insight into our model's performance we can plot some more data. This time, we'll plot the _mean absolute error_, which is another way of measuring how far the network's predictions are from the actual numbers:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Md9E_azmpkZU",
+        "colab_type": "code",
+        "outputId": "90fff6f3-8dc1-42ec-a0e2-f2434c790a3d",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 295
+        }
+      },
+      "source": [
+        "plt.clf()\n",
+        "\n",
+        "# Draw a graph of mean absolute error, which is another way of\n",
+        "# measuring the amount of error in the prediction.\n",
+        "mae = history_1.history['mae']\n",
+        "val_mae = history_1.history['val_mae']\n",
+        "\n",
+        "plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')\n",
+        "plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')\n",
+        "plt.title('Training and validation mean absolute error')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('MAE')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO2de5xd09n4v885c0lISHPRIGFSgoY0\nV+G4jqbUrQyhRfoORSMhKlqG3kgpSr2VVxskqmnyNhK8ftIgKFNDyLTkJhFEg6kEIYZElGQyM8/v\nj7X3mX3O7DPnzOScuT7f+ZzP7L322vs8e+199rOfy1pLVBXDMAzDSCbS1gIYhmEY7RNTEIZhGEYo\npiAMwzCMUExBGIZhGKGYgjAMwzBCMQVhGIZhhGIKop0jIk+IyPnZrtuWiEiViHwrB8dVEdnfW75H\nRH6ZSd0WfM94EflbS+XsbIjIn0Xk11k+5gUi8kI2j2k0n7y2FqAzIiKfB1Z3AbYDdd76Jao6N9Nj\nqepJuajb2VHVidk4jogUAe8A+apa6x17LpDxNTRyi4hcAFysqke1tSydDVMQOUBVe/jLIlKFu3mf\nSa4nInn+Q8cwjPZP2G+2ub/jjvS7NxdTKyIixSKyQUSuEZGNwCwR+YqIPCYim0TkU295QGCfChG5\n2Fu+QEReEJHbvbrviMhJLaw7SESeF5GtIvKMiEwXkb+kkDsTGW8UkRe94/1NRPoGtv+XiPxbRKpF\n5OdNtM9hIrJRRKKBsjNEZJW3PEZEKkVks4h8ICJ/EJGCFMdKcHuIyNXePu+LyIVJdU8RkRUi8pmI\nrBeRqYHNz3v/N4vI5yISS3Z/iMgRIvKyiGzx/h+RadskyeHfH2Ui8pEnb4mInCwib4rIJyLys0D9\niIhcKyJveW37oIj0Dmx/yGvPLd61PjipfaaLyOOeXP8Ukf2auDYpj+XRV0Se9o71nIjs6+0nInKH\ndz6fichqETnE27a7iMzx7qt/i8gvRKTRM0lEisS5BPMCZRUicrGIfB24B4h512ezt73Qu/ffFZEP\nxbkcuzdxfheKyOve/f2UL7+3TUXkMhH5F/AvCf8dF4rINO/+et9bLky6rvH6qeRob5iCaH36A72B\nfYEJuGswy1vfB/gS+EMT+x8GrAX6ArcB94mItKDu/cBLQB9gKvBfTXxnJjKeB/wA2AMoAK4CEJEh\nwN3e8ffyvm8AIajqP4H/AN9MOu793nIdcKV3PjFgLHBpE3LjyXCiJ8/xwGAgOf7xH6AU6AWcAkwS\nkRJv2zHe/16q2kNVK5OO3Rt4HLjTO7ffAY+LSJ+kc2jUNinoD3QD9gauA+4Fvg+MAo4Gfikig7y6\nlwMlwLG4tv0UmB441hPe+e4BLKexW+wc4FfAV4B1wE1NyJXuWOOBG3HXZmVg+wm4NjwA2B34LlDt\nbfu9V/Y17xxKce2UMar6OjARqPSuTy9v02+87xwO7E9DezZCRE4HfgacCfQDFgPzkqqV4H5PQ7z1\n5N/xz4HDve8bBowBfhHYP7l+x0BV7ZPDD1AFfMtbLgZqgG5N1B8OfBpYr8C5qAAuANYFtu0CKNC/\nOXVxD/laYJfA9r8Af8nwnMJk/EVg/VLgSW/5OmB+YNuuXht8K8Wxfw38yVvuiXt475ui7hTgkcC6\nAvt7y38Gfu0t/wn4TaDeAcG6IcedBtzhLRd5dfMC2y8AXvCW/wt4KWn/SuCCdG0T8r3FOOUbDZy/\nAocF6iwDSrzl14GxgW17AjuCsga29fKOtXugff4Y2H4y8EaG1z/sWMFr3AOnzAfilP2buIdnJFAn\n6t0HQwJllwAVIW0cdg0qSLzXXwhsE+++2S9QFgPeSXE+TwAXBdYjwBf+fed99zeTrlPC7xh4Czg5\nsP5toCrT3317/ZgF0fpsUtVt/oqI7CIiMzwT+zOcS6OXBNwsSWz0F1T1C2+xRzPr7gV8EigDWJ9K\n4Axl3BhY/iIg017BY6vqf2h4gwzjfuBMzzw/E1iuqv/25DhAnHtroyfHzbg31nQkyAD8O+n8DhOR\nZz1XxxbcG2kmx/WP/e+ksn/j3lh9UrVNGNWq6ic0fOn9/zCw/cvA/vsCj4hzuW3GKYw64KsiEhWR\n33jup89wLyqQeF4ZyZXhsYLX+HPgE2AvVf07ztqcDnwkIjNFZDdv33wS2y653VpKP9wL0bJA2zzp\nlYexL/A/gbqf4JRMUJbk30fC75jG98G/vbJU9TsEpiBan+Thc38CHIh7S9yNBpdGKrdRNvgA6C0i\nuwTKBjZRf2dk/CB4bO87+6SqrKqv4X5cJ5HoXgLnqnoDGOzJ8bOWyICzoILcDywEBqrq7jiftn/c\ndMMdv497wATZB3gvA7l2lvXASaraK/Dppqrv4drudJw7bXfcWzi07L7K5FjBa9wD5055H0BV71TV\nUTj3zAHA1cDHOGsn2Hap2u0/3v/g/do/sJx8jT7GKdKDA+2yuwaSR5JYj8suDLZjd1Vd0sR3JK8n\n3wf7eGWp6ncITEG0PT1xN/Nmz599fa6/0HsjXwpMFZECEYkB38mRjP8HnCoiR4kLKN9A+vvufuAK\nnCJ6KEmOz4DPReQgYFKGMjwIXCAiQzwFlSx/T5xFtU1ExuAeiD6bgHqcnzyMRcABInKeiOSJyPdw\nD8LHMpRtZ7gHuCkQEO7n+dPBndN2nLW2C87aaimZHOvkwDW+EfiHqq4XkUM9Cy0f96DfBtR7VtKD\nnvw9vXP4Mc7VmYCqbsIpju971syFQDCg/iEwwPtuVLUeF7u5Q0T2ABCRvUXk2ynO7x7gp+IF3r3g\n+dnNaB9wMYtfeNegL861Gpr00ZEwBdH2TAO64956/oEzhVuD8Ti/bDXO7/8A7iEQRotlVNU1wGW4\nh/4HuEDqhjS7zcMFLf+uqh8Hyq/CPby34h4AD2QowxPeOfwdF4z9e1KVS4EbRGQr7of9YGDfL3DB\n2xc9F8ThSceuBk7FWVnVQBlwapLcueJ/cJbP3zzZ/4ELpALMwVli7wGvedtaSibHuh+neD/BBdS/\n75XvhrtWn3rHqAZ+6227HKc03gZe8I7xpxQy/BBneVQDBwPBt/u/A2uAjSLit/s1uGv9D88t9gzO\nCm6Eqj4C3ArM9+q+irNgm8OvcS9dq4DVuEB+VjsPtgXiBVGMLo6IPIALUubcgjEMo2NgFkQXxTP9\n9xOXS38izse8oK3lMgyj/WA9qbsu/YH/hwsYbwAmqeqKthXJMIz2hLmYDMMwjFDMxWQYhmGE0mlc\nTH379tWioqK2FsMwDKNDsWzZso9VNbQTYadREEVFRSxdurStxTAMw+hQiEjySABxzMVkGIZhhGIK\nwjAMwwjFFIRhGIYRSqeJQRiG0Xrs2LGDDRs2sG1bhxugtMvSrVs3BgwYQH5+fsb7mIIwDKPZbNiw\ngZ49e1JUVETq+aqM9oKqUl1dzYYNGxg0aFD6HTxy6mISkRNFZK2IrBORa5uoN86b1m90oOwb4qaX\nXCNumsJuuZTVMIzM2bZtG3369DHl0EEQEfr06dNsiy9nFoQ3mcx03DSPG4CXRWShN95/sF5P3NDO\n/wyU5eGGyv0vVX3Fm75xR65krayEigooLoZYLFffYhidC1MOHYuWXK9cupjG4Ka8fBtARObjBoR7\nLanejbihdq8OlJ0ArFLVVyA+pHJOqKyEsWOhpgYKCqC83JSEYRgG5NbFtDeJ0/RtIGk6QREZiZvF\n6/GkfQ8AVESeEpHlIlIW9gUiMkFElorI0k2bNrVIyIoKpxzq6tz/iooWHcYwjFakurqa4cOHM3z4\ncPr378/ee+8dX6+pqWly36VLl/KjH/0o7XccccQRWZG1oqICEeGPf/xjvGzlypWICLfffnu8rLa2\nln79+nHttYne+OLiYg488MD4+Z111llZkSsT2ixILSIR4He4CceTyQOOAg7FzZVbLiLLVLU8WElV\nZwIzAUaPHt2iUQeLi53l4FsQxcUtOYphGK1Jnz59WLlyJQBTp06lR48eXHXVVfHttbW15OWFP95G\njx7N6NGjQ7cFWbJkSdo6mXLIIYfw4IMPcvHFFwMwb948hg0bllDn6aef5oADDuChhx7illtuSXAJ\nzZ07NyOZs00uLYj3SJwHeACJ8832BA4BKkSkCjgcWOgFqjcAz6vqx96MXouAkbkQMhaDadOcm2na\nNHMvGUauqFxfyS2Lb6FyfWVOjn/BBRcwceJEDjvsMMrKynjppZeIxWKMGDGCI444grVr1wLujf7U\nU08FnHK58MILKS4u5mtf+xp33nln/Hg9evSI1y8uLuass87ioIMOYvz48fijYC9atIiDDjqIUaNG\n8aMf/Sh+3GT23Xdftm3bxocffoiq8uSTT3LSSYmT1s2bN48rrriCffbZh8rK3LRRc8mlBfEyMFhE\nBuEUwzkE5vpV1S1AX39dRCqAq1R1qYi8BZR58wfX4KafvCMXQlZWwpQpzoJYvBiGDjUlYRjZpnJ9\nJWPnjKWmroaCaAHlpeXEBmb/h7ZhwwaWLFlCNBrls88+Y/HixeTl5fHMM8/ws5/9jIcffrjRPm+8\n8QbPPvssW7du5cADD2TSpEmN+gqsWLGCNWvWsNdee3HkkUfy4osvMnr0aC655BKef/55Bg0axLnn\nntukbGeddRYPPfQQI0aMYOTIkRQWFsa3bdu2jWeeeYYZM2awefNm5s2bl+DiGj9+PN27dwfg+OOP\n57e//W2j4+eCnFkQqloLTAaeAl4HHlTVNSJyg4iclmbfT3Hup5eBlcDykDhFVrAYhGHknoqqCmrq\naqjTOmrqaqioqsjJ95x99tlEo1EAtmzZwtlnn80hhxzClVdeyZo1a0L3OeWUUygsLKRv377sscce\nfPjhh43qjBkzhgEDBhCJRBg+fDhVVVW88cYbfO1rX4v3K0inIL773e/y0EMPMW/evEZ1H3vsMY47\n7ji6d+/OuHHjWLBgAXV1dfHtc+fOZeXKlaxcubLVlAPkuB+Eqi5S1QNUdT9Vvckru05VF4bULVbV\npYH1v6jqwap6iKqGBqmzgR+DiERABPr0ydU3GUbXpbiomIJoAVGJUhAtoLioOCffs+uuu8aXf/nL\nX3Lcccfx6quv8uijj6bsAxB8k49Go9TW1raoTjr69+9Pfn4+Tz/9NGPHjk3YNm/ePJ555hmKiooY\nNWoU1dXV/P3vf2/2d2SbLt+T2o9BTJ7srIgpU8zNZBjZJjYwRnlpORVVFRQXFefEvZTMli1b2Htv\nlzj55z//OevHP/DAA3n77bepqqqiqKiIBx54IO0+N9xwAx999FHcygHirrD169fHFdGsWbOYN28e\nxx9/fNblbg5dXkEAVFdDfb37+G4mUxCGkV1iA2Otohh8ysrKOP/88/n1r3/NKaeckvXjd+/enbvu\nuosTTzyRXXfdlUMPPTTtPmGps4888gjf/OY3E6yU008/nbKyMrZv3w4kxiD69u3LM888k6WzaJpO\nMyf16NGjtaUTBllnOcNoHq+//jpf//rX21qMNufzzz+nR48eqCqXXXYZgwcP5sorr2xrsVISdt28\nLgShObRmQeCUQXk5zJnT1pIYhtGRuPfee5k9ezY1NTWMGDGCSy65pK1FyiqmIALMnu2siNmzzYow\nDCM9V155Zbu2GHYWmzDIw9JdDcMwEjEF4eGnu0ajNuSGYRgGmIspjp/u+vDDMG6cuZcMwzBMQXjY\nkBuGYRiJmIvJw2IQhtFxOO6443jqqacSyqZNm8akSZNS7lNcXIyfCn/yySezefPmRnWmTp2aMAR3\nGAsWLOC11xqmtbnuuuuy0i+hPQ4LbgrCw4bcMIyOw7nnnsv8+fMTyubPn592PCSfRYsW0atXrxZ9\nd7KCuOGGG/jWt77VomMl4w8L7pNuWPDkfmzBMZv+7//+b6flMQXh4ccgolHXo3rKFOd2MgwjO1RW\nwi23ZOd3ddZZZ/H444/HJweqqqri/fff5+ijj2bSpEmMHj2agw8+mOuvvz50/6KiIj7++GMAbrrp\nJg444ACOOuqo+JDg4Po4HHrooQwbNoxx48bxxRdfsGTJEhYuXMjVV1/N8OHDeeutt7jgggviD+Py\n8nJGjBjB0KFDufDCC+M9oYuKirj++usZOXIkQ4cO5Y033giVq70NC24KIkDYkBuGYew8/mgFv/yl\n+7+zz7XevXszZswYnnjiCcBZD9/97ncREW666SaWLl3KqlWreO6551i1alXK4yxbtoz58+ezcuVK\nFi1axMsvvxzfduaZZ/Lyyy/zyiuv8PWvf5377ruPI444gtNOO43f/va3rFy5kv322y9ef9u2bVxw\nwQU88MADrF69mtraWu6+++749r59+7J8+XImTZrUpBvLHxZ8yZIlKYcF/853vsO5557LvHnzEvYd\nP3583MV09dVXJx+62ZiCCGBuJsPIDbmI8QXdTEH30oMPPsjIkSMZMWIEa9asSXAHJbN48WLOOOMM\ndtllF3bbbTdOO61hJoJXX32Vo48+mqFDhzJ37tyUw4X7rF27lkGDBnHAAQcAcP755/P888/Ht595\n5pkAjBo1iqqqqpTHaU/DgpuCCGBuJsPIDbnoZ3T66adTXl7O8uXL+eKLLxg1ahTvvPMOt99+O+Xl\n5axatYpTTjkl5TDf6bjgggv4wx/+wOrVq7n++utbfBwf3xJIN1x4exoW3BREEuZmMozs4493duON\n2RvGpkePHhx33HFceOGF8Tftzz77jF133ZXdd9+dDz/8MO6CSsUxxxzDggUL+PLLL9m6dSuPPvpo\nfNvWrVvZc8892bFjB3Pnzo2X9+zZk61btzY61oEHHkhVVRXr1q0D4H//93859thjW3RuN9xwA7fe\nemvosODvvvsuVVVVVFVVMX369EZupmxi/SCSKC5usCCiUetRbRjZIhbLft+ic889lzPOOCPuaho2\nbBgjRozgoIMOYuDAgRx55JFN7j9y5Ei+973vMWzYMPbYY4+EIbtvvPFGDjvsMPr168dhhx0WVwrn\nnHMOP/zhD7nzzjsTMoW6devGrFmzOPvss6mtreXQQw9l4sSJLTqv9jIsuA33nURlJRx3XMPQ388+\nax3mDCMZG+67Y9Lc4b7NxZRERQXU1oKq+28uJsMwuiqmIJKwTCbDMAyHKYgkLJPJMDKjs7inuwot\nuV6mIEKwTCbDaJpu3bpRXV1tSqKDoKpUV1fTrVu3Zu1nWUwh+G6m7dvNzWQYYQwYMIANGzawadOm\nthbFyJBu3boxYMCAZu1jCiIE3800ebLr+Tllig3/bRhB8vPzGTRoUFuLYeQYczGlwNxMhmF0dUxB\npMCmIDUMo6tjLqYU2BSkhmF0dXJqQYjIiSKyVkTWici1TdQbJyIqIqOTyvcRkc9F5KpcyhmGPwVp\nebmluhqG0TXJmYIQkSgwHTgJGAKcKyJDQur1BK4A/hlymN8BTY+2lSNsClLDMLo6ubQgxgDrVPVt\nVa0B5gOnh9S7EbgVSBhLV0RKgHeApgdhzxHWo9owjK5OLhXE3sD6wPoGryyOiIwEBqrq40nlPYBr\ngF819QUiMkFElorI0mznY1uPasMwujptlsUkIhGcC+knIZunAneo6udNHUNVZ6rqaFUd3a9fvxbL\nUrm+klsW30Ll+kQNYKmuhmF0ZXKZxfQeMDCwPsAr8+kJHAJUiAhAf2ChiJwGHAacJSK3Ab2AehHZ\npqp/yLaQlesrGTtnLDV1NRRECygvLSc20KUsWY9qwzC6Mrm0IF4GBovIIBEpAM4BFvobVXWLqvZV\n1SJVLQL+AZymqktV9ehA+TTg5lwoB4CKqgpq6mqo0zpq6mqoqKqIbzM3k2EYXZmcKQhVrQUmA08B\nrwMPquoaEbnBsxLaBcVFxRREC4hKlIJoAcVFxQnbzc1kGEZXJacd5VR1EbAoqey6FHWLU5RPzbpg\nAWIDY0w7cRoPv/Yw44aMi7uXfHw3kz/DnPWoNgyjq9Dle1JXrq9kypNTqKmrYfG7ixm6x9AEJWE9\nqg3D6Kp0+bGYmopBgPWoNgyj69LlFYQfg4gQQUTos0tiqpL1qDYMo6vS5RWEH4OIRqLUaz1TnpyS\n0B/CelQbhtFV6fIKAqD6i2rqtZ56rbdUV8MwDA9TEKR3M1mqq2EYXRFTEJibyTAMIwxTEB7mZjIM\nw0jEFIRHJj2q6+qcgti+3dxMhmF0frp8RzmfdD2q+/RxygHcf3MzGYbR2TEF4ZGuR3V1tYtB1Ne7\n/9XVbSisYRhGK2AuJo90PaqLi6Gw0CmHSMQsCMMwOj+mIDzSpbpaoNowjK6GKQiPdKmukNgfYts2\nmDOnjYQ1DMNoBUxBBGgq1RWcmykadcuqMGuWWRGGYXReTEEEyMTNdOGFrrMcQG2tpbsahtF5MQUR\nIBM3U2kpdOvmLAmbQMgwjM6MKYgk0rmZYjE3N8SNN7r/NoGQYRidFVMQSRQXFRONRBGEaCTaqEe1\nYRhGV8E6yoUgSML/IJWVMHasG24jEoHp02HChNaW0DAMI/eYBZFERVUFtfW1KEptfW0jF1NFhVMO\n9fUuSD15smUyGYbROTEFkUS6TKbiYmc5+NTVWSaTYRidE1MQSaTLZIrFnFspP9+lu9qwG4ZhdFZM\nQYSQLpNpwgT4wx+ccqirg8svNzeTYRidD1MQIaRzMwGsWOGUg6qbhtSG3TAMo7NhCiKETDrMGYZh\ndHZMQaQgnZuptNQN/y3i/peWto2chmEYuSKnCkJEThSRtSKyTkSubaLeOBFRERntrR8vIstEZLX3\n/5u5lDOMdB3mYjF49lm45BL4wQ9aWzrDMIzck7OOciISBaYDxwMbgJdFZKGqvpZUrydwBfDPQPHH\nwHdU9X0ROQR4Ctg7V7KmoqkOcz6zZ7sYxOzZNvSGYRidi1xaEGOAdar6tqrWAPOB00Pq3QjcCmzz\nC1R1haq+762uAbqLSGEOZW1Eug5z4Po/1NS4YLXND2EYRmcjlwpib2B9YH0DSVaAiIwEBqrq400c\nZxywXFW3J28QkQkislRElm7atCkbMsfJJJPJ5ocwDKMz02ZBahGJAL8DftJEnYNx1sUlYdtVdaaq\njlbV0f369cuqfJlkMtn8EIZhdGZyqSDeAwYG1gd4ZT49gUOAChGpAg4HFgYC1QOAR4BSVX0rh3Km\nJF0mE7jsJb9XdTRq80MYhtF5yKWCeBkYLCKDRKQAOAdY6G9U1S2q2ldVi1S1CPgHcJqqLhWRXsDj\nwLWq+mIOZWySTNxM0GBBSOpYtmEYRocjZwpCVWuBybgMpNeBB1V1jYjcICKnpdl9MrA/cJ2IrPQ+\ne+RK1lRk4maqqHCuJVVzMRmG0bnI6XwQqroIWJRUdl2KusWB5V8Dv86lbJkS5maKDWzIZS0udlOP\nbt/uLAgbuM8wjM6C9aROQyYd5qZNc/GH+nqYMsUymQzD6ByYgsiAdB3mqqudcqivd/0izM1kGEZn\nwBREGjLpMOe7mSIRczMZhtF5MAWRBj+TKSpRCqIFjVxMYG4mwzA6JzkNUncGYgNjlJeWM+eVpsfR\nCHMz2bhMhmF0ZMyCyJDZr8zm3uX3MnbO2NC5IXw3UzTq/luHOcMwOjqmIDKgoqqCmroa6rSObbXb\nQq2JWMyN5vqd78DQobB6dRsIahiGkUVMQWSAn+oKoCizVs4KtSJWr4YFC+Cll9w8ETNntrakhmEY\n2cMURAbEBsa4cPiF8TTXVNlMDz+cuH7ffa0gnGEYRo4wBZEhpcNKyY/mp+wwBzBuXOL6ihWWzWQY\nRsfFFEQzSNdhbsIEKClpWK+vt05zhmF0XExBZEgmHeYAysqge3frNGcYRsfHFESGZDr0t3WaMwyj\ns9CkghCR3ZrYtk/2xWm/ZDL0t091tZunur7ejfJqbibDMDoi6SyICn9BRMqTti3IujTtnODQ36n6\nQ4BzK9XXu+X6enMzGYbRMUmnIILR2N5NbOsSZNoforraxSDAxSFWrGhNKQ3DMLJDOgWhKZbD1js9\nfn8Inx11O1KO7prnjXKlCrNmWRzCMIyOR7rB+vYQkR/jrAV/GW+9X04la6eM2HNEfLme+tBgdSwG\nF14IM2Y4BVFTA3Pm2OB9hmF0LNJZEPcCPYEegWV//Y+5Fa19Uv1FNRFxzRaRCNVfVIfWKy2F/Hy3\nbFaEYRgdkSYtCFX9VaptInJo9sVp/xQXFVMYLaSmribl/BDQ2IqorbUhwA3D6Fg0az4IERkCnOt9\nNgOjcyFUeybT+SHAWRF/+hPs2OH6RdgQ4IZhdCTSKggRKaJBKewA9gVGq2pVLgVr78x+ZTY1dTXM\nfmU25aXlxAaGmwYiif8NwzA6Cuk6ylUCj+MUyThVHQVs7erKIZP5IcC5lGprEwPVhmEYHYV0QeoP\ncUHpr9KQtdTl0luTybQ/RHGxcy2BUxL33mtzRBiG0XFoUkGoagkwFFgGTBWRd4CviMiY1hCuvZJp\nfwg/UO1TVwcTJ5qSMAyjY5B2sD5V3aKqs1T1BOBw4DrgDhFZn3Pp2jGZ9IcAF6j2rQhwlsSll1rK\nq2EY7Z9mjeaqqh+q6u9V9UjgqHT1ReREEVkrIutE5Nom6o0TERWR0YGyn3r7rRWRbzdHztYg2B9C\nEFZ8ED6eRizm5qkOYvNEGIbREWgyi0lEFqbZ/7Qm9o0C04HjgQ3AyyKyUFVfS6rXE7gC+GegbAhw\nDnAwsBfwjIgcoKp1aeRpNYqLismL5FFTVxOPQ5QOKw3NZiorg8cfd+muAAUFlvJqGEb7J12aawxY\nD8zDPcCbk6w5Blinqm8DiMh84HTgtaR6NwK3AlcHyk4H5qvqduAdEVnnHa/dOGb8OMSMZTMSJhEK\nUxCxGDz3XEMWU2mpdZgzDKP9k87F1B/4GXAI8D84a+BjVX1OVZ9Ls+/eOOXis8EriyMiI4GBqvp4\nc/dtD5QOK6VbXre0kwiBUwh33+2UQ0WFxSAMw2j/pMtiqlPVJ1X1fFyAeh1QISKTd/aLRSQC/A74\nyU4cY4KILBWRpZs2bdpZkZpNcyYRAqcUiovh5z93/01JGIbRnkkbpBaRQhE5E/gLcBlwJ/BIBsd+\nDxgYWB/glfn0xFkmFSJShVNAC71Adbp9AVDVmao6WlVH9+vXNoPLVn9RTV19HfVaz/ba7Snnqgbn\nYqqpsY5zhmF0DNIFqefgHtrP8tQAACAASURBVOKLgF+p6qvNOPbLwGARGYR7uJ8DnOdvVNUtQN/A\nd1UAV6nqUhH5ErhfRH6HC1IPBl5qxne3Gn126UM9bvq4ptJdDcMwOhrpLIjv4x7OVwBLROQz77NV\nRD5rakdVrQUmA08BrwMPquoaEblBRFJmP3n7rgEexAW0nwQua08ZTEEyHf4bXPyhsNAtR6MwYkTK\nqoZhGG1OuuG+m9VPImT/RTjrI1h2XYq6xUnrNwE37cz3twb+8N/ba7cTkUjaQPWdd8Lkya5X9ZQp\nMHSoZTQZhtE+2SkFYDQ/UF1d7TrK1dfDtm0WhzAMo/1iCiILVH9RTb3WU6/1TY7uCo0H8LOZ5gzD\naK+YgsgCmY7uCg0D+PnzQ1g2k2EY7RVTEFnA71UtXkdzv1d1KoID+Km60V3POMMsCcMw2hemILJE\nc3pVg1MMPvX1sGABHH20DQVuGEb7wRRElmhOsLqiIlFB+NTVuQwnsyQMw2gPmILIIpn2qi4ubugP\nkUxdnQ0FbhhG+8AURBbJtFd1LAbl5W52ufz8hvJIxCkOGwrcMIz2QLrhvo1m4Peqrtf6JicRAqck\nYrGG0V379IEVqasbhmG0OmZBZBF/EiFIn+7qE4vBT3/qlu+7zwWpx461OIRhGG2PKYgskpzuWlNX\n02SnOZ/KSrjsMjfjnPWwNgyjvWAKIsuUDislP+oCC5laERUVTjH4qDprwqwIwzDaElMQWca3Inx2\n1O1ostMchGc17dhhVoRhGG2LKYgcMGLPhnG8M5kjws9qGjMmsXzjxlxIZxiGkRmmIHJAcI6IdNlM\nPrEYTJuWmPb6xBPmZjIMo+0wBZEDWpLNBE5JXHRRw0B+tbXWac4wjLbDFEQOaGk2E7h+Ed26uU5z\nIq5/hGEYRltgCiJHtCSbCRpcTdGosyAmTYLvfx9uucXcTYZhtC6mIHJES7KZfKqrnXIAl/46dy78\n/OfWgc4wjNbFFEQOaW42k09xcUMcwkfVTS5kMQnDMFoLUxA5JJjNFJEI1V9UZ7RfLAZXXdW43GIS\nhmG0JqYgckhxUTGF0UIiRIhIJGMLAuDWW6GszAWrferqYMoUczMZhtE6mILIIcFJhOrq67hs0WXM\nXJb5lHG9eiWuq8L27eZmMgyjdTAFkWP8SYQUpba+lsmLJmeUzQQuFhFJukKqsHlz9uU0DMNIxhRE\njikuKiYSeMrX1tdmnM0Ui8H06Ym9q1Xhttts7mrDMHKPKYgcExsY48exH8fXFWXz9sxNgAkT4Lnn\nYP/9E8tvvNFiEYZh5BZTEK1Ar8Je8V7VAHdU3pGxmwmcJXH11YllGzbAcceZkjAMI3fkVEGIyIki\nslZE1onItSHbJ4rIahFZKSIviMgQrzxfRGZ7214XkZ/mUs5cU1xUTDQSja/X1tdmPPSGz4QJUFKS\nWGYBa8MwcknOFISIRIHpwEnAEOBcXwEEuF9Vh6rqcOA24Hde+dlAoaoOBUYBl4hIUa5kzTWxgTGm\nnzydqDgl0ZyhN4KUlUFe0iziDz4IZ5zhhuQwa8IwjGySSwtiDLBOVd9W1RpgPnB6sIKqfhZY3RVQ\nfxOwq4jkAd2BGiBYt8MxYdQEfjjyh/H15gy94ROLwcUXJ5atXAkLFsA995jLyTCM7JJLBbE3sD6w\nvsErS0BELhORt3AWxI+84v8D/gN8ALwL3K6qn4TsO0FElorI0k2bNmVb/qzT0qE3gpSWNrYifGwo\nDsMwskmbB6lVdbqq7gdcA/zCKx4D1AF7AYOAn4jI10L2namqo1V1dL9+/VpN5pbSkomEkvFTX5PH\nagKnOIqLd1JIwzAMj1wqiPeAgYH1AV5ZKuYDfhj2POBJVd2hqh8BLwKjcyJlK9LSiYSSmTDBuZSi\n0cbb5swxN5NhGNkhlwriZWCwiAwSkQLgHGBhsIKIDA6sngL8y1t+F/imV2dX4HDgjRzK2irszERC\nyUyYAIsXwwknNPS23rHDKY6jj3aBa1MUhmHsDDlTEKpaC0wGngJeBx5U1TUicoOInOZVmywia0Rk\nJfBj4HyvfDrQQ0TW4BTNLFVdlStZW5PkiYTuW3Ffi6wIcO6mqVOhsDDR5VRX5wLXxx5r2U2GYbQc\nUdX0tToAo0eP1qVLl7a1GBlxxvwzWLB2QXx94qiJ3H3q3S0+XmWlG37jr391Q3EEEXFTmJaXO4Vi\nGIYRRESWqWqoC7/Ng9Rdkf49+iesb/x8404fc9GixsoBbARYwzBajimINqB0WCn5kYYR+B5989Fm\nDQOeTEWFiz+kor4ennzSXE2GYTQPUxBtQGxgjItGXBRfr9O6Zg0DnkxxceKIr2E8/zwcdZQFrw3D\nyBxTEG1E6bDSeMorNG8Y8GRiMWdFTJzoPmVl4Smw9fUueO33uK6shFtuMYVhGEY4piDaiLBhwJ98\n68mdymi6+273ufVWlwJbUhLeoW77dhfUHjsWfvELOOYYm1/CMIzGmIJoQ5KHAX/+389z3OzjWqwk\ngsRi8MgjcMkl4dv/+lf48ktnVdTWwuTJZkkYhpGIKYg2JHkYcIDtddtb7GoKo7QUuncPn7o0yI4d\ncPbZqWMUvjtq5kzXt6Kz9q8wt5thNJBi2DejNfCHAZ/02CTqqY+XN2fGubTfEXN9ICoq4KWXXAwi\nFe+95z4LF8JVV0GvXtCnDzzxBDz6qOuAF2TWLHj2WVi9Gh5+GMaNcz28wT1gKypcAL2j9L+orHRu\nt5oaKCiwviOGYR3l2gGTHpvEPcvuia/nR/J57oLniA3M7tOpstL1rm4qJbY5iMDppycqnaIi2Gcf\n+Oc/neuqoACmTYPq6tTKor0ok1tugV/+0inCaNRN6/rTDj1VVfMJuxbt5fo0h8pKNy4ZOCs6E7mb\nc54taZNM90lVL1fXoamOcqhqp/iMGjVKOypL3l2ieTfkKVNRpqIyVXTioxNz811LVIcMUXVOpp37\nRKOqY8akrxeJNNQvK1O9+WYnh6rqjBmq+fmuTvfuDeVBeYP1053bxInuk0n9sP27d3ey5OU52dqC\ndOcctr057dTU93bv7q6Tfy3CynJJts6joKDh/issTH+85pxnS9ok3X0ePHZhoapIg9z+fV1YmJvr\nACzVFM/VNn+wZ+vTkRWEquqMpTM0+qtoXEnk35CvS97Nza/Rvwl3RjlEIu6mLytr2b7du7v98/IS\ny/2Hw803u+1hP8TgQ8RfLitz9fxj5ee37Ec0Y4Y7joh7yAS/synlkyxTqrqZPPx9JRWNqpaUNFYE\nuXqI33xzQxtGIqonnODOwVfwydenqXNorqJOfggWFLRc0d98s7t+/r0g0nA/nXBCuOK/+ebE85w4\nMfEcg+ccbKdo1K2nO7fgfS7ijp987hMnuusd/K2UlLhrGjyfTL6zOTSlICwG0U6YMGoCT/zrifgY\nTTvqd3Dbi7fxyDmPZP27YjEXO6iocDGGFd60FCNGwKWXNo41gHMnHX009O4N/fvDbrvBffc17Nsc\n6uudn/+++5wbyicadfKMHetScf2fBCQOF+LHCaJRtz3MZbZjh5t974ornHsr+TyTXV6++f7SSw3n\nX1Pj3BSrVye2y6xZcOedicebMsXJ6KcV+3VnznQdFIcMSawXibh5PfyYjS/D1KmwbVvDeS9Y4OI/\nd90FQ4e67du3uzbcts3Jt88+Tta6uoZJo9K5h/zyPn0a2qK42LkE/eM/84yTs76+4bpt3tx0nGbm\nzPC2SudiHDs28bzr6tzIxH/8o7uOpaWuPBMXi99xtKbGrRcUwJo1MHeuW//b39z/YLzspZcSz/Pe\ne93//Hz4/e/ddfPPedo0999f9+dgCbq1gvfYnDmJ97mqaxf/nIJu3+RkkjffTGwXcPdYnz4N35lT\n918qzdHRPh3dglBVnfjoxLgFwVQ0+qtozqyIMIJvUf6bzjHHuLeYoHnbEqshzIoIvvH77qcTTkh8\nWwp+Zsxwb1mptjf3k5fnzs0/v0ik8bFLShLf/pLdZsnLTX18yyTZClNtsFxS7SvS4KJIPub48eHu\ni6BlEXwjT7YgRRosupIS1QEDUl+zE05okEHE1Q9aTmFt5csWjbr7aeJE913+W32mbkr/48uabEUm\nW1r+W/mMGY3b7YQTEtuoqXtqyJD0VlSyWyvVfR4snzgx/NyDVlzyb8S/h/Ly3HUPs3abC+Zi6hgs\neXdJgpspl7GI0O9P4YNPNqn33z/1jzjsx5Cfr1pU1PQDYN99m35Agnu4ZPow3tmP7wMOU0g7o6CS\n9/UV484qPf+hEXSfJMvuK4JjjslMtrDrO3x46m3DhrXOtfHbzb/ffKUUjSaev/8QT3bbgGvzJUvc\nA7q5be/v6yu4iRObH9fzFX7YeZWVNd4mkv47JrbwUdGUgrAspnbGzGUzufTxS6lTZ6NHJcpdp9zF\nhFET0uyZHVJlsQTdCpdf7npi+xxzDIwf3+DKufxyZzJHo3Dqqc4ltXFj0ym22SLoEtkZxoyBiy5y\nKb5BuQcMgA0bdv74QUTcT3xn9xGBQw+FkSOdi+PyyxvcLF2JkhI46aQGd17Y/eAPRRN0p0ajrm66\nayHipvetrW3+dWuKSMSNhFBd7UY4CMrtuy6b+r6SEtc5trk0lcVkCqIdkpz2GpUoi3+wOOtpr80h\nWXFccw3cfru7YZPnmwj6t33fbVPxgqbI5OHpx0eGDHF+3dWrXUe+nVEUJSVuCPWu+IBtT7REefr7\nQfP23WMP2LQpuw/9ZJn22w/WrQvfdvrpDS9TCxc2//4tLHSxxebGImw+iA5G6bBSotLQw7pO67jt\nxdua2CP3xGKuT4B/8/Xq1fDj9QOjyXWrqxuCp3V17o184sTwgQTDKClxgcq8DFIpTjzRvX3FYi74\n+MIL7rtKSlyg0X/rKylxFk9yMNBHxFlJn3zSWDmEjWsVpH/iNB/xYw0YkF7+VLKMGZN5e+WSsHNP\n1YYAw4c37/iRiLsuxxzjzjcS2bnz9h0vweMXFDR9zI8+cvv490pzzyGMYLsVFMCZZ4bXi0RcMsI9\n9ziLtSVKqrY2B/O+pPI9dbRPZ4hBBCmZX5IQsJapojOWtlFifgiZpFamqhP0jfuBTj+g6Aeqg77k\n5KD48OGJPtp0ee7JAcXkNMjgZ//9UweMk333ZWWJqZN+oNKPB/hB4RkzGh8n2c/sBxr98mDA2Q+4\nZjsGM358eGA1GAz35Uj24+fnN1yr5MQCP5BbVtYQ+A8mAIg0TttMjh1MnBjuo2/uJ3geZWWZJVic\ncEJ40D2TWIUfSPbjeMGAvp86GxaHCosLhd2DfowieJ/tbP8ILM2141F2RBmPrn00HotQlEsfv5Sh\newxtU1eTT3AIj1QpdqnqlJbC7NkNMY2ysqbN4l69GmILkQh897sN6YP+8ZraPxZL3J6cBhnk6qud\n5aNJb3CFhfCb34QPKxJMVa2oaHy+/v/k/fbbzw2SWFvr3mx//3u3raQk/Bi33AIvvujWk10Sfurx\n7bc3dk1EIu58IhGXKltQ4Ky5CRPgsstcPOnRR12dwsKGnu/BFFiAp55qSNG98konr38Nr74a7rjD\nWYqFhQ2y++cSdDcWFMC3v504RW51deL1qqhIPA/fRZl8br4VG4m45eQU7WDq6h13uPNuym1VWOhS\nif12v/himDGjwbKAhn2TYz7Btgo7Zz+ttVu3hpTo73zH3f9z5rg5W4Lne9ddjVO0/Xs9eI/435eT\nVNdUmqOjfTqbBaHqOs/JVEmwJErmlbS1WFmhuT2ks92bN5gGmWy1pOusli1a0uEqE6vNtzbGjGmw\nbNK1dXPrhMnenB7g6c4lebuf1hq0sAoKEt+e/Yyi/PyGt+vgW7ifWhq0mvw38qY6NgblCLPwmiLT\nXu/B1ONkiyrXYFlMHZczHjiDBW80pNEIwj2n3tNqWU3thdYeD6g1vq8lgwO2l3GRsjGwYbpzyWRM\nImh67KjVq52V5ls25eWuTnPGaUqWI1fXoK2urWUxdWAq11dy9Kyj464maB9ZTUZ2aC8P/JbQUWTv\nKHK2FaYgOjgzl81k4mMTURquVcmBJTkZhsMwjK6Fpbl2cCaMmsDpB52eULZg7QKueeaaNpLIMIyu\ngCmIDkLZEWUJfSMAbnvxNmYus8mkDcPIDaYgOgixgTHuOuWuhDmsAW587saszGFtGIaRjCmIDsSE\nURO4+sirE8o2bN3AUbOOMkvCMIysk1MFISInishaEVknIteGbJ8oIqtFZKWIvCAiQwLbviEilSKy\nxqvTLZeydhRu/datlBxUklBWr/VMfGyiKQnDMLJKzhSEiESB6cBJwBDg3KAC8LhfVYeq6nDgNuB3\n3r55wF+Aiap6MFAMZGkm5Y5PWDzC72lt7ibDMLJFLi2IMcA6VX1bVWuA+UBCKo6qfhZY3RXieZwn\nAKtU9RWvXrWqhsxz1jXx4xGRpMtXp3VcvPBiUxKGYWSFXCqIvYH1gfUNXlkCInKZiLyFsyB+5BUf\nAKiIPCUiy0WkLOwLRGSCiCwVkaWbNm3KsvjtmwmjJvDChS8wpG+iUfbax69x7J+PNSVhGMZO0+ZB\nalWdrqr7AdcAv/CK84CjgPHe/zNEZGzIvjNVdbSqju7Xr1+rydxeiA2M8cfT/tjI3bSjfodZEoZh\n7DS5VBDvAQMD6wO8slTMB/zo6wbgeVX9WFW/ABYBI3MiZQcnVfqrWRKGYewsuVQQLwODRWSQiBQA\n5wALgxVEZHBg9RTgX97yU8BQEdnFC1gfC7yWQ1k7NBNGTeCeU+9ppCR21O+gZH6JZTcZhtEicqYg\nVLUWmIx72L8OPKiqa0TkBhE5zas22UtjXQn8GDjf2/dTXEbTy8BKYLmqPp4rWTsDqZTER198xCWP\nXWLWhGEYzcYG6+tkhA3s55Mfyee5C56zUWANw4hjg/V1IXxLIjkFFpzL6byHz2PSY5PMmjAMIy2m\nIDohfgrsMfsc02hb1ZYq7ll2jw3PYRhGWkxBdFJiA2M894PnmHHqjEZxCXDDc1zy2CWMmDHCLArD\nMEKxGEQXYOaymVz6+KUJs9IlIwinH3Q6J+1/EtVfVFNcVGyxCsPoAtiMcgaV6yu57cXb+Ovav4YG\nsJMpiBZQcX6FKQnD6OSYgjDiVK6v5NpnruX5d59PW/crhV9haP+h9O7WO172yZefsOmLTRzY90DK\njigzBWIYHRxTEEYjZi6bybR/TOONj9/IyKIIIyIR7j7lbgDuW34f3fK70btbb/r36M+IPUew4oMV\nAJQOKzVFYhjtFFMQRkqa63pqCYJw9L5Hxy2R/j36JyiNyvWVVFRVZCXusbPH8tvj/a3vc9HIi5gw\nakKbyGHsPO3pvmrOMSrXVzLnlTmAe7kCcnovmYIw0uLflK9teo3F7y7OmbLwEYTzhp7Hf2r+w8K1\nC1GU/Gg+FedXADDnlTls/Hwjn3z5CdtqtyU8rIM/IN9S8eu+sP4F6rWeCBGO2vcoenfrHXeL9du1\nH0P6DmG3brux8oOVjBsyLkEBVK6v5Jg/H0NtfW28bMapM5gwakL8Ozd+vjGu4Hw5fTn84P7qj1Yz\nedFk6rSOvEgeFw6/MK4Qk3/8YT/4mctm8vBrDzN8z+H0KuzV6MEQdozmPsD8+n126ZOQlBD23as/\nWs3Drz0cb69Mvqsl8vj337babQzuM5hN/9nEuCHjGLrH0FBZ0x2veHYxO+p2xO+r5PNIJQMkXk+A\nsXPGUlNXQ0G0gPLS8pTHCp43NDzYV3+0Op4oEpUod51yV8r7+fInLqemrgZwnVsjEqG2vpZoJMrJ\n+5+c8IKVDcVlCsJoFv5b9IqNK1j/2XrqtT6+TRB2LdiVz2s+z8l39+7Wm0+3fRqqoMqOdKO+//bF\n32ZNgQ3vP5yi3YsAWLlxJVVbqhK2D+g5gPO+cR7/veS/E7LAIkRQ7y8TIkT4Rv9v8MrGVxL2OWbf\nY/jN2N8AcNuLt/GPDf9g4382JuwrCPv22pde3XqxvXY7b1a/mSDL8P7DeX3T6+yo20EkEmH6ydPj\nD5+wBz7AcbOPY3vd9vgxCqIFnD3kbOaunpu2vdZ8tIYd9TsQhK/3+zqnHnAqn237jI2fO7mrNlex\n6sNV1FOPIOzfe3/yInkU5hWyvXY7/Xbtl+CKfOJfTzRpwUYkknAP5kXy+HHsx3y2zU0nE/aWfcb8\nM1iwdkGD3F8dzsoPV8bXy44s49Zv3Qo0PKDvXX5vo0w/Qei3az82/WcTijpreJ+jE2J4Q/oNYUT/\nESz/YDlvfPxGfD+AetzLSj31jY573tDz4vv45y5IRveUfz+t2rgq4eWqJUrCFITRYsLeNAGO/fOx\n7KhPnOQv05vbyD2Dew9m6/atocomlwq+rfDvPV8hrftkXZP3oiBcfeTVvPnxmyxcu7DRA7wjUnJg\nCY+c80iz9zMFYWSdVC6XiqoKNm/fzMoPVjJ8z+Gd6gdoGO2ZqERZ/IPFzbYimlIQeVmRzOhyxAbG\nQm/EsLJkK2TNpjXcv/r+hDe8bFkfQdPbN++/0f8bfPrlp/x7y78zPo5ZQ21HZ277Hvk9+HxHbqw3\nRamoym7fJVMQRs4JUyaXHXpZo0yNYJCyeFBx3Ge+YO0CbnvxtoT9oxKN+4ujEuUnR/wkIaAbFrwL\nBgN367Zbo7iC71O/4rArAJj02KQEyycqUSISibvWfD9yz4Kecf+7H1QvHlQc98snB89PO+g0Duhz\nAI+ufZTXP3494bz69+jP4QMO56T9T0oIvm/6YhO19bWNXCe7F+7Olu1bWnJZEIRh/Yc1iouMHzo+\n4Zz69+jPbt124/YXb4+3R4QI+/XeL9SV47fjiP4j+Ff1v6ipr2F77fZ4DKIwr7DRd/o9+cuOcHEm\n3xJNvkY7S6/CXmzevjnldv9e8q9d1eYqXvnwlYwUlu/eevvTt6nTOiIS4VuDvkX5O+XUaz350Xz+\n+9v/zeRFkxPcs34c7NE3H024p8855Jx4+/kvPP42RalXF+MRcfGOwmhh3AWcLczFZHQIZi6byX3L\n72Ov3faKP0TSZQOlI1UmT3B7U5lLzfneVNkmzU2rnblsZjxDqjBa2CijBlyfFP+hfGDfA0OVTbCj\nY6YypMqeCsv8ySTLKLltM7E+N2/fzB2Vd8Szen4c+zFvfvwm7299n8F9BrP8g+WsrV6LqlIQLeCs\nIWclBN7Ljizjjso7Eh7Qfhr2kL5DQuVIfrGoeKeCvXbbKz4sTfL9k3ytw9bD7qGmMtyaSn0NLluQ\nOgWmIIyuQlfvY5Hu/JO3+5lcwTTdTJRTV8EUhGEYhhGKTRhkGIZhNBtTEIZhGEYopiAMwzCMUExB\nGIZhGKGYgjAMwzBCMQVhGIZhhNJp0lxFZBOQ+VgK7ZO+wMdtLUQ7wtqjAWuLRKw9EtmZ9thXVfuF\nbeg0CqIzICJLU+Ujd0WsPRqwtkjE2iORXLWHuZgMwzCMUExBGIZhGKGYgmhfzGxrAdoZ1h4NWFsk\nYu2RSE7aw2IQhmEYRihmQRiGYRihmIIwDMMwQjEF0UqIyJ9E5CMReTVQ1ltEnhaRf3n/v+KVi4jc\nKSLrRGSViIxsO8lzg4gMFJFnReQ1EVkjIld45V2yTUSkm4i8JCKveO3xK698kIj80zvvB0SkwCsv\n9NbXeduL2lL+XCAiURFZISKPeetduS2qRGS1iKwUkaVeWc5/K6YgWo8/AycmlV0LlKvqYKDcWwc4\nCRjsfSYAd7eSjK1JLfATVR0CHA5cJiJD6Lptsh34pqoOA4YDJ4rI4cCtwB2quj/wKXCRV/8i4FOv\n/A6vXmfjCiA4J2tXbguA41R1eKC/Q+5/K6pqn1b6AEXAq4H1tcCe3vKewFpveQZwbli9zvoB/goc\nb22iALsAy4HDcL1j87zyGPCUt/wUEPOW87x60tayZ7ENBngPvW8CjwHSVdvCO68qoG9SWc5/K2ZB\ntC1fVdUPvOWNwFe95b2B9YF6G7yyTonnEhgB/JMu3CaeS2Ul8BHwNPAWsFlVa70qwXOOt4e3fQvQ\np3UlzinTgDKg3lvvQ9dtCwAF/iYiy0TEnzQ857+VvJbsZGQfVVUR6XI5xyLSA3gYmKKqn4lIfFtX\naxNVrQOGi0gv4BHgoDYWqU0QkVOBj1R1mYgUt7U87YSjVPU9EdkDeFpE3ghuzNVvxSyItuVDEdkT\nwPv/kVf+HjAwUG+AV9apEJF8nHKYq6r/zyvu0m0CoKqbgWdxbpReIuK/yAXPOd4e3vbdgepWFjVX\nHAmcJiJVwHycm+l/6JptAYCqvuf9/wj38jCGVvitmIJoWxYC53vL5+P88H55qZeNcDiwJWBKdgrE\nmQr3Aa+r6u8Cm7pkm4hIP89yQES64+Ixr+MUxVleteT28NvpLODv6jmcOzqq+lNVHaCqRcA5uHMb\nTxdsCwAR2VVEevrLwAnAq7TGb6Wtgy9d5QPMAz4AduB8ghfh/KTlwL+AZ4DeXl0BpuN80KuB0W0t\nfw7a4yicX3UVsNL7nNxV2wT4BrDCa49Xgeu88q8BLwHrgIeAQq+8m7e+ztv+tbY+hxy1SzHwWFdu\nC++8X/E+a4Cfe+U5/63YUBuGYRhGKOZiMgzDMEIxBWEYhmGEYgrCMAzDCMUUhGEYhhGKKQjDMAwj\nFFMQhpEGEanzRtH0P9em3yvjYxdJYIRfw2hP2FAbhpGeL1V1eFsLYRitjVkQhtFCvDH6b/PG6X9J\nRPb3yotE5O/eWPzlIrKPV/5VEXnEm/PhFRE5wjtUVETu9eaB+JvXkxoR+ZG4+TJWicj8NjpNowtj\nCsIw0tM9ycX0vcC2Lao6FPgDbgRSgN8Ds1X1G8Bc4E6v/E7gOXVzPozE9YoFN27/dFU9GNgMjPPK\nrwVGeMeZmKuTM4xUWE9qw0iDiHyuqj1Cyqtwk/y87Q08uFFV+4jIx7jx93d45R+oal8R2QQMUNXt\ngWMUAU+rm/QFEbkGyFfVX4vIk8DnwAJggap+nuNTNYwEzIIwjJ1DUyw3h+2B5ToaYoOn4MbUGQm8\nHBjJ1DBaBVMQhrFzJslZDQAAALlJREFUfC/wv9JbXoIbhRRgPLDYWy4HJkF8cqDdUx1URCLAQFV9\nFrgGN4R1IyvGMHKJvZEYRnq6ezO9+Typqn6q61dEZBXOCjjXK7scmCUiVwObgB945VcAM0XkIpyl\nMAk3wm8YUeAvnhIR4E5180QYRqthMQjDaCFeDGK0qn7c1rIYRi4wF5NhGIYRilkQhmEYRihmQRiG\nYRihmIIwDMMwQjEFYRiGYYRiCsIwDMMIxRSEYRiGEcr/B1jKCPhqR4xuAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ctawd0CXAVEw",
+        "colab_type": "text"
+      },
+      "source": [
+        "This graph of _mean absolute error_ tells another story. We can see that training data shows consistently lower error than validation data, which means that the network may have _overfit_, or learned the training data so rigidly that it can't make effective predictions about new data.\n",
+        "\n",
+        "In addition, the mean absolute error values are quite high, ~0.305 at best, which means some of the model's predictions are at least 30% off. A 30% error means we are very far from accurately modelling the sine wave function.\n",
+        "\n",
+        "**3. Actual vs Predicted Outputs**\n",
+        "\n",
+        "To get more insight into what is happening, we can plot our network's predictions for the training data against the expected values:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "i13eVIT3B9Mj",
+        "colab_type": "code",
+        "outputId": "372e169f-f97d-47ee-e64c-162b8ba4e38c",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 281
+        }
+      },
+      "source": [
+        "# Use the model to make predictions from our validation data\n",
+        "predictions = model_1.predict(x_train)\n",
+        "\n",
+        "# Plot the predictions along with to the test data\n",
+        "plt.clf()\n",
+        "plt.title('Training data predicted vs actual values')\n",
+        "plt.plot(x_test, y_test, 'b.', label='Actual')\n",
+        "plt.plot(x_train, predictions, 'r.', label='Predicted')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEICAYAAABcVE8dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO2de3gV1bn/P28ugCKoRCxWRKiXVhRF\nQexGoPFyAFureGlPFYs3jFGxWk8FPed4So+KJu0pnKqVpCKFA6L+tKK2WijVCJgtiEprG2pFDRJF\njaACKpck6/fHmkkmO7N39v36fp5nnr3ntmbNzN7vrPm+73qXGGNQFEVR8p+iTFdAURRFSQ9q8BVF\nUQoENfiKoigFghp8RVGUAkENvqIoSoGgBl9RFKVAUIOf5YjIsyJyabK3TRQRMSJyZDqOlWq85yIi\nc0XktjQc8zIRWZ3q42QDItIoImemoNy8+Q2mi5JMVyAfEZGdntl9gd1AqzN/tTFmcbRlGWPOSsW2\n6UJEBgPvAKXGmJbM1qZ7jDGV0WwnInXAImPMA6mtUfrJ53MrdNTgpwBjzH7udxFpBKYaY1aEbici\nJblgBHMJvaaKEh6VdNKIiJSLSJOIzBCRD4D5InKgiPxeRJpF5BPn+0DPPnUiMtX5fpmIrBaRXzjb\nviMiZ8W57RARWSkiO0RkhYjcJyKLItT9ZhHZIiLvi8gVIeu+IyKvich2EdksIjM9q1c6n5+KyE4R\nCYjIESLynIhsFZGPRWSxiBwQ4dhGRH4kIm872/9cRIo85/miiMwWka3ATBHp6Zz3uyLyoSPT7BPl\nufxWRO7wzJ8rIuudc3tLRCaKyJ3AWOBe55zudbb9hoj8SUS2icgbIvJ9TzllIvKUU85a4IgI5/us\niEwLWfYXETlfLLNF5COnrNdF5Lgw5VwuIhuce/y2iFwdsj6qcxORwc49KPHs6/2txXQ/PWWcIiIf\niEixZ9l5IvJX5/soEQmKyKfO/bpXRHqEKau9Ps58J8msm3vzbRFpcK7TeyLyk+7qnrMYY3RK4QQ0\nAmc638uBFqAK6AnsA5QBF2Clnz7A/wOWevavw74hAFwG7AWuAoqBa4D3AYlj2yDwC6AHMAbYjn2N\n9zuHicCHwHFAb+AhwABHes5rGLYBcbyz7SRn3WBn2xJPeUcC/+Jcg/7Yh8KcCNfQAM8D/YBBwD9D\nzrMFuB77xroPMBt4ytm+D/A0cFeU5/Jb4A7n+yjgM6euRcChwDdCr7Uz3xvYDFzu1ONE4GNgqLP+\nYeBRZ7vjgPeA1WHOdwrwomd+KPCpc70mAK8ABwACHAMcEqac72AfLAJ8C/gCOCmOc/O7h+3bdHc/\n8fwHfOr4FvAvnvn/B9zifB8BfNO5noOBDcCNIb+LI8PU+TL3+kZxb7YAY53vB7rXKB+njFcg3ye6\nGvw9QK8I2w8HPvHMe/9YlwEbPev2dX70A2LZFms0W4B9PesXEd7gPwjc7Zk/2vtn89l+DjDb+d7F\nWPhsPwl4LcJ6A0z0zF8L/Nlznu961gnwOXCEZ1kAeCeac6Gzwa9xz8OnTqEG5l+BVSHb1AA/xT5w\n9+IYVGfdLMIb/D7OORzuzN8JPOh8Px37wPsmUBTjb3EpcEMc59blHoZuE+l+Etng3+E5t07n7bPt\njcATIb+LaAx+2HvjfH8XuBroG8v1zMVJJZ3002yM2eXOiMi+IlIjIptEZDu2dXSA9zU3hA/cL8aY\nL5yv+8W47VeBbZ5lYFtA4fhqyPpN3pXOq/nzYmWpz4BK4KBwhYnIV0TkYef1eTv2YRN2e5/6bXLq\n5LeuP/bh9oojBXwK/NFZ3u25hHAYtgUaDYcDp7jHdI47GfuA7Y9tWUZ1XGPMDuAPwA+cRRcBi511\nzwH3AvcBH4lIrYj09StHRM4SkZccGeNT4Nt0XOdYzi0icd5Pl4eA80WkJ3A+8KoxZpNT7tFiJc4P\nnHJnxVCul0j3Buwb9reBTSLygogE4jhGTqAGP/2Epif9N+DrwCnGmL7AOGe5pLAOW4B+IrKvZ9lh\n3WzvXT8oZP1DWAnlMGPM/sBcOurvl451lrN8mHPOl9D9+YYe/33PvPcYHwNfAscaYw5wpv1NhyO9\nu3PxspnwWnvoeW0GXvAc8wBjzH7GmGuAZuwbVbTHBVgCXOQYn15YScse2JhfGWNGYKWeo4GbQ3d2\nDOjjWNnuK8aYA4Bn6LjOsZzb586n9/cywPM9nvvpnksD9uF3FnAx9rfkcj/wD+Aop9x/j1Du5xHq\nF+neYIx52RhzLnAw9i3o0Wjqnouowc88fbAG6lMR6YeVAFKK04Jah3Vw9nCMyncj7PIocJmIDHUe\nEqF17IN9Y9glIqOwf1yXZqAN+FrI9juBz0TkUHwMlg83i3VwHwbcADwS5tzagN8As0XkYAAROVRE\nJkR5Ll7mAZeLyBkiUuSU8w1n3Ych5/R74GgR+aGIlDrTySJyjDGmFfgd9nrvKyJDge76SzyDbZn+\nN/CIc144ZZ4iIqVYI7cLe31D6YHV1JuBFrEO+/HxnJsxphnrc7hERIrFOrq9D4t47qeXh7D3dBxW\nw/eWux3Y6dTtmghlrMe+KewrNjb/Ss+6sPfG+f1PFpH9jTF7neP5Xc+8QA1+5pmDdTR+DLyElR/S\nwWSstr0Vq6M+gu0v0AVjzLPYej4HbHQ+vVwL/LeI7AD+C08LyZGN7gRedF6nvwn8DDgJ6zT8A9YY\ndseTWGflemefeRG2neHU8yVHCliBfYuK5ly8570W6+ib7dT1BawRBvhf4EKxEVC/cmSY8VgZ5n2s\nnOY65wGmYeW0D7B+gvmRTtYYsxt7Xc6kc6u3L/aB9gm2ZbwV+LnP/juAH2HvxSfYh/BT8Zybs+wq\nrCHfChwL1HsOF8/99LIE61R+zhjzsWf5T5x673DO2fch7zAb6x/7EFiAI4E559rdvfkh0Oj8Viqx\n/428xI3YUAocEXkE+IcxJuVvGLEiIgb7Wr8x03VRlFxGW/gFivNKe4TzOj8ROBerXyqKkqdoT9vC\nZQD21bsMaAKuMca8ltkqKYqSSlTSURRFKRBU0lEURSkQslbSOeigg8zgwYMzXQ1FUZSc4pVXXvnY\nGNPfb13WGvzBgwezbt26TFdDURQlpxCRsL24VdJRFEUpENTgK4qiFAhq8BVFUQqErNXwFUXJT/bu\n3UtTUxO7du3qfmMlLL169WLgwIGUlpZGvY8afEVR0kpTUxN9+vRh8ODBiKQyKWz+Yoxh69atNDU1\nMWTIkKj3U0lHUZS0smvXLsrKytTYJ4CIUFZWFvNbkhr8PCYYhLvusp+Kkk2osU+ceK6hSjp5SjAI\nZ5wBe/ZAjx7w5z9DIG/H8VEUJRq0hZ+n1NVZY9/aaj/r6jJdI0XJLpYuXYqI8I9//CPidnPmzOGL\nL76IuE0kfvvb3zJt2rS4908mavDzlPJy27IvLraf5eWZrpGiZBdLlixhzJgxLFmyJOJ2iRr8bEIN\nfp4SCFgZ5/bbo5NzVO9Xsplk/z537tzJ6tWrmTdvHg8//DAAra2t/OQnP+G4447j+OOP55577uFX\nv/oV77//PqeddhqnnXYaAPvtt197OY899hiXXXYZAE8//TSnnHIKJ554ImeeeSYffvhhciqbRFTD\nz2MCgeh0+1j1/mDQSkTl5eoXUFJPKvxRTz75JBMnTuToo4+mrKyMV155hbVr19LY2Mj69espKSlh\n27Zt9OvXj1/+8pc8//zzHHTQQRHLHDNmDC+99BIiwgMPPEB1dTX/8z//k1hFk4wafKWT3r97N8yc\naSe/P5U6g5V04+ePSvQ3t2TJEm644QYAfvCDH7BkyRLeeecdKisrKSmxZrFfv34xldnU1MS//uu/\nsmXLFvbs2RNTfHy6UIOvUF5utf62NjutWAGrVvkb80h/Pm35K6nA9Ue5jYxE/VHbtm3jueee4/XX\nX0dEaG1tRUQ4+eSTo9rfGw7pjYO//vrruemmmzjnnHOoq6tj5syZiVU0BaiGX2CE00JFwB38rK0N\ndu2ChQu77h/OGey2/G+7zX6qL0BJFrH6o7rjscce44c//CGbNm2isbGRzZs3M2TIEE444QRqampo\naWkB7IMBoE+fPuzYsaN9/6985Sts2LCBtrY2nnjiifbln332GYceeigACxYsSKySKUINfo6QDKeV\n1yifdhpcc01Hq9z5jbdjDMyf3/V44f58GgaqpJJAAG69NTlvjkuWLOG8887rtOyCCy5gy5YtDBo0\niOOPP54TTjiBhx56CICKigomTpzY7rS9++67Ofvssxk9ejSHHHJIexkzZ87ke9/7HiNGjOhW788Y\nxpiEJ+BB4CPgb2HWC/ArYCPwV+Ck7socMWKEUSz19cbss48xxcX2s74+tn1nzTKmpsaY8eONKSoy\nxppzY0RseTU1HeUXF3deX1nZtSy/4ydSR6WwaGhoyHQV8ga/awmsM2HsarI0/N8C9wI+IgAAZwFH\nOdMpwP3OpxIF8TqtgkHbkt+zx5pwV7ZxP42x67Zuta31ujooK4Prr+/YZ/58mDLFlhfJWeu2/FXD\nV5TsJSmSjjFmJbAtwibnAgudB9BLwAEickiE7RUP8XaiWrjQRt242rwxUFQEJ5/ctTz3lbmiAq64\nwj4UwEo9dXXRSTZuGaAx/YqSjaQrSudQYLNnvslZtsW7kYhUABUAgwYNSlPVsp9AAObMgccfhwsu\nSKz13LOnLQvCt8anTIEFC7pGRUQTKaFhm4qSvWRVWKYxphaoBRg5cqTJcHWyhmAQbrzRGtFVq2DY\nsOiM6JQp8OCDsHcvlJTAlVfaZe6+4coIJ89EI9mkImZaUZTkkC6D/x5wmGd+oLNMiYJ4jWgg0CHH\nxKqr+/XSjabnbrJjphVFSR7pMvhPAdNE5GGss/YzY8yWbvZRHLLViPp1tFLnraJkL0kx+CKyBCgH\nDhKRJuCnQCmAMWYu8AzwbWxY5hfA5ck4bqEQrxFNlZ4eDFqH8Pz51qkbWna0OXwUJVMUFxczbNgw\nWlpaOOaYY1iwYAH77rtvXGVddtllnH322Vx44YVMnTqVm266iaFDh/puW1dXR48ePRg9enRMxxg8\neDDr1q1LOL4/KQbfGHNRN+sNcF0yjpWPRJOSIB4jGq0UFEtKBPchsmtXR/SPavVKrrHPPvuwfv16\nACZPnszcuXO56aab2te3tLS059SJhQceeCDi+rq6Ovbbb7+YDX6y0J62GSaVKQmiCeeM9fjuQ8Q1\n9iLZJTMpeUoK83ePHTuWjRs3UldXx9ixYznnnHMYOnQora2t3HzzzZx88skcf/zx1NTUALaz6rRp\n0/j617/OmWeeyUcffdReVnl5OevWrQPgj3/8IyeddBInnHACZ5xxBo2NjcydO5fZs2czfPhwVq1a\nRXNzMxdccAEnn3wyJ598Mi+++CIAW7duZfz48Rx77LFMnTrV7cCaMFkVpVOIxOqQjaU1Ho0UFOvx\nvf6E4mIbs++N/FGUpJPCWN+WlhaeffZZJk6cCMCrr77K3/72N4YMGUJtbS37778/L7/8Mrt37+bU\nU09l/PjxvPbaa7zxxhs0NDTw4YcfMnToUK644opO5TY3N3PVVVexcuVKhgwZ0p5qubKykv3224+f\n/OQnAFx88cX8+Mc/ZsyYMbz77rtMmDCBDRs28LOf/YwxY8bwX//1X/zhD39g3rx5STlfNfgZJhaH\nbDy/e68U5PewiNUh3N1DRDNmKkknBbG+X375JcOHDwdsC//KK6+kvr6eUaNGtac1Xr58OX/96195\n7LHHAJsc7c0332TlypVcdNFFFBcX89WvfpXTTz+9S/kvvfQS48aNay8rXKrlFStW0NDQ0D6/fft2\ndu7cycqVK/nd734HwHe+8x0OPPDAhM7XRQ1+honFIZvI7z7cwyIeh3A4f4J2ulJSQgrC1Lwavpfe\nvXu3fzfGcM899zBhwoRO2zzzzDMJH9+lra2Nl156iV69eiWtzEiohp8FRJsJMFJq4u7kzUipEaI5\nvt8xQpdFmzFTh1NUYiLZ+ZGjZMKECdx///3s3bsXgH/+8598/vnnjBs3jkceeYTW1la2bNnC888/\n32Xfb37zm6xcuZJ33nkHCJ9qefz48dxzzz3t8+5DaNy4ce3ZOp999lk++eSTpJyTtvCznFCJJLQ1\n3l2r2t2/rCz+RpLfMaDrsmgaYvoWoMRFBmJ9p06dSmNjIyeddBLGGPr378/SpUs577zzeO655xg6\ndCiDBg0i4FOv/v37U1tby/nnn09bWxsHH3wwf/rTn/jud7/LhRdeyJNPPtk+Zu51113H8ccfT0tL\nC+PGjWPu3Ln89Kc/5aKLLuLYY49l9OjRyUs1Ey6NZqYnTY8cXcrhWbM6UhoXF9v5cPvX1IRPb+x3\nbHdbv2N4l3nTKEdKodxdfZXCQNMjJ49MpUdWUkA0mn2kVnXo/lu3dmSzjITbCt+922bXvOkm/2OU\nlNiyjbE5e9xonWijfDScU1HSixr8LMbPOLq9XKHDwIZzukYyrpGiaerqrLF3x7idPRvuvdc+MLzb\nX3451NRYg9/a2vWBpKkXFCW7UIOfxYQaR7Cfe/bY7/Pnw/PPh29VhzOu3eno5eW2Zd/WZudbW/3f\nDsKlUe7uGNHIsRremd8YYzoNBq7EjomjM5Ya/CwiXIvY/X7XXTbVsUs0oZl+xrU7qSgQgPvug2nT\n7DY9e/pLL5Fa66kIIVXyg169erF161bKysrU6MeJMYatW7fGHM6pBj9LqK3tbGD9jFx5OZSWdrTw\n49XAo9HRKyps3v14c/wkotVrTv38ZuDAgTQ1NdHc3JzpquQ0vXr1YuDAgTHtowY/CwgG4brrbOZJ\nsPq5n5Fz89uHavixEq2OHk8knPctJV6tXh27+U1paWl7D1QlvajBzyCucXz33Q69HGzHqnBGLlnh\nyKkIa/aTYqKJCvKrmzp2FSX5qMFPI97WL3QYx6Iim3VSxBr7e+/NTSOXDCnGe43ch4U6cBUlOajB\nTxOhrd9LL+0wjq2tdpuSEussrajIbF3jJVEpJtoevWr0FSU+1OCnidDWL1gD5h1IxBgb/pirJCrF\nhF6jhQvh7bc7+gSoA1dREkMNfgqIJg3xlCl2WrjQ9lJtbc0PB2UivoHQXPvz59sw1LY2K3vlw/VR\nlEwi8QTvp4ORI0cad+SYXCJSDHk4LVo16g68juza2g5jf+aZMHOm5t9XlO4QkVeMMSP91mkLP8lE\nclxG6hGrxsriXgvX2IP9vOCCrg/OsjK48UYr+bjO7lz1fyhKOlCDn2Q0hjw5bN3akd6hqKjDt+F9\ngxLpSN7W1mb7Mgwbpg9PRQmHGvwkE6vjUiUJf8rLbY/j0Aen9w3KDWd1Vcm2NusT0eupKP6ohp9B\nNGdMZPwehqHX7Prr4Ze/tMa+tLQjc6deT6VQKTwNPxiEW26BV1+1lqC1tSPrmDcGsqQEjjoKvvtd\n+Oc/7XT00XZ69FH49FM4/ngbO/nqq7ZJefjhsHOnzYNw4om2rP79obkZhg+3ZbzxBuzYAR9/DL17\nw2GHWQu0Y4eNM+zbF0pKGPHRVra3tgCG1i+L+XLCADisLxx0kC3344876vP738MXX9hjHH00/N//\nWZ3DjiUCBxxgLd6XX9r6lJbacj77DN57zzaFv/gCDjzQnrcxtv5ffAEvvQR9+th9wJbVo4e9Ns3N\nVkBPsTjeXeI4F783qEmTOhy9v/mN5uBRlHDkXws/GIRx4zoS02QR3istIfPe5VnJ9OlQVZWSopP1\npqNvTIpSaC38urqsNPYuEuZ71vPzn8P27fFnbItAsrJjag4eRYlM/hn88nIrWWSZ0Tchn5BjBt8Y\nmDsXHngAVq5MqjVNZmSThrgqSnjyz+AHAtYgZZmGv2NPD95t2MERvM2n9KVfWQk9d2y15RhjA8kH\nDLD6frZo+J98Am++2fn6trTAt74F3/8+LFqUtFumLXNFST35p+FnMTkXghkMdh5TMZRRo2DNmrRW\nKRI5d30VJQVE0vDV4CuRCQahuhqWLvVff8ghNudBhru4qsNWUSyRDH5Ruiuj5BiBADzxBNTUWMkp\nlC1b4OqrYcgQmw8hAwSD9pmze3dnx6+iKJ1Rgx8FwaAdQDwYzHRNMkhFhTXuo0b5r29stIZ/xoy0\nVstt2a9YoVk1FaU71OB3g2tQbrvNfkZj9PP6AbFmDUyeHH79z38O11yTtpN3Qzq9WTWjkXPy+h4p\nShiSYvBFZKKIvCEiG0XkFp/1l4lIs4isd6apyThuOvCLEY9Eba0NYvnP/4z+AZFzLFpkJZ7DD++6\nzg3fHD3aRhOl+AK4IZ3FxTb3jl8K5VDieYgrSj6QsMEXkWLgPuAsYChwkYgM9dn0EWPMcGd6INHj\npguvQelOKggGbcZGd9CO3bvzWEuuqLAyTn297dksPr0K3nzTGv5LLon7MN21xAMBmDPHGu45c6Jz\n1Mb6EFeUfCEZcfijgI3GmLcBRORh4FygIQllZ5xYYsTr6jpyuIN9SOS9lhwIwAsvWIu8cKFt3Yey\neLE1/jGGcEYTeRMM2pz4e/bAqlXRpUfWFNZKoZIMSedQYLNnvslZFsoFIvJXEXlMRA7zK0hEKkRk\nnYisa25uTkLVkkMgALfeGp0h6dnTasklJXZAjoIJDQwE4P77Yfx4//Vr18KECTEVGU1LPJ7WuvsQ\nv/12Dd9UCot0OW2fBgYbY44H/gQs8NvIGFNrjBlpjBnZv3//NFUtPvykBteQ3HGH7exbkKMvLVsW\n3ugvXx5T6GY0closkpuXaB/iipJPJNzxSkQCwExjzARn/lYAY8xdYbYvBrYZY/aPVG42d7zSTj5R\nEAzC975n0zp4GTUKevWyaaIvvrjbDJzR9J7VHraK0kGqs2W+DBwlIkOA94AfABeHVOAQY8wWZ/Yc\nYEMSjpsxkpXdMa8JBKCpyco4y5d3LF+3rsPRUV0NTz8NDeHdPZGSoXkN/a23Jq3mipK3JGzwjTEt\nIjINWAYUAw8aY/4uIv8NrDPGPAX8SETOAVqAbcBliR43k6jTLwaWLbMyzuOPw777dk3RsGGDTTA3\ne3ZMGljoW9acOTaXnLbyFSU8mksnTlRGiINg0IZphmPgQJulNIoLetddNo7eHdu2uNi+OMQqsel9\nVPKNwhoAJU1o3vU4CATsyFnV1f7rm5pgzBhYvbrbi+t9yxKxhr+trXuJzWvgQX0xSmGhBl9JL1VV\ncMQR8OMf21z8obS12bEMevWKOJaut39EWVlHLH4kiS1UBrr0UvXFKIWFGnwl/VRU2OmSS2ynrFBW\nrrSfy5fDW2+FjeTxvmUNG9a9NBPqbAf1xSiFhWr4DqrlZohg0Da13ZG1RDpGJXMZOhRuuCHhjg1+\n4bSg913JL3QAlG7QuPoswH3i/vGPHS38UCZPTnhYRX2wK/mOOm27QePqswBXnykvh7Fj7c0IZfFi\nm6gtgZa+OtuVQkbz4RN/93wlBQQCNgvapEn+6//zPzWfsaLEiRp8NJlW1uEdVjGU5mY74MDQoXDe\neXEbfx0ARSlEVMNXspsZM8LH7YN18s6dm1AvXX3IK/mEDmKeINoazCBVVbalP2oUlJZ2XW+MHUvX\np7Uf7r55fTa7d9tRsvTeKoWAtvC7QVuDWUQwaFv7ofl4XEpL7WAsgUDE++au2727Yyzcnj313ir5\ngbbwE0CHw8siXG1/1Cj/9Xv3wtSpbDnvGh65Mcju3f73zfXZnHmmNfbelAyKks+owe8GjeDJQtas\nCTvIimloYMDSufxy7WjuaJtBUZH/fQsErJTTs6feW6VwUEknCrSzTpbiSjxPPdV5MGHA/VU/evh0\nBi2pinnwFL3nSq6iPW3RP3Be4xlA3QDiLHZ/2VJUBBddFHUvXfXbKLlMQfe0dW3Bgw9aPVf/wHmI\n2312xw5k8eIOQ++ub2uzvXRXr4bGxm6L057XSr6S1xq+21KrqVHHa0GwaBFMn44ceGCHsfeyaROc\neGK3MZjqt1Hylbw2+G5LzVWtRPQPnPdUVcG2bXDMMf7r16+3rYAIRl97Xiv5Sl4bfG9LrWdP2z8n\nnj+wdrzKQRoawodvfvklnHqqHWN3xoxOq9x7DXZgdDX2Sj6R907bRJ216sDLAy65BJ5+Gnbu7BLN\nA9i3gYYGvddKXlDQHa8CgcRaatrxKg9YtAg++8w6bUt84hQ2bIAJEyKmXNC3PCUfyHuDnyjqwMsj\nAgE4/XT/dcuX86PaY3ncnMdoCdLWBitW2BZ/ba39vO22buV/Rclq1OB3gzrw8oxly2wvXbFxPK6g\naYDejQ2c07aUF8ypXEVte8qFefNg1y59y1Nyn7zX8BUlHNtOmcABa5cDNmbf22GrlSLGsZp1pfYJ\nv3evXdejh8blK9lNQWv4ihKOmknLGFdUz1Ls6FreDlvFtPF/XMyqvadw2d5au1zgiivU2Cu5S8EY\nfHW6KaGUl8OrPQN8v/gJflE8Hegw+gBfo5FRrKWGq7mbGfTqBVOmZKSqipIU8j61AmhopeKP65+p\nq4Mx5VXI60fQWnktYlq7SDw3U820r/ye3q/fAIH4B1FXlExSEC18Da1UwtEpbLeigoa5q5hXXEkL\nxRhoT8ZWhHXqcvXVcPDB+qqo5CQFYfA1tFKJlmEVAY5bdT9LKlexffi4Ljl5DGCam2kbPbpLL11F\nyXYKJkontMetpktWoqK21rbqQ2h38I4bB3ffrT8iJWvQfPghqKavxIRj9EPTLrfn3heBuXOhQrV9\nJfMUZFhmpKgc1fSVmKiogPp6dh80sF3X9w60gjH2LaC2NmNVVJRoyEuD77bgb7sNTjsNzjsPrrmm\nw/irpq/ETCBAr+bN/L2mnncPH+e/TWUl9O1rk7UpShaSFIMvIhNF5A0R2Sgit/is7ykijzjr14jI\n4GQcNxyhSbCWLrVv3KedZo2+pktQ4mVYRYDDG19AJk/uss4Yg9mxw46uNXy4RvIoWUfCBl9EioH7\ngLOAocBFIjI0ZLMrgU+MMUcCs4GqRI8bifJy23oPxSvfJJpFUylwnNG1dh/QnzaKOun7BuAvf4HR\no7W1r2QVyWjhjwI2GmPeNsbsAR4Gzg3Z5lxggfP9MeAMEfEdhS5ZiLTnx2pH5RslmQQnVXHg7o8Y\nw2pe4wSgc09dwLb2v/Utbf5AIlwAABlBSURBVO0rWUEyDP6hwGbPfJOzzHcbY0wL8BlQloRj+1JX\nBy0t1pdWVGQHPqqshOef1xa9kjxc6TBIgBGs5/+YTAs+r5YrV9oRtjRuX8kwWZVaQUQqgAqAQYMG\nxV2O65R1wy7nzFFDryQf7++spASCly/i1L7jOKK6a9w+xkB1tU3PfP/9+oNUuuD2DSorg61bU9NH\nKBkG/z3gMM/8QGeZ3zZNIlIC7A9sDS3IGFML1IKNw4+3Qt4cKdqxSkkV/r+zCjgCmDULPvzQJtL3\n8pe/wNixsGqV/jCVdtzIwt277SicRUV2HO5kB5UkQ9J5GThKRIaISA/gB8BTIds8BVzqfL8QeM6k\nuMeXOmWVdOD7O6uogMZGeO65ro4ksOFj48fbeGHV9hU65EF3yGV38J1k9xFK2OA7mvw0YBmwAXjU\nGPN3EflvETnH2WweUCYiG4GbgC6hm4qSdwQCNh7Yj507bbzw6NEwYUJ666VkHa48WORY5KKi1ASZ\nFGRqBUVJK8EgXHstrF8ffpvx462+rxQsydLwNZeOoqQIbxI+6MZvNGMG/Pzn1oHrx/TpUJXSLipK\nAaAGX1FSgDcJX3GxletbWuz3K66wo2O5ht99MJxdFmTYr6+xzls/evSAb35TM3AWAKnK2KsGX1FS\nwF132XxNra0dvln37yQCvXrZKAvwyc76eq319m7bFv4A2uLPW1KZsbcgs2UqSqrxJuErLbXfvYbf\njbLwzc5aUWGF2unTwx+gulo7a+UhwSDMnGlDML2/iXSMu51VHa8UJZcIjcMHWLgQ5s+30o43ysLb\nEbBT5EVVlXXmLl/uf5Dq6o7tlJzHL96+pATWroWf/azjd5OqpI5q8BUlAQKBzn/MQMBq96HabMSO\ngMuW2Zb8nDn2qRBKdTUccYQOsJIHhMbbf+MbsHEjPPlkhxzotvhTYfBVw1eUbGLGDDuQyqefdl5+\nyCHw+efwta/Br3+tDt0cJRi0D333uV5cbA29+wDw+n7ivcWq4StKrlBVBZ980lXb37IFtm+38o8O\noJ6zBAI2gsvr6ykq6hiM6eqrUztGh0o6ipKNVFVZGefxx2H1avjii87rVdvPSVyHbGmpddi6yR1T\nlSwtFJV0FCVBuounDrc+6jjsSy6xefVDKSqyDwOVd3ICbyhmSQlcfnnnvhrJIpKkoy18RUmA7uKp\nw62PKQ570SJobvaP5LnlFtiwAXr3tnH96tjNWrzhuQCDBqX/Wa0avqIkgG+MfRTru9uvC8uWQX09\njBtnW/ZFRVYIXrnSPgwaG60AfMopyT1BJWl4+21kavQ9NfiKkgDd/YnDrY/rzx8IwAsvWBnnjjts\n5E4oa9fC0NAhpZVsIBCwev0ZZ2RuUCbV8BUlQVKu4YdjxowO520offvC6afbaB/V+LOCVKZT8KK5\ndBQlDQSDtqctpMYZ50s4h66LiM3Jr9p+xvHmXiouhttvt26XZKNOW0VJMaEdaubPh1/9yobbpXKM\nUhYtguuug3PPtVp+KMZYbf+ttzSEM8OEjrWdCQ1fW/iKkgTuugv+4z86Z8ssLrY9KGMZozQhmWfC\nhPA5eQCGD9deuhkmVSmRvWhPW0VJMeXltjONS1FRh7GH6MYodTXe226znzFnTVy2DGpqYPBg//Vu\nL10dSzdjZHqsbTX4ipIEAgFrzCsr7fTrX9sWfSxjlMYcqulHRQW8805HCKcfS5fCmDE2Z49SUKiG\nryhJIjRz5rBhsY1RmlSN1w3hDOfUbWuz4+wOG6YSTwGhGr6ipJlIOm5KNN5IY+n26wdTp6pDN4/Q\nsExFyRLSFYvte+Dq6s6J170MGGDH0tW4/ZxHnbaKkiUkRaePh0AAnngCXnzROhn69u28/oMPrLav\nqZfzGtXwFSVNBIPw7rs2XBMyFIvtOhr69g3fS1dTL6eEdIRkdoe28BUlDbhSzm9+Y2P0r7rKyjmQ\n+oGrfamqsvLNQQf5r6+uhiFDNJInSSQccpsk1OArShrwSjktLTY1LmTYCFRV2d6548f7r3czcJ54\nosbtJ0jGpLwQ1OArShrwy46ZLUaAZctsa/+AA/zXr19v4/bV6EdNMNj5zS0bUiODaviKkhYCASvh\nhGq4mc6t0k5VlZ3CZeBsa4OxY+Hf/k21fYdIWVD9IrH87n/aMcZk5TRixAijKPlOfb0xs2bZz1jX\nd7dv3NTUGDNggDE2gLPrdNRRKThoblFfb8w++xhTXGw/vZdj1iy7HOznrFnprRuwzoSxqxk37OEm\nNfhKoRPJqERalzQmTw5v9EWMmTSpYA1/JKOelnsTgUgGXzV8RclSImn8adH/Fy2CUaP81xlj4/bH\nji1IbT+SJu/KN7ffnsaOdVGiGr6iZCmRcuukLbf6mjU2H88jj9jwolBaW+H88+F3v8suy5ZCXO1+\nzpzwOZJC8yplC5paQVGymLTn3YlEba21chs2dF1XVATnnJP3qRncgW727rXpsOvqsu90NbWComQh\noaF7sZL23OoVFdDQYI16KG1tBZGaYeFC+1ZljP10h7TMFRKSdESkH/AIMBhoBL5vjPnEZ7tW4HVn\n9l1jzDmJHFdRcp1okqhlLNFad1RVwXvvhR9Lt7ra5u1ZsCBLKpw8Pvig63w2pEyIlkRb+LcAfzbG\nHAX82Zn340tjzHBnUmOvFDzROF2zpmOWH4sW2dG1jjnGf/2bb+alQ3fAgK7LsiFlQrQkavDPBRY4\n3xcAkxIsT1EKgmh6XmZL78ywuBJPTU3H0F5eWlvhllvgqKPyQuYJBm2LvqjI5kPq0cM+ALL2oexD\nQk5bEfnUGHOA812AT9z5kO1agPVAC3C3MWZpmPIqgAqAQYMGjdi0aVPcdVOUbCdUCvCTBnJGLggG\n4dJLbcs+HP3723z8WX0inXGvf1kZ/OhHsHu3XV5cbIexHDYs+2S3hAZAEZEVgM+LDP8BLPAaeBH5\nxBhzoE8Zhxpj3hORrwHPAWcYY96KdFyN0lEKiazV62Nlxgx46CH42tfg7behqanrNtOn50R6Bu89\nKSqykTkuInDnndZpnm0P5YSidIwxZxpjjvOZngQ+FJFDnIMcAnwUpoz3nM+3gTrgxDjPRVHyEq9e\nv3s3zJyZ/XqwL1VVsHmzHU/34ov9t6muzgmJx3tPWls7xjEAG5LpSmxpj5ZKgEQ1/KeAS53vlwJP\nhm4gIgeKSE/n+0HAqUBDgsdVlLzC1euLimyE44oVueEEjEhVVXin7i9+kfUn5/Wh9OxpJZzKSjtl\nY/x9NCTa0/Zu4FERuRLYBHwfQERGApXGmKnAMUCNiLRhHzB3G2PU4CuKB7c7/syZ1ti3tXU4AXPR\nsLTT0ACnnAJr13Ze3tYGV14JX34J++wDN95oncBZRNZkuEwi2tNWUbKIvNHyQ6mthVmz7BiP4WzO\n+PE2N38ayDbdPZkk5LTNFGrwlUIln40RwaB9jVm+3H99Goy+96FaUgKXXw5Tpth1+XDd1eArSgGS\ntQ+OYBC+9a3OYS9eDj8c/v3fUybx3HWX7SjV2mrn3Zh6Y+yy0DerrL2OYYhk8DVbpqLkGNEYoNpa\nmDbNGrCePbNMGgoEbBTPtdfa4RND2bTJjqW7eDHcfXfSK+46Y3ft6kjwv2ePXefNkePG3994Yx5J\nbOES5Wd60gFQFKUr0QyuUV9vTElJx1glRUUdI2OlZISsRKipMaZfv/ADrRQVGTN9etIPW19vTGWl\nMT162GvZs2fH9x497HxxsTGlpXasF+91zHbQAVAUJT+INgdPW1vHfHGxbalmZc6XigqbVH76dKut\nhNLWZuP2+/a1ry1JIhCA+++31+r22+H55zu+X3GFTf3vxt+7qndbm72OuYwafEXJIaLNwdOzp43p\nLymBe++1NjWrc75UVcGLL8K4cf6Gf8cOK/OcckrEYmJNOR0I2OvlXo9bb7UOXPcaFxd3VKeoyF7H\nXEY1fEXJIaKJDffbJhhM0whZieBq+8GgTbq2cmXXbdauhYMP9s3JE09Ia7h93OsXquFn5XWLATX4\nipJjRDN8Xug2oQ8BsC3hrIw8cQ2/X4ctgOZm+yawcmWnyvvJXd2dW7h9vNdv2LDcG+gkHGrwFSUH\niSdU0DVikVrCWRWCuGYNDB3qP6RiS4ttep90ktVgAoG4xvmNdp8FC+w2CxbkdqSOGnxFyTES7Y0b\nrlWblb18GxpsorXZs7vG7a9da6eaGrj5ZgJVVTGnQnDffCK14ON5c8hW1GmrKDmGnwEKBuGaa+zk\ndVj6OTHDOX6zdoStqipboenT4dBDYfDgzuuNsZE8EybEnblywQL4zW/8I5iyfiCaGNAWvqLkGKEy\nRFmZXeZ2Hpo/34YZgn+LPZzjNx5JJK1UVdkpGITTTusYjcRl+XK45BJYtCisNOW3vLsWfD4lUVOD\nryg5RqgBqqvrrHZ4W+fhDJmf4zdnDFsgYJ9ofpE8ixezc+3fuXXTr1ndGuj0oAsnWUXzoIvGUZ4L\nqMFXlBwk1ACVlna08L1GK9YWe84YNjeSZ8KELonYer+5nhWcyvscypIvL6auropAIHJETk486JKA\nGnxFyXFcY+Y6Hp2gFaAADNmyZVbGWby4fZEAxRgOo4npVNNc9x7cuihsSz6rIpNSjGbLVBQl9wkx\n+gZr+A0gIrYXbyDgO3B81kUmJYhmy1SUAqKQWqztLFpkI3geeggAcQZPb0/S8P3vwwcfEDjpJAJr\n1rTvlk8hl9GgYZmKkke4LdbbbrMGPzRMM69xB1DfvBkmT7ZJcNypqcl21lq71jblnURs+RRyGQ1q\n8BUljwhtsdbUZFl2zHSxaJGVce68s2sytr17bSK22tp2h+3tt+eHnNMdKukoSh4RbnCPfJcqfHFD\ncJYu9c/JM28ebN1KoLycwK2FcXG0ha8oeYTbYr36apsiOVekiljTGsdU3po1MGBA141eecUOpTh6\nNAwcWBCvQRqloyh5Sq44b5MdKRO2vBkzbDfk3r1h0KCunbY80Ty5TKQoHW3hK0qeEppXJlIrurbW\n9mFK4qBSUZPsHD5hy6uqgo8+gnfesVk4QzEGrrwyrz3dquErSgEQqRVdW2slIOjotFpRkb66JTuH\nT1TlTZkCc+d2Xb5hg52cDJxUVSVWmSxDW/iKUgBEakU//njnbUPnXZKts7u4foerroJLL01eeaGR\nN53qHwhAfb3V7v1wM3AefXR+tfbDjW6e6WnEiBHJH85dUQqU+npj9tnHmOJi+1lf37GupsaN57FT\nTU1s+6e6fikvv77emMpKY4qKOl8I7zR9enIrlEKAdSaMXdUWvqIUAJHizSsqrIIxfrz99JNzEtHZ\ng0E47zw7YmE4H0Es5cfzphGx/EAA7r/fTkVhTGJ1NZx4Yu639sM9CTI9aQtfUTKH2+itrLTf422B\n19cbU1KSvDeIROoR1X719cYceWT4ln64E8gi0Ba+oijREgxaR+fcuXYaOxZefz2+Hql1dTajgRc/\nH0FUujvxv2kEAjBnjnVcz5kTof6BALz5ph1dq7jYf5urr7bJ2nKRcE+CTE/awleUzDBrljEinRu1\nJSXx6erRtvDD7RvaKk95C99LqHMjdDryyOQ7G5IA2sJXFCVaysvtgCpe2trii48PBGz/pkmTYNSo\n8D4CP8Jlsoz3TSPmNwPXudGvn//6jRvh1FOtgyJXtP1wT4JMT9rCV5TMUV9vzKRJtkVcVJSayJlo\n6pCsyJ36emN69rRvLj17xlHW9OnG9OkTucWfJZE8aAtfUZRYCATgiSdg1Sq44w6re9fVpbchG9qa\nh8T6ARjT+TMmqqpg+3YbyhSO6moYMiQz3ZWjJdyTIJoJ+B7wd6ANGBlhu4nAG8BG4JZoytYWvqJk\nB6mOkU9HHWbNsvuC/Zw1K4HKTJ8euaWf4dY+KWzh/w04H1gZbgMRKQbuA84ChgIXiYhPIgtFUbKR\nZOe6SUYdFi6MrbWf1IFOqqpsL91x48JvU11tk7VlGQkZfGPMBmPMG91sNgrYaIx52xizB3gYODeR\n4yqKkj7CGctoOkDF00nKbx9vHYqLbdLL226LfnCXpA90EgjACy9Yp+7hh/tvU11tvd/ZFMIZrukf\nywTUEUbSAS4EHvDM/xC4N8y2FcA6YN2gQYNS+NKjKEos1NdbGcSVUqKRWKLdJtpy3W0rK5MozySL\n7mSeyZPTVhUiSDrdZssUkRWAz+gB/Icx5skkPHPaMcbUArVg8+Ens2xFUeLHHTzKJZrBv7vbJjSD\n55w5tlOWO1rX7t2d93HrEAzCggXJy66ZFKqq4IgjYNYs2LSp6/pHHrESUDrTkPrQraRjjDnTGHOc\nzxStsX8POMwzP9BZpihKjhKNJh4qw7z7bmf5xftA2L0bpk2DP/2pI4qmrQ3KyrqWG408k6rMnhGp\nqIDGRv9InpYW20N36NDMRvGEa/rHMhFZ0ikB3gaGAD2AvwDHdlemRukoSnYTKseE26ay0pjSUhsD\n36OHv3xTUtI1WWVRUVe5JtpjZjqqyEyfbsx++4XPwDlpUsoqRqqidETkPBFpAgLAH0RkmbP8qyLy\njPNAaQGmAcuADcCjxpi/J3JcRVEyT+iIWuG2Adi7t2NA9YULO9ZfeqnNg3/ffXYMXjdZZVGRnfe+\nObgSUHfO2myIKqKqCnbssBk4/Vi61I6lm+a4/YRGvDLGPAE84bP8feDbnvlngGcSOZaiKPnBBx90\n1e+nTLHyTF2dlXG2bu06Fm80fgNI/ghaCeFq9v/7v9DQ0HV9Y6OVet56Ky2ja+kQh4qipJQpU2De\nPNvKB3j2WRgwoKvx7u5tIVpD7mr8WTOAe0WFnWpr7YV4+eWu3X2rq+1nio2+plZQFCWlBAJ2bHAR\nO++mS47k9PVzusYSSx+N3JR2KipgzRo7Vq4f1dXQp09KO2yJCX3SZAkjR44069aty3Q1FEVJAn6D\nqIN/KzzSgOt5Q22tNeyffuq/fvx4WLYsrqJF5BVjzEi/dSrpKIqSEoLBzgbdT2bxM+SRtPrQMnOW\nigoYNsyOLtPa2nX98uX2gZBkiUcNvqIoScFrjKGjlV5SApdfbrX8W2/tvpxwWn3etfwDAZuO9Npr\nYf36rut/8Qs7kEAST1I1fEVREiY0ZHLhws6dqmpqEs97kxXhlskmEIDXXvMfaMWYpJ+kGnxFURIm\n1BiDbYW7jlo3Bj+WMWhDna5JzXiZbVRU2FjU6dNtJwQR6NUr6SepBl9RlIQJNcZuXP3VVyfPSCc9\n42U2UlUFq1fDnXem5CQ1SkdRlKQQzqGaN47WHCFSlI4afEVRlDwiksFXSUdRlJSTkeyVShc0LFNR\nlJSSd+GUOYy28BVFSSl5GU6Zo6jBVxQlpeR1OGWOoZKOoigpJeuyVxYwavAVRUk5oWPiKplBJR1F\nUZQCQQ2+oihKgaAGX1EUpUBQg68oilIgqMFXFEUpENTgK4qiFAhZmzxNRJqBTQkUcRDwcZKqkwly\nvf6Q++eQ6/UHPYdsIN31P9wY099vRdYa/EQRkXXhMsblArlef8j9c8j1+oOeQzaQTfVXSUdRFKVA\nUIOvKIpSIOSzwa/NdAUSJNfrD7l/Drlef9BzyAaypv55q+EriqIoncnnFr6iKIriQQ2+oihKgZB3\nBl9EJorIGyKyUURuyXR9YkVEHhSRj0Tkb5muSzyIyGEi8ryINIjI30XkhkzXKVZEpJeIrBWRvzjn\n8LNM1ykeRKRYRF4Tkd9nui7xICKNIvK6iKwXkXWZrk88iMgBIvKYiPxDRDaISEaTROeVhi8ixcA/\ngX8BmoCXgYuMMQ0ZrVgMiMg4YCew0BhzXKbrEysicghwiDHmVRHpA7wCTMqxeyBAb2PMThEpBVYD\nNxhjXspw1WJCRG4CRgJ9jTFnZ7o+sSIijcBIY0zOdroSkQXAKmPMAyLSA9jXGPNppuqTby38UcBG\nY8zbxpg9wMPAuRmuU0wYY1YC2zJdj3gxxmwxxrzqfN8BbAAOzWytYsNYdjqzpc6UUy0jERkIfAd4\nINN1KVREZH9gHDAPwBizJ5PGHvLP4B8KbPbMN5FjxiafEJHBwInAmszWJHYcOWQ98BHwJ2NMrp3D\nHGA60JbpiiSAAZaLyCsiUpHpysTBEKAZmO9Iaw+ISO9MVijfDL6SJYjIfsDjwI3GmO2Zrk+sGGNa\njTHDgYHAKBHJGXlNRM4GPjLGvJLpuiTIGGPMScBZwHWO3JlLlAAnAfcbY04EPgcy6lfMN4P/HnCY\nZ36gs0xJI47u/Tiw2Bjzu0zXJxGcV/DngYmZrksMnAqc42jgDwOni8iizFYpdowx7zmfHwFPYCXb\nXKIJaPK8HT6GfQBkjHwz+C8DR4nIEMdB8gPgqQzXqaBwHJ7zgA3GmF9muj7xICL9ReQA5/s+2CCA\nf2S2VtFjjLnVGDPQGDMY+x94zhhzSYarFRMi0ttx+uPIIOOBnIpcM8Z8AGwWka87i84AMhq8UJLJ\ngycbY0yLiEwDlgHFwIPGmL9nuFoxISJLgHLgIBFpAn5qjJmX2VrFxKnAD4HXHQ0c4N+NMc9ksE6x\ncgiwwIn6KgIeNcbkZGhjDvMV4AnbfqAEeMgY88fMVikurgcWOw3Qt4HLM1mZvArLVBRFUcKTb5KO\noiiKEgY1+IqiKAWCGnxFUZQCQQ2+oihKgaAGX1EUpUBQg68oilIgqMFXFEUpEP4/gbBOHjqKOvUA\nAAAASUVORK5CYII=\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Wokallj1D21L",
+        "colab_type": "text"
+      },
+      "source": [
+        "Oh dear! The graph makes it clear that our network has learned to approximate the sine function in a very limited way. From `0 <= x <= 1.1` the line mostly fits, but for the rest of our `x` values it is a rough approximation at best.\n",
+        "\n",
+        "The rigidity of this fit suggests that the model does not have enough capacity to learn the full complexity of the sine wave function, so it's only able to approximate it in an overly simplistic way. By making our model bigger, we should be able to improve its performance."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T7sL-hWtoAZC",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Training a Larger Model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aQd0JSdOoAbw",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 1. Design the Model\n",
+        "To make our model bigger, let's add an additional layer of neurons. The following cell redefines our model in the same way as earlier, but with 16 neurons in the first layer and an additional layer of 16 neurons in the middle:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oW0xus6AF-4o",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "model_2 = tf.keras.Sequential()\n",
+        "\n",
+        "# First layer takes a scalar input and feeds it through 16 \"neurons\". The\n",
+        "# neurons decide whether to activate based on the 'relu' activation function.\n",
+        "model_2.add(keras.layers.Dense(16, activation='relu', input_shape=(1,)))\n",
+        "\n",
+        "# The new second layer may help the network learn more complex representations\n",
+        "model_2.add(keras.layers.Dense(16, activation='relu'))\n",
+        "\n",
+        "# Final layer is a single neuron, since we want to output a single value\n",
+        "model_2.add(keras.layers.Dense(1))\n",
+        "\n",
+        "# Compile the model using a standard optimizer and loss function for regression\n",
+        "model_2.compile(optimizer='adam', loss='mse', metrics=['mae'])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Dv2SC409Grap",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 2. Train the Model ###\n",
+        "\n",
+        "We'll now train the new model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DPAUrdkmGq1M",
+        "colab_type": "code",
+        "outputId": "64730ff7-488e-4b74-d5a1-49a1b733e9e5",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        }
+      },
+      "source": [
+        "history_2 = model_2.fit(x_train, y_train, epochs=500, batch_size=64,\n",
+        "                    validation_data=(x_validate, y_validate))"
+      ],
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Train on 600 samples, validate on 200 samples\n",
+            "Epoch 1/500\n",
+            "600/600 [==============================] - 0s 736us/sample - loss: 0.4245 - mae: 0.5529 - val_loss: 0.4310 - val_mae: 0.5678\n",
+            "Epoch 2/500\n",
+            "600/600 [==============================] - 0s 64us/sample - loss: 0.4056 - mae: 0.5462 - val_loss: 0.4138 - val_mae: 0.5548\n",
+            "Epoch 3/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.3897 - mae: 0.5302 - val_loss: 0.3974 - val_mae: 0.5437\n",
+            "Epoch 4/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.3743 - mae: 0.5181 - val_loss: 0.3815 - val_mae: 0.5336\n",
+            "Epoch 5/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.3602 - mae: 0.5128 - val_loss: 0.3677 - val_mae: 0.5276\n",
+            "Epoch 6/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.3436 - mae: 0.5010 - val_loss: 0.3504 - val_mae: 0.5140\n",
+            "Epoch 7/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.3281 - mae: 0.4859 - val_loss: 0.3340 - val_mae: 0.5021\n",
+            "Epoch 8/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.3127 - mae: 0.4748 - val_loss: 0.3177 - val_mae: 0.4921\n",
+            "Epoch 9/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.2961 - mae: 0.4626 - val_loss: 0.3012 - val_mae: 0.4794\n",
+            "Epoch 10/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2797 - mae: 0.4502 - val_loss: 0.2851 - val_mae: 0.4687\n",
+            "Epoch 11/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2635 - mae: 0.4391 - val_loss: 0.2699 - val_mae: 0.4589\n",
+            "Epoch 12/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.2467 - mae: 0.4251 - val_loss: 0.2523 - val_mae: 0.4414\n",
+            "Epoch 13/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.2312 - mae: 0.4107 - val_loss: 0.2369 - val_mae: 0.4293\n",
+            "Epoch 14/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.2149 - mae: 0.3971 - val_loss: 0.2225 - val_mae: 0.4168\n",
+            "Epoch 15/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.2031 - mae: 0.3861 - val_loss: 0.2085 - val_mae: 0.4023\n",
+            "Epoch 16/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1908 - mae: 0.3716 - val_loss: 0.1970 - val_mae: 0.3899\n",
+            "Epoch 17/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1777 - mae: 0.3590 - val_loss: 0.1881 - val_mae: 0.3810\n",
+            "Epoch 18/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1682 - mae: 0.3475 - val_loss: 0.1789 - val_mae: 0.3677\n",
+            "Epoch 19/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.1603 - mae: 0.3367 - val_loss: 0.1723 - val_mae: 0.3586\n",
+            "Epoch 20/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1536 - mae: 0.3276 - val_loss: 0.1668 - val_mae: 0.3500\n",
+            "Epoch 21/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1487 - mae: 0.3181 - val_loss: 0.1619 - val_mae: 0.3403\n",
+            "Epoch 22/500\n",
+            "600/600 [==============================] - 0s 74us/sample - loss: 0.1433 - mae: 0.3108 - val_loss: 0.1598 - val_mae: 0.3358\n",
+            "Epoch 23/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.1418 - mae: 0.3072 - val_loss: 0.1558 - val_mae: 0.3248\n",
+            "Epoch 24/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.1389 - mae: 0.2992 - val_loss: 0.1538 - val_mae: 0.3189\n",
+            "Epoch 25/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1387 - mae: 0.2978 - val_loss: 0.1524 - val_mae: 0.3161\n",
+            "Epoch 26/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1346 - mae: 0.2904 - val_loss: 0.1510 - val_mae: 0.3112\n",
+            "Epoch 27/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1340 - mae: 0.2904 - val_loss: 0.1501 - val_mae: 0.3098\n",
+            "Epoch 28/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1313 - mae: 0.2849 - val_loss: 0.1489 - val_mae: 0.3042\n",
+            "Epoch 29/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1303 - mae: 0.2830 - val_loss: 0.1489 - val_mae: 0.3058\n",
+            "Epoch 30/500\n",
+            "600/600 [==============================] - 0s 63us/sample - loss: 0.1292 - mae: 0.2804 - val_loss: 0.1474 - val_mae: 0.2997\n",
+            "Epoch 31/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1286 - mae: 0.2781 - val_loss: 0.1467 - val_mae: 0.2998\n",
+            "Epoch 32/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.1274 - mae: 0.2774 - val_loss: 0.1463 - val_mae: 0.2990\n",
+            "Epoch 33/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.1268 - mae: 0.2758 - val_loss: 0.1451 - val_mae: 0.2945\n",
+            "Epoch 34/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1295 - mae: 0.2746 - val_loss: 0.1449 - val_mae: 0.2966\n",
+            "Epoch 35/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1278 - mae: 0.2760 - val_loss: 0.1438 - val_mae: 0.2937\n",
+            "Epoch 36/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1246 - mae: 0.2710 - val_loss: 0.1431 - val_mae: 0.2908\n",
+            "Epoch 37/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1247 - mae: 0.2693 - val_loss: 0.1434 - val_mae: 0.2939\n",
+            "Epoch 38/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1237 - mae: 0.2702 - val_loss: 0.1415 - val_mae: 0.2893\n",
+            "Epoch 39/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1263 - mae: 0.2691 - val_loss: 0.1411 - val_mae: 0.2891\n",
+            "Epoch 40/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1238 - mae: 0.2693 - val_loss: 0.1408 - val_mae: 0.2906\n",
+            "Epoch 41/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.1209 - mae: 0.2659 - val_loss: 0.1393 - val_mae: 0.2859\n",
+            "Epoch 42/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.1216 - mae: 0.2644 - val_loss: 0.1387 - val_mae: 0.2842\n",
+            "Epoch 43/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1200 - mae: 0.2642 - val_loss: 0.1386 - val_mae: 0.2869\n",
+            "Epoch 44/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1193 - mae: 0.2626 - val_loss: 0.1370 - val_mae: 0.2814\n",
+            "Epoch 45/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.1187 - mae: 0.2625 - val_loss: 0.1362 - val_mae: 0.2829\n",
+            "Epoch 46/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1177 - mae: 0.2593 - val_loss: 0.1353 - val_mae: 0.2796\n",
+            "Epoch 47/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.1172 - mae: 0.2598 - val_loss: 0.1346 - val_mae: 0.2789\n",
+            "Epoch 48/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.1158 - mae: 0.2569 - val_loss: 0.1337 - val_mae: 0.2769\n",
+            "Epoch 49/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1176 - mae: 0.2590 - val_loss: 0.1329 - val_mae: 0.2761\n",
+            "Epoch 50/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1141 - mae: 0.2544 - val_loss: 0.1320 - val_mae: 0.2759\n",
+            "Epoch 51/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1138 - mae: 0.2536 - val_loss: 0.1312 - val_mae: 0.2741\n",
+            "Epoch 52/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1127 - mae: 0.2535 - val_loss: 0.1313 - val_mae: 0.2776\n",
+            "Epoch 53/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.1124 - mae: 0.2518 - val_loss: 0.1294 - val_mae: 0.2708\n",
+            "Epoch 54/500\n",
+            "600/600 [==============================] - 0s 61us/sample - loss: 0.1115 - mae: 0.2508 - val_loss: 0.1287 - val_mae: 0.2722\n",
+            "Epoch 55/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.1103 - mae: 0.2487 - val_loss: 0.1278 - val_mae: 0.2709\n",
+            "Epoch 56/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1094 - mae: 0.2485 - val_loss: 0.1267 - val_mae: 0.2687\n",
+            "Epoch 57/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1090 - mae: 0.2479 - val_loss: 0.1259 - val_mae: 0.2684\n",
+            "Epoch 58/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1118 - mae: 0.2456 - val_loss: 0.1256 - val_mae: 0.2695\n",
+            "Epoch 59/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1106 - mae: 0.2500 - val_loss: 0.1243 - val_mae: 0.2670\n",
+            "Epoch 60/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.1071 - mae: 0.2429 - val_loss: 0.1231 - val_mae: 0.2626\n",
+            "Epoch 61/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.1059 - mae: 0.2436 - val_loss: 0.1226 - val_mae: 0.2653\n",
+            "Epoch 62/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.1048 - mae: 0.2419 - val_loss: 0.1213 - val_mae: 0.2607\n",
+            "Epoch 63/500\n",
+            "600/600 [==============================] - 0s 65us/sample - loss: 0.1038 - mae: 0.2394 - val_loss: 0.1204 - val_mae: 0.2604\n",
+            "Epoch 64/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.1029 - mae: 0.2383 - val_loss: 0.1196 - val_mae: 0.2593\n",
+            "Epoch 65/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.1021 - mae: 0.2376 - val_loss: 0.1186 - val_mae: 0.2576\n",
+            "Epoch 66/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.1012 - mae: 0.2353 - val_loss: 0.1179 - val_mae: 0.2585\n",
+            "Epoch 67/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.1006 - mae: 0.2358 - val_loss: 0.1169 - val_mae: 0.2568\n",
+            "Epoch 68/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0996 - mae: 0.2346 - val_loss: 0.1158 - val_mae: 0.2553\n",
+            "Epoch 69/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0996 - mae: 0.2349 - val_loss: 0.1148 - val_mae: 0.2534\n",
+            "Epoch 70/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.0985 - mae: 0.2316 - val_loss: 0.1142 - val_mae: 0.2490\n",
+            "Epoch 71/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0986 - mae: 0.2327 - val_loss: 0.1144 - val_mae: 0.2559\n",
+            "Epoch 72/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0981 - mae: 0.2306 - val_loss: 0.1121 - val_mae: 0.2494\n",
+            "Epoch 73/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0966 - mae: 0.2308 - val_loss: 0.1118 - val_mae: 0.2521\n",
+            "Epoch 74/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0972 - mae: 0.2281 - val_loss: 0.1104 - val_mae: 0.2456\n",
+            "Epoch 75/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0960 - mae: 0.2293 - val_loss: 0.1101 - val_mae: 0.2500\n",
+            "Epoch 76/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0933 - mae: 0.2247 - val_loss: 0.1087 - val_mae: 0.2424\n",
+            "Epoch 77/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0922 - mae: 0.2221 - val_loss: 0.1080 - val_mae: 0.2453\n",
+            "Epoch 78/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0917 - mae: 0.2235 - val_loss: 0.1069 - val_mae: 0.2432\n",
+            "Epoch 79/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0922 - mae: 0.2204 - val_loss: 0.1061 - val_mae: 0.2394\n",
+            "Epoch 80/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0918 - mae: 0.2239 - val_loss: 0.1062 - val_mae: 0.2456\n",
+            "Epoch 81/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0908 - mae: 0.2220 - val_loss: 0.1048 - val_mae: 0.2372\n",
+            "Epoch 82/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0889 - mae: 0.2193 - val_loss: 0.1046 - val_mae: 0.2421\n",
+            "Epoch 83/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.0883 - mae: 0.2175 - val_loss: 0.1029 - val_mae: 0.2339\n",
+            "Epoch 84/500\n",
+            "600/600 [==============================] - 0s 64us/sample - loss: 0.0872 - mae: 0.2143 - val_loss: 0.1022 - val_mae: 0.2372\n",
+            "Epoch 85/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0865 - mae: 0.2148 - val_loss: 0.1012 - val_mae: 0.2342\n",
+            "Epoch 86/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0856 - mae: 0.2124 - val_loss: 0.1004 - val_mae: 0.2317\n",
+            "Epoch 87/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0850 - mae: 0.2122 - val_loss: 0.0998 - val_mae: 0.2340\n",
+            "Epoch 88/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0843 - mae: 0.2121 - val_loss: 0.0987 - val_mae: 0.2312\n",
+            "Epoch 89/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0836 - mae: 0.2103 - val_loss: 0.0981 - val_mae: 0.2313\n",
+            "Epoch 90/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0832 - mae: 0.2113 - val_loss: 0.0971 - val_mae: 0.2288\n",
+            "Epoch 91/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0830 - mae: 0.2066 - val_loss: 0.0970 - val_mae: 0.2238\n",
+            "Epoch 92/500\n",
+            "600/600 [==============================] - 0s 70us/sample - loss: 0.0829 - mae: 0.2111 - val_loss: 0.0965 - val_mae: 0.2311\n",
+            "Epoch 93/500\n",
+            "600/600 [==============================] - 0s 69us/sample - loss: 0.0813 - mae: 0.2068 - val_loss: 0.0959 - val_mae: 0.2234\n",
+            "Epoch 94/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0816 - mae: 0.2070 - val_loss: 0.0950 - val_mae: 0.2288\n",
+            "Epoch 95/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0817 - mae: 0.2036 - val_loss: 0.0940 - val_mae: 0.2189\n",
+            "Epoch 96/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0803 - mae: 0.2064 - val_loss: 0.0929 - val_mae: 0.2243\n",
+            "Epoch 97/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0795 - mae: 0.2018 - val_loss: 0.0919 - val_mae: 0.2201\n",
+            "Epoch 98/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0773 - mae: 0.2024 - val_loss: 0.0930 - val_mae: 0.2276\n",
+            "Epoch 99/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0780 - mae: 0.2015 - val_loss: 0.0905 - val_mae: 0.2205\n",
+            "Epoch 100/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0770 - mae: 0.2025 - val_loss: 0.0900 - val_mae: 0.2220\n",
+            "Epoch 101/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0768 - mae: 0.1993 - val_loss: 0.0892 - val_mae: 0.2146\n",
+            "Epoch 102/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0783 - mae: 0.2039 - val_loss: 0.0885 - val_mae: 0.2191\n",
+            "Epoch 103/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0748 - mae: 0.1963 - val_loss: 0.0876 - val_mae: 0.2149\n",
+            "Epoch 104/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0743 - mae: 0.1978 - val_loss: 0.0873 - val_mae: 0.2179\n",
+            "Epoch 105/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0733 - mae: 0.1952 - val_loss: 0.0865 - val_mae: 0.2114\n",
+            "Epoch 106/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0733 - mae: 0.1943 - val_loss: 0.0862 - val_mae: 0.2131\n",
+            "Epoch 107/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0723 - mae: 0.1936 - val_loss: 0.0848 - val_mae: 0.2112\n",
+            "Epoch 108/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0715 - mae: 0.1927 - val_loss: 0.0843 - val_mae: 0.2125\n",
+            "Epoch 109/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.0714 - mae: 0.1903 - val_loss: 0.0836 - val_mae: 0.2100\n",
+            "Epoch 110/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0719 - mae: 0.1952 - val_loss: 0.0830 - val_mae: 0.2111\n",
+            "Epoch 111/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0714 - mae: 0.1895 - val_loss: 0.0824 - val_mae: 0.2072\n",
+            "Epoch 112/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0699 - mae: 0.1929 - val_loss: 0.0823 - val_mae: 0.2110\n",
+            "Epoch 113/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0699 - mae: 0.1891 - val_loss: 0.0810 - val_mae: 0.2053\n",
+            "Epoch 114/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0691 - mae: 0.1898 - val_loss: 0.0805 - val_mae: 0.2074\n",
+            "Epoch 115/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0678 - mae: 0.1859 - val_loss: 0.0798 - val_mae: 0.2025\n",
+            "Epoch 116/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0674 - mae: 0.1880 - val_loss: 0.0794 - val_mae: 0.2061\n",
+            "Epoch 117/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0672 - mae: 0.1844 - val_loss: 0.0785 - val_mae: 0.2008\n",
+            "Epoch 118/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0663 - mae: 0.1848 - val_loss: 0.0780 - val_mae: 0.2038\n",
+            "Epoch 119/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.0657 - mae: 0.1830 - val_loss: 0.0772 - val_mae: 0.2003\n",
+            "Epoch 120/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0649 - mae: 0.1813 - val_loss: 0.0767 - val_mae: 0.2002\n",
+            "Epoch 121/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0654 - mae: 0.1845 - val_loss: 0.0761 - val_mae: 0.1997\n",
+            "Epoch 122/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0642 - mae: 0.1815 - val_loss: 0.0755 - val_mae: 0.1991\n",
+            "Epoch 123/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0635 - mae: 0.1807 - val_loss: 0.0750 - val_mae: 0.1955\n",
+            "Epoch 124/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0635 - mae: 0.1779 - val_loss: 0.0744 - val_mae: 0.1981\n",
+            "Epoch 125/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.0642 - mae: 0.1844 - val_loss: 0.0738 - val_mae: 0.1968\n",
+            "Epoch 126/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0659 - mae: 0.1780 - val_loss: 0.0739 - val_mae: 0.1973\n",
+            "Epoch 127/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0622 - mae: 0.1817 - val_loss: 0.0731 - val_mae: 0.1985\n",
+            "Epoch 128/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0619 - mae: 0.1772 - val_loss: 0.0722 - val_mae: 0.1936\n",
+            "Epoch 129/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0607 - mae: 0.1764 - val_loss: 0.0718 - val_mae: 0.1946\n",
+            "Epoch 130/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0602 - mae: 0.1747 - val_loss: 0.0710 - val_mae: 0.1925\n",
+            "Epoch 131/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0600 - mae: 0.1748 - val_loss: 0.0706 - val_mae: 0.1923\n",
+            "Epoch 132/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0592 - mae: 0.1743 - val_loss: 0.0699 - val_mae: 0.1913\n",
+            "Epoch 133/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0594 - mae: 0.1722 - val_loss: 0.0695 - val_mae: 0.1901\n",
+            "Epoch 134/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0589 - mae: 0.1753 - val_loss: 0.0690 - val_mae: 0.1903\n",
+            "Epoch 135/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0587 - mae: 0.1702 - val_loss: 0.0684 - val_mae: 0.1886\n",
+            "Epoch 136/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0575 - mae: 0.1725 - val_loss: 0.0682 - val_mae: 0.1908\n",
+            "Epoch 137/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0570 - mae: 0.1704 - val_loss: 0.0676 - val_mae: 0.1871\n",
+            "Epoch 138/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0567 - mae: 0.1692 - val_loss: 0.0671 - val_mae: 0.1879\n",
+            "Epoch 139/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0562 - mae: 0.1692 - val_loss: 0.0663 - val_mae: 0.1848\n",
+            "Epoch 140/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0558 - mae: 0.1676 - val_loss: 0.0658 - val_mae: 0.1847\n",
+            "Epoch 141/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0553 - mae: 0.1663 - val_loss: 0.0653 - val_mae: 0.1840\n",
+            "Epoch 142/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0552 - mae: 0.1665 - val_loss: 0.0650 - val_mae: 0.1850\n",
+            "Epoch 143/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0550 - mae: 0.1688 - val_loss: 0.0642 - val_mae: 0.1831\n",
+            "Epoch 144/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0542 - mae: 0.1647 - val_loss: 0.0640 - val_mae: 0.1820\n",
+            "Epoch 145/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0536 - mae: 0.1644 - val_loss: 0.0633 - val_mae: 0.1812\n",
+            "Epoch 146/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0533 - mae: 0.1646 - val_loss: 0.0628 - val_mae: 0.1820\n",
+            "Epoch 147/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0527 - mae: 0.1630 - val_loss: 0.0623 - val_mae: 0.1803\n",
+            "Epoch 148/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0524 - mae: 0.1620 - val_loss: 0.0620 - val_mae: 0.1809\n",
+            "Epoch 149/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0519 - mae: 0.1624 - val_loss: 0.0613 - val_mae: 0.1798\n",
+            "Epoch 150/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0527 - mae: 0.1629 - val_loss: 0.0610 - val_mae: 0.1798\n",
+            "Epoch 151/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0515 - mae: 0.1605 - val_loss: 0.0609 - val_mae: 0.1752\n",
+            "Epoch 152/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0511 - mae: 0.1609 - val_loss: 0.0602 - val_mae: 0.1788\n",
+            "Epoch 153/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0506 - mae: 0.1594 - val_loss: 0.0594 - val_mae: 0.1786\n",
+            "Epoch 154/500\n",
+            "600/600 [==============================] - 0s 64us/sample - loss: 0.0501 - mae: 0.1607 - val_loss: 0.0589 - val_mae: 0.1763\n",
+            "Epoch 155/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0497 - mae: 0.1576 - val_loss: 0.0587 - val_mae: 0.1762\n",
+            "Epoch 156/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0493 - mae: 0.1585 - val_loss: 0.0581 - val_mae: 0.1756\n",
+            "Epoch 157/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0489 - mae: 0.1575 - val_loss: 0.0581 - val_mae: 0.1780\n",
+            "Epoch 158/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0486 - mae: 0.1582 - val_loss: 0.0574 - val_mae: 0.1728\n",
+            "Epoch 159/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0488 - mae: 0.1552 - val_loss: 0.0576 - val_mae: 0.1777\n",
+            "Epoch 160/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0501 - mae: 0.1633 - val_loss: 0.0567 - val_mae: 0.1750\n",
+            "Epoch 161/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.0481 - mae: 0.1568 - val_loss: 0.0562 - val_mae: 0.1750\n",
+            "Epoch 162/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0476 - mae: 0.1569 - val_loss: 0.0553 - val_mae: 0.1706\n",
+            "Epoch 163/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0464 - mae: 0.1533 - val_loss: 0.0549 - val_mae: 0.1717\n",
+            "Epoch 164/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0470 - mae: 0.1559 - val_loss: 0.0550 - val_mae: 0.1696\n",
+            "Epoch 165/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0463 - mae: 0.1526 - val_loss: 0.0543 - val_mae: 0.1669\n",
+            "Epoch 166/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0467 - mae: 0.1530 - val_loss: 0.0536 - val_mae: 0.1685\n",
+            "Epoch 167/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0465 - mae: 0.1521 - val_loss: 0.0536 - val_mae: 0.1691\n",
+            "Epoch 168/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0462 - mae: 0.1570 - val_loss: 0.0530 - val_mae: 0.1681\n",
+            "Epoch 169/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0448 - mae: 0.1514 - val_loss: 0.0523 - val_mae: 0.1679\n",
+            "Epoch 170/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0441 - mae: 0.1509 - val_loss: 0.0518 - val_mae: 0.1668\n",
+            "Epoch 171/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0438 - mae: 0.1488 - val_loss: 0.0516 - val_mae: 0.1668\n",
+            "Epoch 172/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0437 - mae: 0.1509 - val_loss: 0.0510 - val_mae: 0.1649\n",
+            "Epoch 173/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0431 - mae: 0.1479 - val_loss: 0.0507 - val_mae: 0.1658\n",
+            "Epoch 174/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0432 - mae: 0.1493 - val_loss: 0.0503 - val_mae: 0.1634\n",
+            "Epoch 175/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0427 - mae: 0.1467 - val_loss: 0.0502 - val_mae: 0.1667\n",
+            "Epoch 176/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0425 - mae: 0.1475 - val_loss: 0.0494 - val_mae: 0.1618\n",
+            "Epoch 177/500\n",
+            "600/600 [==============================] - 0s 43us/sample - loss: 0.0426 - mae: 0.1497 - val_loss: 0.0491 - val_mae: 0.1618\n",
+            "Epoch 178/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0416 - mae: 0.1454 - val_loss: 0.0489 - val_mae: 0.1635\n",
+            "Epoch 179/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0414 - mae: 0.1467 - val_loss: 0.0483 - val_mae: 0.1599\n",
+            "Epoch 180/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0411 - mae: 0.1439 - val_loss: 0.0489 - val_mae: 0.1651\n",
+            "Epoch 181/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0418 - mae: 0.1485 - val_loss: 0.0477 - val_mae: 0.1597\n",
+            "Epoch 182/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0405 - mae: 0.1445 - val_loss: 0.0473 - val_mae: 0.1612\n",
+            "Epoch 183/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0399 - mae: 0.1435 - val_loss: 0.0466 - val_mae: 0.1579\n",
+            "Epoch 184/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0399 - mae: 0.1432 - val_loss: 0.0465 - val_mae: 0.1561\n",
+            "Epoch 185/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0397 - mae: 0.1437 - val_loss: 0.0459 - val_mae: 0.1573\n",
+            "Epoch 186/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0394 - mae: 0.1424 - val_loss: 0.0455 - val_mae: 0.1582\n",
+            "Epoch 187/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0385 - mae: 0.1411 - val_loss: 0.0453 - val_mae: 0.1544\n",
+            "Epoch 188/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0385 - mae: 0.1403 - val_loss: 0.0447 - val_mae: 0.1545\n",
+            "Epoch 189/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0381 - mae: 0.1392 - val_loss: 0.0444 - val_mae: 0.1549\n",
+            "Epoch 190/500\n",
+            "600/600 [==============================] - 0s 61us/sample - loss: 0.0378 - mae: 0.1402 - val_loss: 0.0441 - val_mae: 0.1529\n",
+            "Epoch 191/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0376 - mae: 0.1390 - val_loss: 0.0441 - val_mae: 0.1574\n",
+            "Epoch 192/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0378 - mae: 0.1397 - val_loss: 0.0431 - val_mae: 0.1533\n",
+            "Epoch 193/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0376 - mae: 0.1401 - val_loss: 0.0430 - val_mae: 0.1538\n",
+            "Epoch 194/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0372 - mae: 0.1376 - val_loss: 0.0433 - val_mae: 0.1548\n",
+            "Epoch 195/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0376 - mae: 0.1412 - val_loss: 0.0429 - val_mae: 0.1508\n",
+            "Epoch 196/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0365 - mae: 0.1383 - val_loss: 0.0419 - val_mae: 0.1529\n",
+            "Epoch 197/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0361 - mae: 0.1353 - val_loss: 0.0416 - val_mae: 0.1485\n",
+            "Epoch 198/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0354 - mae: 0.1353 - val_loss: 0.0411 - val_mae: 0.1506\n",
+            "Epoch 199/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0354 - mae: 0.1363 - val_loss: 0.0410 - val_mae: 0.1504\n",
+            "Epoch 200/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0354 - mae: 0.1358 - val_loss: 0.0410 - val_mae: 0.1511\n",
+            "Epoch 201/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0348 - mae: 0.1349 - val_loss: 0.0399 - val_mae: 0.1475\n",
+            "Epoch 202/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0345 - mae: 0.1342 - val_loss: 0.0396 - val_mae: 0.1476\n",
+            "Epoch 203/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0342 - mae: 0.1345 - val_loss: 0.0395 - val_mae: 0.1455\n",
+            "Epoch 204/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0337 - mae: 0.1321 - val_loss: 0.0390 - val_mae: 0.1462\n",
+            "Epoch 205/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0336 - mae: 0.1328 - val_loss: 0.0389 - val_mae: 0.1445\n",
+            "Epoch 206/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0337 - mae: 0.1317 - val_loss: 0.0392 - val_mae: 0.1497\n",
+            "Epoch 207/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0335 - mae: 0.1326 - val_loss: 0.0384 - val_mae: 0.1436\n",
+            "Epoch 208/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0329 - mae: 0.1310 - val_loss: 0.0376 - val_mae: 0.1444\n",
+            "Epoch 209/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0328 - mae: 0.1298 - val_loss: 0.0375 - val_mae: 0.1454\n",
+            "Epoch 210/500\n",
+            "600/600 [==============================] - 0s 44us/sample - loss: 0.0328 - mae: 0.1328 - val_loss: 0.0370 - val_mae: 0.1432\n",
+            "Epoch 211/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0331 - mae: 0.1310 - val_loss: 0.0369 - val_mae: 0.1413\n",
+            "Epoch 212/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0317 - mae: 0.1290 - val_loss: 0.0367 - val_mae: 0.1449\n",
+            "Epoch 213/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0318 - mae: 0.1291 - val_loss: 0.0360 - val_mae: 0.1425\n",
+            "Epoch 214/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0312 - mae: 0.1284 - val_loss: 0.0356 - val_mae: 0.1413\n",
+            "Epoch 215/500\n",
+            "600/600 [==============================] - 0s 65us/sample - loss: 0.0309 - mae: 0.1273 - val_loss: 0.0356 - val_mae: 0.1423\n",
+            "Epoch 216/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0310 - mae: 0.1280 - val_loss: 0.0350 - val_mae: 0.1396\n",
+            "Epoch 217/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0303 - mae: 0.1263 - val_loss: 0.0346 - val_mae: 0.1400\n",
+            "Epoch 218/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0302 - mae: 0.1267 - val_loss: 0.0343 - val_mae: 0.1390\n",
+            "Epoch 219/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0299 - mae: 0.1258 - val_loss: 0.0340 - val_mae: 0.1377\n",
+            "Epoch 220/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0299 - mae: 0.1262 - val_loss: 0.0338 - val_mae: 0.1374\n",
+            "Epoch 221/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0294 - mae: 0.1246 - val_loss: 0.0337 - val_mae: 0.1395\n",
+            "Epoch 222/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0299 - mae: 0.1275 - val_loss: 0.0340 - val_mae: 0.1394\n",
+            "Epoch 223/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0295 - mae: 0.1251 - val_loss: 0.0331 - val_mae: 0.1378\n",
+            "Epoch 224/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0290 - mae: 0.1228 - val_loss: 0.0325 - val_mae: 0.1361\n",
+            "Epoch 225/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0291 - mae: 0.1254 - val_loss: 0.0321 - val_mae: 0.1344\n",
+            "Epoch 226/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0286 - mae: 0.1237 - val_loss: 0.0318 - val_mae: 0.1340\n",
+            "Epoch 227/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0281 - mae: 0.1219 - val_loss: 0.0315 - val_mae: 0.1331\n",
+            "Epoch 228/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0280 - mae: 0.1221 - val_loss: 0.0313 - val_mae: 0.1345\n",
+            "Epoch 229/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0276 - mae: 0.1202 - val_loss: 0.0310 - val_mae: 0.1333\n",
+            "Epoch 230/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0276 - mae: 0.1215 - val_loss: 0.0308 - val_mae: 0.1313\n",
+            "Epoch 231/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0274 - mae: 0.1214 - val_loss: 0.0319 - val_mae: 0.1382\n",
+            "Epoch 232/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0281 - mae: 0.1242 - val_loss: 0.0304 - val_mae: 0.1305\n",
+            "Epoch 233/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0268 - mae: 0.1195 - val_loss: 0.0299 - val_mae: 0.1320\n",
+            "Epoch 234/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0264 - mae: 0.1187 - val_loss: 0.0296 - val_mae: 0.1302\n",
+            "Epoch 235/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0267 - mae: 0.1206 - val_loss: 0.0299 - val_mae: 0.1285\n",
+            "Epoch 236/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0272 - mae: 0.1182 - val_loss: 0.0309 - val_mae: 0.1363\n",
+            "Epoch 237/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0273 - mae: 0.1209 - val_loss: 0.0286 - val_mae: 0.1297\n",
+            "Epoch 238/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0260 - mae: 0.1191 - val_loss: 0.0286 - val_mae: 0.1276\n",
+            "Epoch 239/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0259 - mae: 0.1173 - val_loss: 0.0283 - val_mae: 0.1279\n",
+            "Epoch 240/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0255 - mae: 0.1157 - val_loss: 0.0279 - val_mae: 0.1281\n",
+            "Epoch 241/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0253 - mae: 0.1162 - val_loss: 0.0280 - val_mae: 0.1294\n",
+            "Epoch 242/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0256 - mae: 0.1178 - val_loss: 0.0273 - val_mae: 0.1259\n",
+            "Epoch 243/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0245 - mae: 0.1144 - val_loss: 0.0276 - val_mae: 0.1287\n",
+            "Epoch 244/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0252 - mae: 0.1163 - val_loss: 0.0268 - val_mae: 0.1263\n",
+            "Epoch 245/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0261 - mae: 0.1201 - val_loss: 0.0295 - val_mae: 0.1333\n",
+            "Epoch 246/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0268 - mae: 0.1231 - val_loss: 0.0279 - val_mae: 0.1302\n",
+            "Epoch 247/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0240 - mae: 0.1149 - val_loss: 0.0263 - val_mae: 0.1242\n",
+            "Epoch 248/500\n",
+            "600/600 [==============================] - 0s 66us/sample - loss: 0.0242 - mae: 0.1146 - val_loss: 0.0259 - val_mae: 0.1249\n",
+            "Epoch 249/500\n",
+            "600/600 [==============================] - 0s 69us/sample - loss: 0.0233 - mae: 0.1129 - val_loss: 0.0277 - val_mae: 0.1258\n",
+            "Epoch 250/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0246 - mae: 0.1158 - val_loss: 0.0255 - val_mae: 0.1237\n",
+            "Epoch 251/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0231 - mae: 0.1114 - val_loss: 0.0249 - val_mae: 0.1216\n",
+            "Epoch 252/500\n",
+            "600/600 [==============================] - 0s 63us/sample - loss: 0.0230 - mae: 0.1122 - val_loss: 0.0246 - val_mae: 0.1216\n",
+            "Epoch 253/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0229 - mae: 0.1109 - val_loss: 0.0247 - val_mae: 0.1228\n",
+            "Epoch 254/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0230 - mae: 0.1122 - val_loss: 0.0242 - val_mae: 0.1204\n",
+            "Epoch 255/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0233 - mae: 0.1139 - val_loss: 0.0252 - val_mae: 0.1209\n",
+            "Epoch 256/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0225 - mae: 0.1102 - val_loss: 0.0239 - val_mae: 0.1197\n",
+            "Epoch 257/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0219 - mae: 0.1086 - val_loss: 0.0235 - val_mae: 0.1197\n",
+            "Epoch 258/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0217 - mae: 0.1091 - val_loss: 0.0234 - val_mae: 0.1188\n",
+            "Epoch 259/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0215 - mae: 0.1082 - val_loss: 0.0231 - val_mae: 0.1184\n",
+            "Epoch 260/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0214 - mae: 0.1080 - val_loss: 0.0228 - val_mae: 0.1183\n",
+            "Epoch 261/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0214 - mae: 0.1081 - val_loss: 0.0226 - val_mae: 0.1175\n",
+            "Epoch 262/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0211 - mae: 0.1077 - val_loss: 0.0224 - val_mae: 0.1177\n",
+            "Epoch 263/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0210 - mae: 0.1075 - val_loss: 0.0223 - val_mae: 0.1176\n",
+            "Epoch 264/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0209 - mae: 0.1079 - val_loss: 0.0223 - val_mae: 0.1164\n",
+            "Epoch 265/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0208 - mae: 0.1073 - val_loss: 0.0219 - val_mae: 0.1165\n",
+            "Epoch 266/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0209 - mae: 0.1084 - val_loss: 0.0221 - val_mae: 0.1149\n",
+            "Epoch 267/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0206 - mae: 0.1075 - val_loss: 0.0215 - val_mae: 0.1148\n",
+            "Epoch 268/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0203 - mae: 0.1062 - val_loss: 0.0212 - val_mae: 0.1142\n",
+            "Epoch 269/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0201 - mae: 0.1055 - val_loss: 0.0212 - val_mae: 0.1141\n",
+            "Epoch 270/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0200 - mae: 0.1063 - val_loss: 0.0213 - val_mae: 0.1137\n",
+            "Epoch 271/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0201 - mae: 0.1066 - val_loss: 0.0211 - val_mae: 0.1127\n",
+            "Epoch 272/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0205 - mae: 0.1074 - val_loss: 0.0203 - val_mae: 0.1131\n",
+            "Epoch 273/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0197 - mae: 0.1052 - val_loss: 0.0202 - val_mae: 0.1123\n",
+            "Epoch 274/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0194 - mae: 0.1043 - val_loss: 0.0201 - val_mae: 0.1119\n",
+            "Epoch 275/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0192 - mae: 0.1038 - val_loss: 0.0199 - val_mae: 0.1118\n",
+            "Epoch 276/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0191 - mae: 0.1040 - val_loss: 0.0200 - val_mae: 0.1113\n",
+            "Epoch 277/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0191 - mae: 0.1043 - val_loss: 0.0199 - val_mae: 0.1117\n",
+            "Epoch 278/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0194 - mae: 0.1051 - val_loss: 0.0195 - val_mae: 0.1111\n",
+            "Epoch 279/500\n",
+            "600/600 [==============================] - 0s 65us/sample - loss: 0.0186 - mae: 0.1031 - val_loss: 0.0197 - val_mae: 0.1098\n",
+            "Epoch 280/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0187 - mae: 0.1031 - val_loss: 0.0192 - val_mae: 0.1103\n",
+            "Epoch 281/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0186 - mae: 0.1031 - val_loss: 0.0192 - val_mae: 0.1098\n",
+            "Epoch 282/500\n",
+            "600/600 [==============================] - 0s 63us/sample - loss: 0.0185 - mae: 0.1031 - val_loss: 0.0190 - val_mae: 0.1092\n",
+            "Epoch 283/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.0183 - mae: 0.1022 - val_loss: 0.0188 - val_mae: 0.1097\n",
+            "Epoch 284/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0181 - mae: 0.1020 - val_loss: 0.0186 - val_mae: 0.1086\n",
+            "Epoch 285/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0183 - mae: 0.1025 - val_loss: 0.0192 - val_mae: 0.1085\n",
+            "Epoch 286/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0190 - mae: 0.1057 - val_loss: 0.0190 - val_mae: 0.1106\n",
+            "Epoch 287/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0181 - mae: 0.1022 - val_loss: 0.0181 - val_mae: 0.1077\n",
+            "Epoch 288/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0177 - mae: 0.1012 - val_loss: 0.0181 - val_mae: 0.1072\n",
+            "Epoch 289/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0175 - mae: 0.1003 - val_loss: 0.0182 - val_mae: 0.1082\n",
+            "Epoch 290/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0180 - mae: 0.1028 - val_loss: 0.0179 - val_mae: 0.1064\n",
+            "Epoch 291/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0175 - mae: 0.1013 - val_loss: 0.0179 - val_mae: 0.1063\n",
+            "Epoch 292/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0175 - mae: 0.1014 - val_loss: 0.0177 - val_mae: 0.1067\n",
+            "Epoch 293/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0176 - mae: 0.1018 - val_loss: 0.0171 - val_mae: 0.1051\n",
+            "Epoch 294/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0175 - mae: 0.1010 - val_loss: 0.0175 - val_mae: 0.1050\n",
+            "Epoch 295/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0176 - mae: 0.1015 - val_loss: 0.0174 - val_mae: 0.1056\n",
+            "Epoch 296/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0173 - mae: 0.1017 - val_loss: 0.0172 - val_mae: 0.1040\n",
+            "Epoch 297/500\n",
+            "600/600 [==============================] - 0s 63us/sample - loss: 0.0168 - mae: 0.0999 - val_loss: 0.0169 - val_mae: 0.1046\n",
+            "Epoch 298/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0169 - mae: 0.1001 - val_loss: 0.0166 - val_mae: 0.1035\n",
+            "Epoch 299/500\n",
+            "600/600 [==============================] - 0s 141us/sample - loss: 0.0168 - mae: 0.0994 - val_loss: 0.0168 - val_mae: 0.1035\n",
+            "Epoch 300/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0166 - mae: 0.0999 - val_loss: 0.0162 - val_mae: 0.1026\n",
+            "Epoch 301/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0164 - mae: 0.0985 - val_loss: 0.0164 - val_mae: 0.1026\n",
+            "Epoch 302/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0162 - mae: 0.0988 - val_loss: 0.0165 - val_mae: 0.1026\n",
+            "Epoch 303/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0164 - mae: 0.0989 - val_loss: 0.0161 - val_mae: 0.1022\n",
+            "Epoch 304/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0163 - mae: 0.0988 - val_loss: 0.0161 - val_mae: 0.1026\n",
+            "Epoch 305/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0163 - mae: 0.0993 - val_loss: 0.0158 - val_mae: 0.1015\n",
+            "Epoch 306/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0162 - mae: 0.0989 - val_loss: 0.0161 - val_mae: 0.1020\n",
+            "Epoch 307/500\n",
+            "600/600 [==============================] - 0s 76us/sample - loss: 0.0166 - mae: 0.1004 - val_loss: 0.0158 - val_mae: 0.1011\n",
+            "Epoch 308/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0160 - mae: 0.0984 - val_loss: 0.0158 - val_mae: 0.1004\n",
+            "Epoch 309/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0160 - mae: 0.0983 - val_loss: 0.0160 - val_mae: 0.1012\n",
+            "Epoch 310/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0170 - mae: 0.1013 - val_loss: 0.0159 - val_mae: 0.1016\n",
+            "Epoch 311/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0160 - mae: 0.0983 - val_loss: 0.0192 - val_mae: 0.1091\n",
+            "Epoch 312/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0185 - mae: 0.1053 - val_loss: 0.0153 - val_mae: 0.1004\n",
+            "Epoch 313/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0161 - mae: 0.0997 - val_loss: 0.0162 - val_mae: 0.1010\n",
+            "Epoch 314/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0153 - mae: 0.0966 - val_loss: 0.0154 - val_mae: 0.1006\n",
+            "Epoch 315/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0162 - mae: 0.1002 - val_loss: 0.0152 - val_mae: 0.0999\n",
+            "Epoch 316/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0162 - mae: 0.0999 - val_loss: 0.0158 - val_mae: 0.0996\n",
+            "Epoch 317/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0158 - mae: 0.0985 - val_loss: 0.0170 - val_mae: 0.1026\n",
+            "Epoch 318/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0167 - mae: 0.1021 - val_loss: 0.0148 - val_mae: 0.0981\n",
+            "Epoch 319/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0161 - mae: 0.0994 - val_loss: 0.0157 - val_mae: 0.1011\n",
+            "Epoch 320/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0148 - mae: 0.0950 - val_loss: 0.0144 - val_mae: 0.0973\n",
+            "Epoch 321/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.0147 - mae: 0.0954 - val_loss: 0.0152 - val_mae: 0.0983\n",
+            "Epoch 322/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0149 - mae: 0.0955 - val_loss: 0.0147 - val_mae: 0.0982\n",
+            "Epoch 323/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0149 - mae: 0.0956 - val_loss: 0.0145 - val_mae: 0.0977\n",
+            "Epoch 324/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0147 - mae: 0.0956 - val_loss: 0.0142 - val_mae: 0.0963\n",
+            "Epoch 325/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0145 - mae: 0.0950 - val_loss: 0.0144 - val_mae: 0.0974\n",
+            "Epoch 326/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0147 - mae: 0.0957 - val_loss: 0.0141 - val_mae: 0.0965\n",
+            "Epoch 327/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0147 - mae: 0.0960 - val_loss: 0.0144 - val_mae: 0.0973\n",
+            "Epoch 328/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0145 - mae: 0.0944 - val_loss: 0.0141 - val_mae: 0.0959\n",
+            "Epoch 329/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0145 - mae: 0.0952 - val_loss: 0.0137 - val_mae: 0.0949\n",
+            "Epoch 330/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0143 - mae: 0.0944 - val_loss: 0.0139 - val_mae: 0.0952\n",
+            "Epoch 331/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0143 - mae: 0.0941 - val_loss: 0.0139 - val_mae: 0.0947\n",
+            "Epoch 332/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0141 - mae: 0.0941 - val_loss: 0.0139 - val_mae: 0.0949\n",
+            "Epoch 333/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0149 - mae: 0.0951 - val_loss: 0.0148 - val_mae: 0.0968\n",
+            "Epoch 334/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0148 - mae: 0.0957 - val_loss: 0.0151 - val_mae: 0.0979\n",
+            "Epoch 335/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0151 - mae: 0.0966 - val_loss: 0.0139 - val_mae: 0.0945\n",
+            "Epoch 336/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0141 - mae: 0.0932 - val_loss: 0.0140 - val_mae: 0.0954\n",
+            "Epoch 337/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.0141 - mae: 0.0936 - val_loss: 0.0133 - val_mae: 0.0934\n",
+            "Epoch 338/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0141 - mae: 0.0932 - val_loss: 0.0137 - val_mae: 0.0943\n",
+            "Epoch 339/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.0139 - mae: 0.0931 - val_loss: 0.0132 - val_mae: 0.0929\n",
+            "Epoch 340/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0136 - mae: 0.0923 - val_loss: 0.0132 - val_mae: 0.0929\n",
+            "Epoch 341/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0137 - mae: 0.0925 - val_loss: 0.0146 - val_mae: 0.0963\n",
+            "Epoch 342/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0140 - mae: 0.0947 - val_loss: 0.0139 - val_mae: 0.0946\n",
+            "Epoch 343/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0139 - mae: 0.0940 - val_loss: 0.0136 - val_mae: 0.0934\n",
+            "Epoch 344/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0135 - mae: 0.0920 - val_loss: 0.0132 - val_mae: 0.0925\n",
+            "Epoch 345/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0136 - mae: 0.0923 - val_loss: 0.0134 - val_mae: 0.0932\n",
+            "Epoch 346/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0134 - mae: 0.0922 - val_loss: 0.0130 - val_mae: 0.0919\n",
+            "Epoch 347/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0133 - mae: 0.0920 - val_loss: 0.0137 - val_mae: 0.0937\n",
+            "Epoch 348/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0134 - mae: 0.0926 - val_loss: 0.0133 - val_mae: 0.0926\n",
+            "Epoch 349/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0139 - mae: 0.0941 - val_loss: 0.0135 - val_mae: 0.0929\n",
+            "Epoch 350/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0133 - mae: 0.0904 - val_loss: 0.0126 - val_mae: 0.0907\n",
+            "Epoch 351/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0134 - mae: 0.0916 - val_loss: 0.0128 - val_mae: 0.0912\n",
+            "Epoch 352/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0137 - mae: 0.0928 - val_loss: 0.0131 - val_mae: 0.0916\n",
+            "Epoch 353/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0144 - mae: 0.0947 - val_loss: 0.0126 - val_mae: 0.0904\n",
+            "Epoch 354/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0131 - mae: 0.0910 - val_loss: 0.0132 - val_mae: 0.0923\n",
+            "Epoch 355/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0138 - mae: 0.0930 - val_loss: 0.0131 - val_mae: 0.0919\n",
+            "Epoch 356/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0135 - mae: 0.0926 - val_loss: 0.0126 - val_mae: 0.0904\n",
+            "Epoch 357/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0131 - mae: 0.0907 - val_loss: 0.0138 - val_mae: 0.0940\n",
+            "Epoch 358/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0132 - mae: 0.0907 - val_loss: 0.0126 - val_mae: 0.0904\n",
+            "Epoch 359/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0129 - mae: 0.0903 - val_loss: 0.0127 - val_mae: 0.0907\n",
+            "Epoch 360/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0128 - mae: 0.0900 - val_loss: 0.0126 - val_mae: 0.0902\n",
+            "Epoch 361/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0133 - mae: 0.0909 - val_loss: 0.0126 - val_mae: 0.0905\n",
+            "Epoch 362/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0130 - mae: 0.0907 - val_loss: 0.0125 - val_mae: 0.0898\n",
+            "Epoch 363/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0129 - mae: 0.0899 - val_loss: 0.0124 - val_mae: 0.0896\n",
+            "Epoch 364/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0129 - mae: 0.0903 - val_loss: 0.0126 - val_mae: 0.0900\n",
+            "Epoch 365/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0129 - mae: 0.0898 - val_loss: 0.0125 - val_mae: 0.0901\n",
+            "Epoch 366/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0129 - mae: 0.0910 - val_loss: 0.0131 - val_mae: 0.0912\n",
+            "Epoch 367/500\n",
+            "600/600 [==============================] - 0s 72us/sample - loss: 0.0127 - mae: 0.0895 - val_loss: 0.0122 - val_mae: 0.0890\n",
+            "Epoch 368/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0129 - mae: 0.0905 - val_loss: 0.0126 - val_mae: 0.0905\n",
+            "Epoch 369/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0129 - mae: 0.0902 - val_loss: 0.0123 - val_mae: 0.0889\n",
+            "Epoch 370/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0127 - mae: 0.0899 - val_loss: 0.0125 - val_mae: 0.0894\n",
+            "Epoch 371/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0134 - mae: 0.0920 - val_loss: 0.0139 - val_mae: 0.0931\n",
+            "Epoch 372/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0134 - mae: 0.0916 - val_loss: 0.0129 - val_mae: 0.0905\n",
+            "Epoch 373/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0129 - mae: 0.0907 - val_loss: 0.0126 - val_mae: 0.0897\n",
+            "Epoch 374/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0128 - mae: 0.0899 - val_loss: 0.0121 - val_mae: 0.0879\n",
+            "Epoch 375/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0133 - mae: 0.0923 - val_loss: 0.0125 - val_mae: 0.0904\n",
+            "Epoch 376/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0129 - mae: 0.0908 - val_loss: 0.0130 - val_mae: 0.0915\n",
+            "Epoch 377/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0129 - mae: 0.0911 - val_loss: 0.0119 - val_mae: 0.0877\n",
+            "Epoch 378/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0138 - mae: 0.0941 - val_loss: 0.0121 - val_mae: 0.0881\n",
+            "Epoch 379/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0127 - mae: 0.0898 - val_loss: 0.0127 - val_mae: 0.0895\n",
+            "Epoch 380/500\n",
+            "600/600 [==============================] - 0s 46us/sample - loss: 0.0129 - mae: 0.0903 - val_loss: 0.0120 - val_mae: 0.0876\n",
+            "Epoch 381/500\n",
+            "600/600 [==============================] - 0s 45us/sample - loss: 0.0126 - mae: 0.0896 - val_loss: 0.0120 - val_mae: 0.0876\n",
+            "Epoch 382/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0130 - mae: 0.0917 - val_loss: 0.0121 - val_mae: 0.0880\n",
+            "Epoch 383/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0126 - mae: 0.0895 - val_loss: 0.0120 - val_mae: 0.0882\n",
+            "Epoch 384/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0128 - mae: 0.0910 - val_loss: 0.0150 - val_mae: 0.0983\n",
+            "Epoch 385/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0134 - mae: 0.0912 - val_loss: 0.0118 - val_mae: 0.0876\n",
+            "Epoch 386/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0124 - mae: 0.0892 - val_loss: 0.0123 - val_mae: 0.0886\n",
+            "Epoch 387/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0127 - mae: 0.0898 - val_loss: 0.0128 - val_mae: 0.0900\n",
+            "Epoch 388/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0128 - mae: 0.0903 - val_loss: 0.0129 - val_mae: 0.0906\n",
+            "Epoch 389/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0148 - mae: 0.0984 - val_loss: 0.0121 - val_mae: 0.0880\n",
+            "Epoch 390/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0137 - mae: 0.0939 - val_loss: 0.0118 - val_mae: 0.0874\n",
+            "Epoch 391/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0127 - mae: 0.0896 - val_loss: 0.0122 - val_mae: 0.0893\n",
+            "Epoch 392/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0124 - mae: 0.0888 - val_loss: 0.0118 - val_mae: 0.0873\n",
+            "Epoch 393/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0124 - mae: 0.0887 - val_loss: 0.0119 - val_mae: 0.0879\n",
+            "Epoch 394/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.0124 - mae: 0.0885 - val_loss: 0.0117 - val_mae: 0.0865\n",
+            "Epoch 395/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0128 - mae: 0.0904 - val_loss: 0.0121 - val_mae: 0.0880\n",
+            "Epoch 396/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0126 - mae: 0.0895 - val_loss: 0.0119 - val_mae: 0.0874\n",
+            "Epoch 397/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0124 - mae: 0.0883 - val_loss: 0.0120 - val_mae: 0.0880\n",
+            "Epoch 398/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0130 - mae: 0.0906 - val_loss: 0.0122 - val_mae: 0.0891\n",
+            "Epoch 399/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0136 - mae: 0.0935 - val_loss: 0.0128 - val_mae: 0.0917\n",
+            "Epoch 400/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0136 - mae: 0.0923 - val_loss: 0.0128 - val_mae: 0.0910\n",
+            "Epoch 401/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0126 - mae: 0.0896 - val_loss: 0.0134 - val_mae: 0.0934\n",
+            "Epoch 402/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0135 - mae: 0.0925 - val_loss: 0.0127 - val_mae: 0.0910\n",
+            "Epoch 403/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0129 - mae: 0.0904 - val_loss: 0.0117 - val_mae: 0.0868\n",
+            "Epoch 404/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0126 - mae: 0.0898 - val_loss: 0.0140 - val_mae: 0.0928\n",
+            "Epoch 405/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0132 - mae: 0.0928 - val_loss: 0.0117 - val_mae: 0.0869\n",
+            "Epoch 406/500\n",
+            "600/600 [==============================] - 0s 47us/sample - loss: 0.0126 - mae: 0.0906 - val_loss: 0.0128 - val_mae: 0.0908\n",
+            "Epoch 407/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0122 - mae: 0.0880 - val_loss: 0.0117 - val_mae: 0.0870\n",
+            "Epoch 408/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0125 - mae: 0.0897 - val_loss: 0.0119 - val_mae: 0.0875\n",
+            "Epoch 409/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0124 - mae: 0.0889 - val_loss: 0.0118 - val_mae: 0.0869\n",
+            "Epoch 410/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0124 - mae: 0.0888 - val_loss: 0.0117 - val_mae: 0.0868\n",
+            "Epoch 411/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0122 - mae: 0.0886 - val_loss: 0.0139 - val_mae: 0.0933\n",
+            "Epoch 412/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0132 - mae: 0.0923 - val_loss: 0.0125 - val_mae: 0.0891\n",
+            "Epoch 413/500\n",
+            "600/600 [==============================] - 0s 62us/sample - loss: 0.0140 - mae: 0.0938 - val_loss: 0.0119 - val_mae: 0.0875\n",
+            "Epoch 414/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0134 - mae: 0.0917 - val_loss: 0.0125 - val_mae: 0.0897\n",
+            "Epoch 415/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0131 - mae: 0.0917 - val_loss: 0.0126 - val_mae: 0.0904\n",
+            "Epoch 416/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0128 - mae: 0.0900 - val_loss: 0.0129 - val_mae: 0.0912\n",
+            "Epoch 417/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0124 - mae: 0.0890 - val_loss: 0.0118 - val_mae: 0.0874\n",
+            "Epoch 418/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0128 - mae: 0.0899 - val_loss: 0.0132 - val_mae: 0.0925\n",
+            "Epoch 419/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0131 - mae: 0.0917 - val_loss: 0.0120 - val_mae: 0.0882\n",
+            "Epoch 420/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0124 - mae: 0.0884 - val_loss: 0.0130 - val_mae: 0.0919\n",
+            "Epoch 421/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0131 - mae: 0.0914 - val_loss: 0.0130 - val_mae: 0.0916\n",
+            "Epoch 422/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0133 - mae: 0.0921 - val_loss: 0.0115 - val_mae: 0.0864\n",
+            "Epoch 423/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0123 - mae: 0.0886 - val_loss: 0.0120 - val_mae: 0.0876\n",
+            "Epoch 424/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0122 - mae: 0.0883 - val_loss: 0.0141 - val_mae: 0.0935\n",
+            "Epoch 425/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0136 - mae: 0.0936 - val_loss: 0.0117 - val_mae: 0.0869\n",
+            "Epoch 426/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0134 - mae: 0.0922 - val_loss: 0.0116 - val_mae: 0.0868\n",
+            "Epoch 427/500\n",
+            "600/600 [==============================] - 0s 66us/sample - loss: 0.0121 - mae: 0.0879 - val_loss: 0.0116 - val_mae: 0.0867\n",
+            "Epoch 428/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0121 - mae: 0.0882 - val_loss: 0.0121 - val_mae: 0.0881\n",
+            "Epoch 429/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0125 - mae: 0.0895 - val_loss: 0.0114 - val_mae: 0.0859\n",
+            "Epoch 430/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0123 - mae: 0.0883 - val_loss: 0.0129 - val_mae: 0.0901\n",
+            "Epoch 431/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0126 - mae: 0.0900 - val_loss: 0.0120 - val_mae: 0.0877\n",
+            "Epoch 432/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0123 - mae: 0.0882 - val_loss: 0.0118 - val_mae: 0.0870\n",
+            "Epoch 433/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.0120 - mae: 0.0879 - val_loss: 0.0120 - val_mae: 0.0878\n",
+            "Epoch 434/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0122 - mae: 0.0877 - val_loss: 0.0114 - val_mae: 0.0861\n",
+            "Epoch 435/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0120 - mae: 0.0877 - val_loss: 0.0120 - val_mae: 0.0876\n",
+            "Epoch 436/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0122 - mae: 0.0885 - val_loss: 0.0115 - val_mae: 0.0862\n",
+            "Epoch 437/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0120 - mae: 0.0882 - val_loss: 0.0117 - val_mae: 0.0867\n",
+            "Epoch 438/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0119 - mae: 0.0872 - val_loss: 0.0116 - val_mae: 0.0865\n",
+            "Epoch 439/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0122 - mae: 0.0885 - val_loss: 0.0116 - val_mae: 0.0864\n",
+            "Epoch 440/500\n",
+            "600/600 [==============================] - 0s 65us/sample - loss: 0.0122 - mae: 0.0888 - val_loss: 0.0123 - val_mae: 0.0889\n",
+            "Epoch 441/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0120 - mae: 0.0886 - val_loss: 0.0116 - val_mae: 0.0864\n",
+            "Epoch 442/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0124 - mae: 0.0880 - val_loss: 0.0120 - val_mae: 0.0880\n",
+            "Epoch 443/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0121 - mae: 0.0875 - val_loss: 0.0123 - val_mae: 0.0885\n",
+            "Epoch 444/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0124 - mae: 0.0895 - val_loss: 0.0118 - val_mae: 0.0875\n",
+            "Epoch 445/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0126 - mae: 0.0902 - val_loss: 0.0117 - val_mae: 0.0869\n",
+            "Epoch 446/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0121 - mae: 0.0873 - val_loss: 0.0132 - val_mae: 0.0925\n",
+            "Epoch 447/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0124 - mae: 0.0883 - val_loss: 0.0124 - val_mae: 0.0890\n",
+            "Epoch 448/500\n",
+            "600/600 [==============================] - 0s 69us/sample - loss: 0.0120 - mae: 0.0877 - val_loss: 0.0115 - val_mae: 0.0863\n",
+            "Epoch 449/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0122 - mae: 0.0885 - val_loss: 0.0115 - val_mae: 0.0865\n",
+            "Epoch 450/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0125 - mae: 0.0904 - val_loss: 0.0118 - val_mae: 0.0872\n",
+            "Epoch 451/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0119 - mae: 0.0869 - val_loss: 0.0126 - val_mae: 0.0895\n",
+            "Epoch 452/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0124 - mae: 0.0890 - val_loss: 0.0116 - val_mae: 0.0867\n",
+            "Epoch 453/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0119 - mae: 0.0872 - val_loss: 0.0117 - val_mae: 0.0868\n",
+            "Epoch 454/500\n",
+            "600/600 [==============================] - 0s 49us/sample - loss: 0.0120 - mae: 0.0878 - val_loss: 0.0116 - val_mae: 0.0863\n",
+            "Epoch 455/500\n",
+            "600/600 [==============================] - 0s 61us/sample - loss: 0.0120 - mae: 0.0878 - val_loss: 0.0117 - val_mae: 0.0870\n",
+            "Epoch 456/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0118 - mae: 0.0869 - val_loss: 0.0115 - val_mae: 0.0862\n",
+            "Epoch 457/500\n",
+            "600/600 [==============================] - 0s 66us/sample - loss: 0.0121 - mae: 0.0883 - val_loss: 0.0116 - val_mae: 0.0866\n",
+            "Epoch 458/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.0121 - mae: 0.0876 - val_loss: 0.0116 - val_mae: 0.0863\n",
+            "Epoch 459/500\n",
+            "600/600 [==============================] - 0s 60us/sample - loss: 0.0119 - mae: 0.0872 - val_loss: 0.0116 - val_mae: 0.0864\n",
+            "Epoch 460/500\n",
+            "600/600 [==============================] - 0s 48us/sample - loss: 0.0119 - mae: 0.0871 - val_loss: 0.0115 - val_mae: 0.0862\n",
+            "Epoch 461/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0120 - mae: 0.0880 - val_loss: 0.0120 - val_mae: 0.0881\n",
+            "Epoch 462/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0119 - mae: 0.0872 - val_loss: 0.0116 - val_mae: 0.0864\n",
+            "Epoch 463/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0119 - mae: 0.0873 - val_loss: 0.0117 - val_mae: 0.0866\n",
+            "Epoch 464/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0118 - mae: 0.0868 - val_loss: 0.0115 - val_mae: 0.0862\n",
+            "Epoch 465/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0120 - mae: 0.0875 - val_loss: 0.0124 - val_mae: 0.0896\n",
+            "Epoch 466/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0117 - mae: 0.0875 - val_loss: 0.0129 - val_mae: 0.0901\n",
+            "Epoch 467/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0126 - mae: 0.0907 - val_loss: 0.0127 - val_mae: 0.0898\n",
+            "Epoch 468/500\n",
+            "600/600 [==============================] - 0s 58us/sample - loss: 0.0125 - mae: 0.0893 - val_loss: 0.0118 - val_mae: 0.0874\n",
+            "Epoch 469/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0122 - mae: 0.0887 - val_loss: 0.0115 - val_mae: 0.0864\n",
+            "Epoch 470/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0119 - mae: 0.0874 - val_loss: 0.0119 - val_mae: 0.0876\n",
+            "Epoch 471/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0118 - mae: 0.0866 - val_loss: 0.0116 - val_mae: 0.0867\n",
+            "Epoch 472/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0120 - mae: 0.0873 - val_loss: 0.0118 - val_mae: 0.0872\n",
+            "Epoch 473/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0121 - mae: 0.0882 - val_loss: 0.0115 - val_mae: 0.0863\n",
+            "Epoch 474/500\n",
+            "600/600 [==============================] - 0s 55us/sample - loss: 0.0118 - mae: 0.0871 - val_loss: 0.0117 - val_mae: 0.0867\n",
+            "Epoch 475/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0120 - mae: 0.0877 - val_loss: 0.0121 - val_mae: 0.0884\n",
+            "Epoch 476/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0127 - mae: 0.0902 - val_loss: 0.0119 - val_mae: 0.0877\n",
+            "Epoch 477/500\n",
+            "600/600 [==============================] - 0s 61us/sample - loss: 0.0122 - mae: 0.0882 - val_loss: 0.0151 - val_mae: 0.0967\n",
+            "Epoch 478/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0136 - mae: 0.0933 - val_loss: 0.0123 - val_mae: 0.0889\n",
+            "Epoch 479/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0121 - mae: 0.0884 - val_loss: 0.0116 - val_mae: 0.0869\n",
+            "Epoch 480/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0121 - mae: 0.0883 - val_loss: 0.0118 - val_mae: 0.0877\n",
+            "Epoch 481/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0120 - mae: 0.0876 - val_loss: 0.0118 - val_mae: 0.0875\n",
+            "Epoch 482/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0121 - mae: 0.0887 - val_loss: 0.0116 - val_mae: 0.0865\n",
+            "Epoch 483/500\n",
+            "600/600 [==============================] - 0s 70us/sample - loss: 0.0122 - mae: 0.0892 - val_loss: 0.0114 - val_mae: 0.0863\n",
+            "Epoch 484/500\n",
+            "600/600 [==============================] - 0s 57us/sample - loss: 0.0132 - mae: 0.0926 - val_loss: 0.0115 - val_mae: 0.0866\n",
+            "Epoch 485/500\n",
+            "600/600 [==============================] - 0s 70us/sample - loss: 0.0138 - mae: 0.0948 - val_loss: 0.0118 - val_mae: 0.0874\n",
+            "Epoch 486/500\n",
+            "600/600 [==============================] - 0s 59us/sample - loss: 0.0119 - mae: 0.0879 - val_loss: 0.0114 - val_mae: 0.0860\n",
+            "Epoch 487/500\n",
+            "600/600 [==============================] - 0s 50us/sample - loss: 0.0118 - mae: 0.0872 - val_loss: 0.0116 - val_mae: 0.0870\n",
+            "Epoch 488/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0117 - mae: 0.0870 - val_loss: 0.0114 - val_mae: 0.0861\n",
+            "Epoch 489/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0118 - mae: 0.0869 - val_loss: 0.0120 - val_mae: 0.0879\n",
+            "Epoch 490/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0119 - mae: 0.0873 - val_loss: 0.0115 - val_mae: 0.0863\n",
+            "Epoch 491/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0118 - mae: 0.0871 - val_loss: 0.0117 - val_mae: 0.0873\n",
+            "Epoch 492/500\n",
+            "600/600 [==============================] - 0s 61us/sample - loss: 0.0122 - mae: 0.0886 - val_loss: 0.0127 - val_mae: 0.0899\n",
+            "Epoch 493/500\n",
+            "600/600 [==============================] - 0s 54us/sample - loss: 0.0122 - mae: 0.0881 - val_loss: 0.0113 - val_mae: 0.0857\n",
+            "Epoch 494/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0125 - mae: 0.0898 - val_loss: 0.0119 - val_mae: 0.0880\n",
+            "Epoch 495/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0123 - mae: 0.0897 - val_loss: 0.0116 - val_mae: 0.0866\n",
+            "Epoch 496/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0119 - mae: 0.0875 - val_loss: 0.0115 - val_mae: 0.0866\n",
+            "Epoch 497/500\n",
+            "600/600 [==============================] - 0s 56us/sample - loss: 0.0118 - mae: 0.0868 - val_loss: 0.0117 - val_mae: 0.0871\n",
+            "Epoch 498/500\n",
+            "600/600 [==============================] - 0s 52us/sample - loss: 0.0124 - mae: 0.0889 - val_loss: 0.0116 - val_mae: 0.0866\n",
+            "Epoch 499/500\n",
+            "600/600 [==============================] - 0s 51us/sample - loss: 0.0119 - mae: 0.0871 - val_loss: 0.0115 - val_mae: 0.0863\n",
+            "Epoch 500/500\n",
+            "600/600 [==============================] - 0s 53us/sample - loss: 0.0118 - mae: 0.0873 - val_loss: 0.0115 - val_mae: 0.0864\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Mc_CQu2_IvOP",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 3. Plot Metrics\n",
+        "Each training epoch, the model prints out its loss and mean absolute error for training and validation. You can read this in the output above (note that your exact numbers may differ): \n",
+        "\n",
+        "```\n",
+        "Epoch 500/500\n",
+        "600/600 [==============================] - 0s 51us/sample - loss: 0.0118 - mae: 0.0873 - val_loss: 0.0105 - val_mae: 0.0832\n",
+        "```\n",
+        "\n",
+        "You can see that we've already got a huge improvement - validation loss has dropped from 0.15 to 0.01, and validation MAE has dropped from 0.33 to 0.08.\n",
+        "\n",
+        "The following cell will print the same graphs we used to evaluate our original model, but showing our new training history:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "SYHGswAJJgrC",
+        "colab_type": "code",
+        "outputId": "bdc6e8f7-480d-4d3e-c20b-94776722360f",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 851
+        }
+      },
+      "source": [
+        "# Draw a graph of the loss, which is the distance between\n",
+        "# the predicted and actual values during training and validation.\n",
+        "loss = history_2.history['loss']\n",
+        "val_loss = history_2.history['val_loss']\n",
+        "\n",
+        "epochs = range(1, len(loss) + 1)\n",
+        "\n",
+        "plt.plot(epochs, loss, 'g.', label='Training loss')\n",
+        "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
+        "plt.title('Training and validation loss')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('Loss')\n",
+        "plt.legend()\n",
+        "plt.show()\n",
+        "\n",
+        "# Exclude the first few epochs so the graph is easier to read\n",
+        "SKIP = 100\n",
+        "\n",
+        "plt.clf()\n",
+        "\n",
+        "plt.plot(epochs[SKIP:], loss[SKIP:], 'g.', label='Training loss')\n",
+        "plt.plot(epochs[SKIP:], val_loss[SKIP:], 'b.', label='Validation loss')\n",
+        "plt.title('Training and validation loss')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('Loss')\n",
+        "plt.legend()\n",
+        "plt.show()\n",
+        "\n",
+        "plt.clf()\n",
+        "\n",
+        "# Draw a graph of mean absolute error, which is another way of\n",
+        "# measuring the amount of error in the prediction.\n",
+        "mae = history_2.history['mae']\n",
+        "val_mae = history_2.history['val_mae']\n",
+        "\n",
+        "plt.plot(epochs[SKIP:], mae[SKIP:], 'g.', label='Training MAE')\n",
+        "plt.plot(epochs[SKIP:], val_mae[SKIP:], 'b.', label='Validation MAE')\n",
+        "plt.title('Training and validation mean absolute error')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('MAE')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deXwU9fnA8c+TzQVJIJAAARIIKIcg\nkEAAFxSDUgtCwVupBSkKgrVerUBrPWq1KtLW+itW8UBpsWi1Ug+sChJAiMopAnKpwQQ5QgghIXfy\n/f0xk7jkTshmSeZ5+8oruzOzM88scZ/93mKMQSmllHP5+ToApZRSvqWJQCmlHE4TgVJKOZwmAqWU\ncjhNBEop5XCaCJRSyuE0EahGJSLvi8hNjX2sL4lIioiM8cJ5jYicaz9+VkTur8uxDbjOjSLyYUPj\nrOG8iSKS1tjnVU3P39cBKN8TkRyPp62BAqDEfn6rMWZpXc9ljBnnjWNbOmPMrMY4j4jEAt8CAcaY\nYvvcS4E6/xsq59FEoDDGhJY9FpEU4BZjzMqKx4mIf9mHi1Kq5dCqIVWtsqK/iMwVkcPAYhFpJyLv\niki6iGTaj6M9XpMkIrfYj6eJyCcissA+9lsRGdfAY3uIyFoRyRaRlSKyUET+WU3cdYnxDyKy3j7f\nhyIS6bF/iogcEJEMEbmvhvdnuIgcFhGXx7YrRWS7/XiYiCSLyAkROSQifxORwGrO9bKIPOLx/F77\nNd+LyPQKx44Xka0iclJEUkXkIY/da+3fJ0QkR0TcZe+tx+tHiMhGEcmyf4+o63tTExE5z379CRHZ\nKSITPfZdLiK77HMeFJFf29sj7X+fEyJyXETWiYh+LjUxfcNVbaKA9kB3YCbW38xi+3k3IA/4Ww2v\nHw7sASKB+cCLIiINOPZV4HMgAngImFLDNesS40+BnwMdgUCg7IOpH/B3+/xd7OtFUwVjzGfAKeCS\nCud91X5cAtxt348buBS4rYa4sWMYa8fzI6AXULF94hQwFQgHxgOzReQKe98o+3e4MSbUGJNc4dzt\ngfeAp+17+zPwnohEVLiHSu9NLTEHAO8AH9qv+yWwVET62Ie8iFXNGAacD3xsb/8VkAZ0ADoBvwV0\n3psmpolA1aYUeNAYU2CMyTPGZBhj3jTG5BpjsoFHgYtreP0BY8zzxpgS4BWgM9b/8HU+VkS6AUOB\nB4wxhcaYT4C3q7tgHWNcbIzZa4zJA14H4uzt1wDvGmPWGmMKgPvt96A6/wImA4hIGHC5vQ1jzGZj\nzKfGmGJjTArwXBVxVOU6O74dxphTWInP8/6SjDFfGmNKjTHb7evV5bxgJY59xph/2HH9C9gN/MTj\nmOrem5pcAIQCj9v/Rh8D72K/N0AR0E9E2hhjMo0xWzy2dwa6G2OKjDHrjE6A1uQ0EajapBtj8sue\niEhrEXnOrjo5iVUVEe5ZPVLB4bIHxphc+2FoPY/tAhz32AaQWl3AdYzxsMfjXI+Yunie2/4gzqju\nWljf/q8SkSDgKmCLMeaAHUdvu9rjsB3HH7FKB7U5LQbgQIX7Gy4iq+2qryxgVh3PW3buAxW2HQC6\nejyv7r2pNWZjjGfS9Dzv1VhJ8oCIrBERt739SWA/8KGIfCMi8+p2G6oxaSJQtan47exXQB9guDGm\nDT9URVRX3dMYDgHtRaS1x7aYGo4/kxgPeZ7bvmZEdQcbY3ZhfeCN4/RqIbCqmHYDvew4ftuQGLCq\ntzy9ilUiijHGtAWe9Thvbd+mv8eqMvPUDThYh7hqO29Mhfr98vMaYzYaYyZhVRstxyppYIzJNsb8\nyhjTE5gI3CMil55hLKqeNBGo+grDqnM/Ydc3P+jtC9rfsDcBD4lIoP1t8ic1vORMYnwDmCAiF9oN\nuw9T+/8nrwJ3YiWcf1eI4ySQIyJ9gdl1jOF1YJqI9LMTUcX4w7BKSPkiMgwrAZVJx6rK6lnNuVcA\nvUXkpyLiLyLXA/2wqnHOxGdYpYc5IhIgIolY/0bL7H+zG0WkrTGmCOs9KQUQkQkicq7dFpSF1a5S\nU1Wc8gJNBKq+ngJaAceAT4H/NdF1b8RqcM0AHgFewxrvUJUGx2iM2Qn8AuvD/RCQidWYWZOyOvqP\njTHHPLb/GutDOht43o65LjG8b9/Dx1jVJh9XOOQ24GERyQYewP52bb82F6tNZL3dE+eCCufOACZg\nlZoygDnAhApx15sxphDrg38c1vv+DDDVGLPbPmQKkGJXkc3C+vcEqzF8JZADJAPPGGNWn0ksqv5E\n22VUcyQirwG7jTFeL5Eo1dJpiUA1CyIyVETOERE/u3vlJKy6ZqXUGdKRxaq5iAL+g9VwmwbMNsZs\n9W1ISrUMWjWklFIOp1VDSinlcM2uaigyMtLExsb6OgyllGpWNm/efMwY06Gqfc0uEcTGxrJp0yZf\nh6GUUs2KiFQcUV5Oq4aUUsrhNBEopZTDaSJQSimHa3ZtBEqppldUVERaWhr5+fm1H6x8Kjg4mOjo\naAICAur8Gk0ESqlapaWlERYWRmxsLNWvK6R8zRhDRkYGaWlp9OjRo86v06ohpVSt8vPziYiI0CRw\nlhMRIiIi6l1y00SglKoTTQLNQ0P+nRyTCD75BH7zG9AZNZRS6nSOSQQbN8Ljj0NWlq8jUUrVV0ZG\nBnFxccTFxREVFUXXrl3LnxcWFtb42k2bNnHHHXfUeo0RI0Y0SqxJSUlMmDChUc7VVBzTWJwp+4Be\nfPTlVq69KN7X4Sil6iEiIoJt27YB8NBDDxEaGsqvf/3r8v3FxcX4+1f9cZaQkEBCQkKt19iwYUPj\nBNsMOaJEkJyazBNbrT+aKUvvIjk12ccRKdXyJacm89i6x7z2/9u0adOYNWsWw4cPZ86cOXz++ee4\n3W7i4+MZMWIEe/bsAU7/hv7QQw8xffp0EhMT6dmzJ08//XT5+UJDQ8uPT0xM5JprrqFv377ceOON\nlM3SvGLFCvr27cuQIUO44447av3mf/z4ca644goGDhzIBRdcwPbt2wFYs2ZNeYkmPj6e7OxsDh06\nxKhRo4iLi+P8889n3bp1jf6eVccRJYKklCSKgw8DUJQdTlJKEu4Yt4+jUqrlSk5N5tIll1JYUkig\nK5BVU1d55f+5tLQ0NmzYgMvl4uTJk6xbtw5/f39WrlzJb3/7W958881Kr9m9ezerV68mOzubPn36\nMHv27Ep97rdu3crOnTvp0qULI0eOZP369SQkJHDrrbeydu1aevToweTJk2uN78EHHyQ+Pp7ly5fz\n8ccfM3XqVLZt28aCBQtYuHAhI0eOJCcnh+DgYBYtWsSPf/xj7rvvPkpKSsjNzW2096k2jigRJMYm\nEhBmNQ648juRGJvo24CUauGSUpIoLCmkxJRQWFJIUkqSV65z7bXX4nK5AMjKyuLaa6/l/PPP5+67\n72bnzp1Vvmb8+PEEBQURGRlJx44dOXLkSKVjhg0bRnR0NH5+fsTFxZGSksLu3bvp2bNnef/8uiSC\nTz75hClTpgBwySWXkJGRwcmTJxk5ciT33HMPTz/9NCdOnMDf35+hQ4eyePFiHnroIb788kvCwsIa\n+rbUmyMSgTvGzXu3vALAzL73aWlAKS9LjE0k0BWIS1wEugK99uUrJCSk/PH999/P6NGj2bFjB++8\n8061femDgoLKH7tcLoqLixt0zJmYN28eL7zwAnl5eYwcOZLdu3czatQo1q5dS9euXZk2bRpLlixp\n1GvWxBFVQwCX9BlOUBCEFHf3dShKtXjuGDerpq4iKSWJxNjEJvnylZWVRdeuXQF4+eWXG/38ffr0\n4ZtvviElJYXY2Fhee+21Wl9z0UUXsXTpUu6//36SkpKIjIykTZs2fP311wwYMIABAwawceNGdu/e\nTatWrYiOjmbGjBkUFBSwZcsWpk6d2uj3URXHJAIRiIyEY8d8HYlSzuCOcTdp6XvOnDncdNNNPPLI\nI4wfP77Rz9+qVSueeeYZxo4dS0hICEOHDq31NWWN0wMHDqR169a88opVM/HUU0+xevVq/Pz86N+/\nP+PGjWPZsmU8+eSTBAQEEBoa2qQlgma3ZnFCQoJp6MI0cXHQvTv897+NHJRSLdxXX33Feeed5+sw\nfC4nJ4fQ0FCMMfziF7+gV69e3H333b4Oq5Kq/r1EZLMxpsp+tI5oIyijJQKl1Jl4/vnniYuLo3//\n/mRlZXHrrbf6OqRG4ZiqIQBaH2PvXiE5da82GCul6u3uu+8+K0sAZ8oxJYLk1GSSjr7BsWNw6ZJL\ndVCZUkrZvJoIRGSsiOwRkf0iMq+G464WESMitY8Db6CklCRKWx2FvHYUFJZ4rV+zUko1N15LBCLi\nAhYC44B+wGQR6VfFcWHAncBn3ooFrH7N/qEnAD8Ci3RQmVJKlfFmiWAYsN8Y840xphBYBkyq4rg/\nAE8AXl0Dzx3j5ndjZwHw4qVvaxuBUkrZvJkIugKpHs/T7G3lRGQwEGOMea+mE4nITBHZJCKb0tPT\nGxzQiD69AYgOiGvwOZRSTW/06NF88MEHp2176qmnmD17drWvSUxMpKyr+eWXX86JEycqHfPQQw+x\nYMGCGq+9fPlydu3aVf78gQceYOXKlfUJv0pn03TVPmssFhE/4M/Ar2o71hizyBiTYIxJ6NChQ4Ov\nGRlp/dYupEo1L5MnT2bZsmWnbVu2bFmd5vsBa9bQ8PDwBl27YiJ4+OGHGTNmTIPOdbbyZiI4CMR4\nPI+2t5UJA84HkkQkBbgAeNubDcZlOUQTgVLNyzXXXMN7771XvghNSkoK33//PRdddBGzZ88mISGB\n/v378+CDD1b5+tjYWI7Z/+M/+uij9O7dmwsvvLB8qmqwxggMHTqUQYMGcfXVV5Obm8uGDRt4++23\nuffee4mLi+Prr79m2rRpvPHGGwCsWrWK+Ph4BgwYwPTp0ykoKCi/3oMPPsjgwYMZMGAAu3fvrvH+\nfD1dtTfHEWwEeolID6wEcAPw07KdxpgsILLsuYgkAb82xjRs2HAdRERYv9/clMSAcUHaTqBUA9x1\nF9hrxDSauDh46qnq97dv355hw4bx/vvvM2nSJJYtW8Z1112HiPDoo4/Svn17SkpKuPTSS9m+fTsD\nBw6s8jybN29m2bJlbNu2jeLiYgYPHsyQIUMAuOqqq5gxYwYAv/vd73jxxRf55S9/ycSJE5kwYQLX\nXHPNaefKz89n2rRprFq1it69ezN16lT+/ve/c9dddwEQGRnJli1beOaZZ1iwYAEvvPBCtffn6+mq\nvVYiMMYUA7cDHwBfAa8bY3aKyMMiMtFb163J1vRkCMzmox3bdCyBUs2MZ/WQZ7XQ66+/zuDBg4mP\nj2fnzp2nVeNUtG7dOq688kpat25NmzZtmDjxh4+iHTt2cNFFFzFgwACWLl1a7TTWZfbs2UOPHj3o\n3dtqe7zppptYu3Zt+f6rrroKgCFDhpCSklLjuXw9XbVXRxYbY1YAKypse6CaYxO9GQtYYwloHYU5\n1b58jnQtFShVPzV9c/emSZMmcffdd7NlyxZyc3MZMmQI3377LQsWLGDjxo20a9eOadOmVTv9dG2m\nTZvG8uXLGTRoEC+//DJJSUlnFG/ZVNZnMo31vHnzGD9+PCtWrGDkyJF88MEH5dNVv/fee0ybNo17\n7rnnjGcpdczIYrDGEkjIccjr4NU50pVSjS80NJTRo0czffr08tLAyZMnCQkJoW3bthw5coT333+/\nxnOMGjWK5cuXk5eXR3Z2Nu+88075vuzsbDp37kxRURFLly4t3x4WFkZ2dnalc/Xp04eUlBT2798P\nwD/+8Q8uvvjiBt1b2XTVQJXTVc+dO5ehQ4eye/duDhw4QKdOnZgxYwa33HILW7ZsadA1PTlqriF3\njJvhvTJJO9yZ1720dJ5SynsmT57MlVdeWV5FNGjQIOLj4+nbty8xMTGMHDmyxtcPHjyY66+/nkGD\nBtGxY8fTppL+wx/+wPDhw+nQoQPDhw8v//C/4YYbmDFjBk8//XR5IzFAcHAwixcv5tprr6W4uJih\nQ4cya9asBt2Xr6erdtQ01ABTpsAnn8C33zZiUEq1cDoNdfOi01DXolMnOHIEmln+U0opr3FcIigI\nTiEvD1Z99bmvQ1FKqbOCoxJBcmoyz+15GIAJz9+i3UeVqofmVo3sVA35d3JUIkhKSaKktTW4uSgr\nQqeiVqqOgoODycjI0GRwljPGkJGRQXBwcL1e56heQ4mxiQS0fZMCwJXbVbuPKlVH0dHRpKWlcSaT\nPqqmERwcTHR0dL1e46hE4I5x89bNf+fy/4NfnPcH3DE9fB2SUs1CQEAAPXro/y8tlaOqhgB+PGAo\nLhe0LtQ/aqWUAgcmAj8/a/K5jAxfR6KUUmcHxyUCgJC2uXyye7f2GlJKKRyYCJJTkzlQtJmdB47o\nDKRKKYUDE0FSShKlrdLhVET5DKRKKeVkjksEibGJuEJOQF6kzkCqlFI4MBG4Y9zcOPwy/PI6snKK\nzkCqlFKOSwQAg3pGU1riR782mgSUUsqRiSDSXilZF7FXSimHJoJ08xUAq3d+6eNIlFLK9xyXCJJT\nk7nv01sBuP2Nh7T7qFLK8RyXCJJSkigKOgRAUU5b7T6qlHI8xyWCxNhEAsNOAuDK76TdR5VSjue4\nROCOcbNqxnJc/iXc0ON27T6qlHI8R01DXWZENzcdO0BQYVdfh6KUUj7nuBJBmchInYFUKaXA4YlA\nxxEopZSDE4G0Psae745p91GllOM5MhEkpyazJv0/pB8zOhW1UsrxHJkIrKmoj0JeewqKinUsgVLK\n0RyZCBJjE/EPzQTjIrCoo44lUEo5miMTgTvGzX0/ngXA4h+9o2MJlFKO5shEAODu0wuAmIB4H0ei\nlFK+5dhEoFNRK6WUxbGJICLC+r1kw3vaa0gp5WiOTQRf538GwFtbPtEupEopR3NsIvj0yMfgn4fJ\nbU9hSaF2IVVKOZYjJ50DGN0jEWmdAbkdCHQFahdSpZRjOTYRuGPcnBtzClfrkbw0dZV2IVVKOZZX\nq4ZEZKyI7BGR/SIyr4r9s0TkSxHZJiKfiEg/b8ZTUffOIYSX9tIkoJRyNK8lAhFxAQuBcUA/YHIV\nH/SvGmMGGGPigPnAn70VT1V0BlKllPJuiWAYsN8Y840xphBYBkzyPMAYc9LjaQhgvBhPJZGRkJ7e\nlFdUSqmzjzcTQVcg1eN5mr3tNCLyCxH5GqtEcEdVJxKRmSKySUQ2pTfiJ3d+YCpZWbDum08b7ZxK\nKdXc+Lz7qDFmoTHmHGAu8LtqjllkjEkwxiR06NChUa6bnJrMK/sWAHDZop/qOAKllGN5MxEcBGI8\nnkfb26qzDLjCi/GcJikliZJWhwAoPNlWxxEopRzLm4lgI9BLRHqISCBwA/C25wEi0svj6Xhgnxfj\nOU1ibCL+YScA8M/vrOMIlFKO5bVEYIwpBm4HPgC+Al43xuwUkYdFZKJ92O0islNEtgH3ADd5K56K\n3DFuFk+eD8B9Q57SLqRKKcfy6oAyY8wKYEWFbQ94PL7Tm9evzY8GxgHQrrS3L8NQSimf8nljsS+1\nbw8icPSoryNRSinfcXQicLmgbbsiPty+RXsNKaUcy9GJIDk1mSzXfj7fl6JTUSulHMvRiSApJQnT\nOh1ORepU1Eopx3J0IkiMTcQvVKeiVko5m6MTgTvGzaTBbloXdWeVTkWtlHIoRycCgAE9osg72Zph\nXTQJKKWcyfGJoEMHMAYyMnwdiVJK+YYmAnsOO52OWinlVI5PBEfNTgCSdu70cSRKKeUbjk4EyanJ\n3Lt+GgD3vPVHHUeglHIkRyeCpJQkioK/B6Aou52OI1BKOZKjE0FibCKBYdkAuPKidByBUsqRHJ0I\n3DFuPv75B7QKy2NSzC06jkAp5UhenYa6OXDHuInpDK68Vr4ORSmlfMLRJYIyHTpo91GllHNpIkAT\ngVLK2TQRAKWtjpDyfY52H1VKOZLjE0FyajIrvn+FnBPBXPLyGE0GSinHcXwiSEpJoqTVISj1p/BU\niI4lUEo5juMTQWJsIv5hmQAE5HfRsQRKKcdxfCJwx7h5ctIcAP5y4T91LIFSynEcnwgALu7fD4BO\nfuf7OBKllGp6dUoEIhIiIn72494iMlFEArwbWtPRqaiVUk5W1xLBWiBYRLoCHwJTgJe9FVRTi4y0\nfr+xcY32GlJKOU5dE4EYY3KBq4BnjDHXAv29F1bT2nI0GYKyWLXjCy5dcqkmA6WUo9Q5EYiIG7gR\neM/e5vJOSE0vKSUJQtIxpyIpLCnULqRKKUepayK4C/gN8JYxZqeI9ARWey+sppUYm4iEZEBuRwJd\ngdqFVCnlKHWafdQYswZYA2A3Gh8zxtzhzcCakjvGzcg+x/k6pYg3p67SLqRKKUepa6+hV0WkjYiE\nADuAXSJyr3dDa1q9u7VHcjtpElBKOU5dq4b6GWNOAlcA7wM9sHoOtRgdO1rdR43xdSRKKdW06poI\nAuxxA1cAbxtjioAW9ZHZpQsUFUFGhq8jUUqpplXXRPAckAKEAGtFpDtw0ltB+UJ24B4A3t/yhY8j\nUUqpplWnRGCMedoY09UYc7mxHABGezm2JpOcmszvt9wKwC1LH9RxBEopR6lrY3FbEfmziGyyf/6E\nVTpoEZJSkigO+Q6AohMddByBUspR6lo19BKQDVxn/5wEFnsrqKaWGJtIYNvjALhOxeg4AqWUo9Q1\nEZxjjHnQGPON/fN7oKc3A2tK7hg3H09/n9Ztc/lJ55nahVQp5Sh1TQR5InJh2RMRGQnkeSck33DH\nuDm3e2tKTkb5OhSllGpSdU0Es4CFIpIiIinA34Bba3uRiIwVkT0isl9E5lWx/x4R2SUi20Vkld0b\nyWdat89k895D2lislHKUuvYa+sIYMwgYCAw0xsQDl9T0GhFxAQuBcUA/YLKI9Ktw2FYgwRgzEHgD\nmF/P+BtNcmoyG0++zcGDpToDqVLKUeq1Qpkx5qQ9whjgnloOHwbst9sUCoFlwKQK51ttT28N8CkQ\nXZ94GlNSShKloWmQE0VBUbH2HFJKOcaZLFUptezvCqR6PE+zt1XnZqzpKypfSGRmWdfVdC8tI5YY\nm4h/26NgXATmR2vPIaWUY5xJImi0KSZE5GdAAvBklRcyZpExJsEYk9ChbF3JRuaOcfPIpNkA/G3k\nf7XnkFLKMWqchlpEsqn6A1+AVrWc+yAQ4/E82t5W8RpjgPuAi40xBbWc06u6ds8HIC0l2JdhKKVU\nk6qxRGCMCTPGtKniJ8wYU9taBhuBXiLSQ0QCgRuAtz0PEJF4rHmMJhpjjp7JjZyp5NRkbvlkNEgp\njyxfpo3FSinHOJOqoRoZY4qB24EPgK+A1+3VzR4WkYn2YU8CocC/RWSbiLxdzem8LikliSK/bAhL\no+RYD20sVko5Rp1WKGsoY8wKYEWFbQ94PB7jzevXR2JsIoGuQPIi9sPx3iTGnuPrkJRSqkl4rUTQ\n3Lhj3Kyauoqh57ejzal4bSxWSjmGJgIP7hg3142KJyszgMxMX0ejlFJNQxNBBeeea/3ev9+3cSil\nVFPRRFDBqdBtAKz4dJ+PI1FKqaahicBDcmoyt6xPBFcBj7z+jnYhVUo5giYCD0kpSRRJDkRtozg1\nQbuQKqUcQROBh7IupNJ1I3w/hAujE30dklJKeZ0mAg9lXUhHXxgCRSGk7Gvt65CUUsrrNBFUYX3p\nnwG4+e/PaTuBUqrF00RQQVJKEkXhX0HwcYoPaDuBUqrl8+oUE81RYmwiQf6B5MWug5TRJMYe9nVI\nSinlVVoiqMAd4+apsU/RNyENk9mDiDydakIp1bJpIqggOTWZu/53F3s7PgkBucy6O8PXISmllFdp\nIqggKSWJwpJCStscQIYtZM0H7cjQXKCUasE0EVRQNpbADz/8BrxBaYkfy5b5OiqllPIeTQQVlLUR\nuPxclEZtwq/bBn7z22L27PF1ZEop5R2aCKqQkZtBSWkJhlLMFVMpdRXw4x+jyUAp1SJpIqhCROsI\nSikFwLT/mjuf/oDsbBg8GO66C3JzfRygUko1Ik0EVcjIzcBPrLfGT/wI7b6H7dth2DD461/h4oth\nxYpaTqKUUs2EJoIqJMYmEuQKwg8/BOHz7z/nu9JkVq+GV1+FY8dg/HgYM8ZawMYYX0eslFINp4mg\nCmUNxiJCiSlh+e7ljH5lNMmpyUyeDHv3wn33waZN0KsXdO0KBw/6OmqllGoYTQTVyMjNoNSUlj8v\nKClgyRdLAAgIgEcegS++gNmz4dAh6NEDbr4ZSkp8FbFSSjWMJoJqJMYm4vJznbbtxa0vnjYbaffu\n8MwzsHUr/Pzn8NJLcN558NxzTR2tUko1nCaCarhj3NwSf8tp24pKi5i/fn6lY+Pi4NlnYfFi8PeH\nWbPg3nvh1KmmilYppRpOE0ENpg6aSqAr8LRty/csZ+7KuZWOFYFp02DzZpgxAxYsgP794aOPmihY\npZRqIE0ENXDHuEm6KYnosOjTts9fP5+5K+fy2LrHKi1c06oVLFoEa9dCcDBcdpmVIJKTtXeRUurs\nJKaZfTolJCSYTZs2Nek1F21exK3v3lrlPn8/fxZevpCZQ2ZW2pefD7//PTz+uPX82Wfh1qpPo5RS\nXiUim40xCVXt0xJBHcwcMpNR3UdVua+4tJjb3ruN2e/OrlQ6CA6Gxx6Dr7+GhASr7eCGGyAnpymi\nVkqputFEUEePX/o4AX4BVe4rMSU8u/lZLlp8EYs2L6q0v2dPWL8eHn4Y/v1v6NvXakNoZoUxpVQL\npYmgjtwxbtZMW8OoblWXDMBKCLe+e2uVjcmBgXD//bBmjTXm4N574bbb4MQJb0atlFK100RQD+4Y\nN2t+voY5I+eUz0VUlfnr59Pjrz2qLB1ceKHVkHzPPVabgdttVR0ppZSvaGNxAyWnJjN//XyW71le\n43GX9byMxNhEEmMTccecvv7xqlVw9dXWaOS//tUalCbizaiVUk5VU2OxJoIztGjzIl7c8iKZ+Zns\nO76vymMEIdg/mFVTV1VKBt99BzfdBElJcNVV1qjkyMgmCFwp5Sjaa8iLZg6ZyWczPmPvL/cyZ+Sc\nKo8xGPKK87jl7Vsq9Szq1s0qGTz5JLzzDgwaZCWDfVXnFKWUanRaImhkyanJ3PbebWw7sq3K/YLw\n0wE/JSwwDLBGL5eVErZts9E98lgAABZuSURBVLqX7tkDXbrAp59CTEyTha6UasG0asgH5q6cy5Pr\nn8RQ8/sb5Api9U2ry5NBSYk1Z9Gtt0JIiDWp3Y03atuBUurMaNWQDzwx5gmenfAsfrW8xZ7TWwO4\nXHDLLbBxI3TsCFOmwPTpkJXl7YiVUk6licCLZg6ZySfTP+GKPlcQFRJV7XHPbX6uUlfTwYNh926Y\nMweWLIELLoD583VUslKq8Xk1EYjIWBHZIyL7RWReFftHicgWESkWkWu8GYuvuGPcvHXDWxz69SFu\nHHBjlccYTJUD0fz94YknrBlMT52CuXNh4EBrHIJSSjUWryUCEXEBC4FxQD9gsoj0q3DYd8A04FVv\nxXE2+edV/+S5Cc8xrMswhMqV/vPXz6fznzpz5WtXnta76JJL4MABa3qK/HyYNMl6rJRSjcGbJYJh\nwH5jzDfGmEJgGTDJ8wBjTIoxZjtQWtUJWqKy7qbVtR8czjnM8t3LGfHSCC5++eLyyexE4JprrDmL\nzjkHrrsOJkyAr77ywU0opVoUbyaCrkCqx/M0e1u9ichMEdkkIpvS09MbJThfK2s/qGnuorUH1vLs\n5me5+OWLy0sIPXpY3UqfeAI2bIBhw+APf4DDh5sqcqVUS9MsGouNMYuMMQnGmIQOHTr4OpxG4zl3\nUU0qLpHp7281Im/bBmPGwAMPQNeuVonh4EFvR62Uamm8mQgOAp7DoaLtbaqCJ8Y8wYbpG2osHSzf\ns7xS+0G3bvDWW/D551ZD8v/+B/Hx1u9tVY9nU0qpSryZCDYCvUSkh4gEAjcAb3vxes1aWelgw/QN\nXNHniiobk8vaDy5cfOFp3U2HDoU//hE2bbLGHowbZyWEN99syjtQSjVXXksExphi4HbgA+Ar4HVj\nzE4ReVhEJgKIyFARSQOuBZ4TkZ3eiqe5KOtuun76eq7oc0WVx5SaUma9O6vS2IO+feGzz6wprsFq\nUL7vPsjL83bUSqnmTKeYOMvNXTn3tPaBikZ1H8Xjlz5eaVbTffvgwQfhX/+yVkibORNmzID27b0d\nsVLqbKRTTDRjT4x5gucmPMd5kecRGhhaaf/aA2sZ8dIIevy1x2ntB716wauvwsqV0KkTzJtnTWD3\n1lu6RKZS6nRaImhGklOTufjliykqLarxuH4d+nHn8DuZOWRm+baNG+Hmm+HLLyEhAW6/HcaOtZKE\nUqrl0xJBC1GXdZMBdqXvKp+yIjk1mcfWPUZxVDIbN1prHZw4AdOmWdNVvPoqFBQ0TfxKqbOTlgia\nqbpOc13W+8hzhbSSEmsxnDvusNY+6NDBmtDuxhshIKApoldKNTUtEbRAT4x5gvXT1zNryCziOsVV\ne5yx/8srzitvdHa54LLLYNcua0K7nj2t9ZLPOQf+/Gc4ebKp7kIpdTbQEkELkZyazJIvlvDfPf/l\nUM6hao8b1GkQtw29jYzcDBJjE3HHuCkthffft5bLXLMG2rSxehmNGQOjR0NgYBPeiFLKK3SFMgep\na4MygB9+TOw7kTkj5pR3P924Ef70J2t209JSOPdcK0FMmqSrpCnVnGnVkIOUNSjPGjKLUd1G0b1t\n92qPLaWU5buXM/KlkeVrIQwdCsuWWdNeL1tmlQauvNKa3O6557TaSKmWSEsEDvCz//yMpV8urfW4\nqNAoLoi+oLyEkJyazKr9a8j+7FpWLD2HHTusdZTHjYOf/Qx+8hPw068SSjULWjWkWLR5EX9c90cO\nZB2o0/FRoVEcPXUUgCBXECunrMJ1yM3zz1vtCd9/b01pcdddcMMN0LatN6NXSp0prRpSzBwyk5S7\nUspXSIuLqr6nEVgT3JWaUkpNKXnFefxj+xKGD4cXXrCqjf71L2jVCmbNgqgoq+vpRx9BSUkT3ZBS\nqtFoicDBklOTmbdyHmu/q9siyLHhscRFxZVXHRkDmzfD4sVWYsjMhOhomDoVJk6EuDgICvLyTSil\n6kSrhlSN6psQoPI0Fvn58M478PLL1noIpaVWUvjtb60Fc1rQekJKNUuaCFSdlI1F2JW+iwNZB+rU\nntA1rCttgtrQJ7JPeUnh+++t8Qj/93+QnGwNYOvfH4YMsaa2GFXzDBlKKS/QRKAaZNHmRcx+dzal\nlNb5NbHhsVzX/zrCg8K5uHsirkNuli+H1auttRIALr/cWivhRz+CLl28FLxS6jSaCFSDlZUSPk37\nlG1H6r/+ped6CdnZsHAhPPoo5ORAcDBceKGVFKZMsZ4rpbxDE4FqFGVJYdU3q9iXua9er40KjSIq\nNIpAv0Cm9JuJO/hmFi6E9eth715o184anzB2LAwfDr17e+kmlHIoTQSq0S3avIgXt7xIYWkhh7MP\nc/jU4Xq9vqxtIdAVRME+N+323cnONX3KRy5fcYVVdVRYaK2jEBbmhZtQykE0ESivq++AtapM7jeF\n3/VfwpIl8MorcNjOLdHRMH26VWIYPlznPFKqITQRqCbj2fNob8beepcU2rdqT5ugNsS06UaX/Evo\nVzqZNf/pzccfW/u7drXmPho1CgYMsEY3K6Vqp4lA+cyizYt46tOnyCvOo7ikmLTstHqfo1f7XpAb\nQei3N9LmwGQ2rIqgqMia56hfP2t6i/vus9ZYcLm8cBNKtQCaCNRZw7NtoaC4gOyC7Honh05B3emS\ndxknNlxJ4bFYSo714HBaMB07QkwMzJ5tdVHt3NlLN6FUM6SJQJ3V5q6cW756WoMUhBCZdjPB31yF\n68hQDuxvDUC3bmDMD6ObIyMbKWClmiFNBOqsl5yazPz189l6eCsFJQUczqlf20K5Ehed0n+K35Eh\nZH1zLkVHz6HoUF9cLkPPnsLw4TB5MowYAeHhjXsPSp3NNBGoZqcsMezJ2EOQf1CDuqgCUCpw4GL4\ndjTBJ+Io2nspJfkhBAWXMMLtYsQIqwG6c2ddhU21bJoIVItQ1vCcmZ/JkZwjGBrwt5sfBmkXwL7x\nBB28lIKD54GxWpjbxRxi4hUlXD8uml69rHUXJk+2qpRKSrQhWjVvmghUi1PWTfVwzmFSTqQ0aPoL\nAApbw6kO8O0lsG0apI4A41++O7jTAS4YlUPyu73pNfUv/HJGePmMqwcPWmMdhgxphBtSyss0EagW\nz3P8QnpuOkH+QWTmZZJdmM3xvON1P1FBKBwaDEcGwKmOsPcncDje2ifFELGXVtF7ad/9MAf/Owvx\nK8Y97xFGhEzld7/oqSu1qbOWJgLlaJ5VSg1qhC4OhJxO8PntcGSQlShy7QUWAk5BUQgAQee/T8SE\nP9E2NAg51ZE7r3aXlx6U8jVNBErZKjZCZ+ZlUlBSUL82h1KB7K5w9Hxovx9WPgZ7J0BJhelTBy6h\nTUwawd12ENh5HxGRJRSW5p+2doNSTUUTgVK1qLgoj4jUfyT0sd7w3YWQ2dNqc8joDXkRpx/T5jvo\nsAu6biQkqBWdB+ymsOsq/FwQHhxOZl4mIYEh5au/Jacmk5SSRGJsoiYOdUY0ESjVQGUjobu06ULv\niN4kfZtEZn4mXx//um4L9uS2g0NDIKOXVYrI6G1VLZ2ILe+thCsfWh0Hv2Jo9y1E7IWIPYS0zyE3\nvQOm43bo8za9InpRWFKIiJQnDRGhW9tu9Ivsx9RBU3HHuMuTR3BRF/7z70D6j1vHTfFTTtunicV5\nNBEo1cjKPlAjWkew9dDW+pckSv2gOBh23GAlh9xIKAyBY32t58WtK74Agk9YP5F7wD8fwlOg45cQ\nehiCsiHkKF26lnLo5BFMYDa89h/YfSVccx30/zdh6ZeRnZcH3dcB1hxO/n7+5VVkIYEhTOg9gb3H\n9rInY0+dq7C8lVw0aTUuTQRKNTHPOZXKvrmHB4fXbWCcAQrCIKs7hByB/ePg+LmQHw7Zna0SRUCu\nVQVVKWHYAnKgKPSH5x2/hKMDrMet06HXCuj5EbRNBSmxztdxJxS1spLHie4wZBG0OURUaBSh349H\nOu4iJDyv0v0cOfVD+0rZAkSexxQUF5QnG4wf4a3aciI/s1LJJjw4nPTPE2kdkUl2xw9qPW9mXiYg\nhAdGEhTgR6+IXmw5tIXcwnzatW5b6diyx3l5hk7hbWkf3B6A43nHyS/O5+bBNzOg44DybskVRYVG\nEd85nq2HtgKUl8Bq+ht4c9ebXN3v6vJOA+u++ZS7f5vBeT9ez21jflJeSlvyxZJK5/TsIh0VGlXr\n9WqjiUCps4jnjKwVP6TqNYK6OMBKBpk9QUohp7NV5QRQ0AaCM602ivTzrOeBOfDtmPoF2yrDKoVk\nnmN1n43aBqUBVgkm5Ah02Wwlj8Iw6xrHz4Xz/gNtUqFVJpQEWknFVQhtv4NNs63zjlgAWTEQdNIq\nzbQ+ZlWPPb/R2n/JfVaJ6bw3rdeGpFvXNGJ1501JhJwoqwSVEwUX/x76/xu+HQ0fLoAx86CotfV+\nZJ5jxRGx1zr3tptgVhy4iuDARdZ71+0TKxa/EgjIh5OdobiVlXz9iuCzOyA2Cc5/DfaOt96Htt/R\nKawTrQKCCQ8O51hGCX7+xbTxj+TA6kvIzi2ykqmrgM5tO1JKIUf+Nx3WPAQxn8DPxtE1Ipzvcw5i\nigPAvxAKW3NO+3M4ttVNVvgaK1EH5gIgCJP6TmpwRwNNBEo1I549mzqEdADDaWMjPL/l1nteJoP1\ngeoqsEocWTFWW0VBGzjWx/rQD0+BNgdhz0SrmiqnEwRnWR+kh+KhoK2VZHI6W/v988E/z/qwLWis\nCZxKAb/qdwecglJ/KAlqpOvZpMT6baoZRi7FPww4lGIrmfjnAWK9h2Alq6pi93xtGVeB9fqCcKsU\nV9yq8rX9c61t/nkw9m6CEv7F6ptW1zsZ1JQI/KvaqJTyHXeMm7dueKvOx1fsEhvoF0iviF7sy9hX\n3sj9zp53yCvOo1vbbj8kllh/CopTPOZyevX0E0d/XvvFS/2sb9QC5b1vSwKhOAjy21kN4IHZ1rf2\nUn8IOWqVFL4fYrVtpPe3PwjbWiWLkKNWoorZYH3If3sJlARYj4OyrCqssEPQbZ1VCtl1NfgXWNcr\nbgWFoXDu+1YpqfMWa7xHboR1zYI2cDgOIr+CNDeUuqwP2E5fwKlOdlL0s65h/KySSMQ+65zt91ml\nnazuVtwFbax4i4Ot+wI7IRZY289fZj3+crL1G6xj2++zShOHhlhx5bezSi5+RdZv/3zrPQvI+6HU\nVdDWiqWoFbTfR2FJIUkpSY3abqIlAqUUQKW66i+PfnlaFZZnXb9nqSQkMIT4qHi2HNpSZXVXda+r\n6rHnsbWdt+xxgF/Aab24qmtP8Hz8XdZ3DZur6iwQ5Apq9BKBVxOBiIwF/gq4gBeMMY9X2B8ELAGG\nABnA9caYlJrOqYlAKVVRfXsYVZX0XtzyIsEBwbQPbl+pYbjssWc13PG84+U9xapKaH0i+5R3OS7r\nNFCxZ1ZZCS6xR+Jp2zyPPZl/svy6Z9Jo7JNEICIuYC/wIyAN2AhMNsbs8jjmNmCgMWaWiNwAXGmM\nub6m82oiUEqp+qspEdTQGnPGhgH7jTHfGGMKgWXApArHTAJesR+/AVwqojPCK6VUU/JmIugKpHo8\nT7O3VXmMMaYYyAIqjMkHEZkpIptEZFN6erqXwlVKKWfyZiJoNMaYRcaYBGNMQocOHXwdjlJKtSje\nTAQHgRiP59H2tiqPERF/oC1Wo7FSSqkm4s1EsBHoJSI9RCQQuAF4u8IxbwM32Y+vAT42za0/q1JK\nNXNeG1BmjCkWkduBD7C6j75kjNkpIg8Dm4wxbwMvAv8Qkf3AcaxkoZRSqgk1uwFlIpIOHGjgyyOB\nY40YTnOg9+wMes/OcCb33N0YU2Uja7NLBGdCRDZV14+2pdJ7dga9Z2fw1j03i15DSimlvEcTgVJK\nOZzTEsEiXwfgA3rPzqD37AxeuWdHtREopZSqzGklAqWUUhVoIlBKKYdzRCIQkbEiskdE9ovIPF/H\n01hE5CUROSoiOzy2tReRj0Rkn/27nb1dRORp+z3YLiKDfRd5w4lIjIisFpFdIrJTRO60t7fY+xaR\nYBH5XES+sO/59/b2HiLymX1vr9kj+BGRIPv5fnt/rC/jPxMi4hKRrSLyrv28Rd+ziKSIyJcisk1E\nNtnbvP633eITgb0uwkJgHNAPmCwi/XwbVaN5GRhbYds8YJUxphewyn4O1v33sn9mAn9vohgbWzHw\nK2NMP+AC4Bf2v2dLvu8C4BJjzCAgDhgrIhcATwB/McacC2QCN9vH3wxk2tv/Yh/XXN0JfOXx3An3\nPNoYE+cxXsD7f9vGmBb9A7iBDzye/wb4ja/jasT7iwV2eDzfA3S2H3cG9tiPn8NaGKjScc35B/gv\n1uJHjrhvoDWwBRiONcLU395e/neONa2L237sbx8nvo69AfcabX/wXQK8i7Uycku/5xQgssI2r/9t\nt/gSAXVbF6El6WSMOWQ/Pgx0sh+3uPfBLv7HA5/Rwu/briLZBhwFPgK+Bk4Yax0POP2+6rTORzPw\nFDAH7MWIrXto6fdsgA9FZLOIzLS3ef1v22uTzinfM8YYEWmR/YNFJBR4E7jLGHPSc2G7lnjfxpgS\nIE5EwoG3gL4+DsmrRGQCcNQYs1lEEn0dTxO60BhzUEQ6Ah+JyG7Pnd7623ZCiaAu6yK0JEdEpDOA\n/fuovb3FvA8iEoCVBJYaY/5jb27x9w1gjDkBrMaqFgm31/GA0++rJazzMRKYKCIpWMvcXgL8lZZ9\nzxhjDtq/j2Il/GE0wd+2ExJBXdZFaEk813i4CasOvWz7VLunwQVAlkdxs9kQ66v/i8BXxpg/e+xq\nsfctIh3skgAi0gqrTeQrrIRwjX1YxXtu1ut8GGN+Y4yJNsbEYv0/+7Ex5kZa8D2LSIiIhJU9Bi4D\ndtAUf9u+bhxpogaYy4G9WPWq9/k6nka8r38Bh4AirPrBm7HqRVcB+4CVQHv7WMHqPfU18CWQ4Ov4\nG3jPF2LVo24Httk/l7fk+wYGAlvte94BPGBv7wl8DuwH/g0E2duD7ef77f09fX0PZ3j/icC7Lf2e\n7Xv7wv7ZWfZZ1RR/2zrFhFJKOZwTqoaUUkrVQBOBUko5nCYCpZRyOE0ESinlcJoIlFLK4TQRKGUT\nkRJ71seyn0abqVZEYsVjllilziY6xYRSP8gzxsT5OgilmpqWCJSqhT1H/Hx7nvjPReRce3usiHxs\nzwW/SkS62ds7ichb9voBX4jICPtULhF53l5T4EN7lDAicodY6ytsF5FlPrpN5WCaCJT6QasKVUPX\ne+zLMsYMAP6GNSsmwP8BrxhjBgJLgaft7U8Da4y1fsBgrFGiYM0bv9AY0x84AVxtb58HxNvnmeWt\nm1OqOjqyWCmbiOQYY0Kr2J6CtTDMN/aEd4eNMREicgxr/vcie/shY0ykiKQD0caYAo9zxAIfGWtx\nEURkLhBgjHlERP4H5ADLgeXGmBwv36pSp9ESgVJ1Y6p5XB8FHo9L+KGNbjzWnDGDgY0es2sq1SQ0\nEShVN9d7/E62H2/AmhkT4EZgnf14FTAbyheUaVvdSUXED4gxxqwG5mJNn1ypVKKUN+k3D6V+0Mpe\nBazM/4wxZV1I24nIdqxv9ZPtbb8EFovIvUA68HN7+53AIhG5Geub/2ysWWKr4gL+aScLAZ421poD\nSjUZbSNQqhZ2G0GCMeaYr2NRyhu0akgppRxOSwRKKeVwWiJQSimH00SglFIOp4lAKaUcThOBUko5\nnCYCpZRyuP8H8luf2Ik/xC4AAAAASUVORK5CYII=\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOyde3hU5bX/P2smCYh4qQGLQjR4F0UI\n4dIRgVBqi0oFvFRsTwNFHUGxRauAtipHT2tBzynHIxdRRPFgwZ9Wioq1RySAOOUeRBB60AaDgmIU\nhAPkMrN+f+w9ycxkkkxCJtf1eZ482fvd7957zQ7s77zvetdaoqoYhmEYRiyexjbAMAzDaJqYQBiG\nYRhxMYEwDMMw4mICYRiGYcTFBMIwDMOIiwmEYRiGERcTCKNBEJG3RGR0ffdtTESkQER+kITrqoic\n527PEZEHE+lbh/v8TET+Vlc7q7lujojsqe/rGg1PSmMbYDRdRORwxG47oBgIuvu3q+rCRK+lqlcl\no29LR1XH1cd1RCQT+CeQqqpl7rUXAgn/DY3WhwmEUSWq2j68LSIFwK2q+k5sPxFJCb90DMNoOdgU\nk1FrwlMIIjJZRPYB80XkOyLyhojsF5Fv3O0uEefkicit7vYYEXlPRJ5w+/5TRK6qY9+uIrJKRA6J\nyDsiMlNE/rsKuxOx8VERWeNe728i0iHi+M9FZLeIFInIb6p5Pv1EZJ+IeCPaRorIB+52XxEJiMgB\nEdkrIk+JSFoV13peRP4tYv8+95zPRWRsTN9rRGSziHwrIoUiMjXi8Cr39wEROSwivvCzjTj/chFZ\nLyIH3d+XJ/psqkNELnbPPyAi20Tk2ohjV4vIdvean4nIvW57B/fvc0BEvhaR1SJi76sGxh64UVc6\nAacBZwN+nH9L8939s4CjwFPVnN8P2Al0AKYD80RE6tD3JWAdkA5MBX5ezT0TsfGnwC+A04E0IPzC\n6gbMdq9/pnu/LsRBVdcC/wd8P+a6L7nbQeBu9/P4gCHAHdXYjWvDUNeeK4HzgVj/x/8BucCpwDXA\neBEZ4R4b6P4+VVXbq2og5tqnAW8CT7qf7T+AN0UkPeYzVHo2NdicCrwO/M097y5goYhc6HaZhzNd\neRJwKfCu2/5rYA/QEfgu8ABgeYEaGBMIo66EgIdVtVhVj6pqkaq+qqpHVPUQ8DtgUDXn71bVZ1Q1\nCLwAnIHzIki4r4icBfQBHlLVElV9D1ha1Q0TtHG+qv5DVY8CLwM93fYbgDdUdZWqFgMPus+gKv4E\n3AwgIicBV7ttqOpGVf27qpapagHwdBw74vET174PVfX/cAQx8vPlqepWVQ2p6gfu/RK5LjiC8r+q\n+qJr15+AHcCPI/pU9Wyq43tAe+AP7t/oXeAN3GcDlALdRORkVf1GVTdFtJ8BnK2qpaq6Wi1xXINj\nAmHUlf2qeiy8IyLtRORpdwrmW5wpjVMjp1li2BfeUNUj7mb7WvY9E/g6og2gsCqDE7RxX8T2kQib\nzoy8tvuCLqrqXjijhetEpA1wHbBJVXe7dlzgTp/sc+34Pc5ooiaibAB2x3y+fiKywp1COwiMS/C6\n4WvvjmnbDXSO2K/q2dRos6pGimnkda/HEc/dIrJSRHxu++PALuBvIvKJiExJ7GMY9YkJhFFXYr/N\n/Rq4EOinqidTMaVR1bRRfbAXOE1E2kW0ZVTT/3hs3Bt5bfee6VV1VtXtOC/Cq4ieXgJnqmoHcL5r\nxwN1sQFnmiySl3BGUBmqegowJ+K6NX37/hxn6i2Ss4DPErCrputmxPgPyq+rqutVdTjO9NMSnJEJ\nqnpIVX+tqucA1wL3iMiQ47TFqCUmEEZ9cRLOnP4Bdz774WTf0P1GvgGYKiJp7rfPH1dzyvHY+Aow\nTESucB3Kj1Dz/5+XgF/hCNH/i7HjW+CwiFwEjE/QhpeBMSLSzRWoWPtPwhlRHRORvjjCFGY/zpTY\nOVVcexlwgYj8VERSROQmoBvOdNDxsBZntDFJRFJFJAfnb7TI/Zv9TEROUdVSnGcSAhCRYSJynutr\nOojjt6luSs9IAiYQRn0xAzgB+Ar4O/DXBrrvz3AcvUXAvwGLceI14lFnG1V1G3Anzkt/L/ANjhO1\nOsI+gHdV9auI9ntxXt6HgGdcmxOx4S33M7yLM/3ybkyXO4BHROQQ8BDut3H33CM4Ppc17sqg78Vc\nuwgYhjPKKgImAcNi7K41qlqCIwhX4Tz3WUCuqu5wu/wcKHCn2sbh/D3BccK/AxwGAsAsVV1xPLYY\ntUfM72O0JERkMbBDVZM+gjGMlo6NIIxmjYj0EZFzRcTjLgMdjjOXbRjGcWKR1EZzpxPwZxyH8R5g\nvKpublyTDKNlYFNMhmEYRlxsiskwDMOIS4uZYurQoYNmZmY2thmGYRjNio0bN36lqh3jHWsxApGZ\nmcmGDRsa2wzDMIxmhYjERtCXY1NMhmEYRlxMIAzDMIy4mEAYhmEYcUmqD8INXPpPwAs8q6p/iDne\nBlgAZOOE99+kqgVurpungd44+Vd+pap5ybTVMIzaU1payp49ezh27FjNnY1GpW3btnTp0oXU1NSE\nz0maQLgplGfiFDfZA6wXkaVulsswtwDfqOp5IjIKmAbcBNwGoKrdReR04C0R6ROTMtgwjEZmz549\nnHTSSWRmZlJ1vSejsVFVioqK2LNnD127dk34vGROMfUFdqnqJ27CrkU4aRAiGY5TAAacbJlD3OyN\n3XATkanql8ABnNGEYRhNiGPHjpGenm7i0MQREdLT02s90kumQHQmurjJHqKLj0T1cYveH8RJmbAF\nuNZNO9wVZwqqUp5/EfGLyAYR2bB///46GxoIwGOPOb8Nw6gdJg7Ng7r8nZpqHMRzwMU4uf53A+/j\n5IOPQlXnAnMBevfuXaecIYEADBkCJSWQlgbLl4PPV/N5hmEYLZ1kjiA+I/pbfxcqV6cq7yMiKcAp\nQJFbE/duVe3pVps6FfhHMozMy4PiYggGnd95ecm4i2EYyaCoqIiePXvSs2dPOnXqROfOncv3S0pK\nqj13w4YN/PKXv6zxHpdffnm92JqXl8ewYcPq5VoNRTJHEOuB890pos+AUURXuAKnPOJonIIgN+AU\nVlG3Wpao6v+JyJVAWYxzu95IT4eQ6/oOhZx9wzCaB+np6eTn5wMwdepU2rdvz7333lt+vKysjJSU\n+K+53r1707t3za7N999/v36MbYYkbQTh+hQmAG8DHwEvq+o2EXlERK51u80D0kVkF3APEC5Mfjqw\nSUQ+AibjVJ1KCkVF4HGfgghstkTRhpFUAoUBHlv9GIHC5Dj9xowZw7hx4+jXrx+TJk1i3bp1+Hw+\nsrKyuPzyy9m5cycQ/Y1+6tSpjB07lpycHM455xyefPLJ8uu1b9++vH9OTg433HADF110ET/72c8I\nZ8NetmwZF110EdnZ2fzyl7+scaTw9ddfM2LECC677DK+973v8cEHHwCwcuXK8hFQVlYWhw4dYu/e\nvQwcOJCePXty6aWXsnr16np/ZlWRVB+Eqi7DqXUb2fZQxPYx4MY45xXgFJdPOjk5kJLi+CBU4Zln\nICsL/P6GuLthtC4ChQGGLBhCSbCENG8ay3OX48uof6ffnj17eP/99/F6vXz77besXr2alJQU3nnn\nHR544AFeffXVSufs2LGDFStWcOjQIS688ELGjx9fKWZg8+bNbNu2jTPPPJP+/fuzZs0aevfuze23\n386qVavo2rUrN998c432Pfzww2RlZbFkyRLeffddcnNzyc/P54knnmDmzJn079+fw4cP07ZtW+bO\nncuPfvQjfvOb3xAMBjly5Ei9PaeaaPWR1D4fjB1bsR8MwoQJtqLJMJJBXkEeJcESghqkJFhCXkFe\nUu5z44034vV6ATh48CA33ngjl156KXfffTfbtm2Le84111xDmzZt6NChA6effjpffPFFpT59+/al\nS5cueDweevbsSUFBATt27OCcc84pjy9IRCDee+89fv5zZ2Lk+9//PkVFRXz77bf079+fe+65hyef\nfJIDBw6QkpJCnz59mD9/PlOnTmXr1q2cdNJJdX0stabVCwRAbq4zighTVgYLFjSePYbRUsnJzCHN\nm4ZXvKR508jJzEnKfU488cTy7QcffJDBgwfz4Ycf8vrrr1cZC9CmTZvyba/XS1lZWZ36HA9Tpkzh\n2Wef5ejRo/Tv358dO3YwcOBAVq1aRefOnRkzZgwLGvDlZAKBM4qYORPcLxyowrx5NoowjPrGl+Fj\nee5yHh38aNKml2I5ePAgnTs7IVjPP/98vV//wgsv5JNPPqGgoACAxYsX13jOgAEDWLhwIeD4Njp0\n6MDJJ5/Mxx9/TPfu3Zk8eTJ9+vRhx44d7N69m+9+97vcdttt3HrrrWzatKneP0NVmEC4+P3w4x9X\n7JeW2ijCMJKBL8PH/QPubxBxAJg0aRL3338/WVlZ9f6NH+CEE05g1qxZDB06lOzsbE466SROOeWU\nas+ZOnUqGzdu5LLLLmPKlCm88IKTUGLGjBlceumlXHbZZaSmpnLVVVeRl5dHjx49yMrKYvHixfzq\nV7+q989QFS2mJnXv3r31eAsGjR8Pc+ZU7I8bB7NnH6dhhtGC+eijj7j44osb24xG5/Dhw7Rv3x5V\n5c477+T888/n7rvvbmyzKhHv7yUiG1U17npfG0FEkJsL4SlGr9dZzWQYhlETzzzzDD179uSSSy7h\n4MGD3H777Y1tUr1gAhGBzwdPPgmpqY4fYuJE80MYhlEzd999N/n5+Wzfvp2FCxfSrl27xjapXjCB\niKGoyImoDoXg2DHzQxiG0XoxgYghJyd6NdMzz8DcuY1qkmEYRqNgAhGDBc4ZhmE4mEDEIV7gnGV5\nNQyjtWECEQefD+65p2JfFQ4caDx7DMOIz+DBg3n77bej2mbMmMH48eOrPCcnJ4fwkvirr76aA3H+\nc0+dOpUnnnii2nsvWbKE7dsrkkw/9NBDvPPOO7UxPy5NKS24CUQVnHqqk901zBNPmC/CMJoaN998\nM4sWLYpqW7RoUUL5kMDJwnrqqafW6d6xAvHII4/wgx/8oE7XaqqYQFRBpLManFVN5oswjOOnPkv8\n3nDDDbz55pvlxYEKCgr4/PPPGTBgAOPHj6d3795ccsklPPzww3HPz8zM5KuvvgLgd7/7HRdccAFX\nXHFFeUpwcGIc+vTpQ48ePbj++us5cuQI77//PkuXLuW+++6jZ8+efPzxx4wZM4ZXXnkFgOXLl5OV\nlUX37t0ZO3YsxcXF5fd7+OGH6dWrF927d2fHjh3Vfr7GTgtuAlEF4fxMnognFAyaL8Iwjodwid8H\nH3R+H69InHbaafTt25e33noLcEYPP/nJTxARfve737FhwwY++OADVq5cWf5yjcfGjRtZtGgR+fn5\nLFu2jPXr15cfu+6661i/fj1btmzh4osvZt68eVx++eVce+21PP744+Tn53PuueeW9z927Bhjxoxh\n8eLFbN26lbKyMmZHpGTo0KEDmzZtYvz48TVOY4XTgn/wwQf8/ve/Jzc3F6A8LXh+fj6rV6/mhBNO\n4KWXXuJHP/oR+fn5bNmyhZ49e9bpmUZiAlENfr+TaiM11Zlu8nis4pxhHA95eU7tlWDQ+V0fX7gi\np5kip5defvllevXqRVZWFtu2bYuaDopl9erVjBw5knbt2nHyySdz7bXXlh/78MMPGTBgAN27d2fh\nwoVVpgsPs3PnTrp27coFF1wAwOjRo1m1alX58euuuw6A7Ozs8gR/VdHYacGTKhAiMlREdorILhGZ\nEud4GxFZ7B5fKyKZbnuqiLwgIltF5CMRuT+ZdlaH3w9PPeWIQzAId91l00yGUVdyciAtzZm+TUtz\n9o+X4cOHs3z5cjZt2sSRI0fIzs7mn//8J0888QTLly/ngw8+4JprrqkyzXdNjBkzhqeeeoqtW7fy\n8MMP1/k6YcIpw48nXXhDpQVPmkCIiBeYCVwFdANuFpFuMd1uAb5R1fOAPwLT3PYbgTaq2h3IBm4P\ni0djsHmzIw6qzrcei642jLrh88Hy5fDoo85vXz0kdG3fvj2DBw9m7Nix5aOHb7/9lhNPPJFTTjmF\nL774onwKqioGDhzIkiVLOHr0KIcOHeL1118vP3bo0CHOOOMMSktLy1N0A5x00kkcOnSo0rUuvPBC\nCgoK2LVrFwAvvvgigwYNqtNna+y04MksOdoX2KWqnwCIyCJgOBA5zhsOTHW3XwGeEhEBFDhRRFKA\nE4AS4Nsk2lor9u1rbAsMo/ni89WPMERy8803M3LkyPKppnB67IsuuoiMjAz69+9f7fm9evXipptu\nokePHpx++un06dOn/Nijjz5Kv3796NixI/369SsXhVGjRnHbbbfx5JNPljunAdq2bcv8+fO58cYb\nKSsro0+fPowbN65OnytcK/uyyy6jXbt2UWnBV6xYgcfj4ZJLLuGqq65i0aJFPP7446SmptK+fft6\nGUEkLd23iNwADFXVW939nwP9VHVCRJ8P3T573P2PgX7AQeBFYAjQDrhbVatdZFof6b6rIhCAQYOc\nGhHgDI9nzbK61YZh6b6bFy0l3XdfIAicCXQFfi0i58R2EhG/iGwQkQ379+9PmjE+H9xyS8W+pd8w\nDKM1kEyB+AzIiNjv4rbF7eNOJ50CFAE/Bf6qqqWq+iWwBqikcKo6V1V7q2rvjh07JuEjVGB1qw3D\naG0kUyDWA+eLSFcRSQNGAUtj+iwFRrvbNwDvqjPn9SnwfQARORH4HlB9RMlxECgM8NjqxwgUVj0k\niFe3ev58G0UYRkupStnSqcvfKWlOalUtE5EJwNuAF3hOVbeJyCPABlVdCswDXhSRXcDXOCICzuqn\n+SKyDRBgvqpWHeVyHAQKAwxZMISSYAlp3rRqC6n7/c6Kpqefjl7RVN8ON8NoLrRt25aioiLS09OR\nyNw0RpNCVSkqKqJt27a1Oi+Zq5hQ1WXAspi2hyK2j+EsaY0973C89mSQV5BHSbCEoAYpCZaQV5BX\nbTH13Fx47jlHHMKjiNxcEwmjddKlSxf27NlDMn2ARv3Qtm1bunTpUqtzkioQzYGczBzSvGnlI4ic\nzJxq+4frRcyZ4+yXljrRoCYQRmskNTWVrl27NrYZRpJo9QLhy/AxY+gMXt3+Ktd3u77a0UOYrKyK\n7VDIUoEbhtEyafUCESgMMPGvEykJlrD609V0P717jSJRVOTkZgr7fJ54As491+IiDMNoWTTVOIgG\nI54PoiYsFbhhGK2BVi8QYR+EV7wJ+SAgfipwi4swDKOlkbRUGw3N8aTaCBQGyCvIIyczJyEfRJi5\nc+GOO5zIanDSgq9caQ5rwzCaD80x1UaD4svwkZOZQ15BXrXBcrH4/fDjH1fsl5baKMIwjJZDq3dS\nQ0WwXHFZMR6Ph5lXz8SfnZjHuVOn6H3L9GoYRkvBRhA4jurismJChCgLlTFh2YSERxK5uc7UUpil\nS2HkSHNYG4bR/DGBwHFUeyI8zkENJrSaCSpneg2FYMkSGDzYRMIwjOaNCQSOD2Lm1TNJ9VQMBQ4U\nJx79FpvpFaC4uH7q7RqGYTQWJhAu/mw/d/vuBiCkIaavmc7cjdXWKCon3rJXgPT0+rbSMAyj4TCB\niCB/b37U/rxN8xI+1+93fsIJLUWczK+GYRjNFROICK7vdn3U/sa9Gxn/xvg6OaxV4ZlnnFgJwzCM\n5ogJRAT+bD8jLhpRvh/UIHM2zmHA/AEJTTeFM72Wn2+lSQ3DaMaYQMQw6fJJpHnTotqCGuSON+9I\naCRhpUkNw2gpmEDE4MvwMbbn2ErtIQ0ltPQ1XmnSefNsFGEYRvMjqQIhIkNFZKeI7BKRKXGOtxGR\nxe7xtSKS6bb/TETyI35CItIzmbZGktsjt9IoItFEfmApOAzDaBkkLdWGiHhxaktfCewB1ovIUlXd\nHtHtFuAbVT1PREYB04CbVHUhsNC9TndgiapGLzFKIr4MH3mj81iwZQH7DlfkzliwZUH58ZqwFByG\nYTR3kpbNVUR8wFRV/ZG7fz+Aqj4W0edtt09ARFKAfUBHjTBKRH7vnKa/qe5+x5PNtToChQFyXsih\nJFgCQBtvG1aMXlGjSAQCMGiQM3oAZ8pp1iwrKmQYRtOisbK5dgYKI/b3uG1x+6hqGXAQiA0vuwn4\nU7wbiIhfRDaIyIZkFU3PK8ijNFhavl8cLE7YFxGZgsNWNBmG0dxo0k5qEekHHFHVD+MdV9W5qtpb\nVXt37NgxKTbkZObg9Xij2tLbJRYibSuaDMNoziRTID4DMiL2u7htcfu4U0ynAEURx0dRxeihofBl\n+Lg161YEJ0TaIx6KjhTVcJZ7bpwVTXPnWvCcYRjNg2QKxHrgfBHpKiJpOC/7pTF9lgKj3e0bgHfD\n/gcR8QA/ARYl0caEyO2RS9uUtnjFSxtvm4RXM0HlFU2hkFOFzqaaDMNo6iRtFZOqlonIBOBtwAs8\np6rbROQRYIOqLgXmAS+KyC7gaxwRCTMQKFTVT5JlY6L4Mnwsz11evqqpNquZoPKKpmDQmWqy0qSG\nYTRlrCZ1gsSuZvKKl1nXzEqo8lzsiiawVU2GYTQNrCZ1PRC7mimowYQrz/l8sHIl9O1b0WarmgzD\naOqYQCRITmYOqd7UqLayUFn5dFNN+HwwY4atajIMo/lgApEg4ejqEReOwOM+NkWZnz8/ahQRKAzw\n2OrH4o4sLE+TYRjNCROIWuDL8PHaqNei/A6lwdLywLlAYYAhC4bw4IoHGbJgSFyRsDxNhmE0F0wg\n6kDWGVnl2yFCrPt8HYHCAHkFeZQESwhqkJJgSZUR15anyTCM5oAJRB0oOlKERyoe3V92/IUhC4aQ\n3i6dNG8aXvFWm/01svIcwOuvW/CcYRhNDxOIOpCTmUOKp8LbrCjFwWKKjhSxPHc5jw5+lOW5y6uM\nk7A8TYZhNAdMIOpAVUWFDhQfIK8gj5zMnBqD6GLzNJWWwvTp9W2pYRhG3TGBqCO5PXI5IeWE8hxN\nIQ0xfc10fvPub6p0UEcSXtHkifgLLFliU02GYTQdTCDqSDj9Rp8z+0S1h6ebEkkJ7vdD75j4xXnz\n6tFIwzCM48AE4jjwZfjodUavSu1e8Sac0C/SFwGwYYONIgzDaBqYQBwnuT1ySfVULEnyipenrn4q\n4UR+fj+MGFGxHwrB+PEmEoZhND6WrK8eCBQGylNuZJ2Rxea9mwFHPBIRikAABg50Um+E8Xph9WrL\n+GoYRnKpLlmfCUQ9Utf61eCMGMaNc9JvhBk3DmbPTpa1hmEYls21wYjN+FpdNHUsfj8MHx7dZhHW\nhmE0JiYQ9UhsxlePeBKuXw0waVJ0hPXSpTBypAXQGYbROCRVIERkqIjsFJFdIjIlzvE2IrLYPb5W\nRDIjjl0mIgER2SYiW0WkbTJtrQ8iM756xYuiTPzrxIRqRkDlCOtQyImNGDzYRMIwjIYnaQIhIl5g\nJnAV0A24WUS6xXS7BfhGVc8D/ghMc89NAf4bGKeqlwA5QCnNAF+Gj76dncpAIQ1xtOwo09ckHiId\nG2ENUFwMeXn1aKRhGEYCJHME0RfYpaqfqGoJsAiImWVnOPCCu/0KMEREBPgh8IGqbgFQ1SJVDSbR\n1nolJzMH52M4LNm5hLkbE1u3Gi/CGiA98ZkqwzCMeiGZAtEZKIzY3+O2xe2jqmXAQSAduABQEXlb\nRDaJyKR4NxARv4hsEJEN+/fvr/cPUFd8GT56dYoOoHt05aMJTzX5/c5PWGNEYPPm+rbSMAyjepqq\nkzoFuAL4mft7pIgMie2kqnNVtbeq9u7YsWND21gtt/SKDpHec2gPA+YPSHgkEZkSXBWeecaC5wzD\naFiSKRCfARkR+13ctrh9XL/DKUARzmhjlap+papHgGVA5ZwWTRh/tp8RF42IagtqkDvevCOhkYTP\nB2MjEsYGgxZhbRhGw5JMgVgPnC8iXUUkDRgFLI3psxQY7W7fALyrTuTe20B3EWnnCscgYHsSbU0K\nky6fRJo3LaotqMHyqOuaiHVYh0JO8JyJhGEYDUHSBML1KUzAedl/BLysqttE5BERudbtNg9IF5Fd\nwD3AFPfcb4D/wBGZfGCTqr6ZLFuTRXjZa7cOsYu3EjzfdVhH+LtRdUYS48fb0lfDMJKLpdpoAAKF\nAQa/MJjiYDFe8TLrmln4s/0Jnz9ypBMPEYkItG0Ly5dbvibDMOqOpdpoZHwZPp686klSPamENMSd\ny+5M2FkNlSOswRlJWHyEYRjJxASigSg6UkQwFERRykJljH9zfK1iI1auhL59Kx+z+AjDMJKFCUQD\nkZOZgyci+i2koVqLxIwZcMIJFT6JUAjuust8EYZhJAcTiAbCl+Fj5tUz8Ui0SCS67BUckVi+HPpE\nVDktKYEFiS2KMgzDqBUmEA2IP9vP7GtmI1QsSwpqkFuX3lorkegVExFiacENw0gGJhANjD/bz/CL\nolNSbf9qe52jrMHSghuGkRxMIBqBSZdPwiveqLagBpmwbELCUdaWFtwwjGRjAtEI+DJ8zLpmViWR\nKAuVJVyBztKCG4aRbEwgGgl/tp/Vv1jNwLMGlrcpmnAFOksLbhhGsjGBaER8GT6Gnjc0amXTvE3z\n6pwWHGDePJtmMgyjfjCBaGRyMnNI8VTMFa37fB2DXxicsEjEOqzXrYMBAyyhn2EYx09CAiEiJ4o4\nX3NF5AIRuVZEUms6z6gZX4aPsT3HRrUVB4uZvmY6j61+rEahiE0LDk5q8AkTbCRhGMbxkegIYhXQ\nVkQ6A38Dfg48nyyjWhu5PXIrpQVfsnMJv13xW4YsGFKjSOTmQlr06ZSVmcPaMIzjI1GBELdwz3XA\nLFW9EbgkeWa1LsJpwS/ucHFUe0hDlARLalzZ5PM5YjCwwt+NKhw4UP+2GobRekhYIETEh1MCNFyX\nwVtNf6OW+DJ8DDp7UNxjiaxs8vlg6NBoh/UTT5gvwjCMupOoQEwE7gdec4v+nAOsSJ5ZrZPcHrlx\nA+jueuuuhJzWOTngjTg9FLIypYZh1J2EBEJVV6rqtao6zXVWf6Wqv0yyba2OcABdqifa/18SLGH6\nmuk1nx8nNiIUgjvuMIe1YRi1J9FVTC+JyMkiciLwIbBdRO5L4LyhIrJTRHaJyJQ4x9uIyGL3+FoR\nyXTbM0XkqIjkuz9zavexmsV+5DkAACAASURBVC/+bD8rx6yk75nRxR9e/8frUaOIQGEg7ionvx9m\nz46eagoG4dZbTSQMw6gdCZUcFZF8Ve0pIj8DeuHUjt6oqpdVc44X+AdwJbAHp770zaq6PaLPHcBl\nqjpOREYBI1X1Jlco3lDVSxP9IE255GhdCBQGGDB/AEENlrf1PbMvM4bOAGDIgiGUBEtI86axPHc5\nvozouqPxypS2aQMrVliJUsMwKqiPkqOpbtzDCGCpqpYCNSlLX2CXqn6iqiXAImB4TJ/hwAvu9ivA\nEJHI776tl3j5mtZ9vo5Bzw9iwZYFlARLCGqwylVOkyZF+yPAydVktSMMw0iURAXiaaAAOBFYJSJn\nA9/WcE5noDBif4/bFrePqpYBB4Hwkp2uIrJZRFaKyIB4NxARv4hsEJEN+/fvT/CjNB/82X5u63Vb\nVFtpqJTt+7eT5k3DK17SvGnkZOZUOtfng1mzKovE00/D5MlJNNowjBZDok7qJ1W1s6perQ67gcFJ\ntGsvcJaqZgH3AC+JyMlx7Jqrqr1VtXfHjh2TaE7jEW9l06pPV9Gncx9u63Vb3OmlMH4/rF4dXcta\nFaZPt/oRhmHUTKJO6lNE5D/C39ZF5N9xRhPV8RmQEbHfxW2L20dEUoBTgCJVLVbVIgBV3Qh8DFyQ\niK0tjfBUU2QVOoBVu1cxP39+zee7taxjs74uWQJDhphIGIZRNYlOMT0HHAJ+4v58C9T0dloPnC8i\nXUUkDRgFLI3psxQY7W7fALyrqioiHV0nN27MxfnAJwna2uLwZ/uZM2xOVNZXcHI2LdhSs1PB54N7\n763cbvUjDMOojkQF4lxVfdh1OH+iqv8KnFPdCa5PYQLwNvAR8LIbZPeIiFzrdpsHpIvILpyppPBS\n2IHAByKSj+O8HqeqX9fuo7UswvWsY6ebntn0TEKlSqdNgxEjKrdb/QjDMKoi0WWuAeA+VX3P3e8P\nPKGqTWbBZEtb5loVgcIAE/86kXWfrytv84qX23rdRm6P3Cr9EeBMJw0ZAseOOb4IcJL85eXZ0lfD\naK3UxzLXccBMESkQkQLgKeD2erLPqAW+DB8zhs6IqiER1CBzNs5h4PMDqx1N+HywfDn06VPRVlLi\nOK0NwzBiSXQV0xZV7QFchhPYlgV8P6mWGVXiy/Ax8+qZlVJylIXKGPfGOEYuHlll7iafD3r1im77\ny18sX5NhGJWpVUU5Vf1WVcPxD/ckwR4jQfzZfp66+ik8MX9CRVmyY0m1Velyc6PjI1QtqZ9hGJU5\nnpKjFvHcyBQdKUKrCGivro5EOIjOkvoZhlEdxyMQNXu3jaSSk5lDqjd+5deqIqzDVJXUz/wRhmGE\nqXYVk4gcIr4QCHCCqqbEOdYotJZVTLEECgPlsRBZZ2Tx1v++xeeHPueWXrfgz/bXeH5sUj8RmDPH\nERDDMFo+1a1iSmiZa3OgtQpEJIHCQI1ZXiudE4ABA5zRQxiPxxldmEgYRsunPpa5Gs2AvIK88iyv\nR8uOMvGvE2usRFeVP2LcOHNaG0ZrxwSiBZGTmYPXE50efMD8ATVGWsfzR6ia09owWjsmEC0IX4aP\nsT3HRrUFNci4N8YlJBLDY6p1BIMwcaKJhGG0VkwgWhi5PXJJ86ZFtSnKuDfGMfmdyXHLlIaZNAlS\nYxZFrVsHV1xh002G0RoxJ3ULJFAY4Nalt7L9q+2VjglC25S2VTqwAwFn1LBuXXS71+vUlrCcTYbR\nsjAndSvDl+Hj2WufrZSKA5zRRHGwuNoguhkzKleiCwatXKlhtDZMIFoovgwfK8esZMSFIyoVG1JV\nDhQfqHK6Kd7KJoBnnrGpJsNoTdgUUytg7sa53PHmHQQ1GNVel+kmi5EwjJaFTTG1cvzZfm7rdVul\n9kSnm1Ii4uUtRsIwWg9JFQgRGSoiO0Vkl4hMiXO8jYgsdo+vFZHMmONnichhEYlTMNOoDfFWN4Ez\niqguZ5PPBzNnWoyEYbRGkiYQbk3pmcBVQDfgZhHpFtPtFuAbVT0P+CMwLeb4fwBvJcvG1oQvw0fe\n6DxGXDgiKkV4SENMf396tRHXVcVImNPaMFo2yRxB9AV2uTWsS4BFQMxrhuHAC+72K8AQEee7qoiM\nAP4JbEuija0KX4aP10a9FpXEL1w/oqaI63gxEk8/DZMnJ8tawzAam2QKRGegMGJ/j9sWt4+qlgEH\ngXQRaQ9MBv61uhuIiF9ENojIhv3799eb4S2d3B65USVLwYm4vuPNO6qtRLdyJfTtW9Gm6qQHnzwZ\nHnvMppwMo6XRVJ3UU4E/qurh6jqp6lxV7a2qvTt27NgwlrUAwiVLvRId7BDUYHnq8LjnuU7r2OWv\njz8Ov/0tDBliImEYLYlkCsRnQEbEfhe3LW4fEUkBTgGKgH7AdBEpACYCD4jIhCTa2urwZ/tZ/YvV\ndOsQ7RZatXtVtf4Inw/ujVkyoOqsbjp2DPLykmCsYRiNQjIFYj1wvoh0FZE0YBSwNKbPUmC0u30D\n8K46DFDVTFXNBGYAv1fVp5Joa6skXsT19q+21+iPmDYNRoyo3K4K6enJsNQwjMYgaQLh+hQmAG8D\nHwEvq+o2EXlERK51u83D8TnsAu4BKi2FNZKLL8PHLVm3RLUFNcjtb9zOoOcHMXLRSMa/Mb7SqGLS\nJEirvGqWefNsmskwWgoWSW0QKAyQ80IOJcGSKvu08bZhxegVURHXgYDjpF661JliKu/bBlassMR+\nhtEcsEhqo1rCMRKx/ohISoIllSKufT547bXKaTeKi+HWW20kYRjNHRMIA6g+A2yY9HbxHQy5uZWn\nm7Zvh0GDTCQMozljAmGUE84AOy57HAPPGohHKv55KMqdy+6M67z2+ZzVS126RLeXllq0tWE0Z0wg\njCh8GT5mD5vNyl+s5L1fvMcPz/lhebrwslAZE5ZNqDJF+IMPVr7e3LkwcqSNJAyjOWICYVSJL8PH\n1JypeD0VAXWloVJuXXprXJHw+530G5EjiVAIliyBwYNNJAyjuWECYVRLOOo6MsHf9q+2c8X8K+JO\nN/n98PLL0SnCwXFcWxCdYTQvTCCMGvFn++l9ZvQquJCGGP/m+Cp9EjNnVk7JceBAMq00DKO+MYEw\nEuKWXrdUagtpqMoEf35/5eWvjz/u+CPmzrXkfobRHLBAOSNh5m6cy4y/z+Cjrz6Kas88JZOh5w0l\nt0dupUC6gQOhrKzytUSgbVtYvtwC6gyjMbFAOaNe8Gf72X7ndkZcFJ2IqeBgAXM2zmHQ84OiRhNV\nTTWBk7fJ/BKG0bQxgTBqzaTLJ1VKFQ7OCqfrFl9Hv2f6lfsm/H6YPTu6ZGkkltzPMJouJhBGrfFl\n+Jh1zay4IrHv//ax7vN13P7G7VEiMWdO5ZFEKAR33WW+CMNoqphAGHUiXE9ixIUjygPpYnl1+6sV\n/f3w3nswbhxcfHFFn5ISJ+GfYRhNDxMIo86Ea1zPGTYn7mjiWPBYJZ/E7NlOjqZI/vIXZ2UTOKMJ\nW+FkGE0DW8Vk1AuBwgALtizg73v+zpYvtqA4/6684mXWNbPwZ1eseQ0EYMAACAYrzheBn/4U/vxn\nZ1SRlmYrnAyjIbBVTEbSCedw+sklP4lqD2qQcW+MY/I7kyv6+mDWrGifhCosXAhHjzrCUVLirHCy\nEYVhNB5JFQgRGSoiO0Vkl4hUqhYnIm1EZLF7fK2IZLrtfUUk3/3ZIiIjk2mnUX/kZOaQ6o1OGa4o\n09dMj4q6rml1k8fjrHAaMsRJAjhkiImEYTQ0SRMIEfECM4GrgG7AzSISW5HmFuAbVT0P+CMwzW3/\nEOitqj2BocDTIhKT3cdoioSLD424sHLR6kdXPhrlk/D74b774l8nFILNm52RROSIwjCMhiOZI4i+\nwC5V/URVS4BFwPCYPsOBF9ztV4AhIiKqesStaQ3QFmgZjpJWQth5Pan/pKj2PYf20P+5/vzLn/+F\nx1Y/RqAwwLRpTn3r2JFEKOQUHfJ4nJ+0NMjJabjPYBhGcgWiM1AYsb/HbYvbxxWEg0A6gIj0E5Ft\nwFZgXIRglCMifhHZICIb9u/fn4SPYBwP034wrVLUtaIs3LqQB959gIHPD2TuxrlMmwZr1sCIERV+\nCVVYtcopOuTxwIwZ0Q5r800YRvJpsk5qVV2rqpcAfYD7RaRtnD5zVbW3qvbu2LFjwxtp1MikyyeR\n5k2Le6wsVMa4N8Yxd+PcKutbgzOaKCqq2A8EzDdhGA1BMgXiMyAjYr+L2xa3j+tjOAUoiuygqh8B\nh4FLk2apkTTCPomBZw2Me1zRcpEAp751bC0JVVi3rkII8vLMN2EYDUEyBWI9cL6IdBWRNGAUsDSm\nz1JgtLt9A/Cuqqp7TgqAiJwNXAQUJNFWI4n4Mnys/MVKJvWfFFXnOoyiFbUlugQYNnEZ3pRQxXF1\nqtINGOAE1OXkOD4Jr9d8E4aRTJIaKCciVwMzAC/wnKr+TkQeATao6lJ32uhFIAv4Ghilqp+IyM+B\nKUApEAIeUdUl1d3LAuWaB+GAumc2PUNQg5WOp3pSCWkI72dX0HPHa6xf/R0i/4mKOCufRoyABQuc\ntqwsZwoqJ8cC6wyjtlQXKGeR1EajECgMMH3NdJbsjK/7HvHg7zifeRN/RmlJeNRRsdSpZ0/46CPH\niR0KOY7sNm0s+towaotFUhtNjvBS2NhVTmFCGuLQd/8Go78PF72Gs9K54stMfr5TTyLkzkSFQuaP\nMIz6xgTCaFQmXT6JVE9q3GMvbX2J0s6rYNT10H9anB5a/mOxEoZR/5hAGI2KL8PHyjErGXHhCDwx\n/xw1Mj7yygfoef3bEQF14WNB5OzVXPvTfTa9ZBj1jAmE0eiEp5veG/teXKEIk3nj08z581a69fgW\nRyAE8KK7+7P0Tx3ZurUhrTaMlo8JhNFkCAtFZGrwSJbsWMLtWy7j65wxkHIMCK+C8hIKehg/vqKu\nhGEYx48JhNHkyO2RywkpJ+DBEzduYt93XoPRQ6D3XJAywqOJUIhykbBUHIZx/NgyV6NJEigMkFeQ\nR3q7dO566y5KgiVx+3k3jSP0xkw0JDhTTo5YpKQ4AXZWeMgwqseWuRrNDl+Gj/sH3I8/2199qo7s\nufQYOwckSIVfQikrs1QchnG8mEAYTZ5wqo6nhz1Nl5O6RB9U2NJlAlwzHifoPiwSFaSnN5SlhtGy\nMIEwmg3+bD8v3/hyuX/CK14Qdzls72eh/3S3p5b/DgaV8eNh8uSqrmoYRlVYlTajWeHL8LE8dzl5\nBXl8evBTnt74dMXBKx/g1Lbf4cC7t4GGfRJCKKRMn+6MKqbFi7czDCMu5qQ2mi2BwgADnx9IWSim\nllTh92BLLmzw4wySK6acRoxwKtiZ09owHMxJbbRIfBk+Zl49k1RPKhLpd8j4Owy7I86Uk5M2vH9/\nGDnSlsAaRk2YQBjNGn+2n5VjVnJ79u2OTyKSKx+A/n8gNtFfuL5E//7mmzCM6rApJqPFEE4hvrNo\nJyLCR/s/chzYG26FN+YQO90UZtIk800YrRerB2G0OgKFAQbMH1BRlGjDrfDGbJzaVWEqxCIz06kx\nYf4Jo7VhPgij1eHL8PHjC39c0dD7WbhlAPSeA502u40Vy2ELCpxpp0GDzDdhGGGSKhAiMlREdorI\nLhGZEud4GxFZ7B5fKyKZbvuVIrJRRLa6v7+fTDuNlkmlWhNh5/W47BjfRMVIorRUuXX8tyYShkES\nBUJEvMBM4CqgG3CziHSL6XYL8I2qngf8EQjPBH8F/FhVuwOjcepWG0atCNeaGJc9joFnDYxe6XTl\nAzDsdpyMsEqkI3v7lpO4/PIQWVlO8j8TC6O1kjQfhIj4gKmq+iN3/34AVX0sos/bbp+AiKQA+4CO\nGmGUiAhQBJyhqsVV3c98EEZNhJ3Yf9n5l4piRIXfgzX3waeXw5HTqfjOFP4nKIjAffeZI9tomTSW\nD6IzUBixv8dti9tHVcuAg0Bs5pzrgU3xxEFE/CKyQUQ27N+/v94MN1om4XoTa8auYcSFIzj7lLOd\naadR18PNI8FbQvS0kzPiUFWmT9fyEcXcuTDyX/bSb/gm5i45/ipFlprcaKo06VQbInIJzrTTD+Md\nV9W5wFxwRhANaJrRjAkLBcDkdyYzfc10RyjGDIZ3HoPdg4iMmwhniM3PV/LzwynFOwGdWPdmMbyy\nFf+I7nWyJRCAIUOcrLOWmtxoaiRzBPEZkBGx38Vti9vHnWI6BWc6CRHpArwG5Krqx0m002jFTPvB\nNJ4e9jR9z+zLwCvS8IwdAsP80HktnPYPt1f0iMLB3Q+m8vjvT6zzt/+8PCguUYJB57elJjeaEskU\niPXA+SLSVUTSgFHA0pg+S3Gc0AA3AO+qqorIqcCbwBRVXZNEGw0Df7aftbetZeWYlcy+Zjae3s/B\nbT745UXuaqdwGvEKv0TFvodd67vSv3+Ifxm/p9b3Tr94KyHPUZBSQp6jpF9shbWNpkPSpphUtUxE\nJgBv40QnPaeq20TkEWCDqi4F5gEvisgu4GscEQGYAJwHPCQiD7ltP1TVL5Nlr2GAIxbdT+/Ogi0L\n2L5/O6uv/A160VLHkb3jWqID7cIIqrBwTmf+lv82Hc/+kgt672XSTQPwZVQ/X1SU/gae0W8S+ucA\nPF1XU5R+DVC36SrDqG8sktowqiFq5VNhPydL7N4s+KwPFWJRUcnOIQSiyOX/zpo/DaxWJAKFAYYs\nGEJJsIQ0bxrLc5fXKCqGUZ9Yqg3DOE4ChQGmvDOF9z59j1BhX3h+BQTbxPSqqIkdFot22X/m0ku8\n3DLy3ChHdmTN7c17ncju3B65+DJ8BAKwYMluyFxJ7rDzTTCMpGICYRj1RPjFvm3zySx80QsoFJ8I\nW/+FCkd27IhCgRBd+m7mpMFz6HjRLtbuWUtpsJQQITziIeWzAYw99QWyzj2bX/4qSHGxgreEtLFX\nk/fbx0wkjKRhAmEYSSBQGGDBlgXsO7yPguVXsmW+Hw0KFWs/4olFCLovhNM/ghO+gn294PB34X+v\nhlAKHo8QCgHqBSmF7z/MuInfMHvY7Eb4hEZrwATCMBqAQMBZtrpk9U7WvXU+lZfGQvSoIvaYAEEQ\ndUqmesqg13OkZi1i5YN/sFGEkRQsm6thNAA+H9x/P6xddiGTHvsneGLzPEWKQuQoI9zmLJtFvU5b\nKA023EbpvP9h+pPfVHvvQADGT97N+NkLCBRaSHZzoDlE0NsIwjCSRCAA02fu5e87d7NvTxvY19M9\nEm/qiSqOub8lRM9bniZzyP8A0Kl9pyin9oBBpQRLPeAtJe2WoU3GbxEeVeXkOAIa9uHkZOY0Cfsa\ni6YUQV/dCKJJp9owjOaMzwev+c4AznBWQU0J8N4iH6FQ5DRTmMjgu1gHt4B6yH/WT/57neC8t+Do\nqczJvIeefY7xxaKpBEuvdfoFPZRsGkVeQV65eES+oOORrJd27EtwxktbmbjNlvSC8zcpKYFg0Pmd\nl9c0U6yYQBhGA+DL8LFyIQQmOC+DAwcgP9+pYvdf/+Wk2QhpCajH+SkvjxopGl7YMdL5IQQSIv+d\nNXAwI/pmu6/gyVcf4q95U1jz6L8RLPPg8QaZ/f92lC+1jVxmO/GvE+v00q5JfGJfgq++VURJ5xKC\nGqQkWFIuYrW9R7gt/eKtFKW/UaWwJSKOjUVOjiOaYfHMyWlsi+JjAmEYDYjPV/llNWIE5OUJ6Rf/\ng817N7NvzZW89coZlJSEUI1dOou77XWEZPegiCu5o479l7DvycXsO2sNlDr+jFCZcPv9/8vCrQs5\n7YvreGPPiwQPfwfp+iba5RiKcrTsKLfMfJZBeia5I86u9qVa1RRJ5GgkJ8cX9RK8/qp0Vm9Lo7is\nGBEhvV1s4ubolWHs8fHWg7+mrNRbfg9w7ltcooQ85+IZ/SZtMh+tJGxh+4pLFG9KGU8t2lHnhIrJ\nwOdzPk843oUu5wOO/U1pGs58EIbRBAkEYMECeHZeGWWlUBG1HSkS8VZIRR6PTDLoxGI4K6TC1wuB\ntxSy5kObA1AwGPb2AvXgSQ1y75y3OPW8j8pfVJHfyPPy4De/DaEhDx5vCP+vC2HAH5ifP5+yUFn5\naIQ9vqigv61fbmXCsgkENUgbb5uoF3ugMEDOCzmUFPSCghw4eBZsvA00BY9X+bdHnc/24IPOqMRZ\nBvwQMmAaw0/8A5323wSZKzn5/G38ee6FfPzqaDTkLBf2DvlXVj93Ta1fuMkchcSLogcY/MLg8rYV\no1ckXSTMB2EYzYzwSCM3N6V8SurxJ0JoKLZn7Be8yISCkW1ufIaGj7mjkKAHNtwe018IlQjTZ+2F\nYQ8gCOcd+Tkf//FpQmUpeFLLOPOG6ahMAlIJUcqc7b+DjemQmQXA0YLBTAn+hdMumM7rJ76O7lfm\n3nkbqTtHUXrBGOj9DMfKjrFgywIA8gryWPf5OkrW5sKbs9yVXCHwhEDLCGmIA1LIiEHnkpIaJBhy\nxS0zD91wC0uWTXRGVJ6fwPnL3I9RCuL0C569nAVbimCPL+EX/vE6ksMiD5CbW/ncBVsWcKzMGb2F\np9w+PfgpxUGn9E1xsJgFWxZUCGgjTJnZCMIwmgnhF86+ffDmm1BaCh4PXNbnIGnf/Zgc3ym8PK8T\nBbvaUVkgoPLoIzY7beyqqiB0+gDK0qCsHRzIxPGNBOGUAvj2bNdfEgKPQsjjxG6IQijFKcA0eohT\na+N/fg9rIsrSD/ND72cB8IiHkIac6n7PrQJNibDFjQtB8KSUceE9d3K07CgFKwc4AYZHT4PdA4j2\n2bh4SqDXc9BjAWT8nYEymfd+96+ESr14UoPMXvyP6PQn7gv4gHxM3kdb+fKzthS8eyWoF/Eot9/7\nKfs6LubzrRdUSp0Sy9y5cMcdEAw69qSkBlm1MqXCjxIzWko9531WPvgHFmxZwJyNc8qvMy57HLOH\nzY47ZdY9+3C9TEVZoJxhtDCq+jYZCMDAgVBWBuUvS0/QeXlHTTeFibfUtrr26vrFbgehw0fQ4R+w\nYzhRL/EO2yBztdO10ybYdVWcsq9QkWrdC5TBue/Axa/CW/8Vkwsr9t7uub2fhmF3QKEP8h6Cj3+A\nM3FSBp030P7sXZw3+H1Q2PL4v6Olqe69FKQMNLXiFv2nwdpfQjANvCUMfPARTrtgp/MR2nfi5P0/\nIi8P2p50mPfm3Ewo6ImwJcipl2zkGv969n9nKV/uPJf8V38AO38MCJJSyll3jeU7F+zgg30fECKE\nV7zMumYWFPp4/PcnsmtDpivIQeg9l9Rrf0UwFMTj8TDz6pn4s/0J/L0qYwJhGK2IyJFGp07O9MbW\nrTD+jhChIBWR2nioehQR698IE3ssTsxGlSQqOpEEI05zX9yRU2ZRdsS7ZSmcXOiOdsLXiMFTAid/\nBge6Ev+zCFAGJ38O33ahfBQVFj9wRjKf9nefa9i22M/ril37fXC4E9F+pSC02w9nvQ/pO2FfFnTa\nDMWnwuZfQDAl2nYpg0v/5Ahqp81I20Pc97O+TPvFiOofZ7xHZAJhGEb58tB0mDgRiotBxJmmCgYh\nFHL2q38n1DTiqE5Yquobe17ImcI6eBaVX+g1jYCqGtHE3jf2/lV9ntpS3Xnx7Knq/kQcix35xbuH\nQsoxnv5/H9d6tZY5qQ3DiFpi2717xRQVVAhHURGkpwubNzsjkK+/hv37oU0bZxQSCoHHI/TvD926\nwcknwx//6PhDan7hhlwBil26G/3CS0mFXoMOsP71rm5fiH6hO9cKpyYREVfUqpvuirQr3mgnnn8m\n9ryqxC+S6s6LN/qKvX+sDeFRXmRbvFGbB4JpvPpWEf7aDyKqJKkCISJDgf/E+RrwrKr+IeZ4G2AB\nkI1Ti/omVS0QkXTgFaAP8LyqTkimnYbR2oiNx0hkVUxVfg8njsMRGEdYKl52X38Na9aAKrRp4+Gu\nu+IJCogIZ5/tBA5OmiRAL3L+6qwgqqDihepNDfHrR3Zzqp7rjojEHREJHg+UBRXVoLsaqrpv7fFE\nLUTdfDVxriVlrtO98meAIOkX7eCbf1xEKBQ5UqpppFN1Opbrr6ocW3I8JG2KSUS8wD+AK4E9ODWq\nb1bV7RF97gAuU9VxIjIKGKmqN4nIiUAWcClwaSICYVNMhtE0qZSPKRAtKBB/GWjkMtGsLMpHNWG/\nSqxzPnZElJPjjHrmzYO2bWHtWsd57/XC1VdXrAQDZ2rt4oth2DA49dRIsYO33nL6iUD//sAJReze\ne5hPP8xAQwKizitawesNcc5lX/DFwW/53rXbOOeCI+xbcyWdTjqDk8/+mNeXlfHNno6c1mU/v7qn\nBP+I7k7OrunO/QoLnVGaQ8y7WUKkpAjBMo87YqoQD483xL3/VsC0KefW+u/TKD4IEfEBU1X1R+7+\n/QCq+lhEn7fdPgERSQH2AR3VNUpExgC9TSAMwzhe4glVdXEKVZ0Xrx3qJ0YhUjyd6b5oEQ3fJ/J4\nUdHx3bexBOIGYKiq3uru/xzoF/myF5EP3T573P2P3T5fuftjqEYgRMQP+AHOOuus7N27dyflsxiG\nYbRUWmw9CFWdq6q9VbV3x44dG9scwzCMFkUyBeIzIDLNZBe3LW4fd4rpFBxntWEYhtHIJFMg1gPn\ni0hXEUkDRgFLY/osBUa72zcA72qy5rwMwzCMWpG0Za6qWiYiE4C3cZa5Pqeq20TkEWCDqi4F5gEv\nisgu4GscEQFARAqAk4E0ERkB/DByBZRhGIaRXJIaB6Gqy4BlMW0PRWwfA26s4tzMZNpmGIZhVE+z\ndlIbhmEYyaPF5GISkf1AXde5dgC+qkdz6guzq3aYXbWjqdoFTde2lmjX2aoadxloixGI40FENlS1\nDrgxMbtqh9lVO5qqXdB0bWttdtkUk2EYhhEXEwjDMAwjLiYQDnMb24AqMLtqh9lVO5qqXdB0bWtV\ndpkPwjAMw4iLjSAMwzCMuJhAGIZhGHFp8QIhIs+JyJduavFw22ki8j8i8r/u7++47SIiT4rILhH5\nQER6NYJtU0XkMxHJ8wqvjQAABk1JREFUd3+ujjh2v2vbThH5UZJsyhCRFSKyXUS2iciv3PZGfWbV\n2NWoz8u9T1sRWSciW1zb/tVt7yoia10bFrs5yRCRNu7+Lvd4ZgPb9byI/DPimfV02xv6379XRDaL\nyBvufqM+r2rsavTnJSIFIrLVvf8Gty35/ydVtUX/AAOBXsCHEW3TgSnu9hRgmrt9NfAWTpmm7wFr\nG8G2qcC9cfp2A7YAbYCuwMeANwk2nQH0crdPwqkK2K2xn1k1djXq83LvJUB7dzsVWOs+i5eBUW77\nHGC8u30HMMfdHgUsbmC7ngduiNO/of/93wO8BLzh7jfq86rGrkZ/XkAB0CGmLen/J1v8CEJVV+Ek\nAoxkOPCCu/0CMCKifYE6/B04VUTOaGDbqmI4sEhVi1X1n8AuoG8SbNqrqpvc7UPAR0BnGvmZVWNX\nVTTI83LtUVU97O6muj8KfB+ntjpUfmbhZ/kKMEREIqvWJ9uuqmiwf/8i0gW4BnjW3Rca+XnFs6sG\nGvR9UcX9k/p/ssULRBV8V1X3utv7gO+6252Bwoh+e6j+JZQsJrhDw+fCw0YawTZ3KJ+F882zyTyz\nGLugCTwvd1oiH/gS+B+cEcsBVS2Lc/9y29zjB4H6rTZfhV2qGn5mv3Of2R9FpE2sXXFsrm9mAJOA\ncAXmdJrA84pjV5jGfl4K/E1ENopTSRMa4P9kaxWIctQZkzWltb6zgXOBnsBe4N8bwwgRaQ+8CkxU\n1W8jjzXmM4tjV5N4XqoaVNWeOIWx+gIXNYYdscTaJSKXAvfj2NcHOA2Y3JA2icgw4EtV3diQ962J\nauxq1OflcoWq9gKuAu4UkYGRB5P1f7K1CsQX4SGX+/tLtz2RKnhJRVW/cP9Th4BnqJgWaTDbRCQV\n5yW8UFX/7DY3+jOLZ1dTeF6RqOoBYAXgwxnah1PqR96/wSspRtg11J2uU1UtBubT8M+sP3CtODVf\nFuFMLf0njf+8KtklIv/dBJ4XqvqZ+/tL4DXXhqT/n2ytAhFZyW408JeI9v/f3h2EWFXFcRz//hoj\nBxMtCxFMJmlWkZG4CHERQUG2ChcqSiBucpGtIkNo5cpFi1E3uhDRKHCRtJJwlAgUdKEzzmDhEO4K\nxkWCICLyb3H+z3fR+3w4et9N5veBx9x33uPe/ztv3vznnHvf/3yeVwG8D9yqDOEG4qG5ws+AzhVO\nvwBb8oqON4FR4GIDxxdlIadrEfF95aFW+6xXXG33V8bwuqSluT0MfEQ5R3KOslIiPNpnja+k2COu\nPyp/VESZt672WePvZUR8GxEro6z5soXy+rfRcn/1iGt72/0laZGkxZ1t4OOMofnP5FzPbj8vN+BH\nytTDPcpc3E7K/OU4cB04A7yazxVwiDJ/fBVY10Jsx/PYk/lGr6g8f2/G9ifwSUMxbaAMVSeBK3nb\n2HafPSauVvsrj7MGuJwxTAHfZftqSlKaAU4CL2X7wrw/k4+vHnBcZ7PPpoATdK90Gujvfx7zA7pX\nC7XaX4+Jq9X+yn6ZyNs0sDfbG/9MutSGmZnVmq9TTGZm1ocThJmZ1XKCMDOzWk4QZmZWywnCzMxq\nOUGY9SHpvrqVPK9I2vMM9z2iSjVfs/+TBf2fYjbv3YlSrsJsXvEIwmyOskb/fpU6/RclvZXtI5LO\nZnG3cUmrsn25pJ9V1meYkLQ+dzUk6YjKmg2/5reekbRbZf2LSUk/tfQybR5zgjDrb/ihKabNlcdu\nRcQ7wEFKJVCAA8CxiFgD/ACMZfsY8FtEvEtZB2Q620eBQxHxNvAvsCnb9wDv5X6+aOrFmfXib1Kb\n9SHpdkS8XNN+A/gwIv7KQoL/RMQySTcpJT/uZfvfEfGapFlgZZSib519jFDKcI/m/W+AFyNin6TT\nwG3gFHAqums7mA2ERxBmTyd6bD+Ju5Xt+3TPDX5KqamzFrhUqXRqNhBOEGZPZ3Pl54XcPk+pBgqw\nDfg9t8eBXfBgIZ8lvXYq6QXgjYg4R1l/YAnwyCjGrEn+j8Ssv+Fcla3jdER0LnV9RdIkZRSwNdu+\nBI5K+hqYBXZk+1fAYUk7KSOFXZRqvnWGgBOZRASMRVnTwWxgfA7CbI7yHMS6iLjZdixmTfAUk5mZ\n1fIIwszMankEYWZmtZwgzMyslhOEmZnVcoIwM7NaThBmZlbrP163unCMZAiLAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOydeXxU5bn4v89MFqC41IiiEAy2iksp\nOzouGEStoFWsS6W2AVHCIt5yvZbWqpUreqlob6mKQFCRtBbU+pMiAipIgOp42RVBrKBhUVFMRbSY\nZea8vz/ecyZnJpNkghkSyPPNJ5+cc95z3nlnyXnm2cUYg6IoiqIkEmjqBSiKoijNExUQiqIoSlJU\nQCiKoihJUQGhKIqiJEUFhKIoipIUFRCKoihKUlRAHAKIyCIRGdrY5zYlIlIqIhelYV4jIt93t6eL\nyN2pnHsAj3ODiLxyoOs83BCRp0Tkvkaec5iI/KMx51QaRkZTL+BwRUS+9u22ASqAqLs/0hjzdKpz\nGWMGpuPcwx1jzKjGmEdE8oAPgUxjTMSd+2kg5fdQSS8iMgy42RhzXlOv5XBCBUSaMMa09bZFpBT7\n4V2SeJ6IZHg3HUVRmj/J/mcb+n98qPzfq4npICMi+SKyS0R+LSK7gVki8l0RWSAie0TkC3e7o++a\nEhG52d0eJiL/EJGH3HM/FJGBB3huZxFZISJficgSEZkqIn+pZd2prHGiiLzuzveKiBzrG/+FiGwX\nkTIRubOO1+csEdktIkHfsatE5G13u6+IhEVkr4h8IiKPikhWLXPFmT1E5FfuNR+LyPCEcy8TkfUi\nsk9EdorIBN/wCvfvXhH5WkRCieYPETlHRFaLyJfu33NSfW0S1uF9PsaLyGfuegeLyCAR+aeI/EtE\nfus7PyAivxGRbe5r+6yIHOMbf859Pb903+szE16fqSLykruu/xOR79Xx3tQ6l8uxIvKqO9dyETnJ\nvU5E5I/u89knIhtF5Afu2FEiUux+rraLyF0iUuO+JCJ5Yk2CGb5jJSJys4icDkwHQu77s9cdz3Y/\n+ztE5FOxJsfWdTy/4SLyrvv5ftlbvztmROQWEXkfeF+S/x9ni8gU9/P1sbudnfC+xs6vbR3NCRUQ\nTUN74BjgJKAQ+z7Mcvc7Ad8Aj9Zx/VnAe8CxwGTgCRGRAzj3r8AqIAeYAPyijsdMZY0/A24EjgOy\ngNsBROQMYJo7/4nu43UkCcaY/wP+DVyYMO9f3e0o8J/u8wkBA4Axdawbdw2Xuuu5GDgFSPR//Bso\nAI4GLgNGi8hgd6yf+/doY0xbY0w4Ye5jgJeAh93n9r/ASyKSk/Acarw2tdAeaAV0AH4HzAR+DvQC\nzgfuFpHO7rm3AoOBC7Cv7RfAVN9ci9znexywjppmseuB/wa+C2wF7q9jXfXNdQMwEfvebPCNX4J9\nDU8FjgKuA8rcsUfcYye7z6EA+zqljDHmXWAUEHbfn6Pdod+7j9kd+D7Vr2cNRORK4LfAT4B2wEpg\nTsJpg7H/T2e4+4n/x3cCZ7uP1w3oC9zluz7x/OaPMUZ/0/wLlAIXudv5QCXQqo7zuwNf+PZLsCYq\ngGHAVt9YG8AA7RtyLvYmHwHa+Mb/AvwlxeeUbI13+fbHAIvd7d8Bc31j33Ffg4tqmfs+4El3+wjs\nzfukWs4dB7zg2zfA993tp4D73O0ngd/7zjvVf26SeacAf3S389xzM3zjw4B/uNu/AFYlXB8GhtX3\n2iR53Hys8A36nr8BzvKdsxYY7G6/CwzwjZ0AVPnX6hs72p3rKN/r87hvfBCwJcX3P9lc/ve4LVaY\n52KF/T+xN8+A75yg+zk4w3dsJFCS5DVO9h6UEP9Z/4dvTNzPzfd8x0LAh7U8n0XATb79ALDf+9y5\nj31hwvsU938MbAMG+fZ/BJSm+n/fHH9Vg2ga9hhjyr0dEWkjIjNcFXsf1qRxtPjMLAns9jaMMfvd\nzbYNPPdE4F++YwA7a1twimvc7dve71vTif65jTH/pvobZDL+CvzEVc9/Aqwzxmx313GqWPPWbncd\n/4P9xlofcWsAtic8v7NEZJlr6vgS+400lXm9ubcnHNuO/cbqUdtrk4wyY4wX0PCN+/dT3/g3vutP\nAl4Qa3LbixUYUeB4EQmKyO9d89M+7BcViH9eKa0rxbn87/HXwL+AE40xr2G1zanAZyJSJCJHutdm\nEv/aJb5uB0o77Beitb7XZrF7PBknAX/ynfsvrJDxryXx/yPu/5ian4Pt7rHazm/2qIBoGhJL6P4X\n0AX7LfFIqk0atZmNGoNPgGNEpI3vWG4d53+bNX7in9t9zJzaTjbGbMb+cw0k3rwE1lS1BTjFXcdv\nD2QNWA3Kz1+B+UCuMeYorE3bm7e+kscfY28wfjoBH6Wwrm/LTmCgMeZo328rY8xH2NfuSqw57Sjs\nt3A4sM9VKnP53+O2WHPKxwDGmIeNMb2w5plTgV8Bn2O1Hf9rV9vr9m/3r//z2t63nfgefY4VpGf6\nXpejjC94JIGd2OhC/+vY2hjzRh2Pkbif+Dno5B6r7fxmjwqI5sER2A/zXteefU+6H9D9Rr4GmCAi\nWSISAn6cpjX+DbhcRM4T61C+l/o/e38FfokVRM8lrGMf8LWInAaMTnENzwLDROQMV0Alrv8IrEZV\nLiJ9sTdEjz2Ag7WTJ2MhcKqI/ExEMkTkp9gb4YIU1/ZtmA7c73MIt3Pt6WCfUwVWW2uD1bYOlFTm\nGuR7jycCbxpjdopIH1dDy8Te6MsBx9WSnnXXf4T7HG7DmjrjMMbswQqOn7vazHDA71D/FOjoPjbG\nGAfru/mjiBwHICIdRORHtTy/6cAd4jreXef5tQ14fcD6LO5y34NjsabVpEEfhwoqIJoHU4DW2G89\nb2JV4YPBDVi7bBnW7v8M9iaQjANeozFmE3AL9qb/CdaRuquey+ZgnZavGWM+9x2/HXvz/gp7A3gm\nxTUscp/Da1hn7GsJp4wB7hWRr7D/2M/6rt2Pdd6+7pogzk6Yuwy4HKtllQHjgcsT1p0u/oTVfF5x\n1/4m1pEKUIzVxD4CNrtjB0oqc/0VK3j/hXWo/9w9fiT2vfrCnaMMeNAduxUrND4A/uHO8WQtaxiB\n1TzKgDMB/7f714BNwG4R8V73X2Pf6zdds9gSrBZcA2PMC8ADwFz33HewGmxDuA/7pettYCPWkd+o\nyYMHG3EdKIqCiDyDdVKmXYNRFKX5oxpEC8ZV/b8nNpb+UqyNeV5Tr0tRlOaBZlK3bNoD/w/rMN4F\njDbGrG/aJSmK0lxQE5OiKIqSFDUxKYqiKEk5bExMxx57rMnLy2vqZSiKohxSrF279nNjTNIEwsNG\nQOTl5bFmzZqmXoaiKMohhYgkVgGIoSYmRVEUJSkqIBRFUZSkqIBQFEVRknLY+CAURTl4VFVVsWvX\nLsrLD6nipC2aVq1a0bFjRzIzM1O+Jq0Cws3O/RO27vvjxpjfJ4zfBtyM7UuwBxhujNkuIt2xVTuP\nxJYuvt8Yk1LNHUVR0s+uXbs44ogjyMvLo/ZeVUpzwRhDWVkZu3btonPnzvVf4JI2E5PbJ2AqtuDV\nGcAQt7OYn/VAb2PMD7EVPye7x/cDBcaYM4FLgSkicjSKojQLysvLycnJUeFwiCAi5OTkNFjjS6cP\noi+2m9kHxphKYC621k8MY8wyX8OaN3HbUBpj/mmMed/d/hj4jNobfXxrwmGYNMn+VRQlNVQ4HFoc\nyPuVThNTB+I7MO2iugxxMm7Ctv2Lw63Nn4Vt55c4Vojb27VTp8T+L6kRDsOAAVBZCVlZsHQphEIH\nNJWiKMphRbOIYhKRnwO9qa4R7x0/AfgzcKPbACQOY0yRMaa3MaZ3u3YHpmCUlFjhEI1CRQVMmKCa\nhKI0d8rKyujevTvdu3enffv2dOjQIbZfWVlZ57Vr1qzhP/7jP+p9jHPOOadR1lpSUoKI8Pjjj8eO\nbdiwARHhoYceih2LRCK0a9eO3/zmN3HX5+fn06VLl9jzu+aaaxplXamQTg3iI+JbPHYkSStBEbkI\nuBO4wBhT4Tt+JPAScKcx5ts0OqmT/HyrOVRUgOPAkiWwcqVqEorSnMnJyWHDhg0ATJgwgbZt23L7\n7bfHxiORCBkZyW9vvXv3pnfv3vU+xhtvvFHvOanygx/8gGeffZabb74ZgDlz5tCtW7e4c1599VVO\nPfVUnnvuOSZNmhRnEnr66adTWnNjk04NYjVwioh0dtsAXo/tfBVDRHoAM4ArjDGf+Y5nAS8AxcaY\nv6VxjYRCVhhcdBEEAlZIVFZazUJRlMYjvDPMpJWTCO9Mj4o+bNgwRo0axVlnncX48eNZtWoVoVCI\nHj16cM455/Dee+8B9hv95ZdfDljhMnz4cPLz8zn55JN5+OGHY/O1bds2dn5+fj7XXHMNp512Gjfc\ncANeFeyFCxdy2mmn0atXL/7jP/4jNm8iJ510EuXl5Xz66acYY1i8eDEDB8Y3rJszZw6//OUv6dSp\nE+FmYsZImwZhjImIyFjgZWyY65PGmE0ici+wxhgzH2tSags850rLHcaYK4DrsL2Ic0RkmDvlMGPM\nhnSsNRSypqWVK6t9Efn56XgkRWmZhHeGGVA8gMpoJVnBLJYWLCWU2/gq+q5du3jjjTcIBoPs27eP\nlStXkpGRwZIlS/jtb3/L888/X+OaLVu2sGzZMr766iu6dOnC6NGja+QKrF+/nk2bNnHiiSdy7rnn\n8vrrr9O7d29GjhzJihUr6Ny5M0OGDKlzbddccw3PPfccPXr0oGfPnmRnZ8fGysvLWbJkCTNmzGDv\n3r3MmTMnzsR1ww030Lp1awAuvvhiHnzwwRrzp4O05kEYYxZiG7r7j/3Ot31RLdf9hSZo9j10qP1b\nUKDmJUVpTEpKS6iMVhI1USqjlZSUlqRFQFx77bUEg0EAvvzyS4YOHcr777+PiFBVVZX0mssuu4zs\n7Gyys7M57rjj+PTTT+nYsWPcOX379o0d6969O6WlpbRt25aTTz45llcwZMgQioqKal3bddddx09/\n+lO2bNnCkCFD4kxYCxYsoH///rRu3Zqrr76aiRMnMmXKlNhzORxNTIcMRUVwwQX27+zZTb0aRTn8\nyM/LJyuYRVCCZAWzyM/LT8vjfOc734lt33333fTv35933nmHF198sdYcAP83+WAwSCQSOaBz6qN9\n+/ZkZmby6quvMmDAgLixOXPmsGTJEvLy8ujVqxdlZWW89tprDX6MxqbFl9oIh+GWW8B7vysqrP9B\nNQhFaTxCuSGWFiylpLSE/Lz8tGgPiXz55Zd06NABgKeeeqrR5+/SpQsffPABpaWl5OXl8cwz9Rd7\nuPfee/nss89imgEQM4Xt3LkzJohmzZrFnDlzuPjiixt93Q2hxQuIkhLrmPYQgR07rOBQIaEojUco\nN3RQBIPH+PHjGTp0KPfddx+XXXZZo8/funVrHnvsMS699FK+853v0KdPn3qvSRY6+8ILL3DhhRfG\naSlXXnkl48ePp6LCBnb6fRDHHnssS5YsaaRnUTeHTU/q3r17mwNpGOQlylVUWOHgRTIFgzB8uPoj\nFCUZ7777LqeffnpTL6PJ+frrr2nbti3GGG655RZOOeUU/vM//7Opl1Uryd43EVlrjEnq4GjxPggv\nzPW++2DECCscolEbzTRjhhUezSTiTFGUZsbMmTPp3r07Z555Jl9++SUjR45s6iU1Ki1eg/ATDkP/\n/lab8AgEoHdv6NlTtQlF8VAN4tBENYhvSTQav+84sGoVTJ9uhYdqE4qitBRUQPgoKakpIPxohrWi\nKC0JFRA+8vOhrmZLmmGtKEpLQgWEj1DIagijRkHfvtb/4NG3Lyxbpj4IRVFaDiogEgiFYNo0mDIF\nsrNtuGvr1nZfhYOiNA/69+/Pyy+/HHdsypQpjB49utZr8vPz8QJZBg0axN69e2ucM2HChLgS3MmY\nN28emzdvju3/7ne/a5S8hOZYFlwFRC144a8jRlTXaNLOc4rSPBgyZAhz586NOzZ37tx6C+Z5LFy4\nkKOPPrAuxokC4t577+Wii5KWlWswXllwj/rKgidGoT799NNs2LCBDRs28Le/fftC2Cog6mH2bJg5\n0/oe+veHu+6Cfv1s3SZFUVKnMb9gXXPNNbz00kux5kClpaV8/PHHnH/++YwePZrevXtz5plncs89\n9yS9Pi8vj88//xyA+++/n1NPPZXzzjsvVhIcbI5Dnz596NatG1dffTX79+/njTfeYP78+fzqV7+i\ne/fubNu2jWHDhsVuxkuXLqVHjx507dqV4cOHxzKh8/LyuOeee+jZsyddu3Zly5YtSdfV3MqCq4Co\nA3+3uaqq6qZCkQiMHauahKKkilex4O67Gyf59JhjjqFv374sWmS7FM+dO5frrrsOEeH+++9nzZo1\nvP322yxfvpy333671nnWrl3L3Llz2bBhAwsXLmT16tWxsZ/85CesXr2at956i9NPP50nnniCc845\nhyuuuIIHH3yQDRs28L3vfS92fnl5OcOGDeOZZ55h48aNRCIRpk2bFhs/9thjWbduHaNHj67TjOWV\nBX/jjTdqLQv+4x//mCFDhjBnzpy4a2+44YaYielXv/pV6i9oLaiAqAOv21wwaH/9Pb+jUQ15VZRU\n8X/Zaqxwcb+ZyW9eevbZZ+nZsyc9evRg06ZNceagRFauXMlVV11FmzZtOPLII7niiitiY++88w7n\nn38+Xbt25emnn2bTpk11rue9996jc+fOnHrqqQAMHTqUFStWxMZ/8pOfANCrVy9KS0trnee6667j\nueeeY86cOTVMZollwefNm0fUF5vvNzE1Rs8IFRB14PdD+IovEghYB7aGvCpKavi/bDVWuPiVV17J\n0qVLWbduHfv376dXr158+OGHPPTQQyxdupS3336byy67rNYy3/UxbNgwHn30UTZu3Mg999xzwPN4\neJpAfeXCm1NZcBUQ1N0KMRSCTp2sWckYKxwuushGNZWUqJlJUVLB+7I1cWLj9Xtv27Yt/fv3Z/jw\n4bFv2vv27eM73/kORx11FJ9++mnMBFUb/fr1Y968eXzzzTd89dVXvPjii7Gxr776ihNOOIGqqiqe\nfvrp2PEjjjiCr776qsZcXbp0obS0lK1btwLw5z//mQsuuOCAntu9997LAw88kLQs+I4dOygtLaW0\ntJSpU6fWMDM1Ji2+3HcqrRC9bz9eO9Krr4Zx46r3G+sDryiHM6FQ4/+fDBkyhKuuuipmaurWrRs9\nevTgtNNOIzc3l3PPPbfO63v27MlPf/pTunXrxnHHHRdXsnvixImcddZZtGvXjrPOOismFK6//npG\njBjBww8/HBcp1KpVK2bNmsW1115LJBKhT58+jBo16oCeV3MpC97ii/VNWjmJu5fdTdRECUqQif0n\ncsf5d9Q4LxyG4uLq/ZkzrT01GLTfiu6oeYmiHLZosb5Dk4YW62vxGoTXCtHTIOpqhTh7ttUagkHI\ncF85Lb+hKMrhSlp9ECJyqYi8JyJbReQ3ScZvE5HNIvK2iCwVkZN8Y0NF5H33d2i61ui1QpzYf2JS\n85KHPwojEoFu3azzWs1LiqIcrqRNgxCRIDAVuBjYBawWkfnGGH/M2XqgtzFmv4iMBiYDPxWRY4B7\ngN6AAda6136RjrWm0grR80N4uRCrV8PatbB7N4wfr0JCaXkYYxB/7LfSrDkQd0I6NYi+wFZjzAfG\nmEpgLnCl/wRjzDJjzH53902go7v9I+BVY8y/XKHwKnBpGtdaL14UxkUX2XwIY6w2MW8enH++ZlYr\nLYtWrVpRVlZ2QDcd5eBjjKGsrIxWrVo16Lp0+iA6ADt9+7uAs+o4/ybAi0lLdm2HRl2dj/DOMCWl\nJeTn5depSYRCMGECvPaaNTN5RKPg1QgrLEzXKhWl+dCxY0d27drFnj17mnopSoq0atWKjh071n+i\nj2bhpBaRn2PNSQ0KGhaRQqAQoFOnTgf02KmEufoJhWDqVBgzJr65kONUC4muXa3PIj9fTU/K4Ulm\nZiadO3du6mUoaSadJqaPgFzffkf3WBwichFwJ3CFMaaiIdcaY4qMMb2NMb3btWt3QIssKS2hMlpJ\n1ESpiFQwoWRC0oQ5P4WFsHIlDB4cX37Dcazg6N+/8WrOKIqiNBXpFBCrgVNEpLOIZAHXA/P9J4hI\nD2AGVjh85ht6GbhERL4rIt8FLnGPNTpemGuAAA4OSz5cwoDiAfUKiVAIXnjB9qr2NxaKRq0juzFr\nziiKojQFaRMQxpgIMBZ7Y38XeNYYs0lE7hURryLWg0Bb4DkR2SAi891r/wVMxAqZ1cC97rFGxwtz\n7X1ibwTBMQ6V0UpKSktSur6w0DYY8tdq8sjI0BwJRVEOXdLqgzDGLAQWJhz7nW+71i4bxpgngSfT\nt7p4Nny6AYONyMgIZNSZMJdI167QqxesWhV//MYb1QehKMqhS7NwUjc1JaUlRJ1qj3O346s7ONUX\n4eTVua+oiD+enQ0FBWlbsqIoStpRAUG1H6IiUoGDw+qPV5M/O59HBj7CuMXj6oxw8jKsHcf6Inr3\nhp49oUePav+DahGKohyKaLlv4v0QAAZDZbSSP735p1iEU21+CX+d++xsWwa8Rw/bce6uuzSSSVGU\nQxcVEC6h3BA9T+gZd2zz55sJSIAAAUSEnDY5Na9LqHMPcMsttkWp41jTk0YyKYpyKKICwkdBtwKC\nEh+OFHWiiNjopnGLx9XaVOiOO+zfkhIrGDyMsc5r1SIURTnUUAHhI5Qb4rHLHiMg1S+LweAYJ+Xw\n1/x8a2ryEuiMsfWaLrhAhYSiKIcWKiASKOxVyLTLppEZyCQgATKDmWQFswhKsN5+EVBtcrr44vjj\nVVUweTJMmqSCQlGUQ4MW31GuNsI7wxS/ZVvI9TihB2X7y+ot5hd3fdhWefXXaxKxv9nZ2kdCUZTm\nQV0d5VSDqIPZb82maG0Rtyy8JeagnrRyUr1lOMDe/B97LL4MhzHquFYU5dBB8yBqoaS0JJYX4TgO\noxeMRkQwGLKD2fVWfQVbhmP9eluvyY+IluBQFKX5oxpELeTn5RPwff13cIiaKI5xKI+Ux8xP9VFQ\nYPMkEikuVl+EoijNGxUQtRDKDTF10FQyA5kI8W0VDYZZG2albGoqKYG+fasjm6JRmDFDk+gURWne\nqICog8JehSwftpyRvUaSHcyOExQRJ5JyxddQyGZYt2oVH/6q5cAVRWnOqICoh1BuiGmXT2PZ0GVc\n2eVKghIkIIGUQl7j5nGFhN9preXAFUVpzqiAaAAvb3sZYwwBCTDl0ikph7x6lJVVb4toOXBFUZo3\nGsWUIl5rUgcHMcL6T9YzaeUkctrkpJwj4RX2q6y0AmLdOvj1r+Hoo7V/taIozQ8VECnilQSvjFYi\nIsxcNxPHOBisRpEdzGbKpVPqFBZelvXkybb8xqpV9lfE+ic0eU5RlOaEmphSxCsJPqLnCIwxRE00\n1oHOMQ4VkQrGLhzL3cvurrOndSgE+/fHHzNGk+cURWl+qIBoAKHcEJ2O6kRieZKABAgEAkRNtM7e\nER5XX538eE7NauKKoihNhgqIBpKfl09GsNoyF5QghT0LmTpoKtnB7JSK+hUWwvjx8RFNjmP7SIwe\nrbkRiqI0D7RY3wEwesFoZqydgcEgCFd2uZK+HfrGHNapOq7DYRg3zvoh/LRurf4IRVEODk1WrE9E\nLhWR90Rkq4j8Jsl4PxFZJyIREbkmYWyyiGwSkXdF5GERkcTrm4qCbgVkBjMBm1U977153Pnandy6\n6FZy2uQwbvG4en0RYAVAz541j5eXw4QJqkkoitK0pE1AiEgQmAoMBM4AhojIGQmn7QCGAX9NuPYc\n4Fzgh8APgD7ABelaa0MJ5YYY3n14XGa118f6iXVP1NvH2k9Bge1n7ccYeOUVWy68qCgNT0BRFCUF\n0qlB9AW2GmM+MMZUAnOBK/0nGGNKjTFvA07CtQZoBWQB2UAm8Gka19pgCroV0CqjVY06TScecWKD\nGww99hhkZtYci0ZhzBjVJBRFaRrSKSA6ADt9+7vcY/VijAkDy4BP3N+XjTHvJp4nIoUiskZE1uzZ\ns6cRlpw6XtjryF4jYwX9soPZjD93PEsLljKx/8SUSoKDdVovXw6XXFJzLBq1WoZqEoqiHGyaZaKc\niHwfOB3o6B56VUTON8as9J9njCkCisA6qQ/uKq2QCOWGKOhWECv/vfGzjQ3uPgdWk5gwweZCVFbG\nj23dCiNHwrZtmnWtKMrBI50C4iMg17ff0T2WClcBbxpjvgYQkUVACFhZ51VNyOy3ZscaDAlCMBBk\n6qCpFPYqTHkOrzR4cTHs3g1r1sCuXdXjDz1ks66DQRg+3GoWKigURUkX6TQxrQZOEZHOIpIFXA/M\nT/HaHcAFIpIhIplYB3UNE1NzwV+nCazDOuJEGLtwbEo9I/yEQjBtGrzwAtx9d/yYMdbkVFmp/SQU\nRUk/aRMQxpgIMBZ4GXtzf9YYs0lE7hWRKwBEpI+I7AKuBWaIyCb38r8B24CNwFvAW8aYF9O11m+L\nV6cpkPByRpxIyp3nklFYaAVB377QvbtNrNN+EoqiHCw0Ua6RCO8MU1Jawt6KvfzhjT8QNVEAsoPZ\nLBu6DLCaRkN8E+GwNTc98QRUVVUfz8iwAiIrSxPqFEX5dtSVKNcsndSHIp7DGmBf+b5YpnVFtIKb\n5t/EB198QMSJkBXMSim6KRy2JqTycisM/BgD555rxzZuVAGhKEp60FpMacCfaQ3w7ufvUhGtSDl5\nDqqjmZIpeNEorFhhS3SMHKkhsIqipAcVEGnAy7RORJCUW5V6zYWCQfu3Xz+7HQjEF/kDeP75xlm3\noiiKHxUQaaKgWwFBia+hcdqxpzG029CUrveaC02caLWJ5cth5UrruD7vvPhzaysfriiK8m1QJ3Ua\nKVpbxJiXxhA1UQIECAaCOMZJ2Q+RiOeX8FqW9uxpNQ1NnlMU5UBRJ3UT4SXJjV04logTocqxoUie\nH6KhAsLzS0Sj1tzUsyc88og9phFNiqI0NmpiSjNl+8tivas9RIScNg1vH5fol9i920YyeclzmhOh\nKEpjohpEmvGS6CqjtsCSMYaoE2Xc4nF0Pa5rg+s1LV1qBUFODtx6a3WUU0aGFSCKoiiNhWoQacar\n+jqi5wgAHKw28U3kG8YtHlR5k2gAACAASURBVHdApTjuuAPKyqzm4NGtm82JmDRJy28oitI4qAZx\nEAjlhigpLSExIGDVx6voP7s/Dw98uMEVYD1zU0WF7We9apX9FYFWrdQfoSjKt0c1iINEfl4+2RnZ\nNRoMVUQrGLtwbEotSv145qaLLqquzwTW5PTNN7bXtWoSiqJ8G1RAHCQSGwx5BCRA1EQblGUdm9Pt\nIZHYshSsNtG/vwoJRVEOHBUQB5FQbohpl0/j0UGPxpLoPLNTQAIpZ1nHzRmC226rmV0N1vxUXGyF\nhPomFEVpKOqDaALK9pfFBIPBYIwhKEGmXDrlgJLnHnnEbgeD1sTk+Dp8FxXBzJn2eHa2+iYURUkd\n1SCagPy8fAIJX/kd41C2v6zBc3nJc55Q6J2QD+k4NtrJcaxGUVKiGoWiKKmhAqIJCOWGmDpoalyt\nJkFYvG0xoxeMblDoa2Ly3E032b/JCAZt/sSAAbZbnXakUxSlLlRANBGFvQpZeeNKBncZTIAADg4r\ntq9g+trp9J/dv8HRTBMn2r+FhVZL6Ns3/rxAAB591OZPeOU6NPtaUZS6UAHRhIRyQ/Tt0DeuDAfY\n0NfJr09m0spJtQqK8M5wbNxLnvN8C6EQTJkCrVvbENhAAG6/Hbp2hR07bNa1p3Fo9rWiKLWh1Vyb\nmPDOMPmz82OlOPwIQquMVjUqv4Z3hhlQPIDKaGWdlWGLimDsWKstZGRYYRGJWOEwfDgUFKjDWlFa\nOnVVc1UNookJ5YYoGVrCqF6jOOPYM+LGvJalibkRJaUlVEYr682dKCuzzmnHsT2tKyqssIhEoFMn\nFQ6KotSNCohmgJcf8fgVj9doMoSBHV/uiDM1eQUAgxKsM3fCc2AHAvGtSx0H9u7VSCZFUeomrSYm\nEbkU+BMQBB43xvw+YbwfMAX4IXC9MeZvvrFOwONALmCAQcaY0toe61A1MSVy1dyrmPfevNi+V5oj\nGAgyddDUWI+J8M4wJaUl9dZvCodttvWrr8YLicxMKyi0j4SitGyaxMQkIkFgKjAQOAMYIiJnJJy2\nAxgG/DXJFMXAg8aY04G+wGfpWmtzYvy548kKVsepGvcn4kQYu3BsTJMI5Ya44/w76k2s88pxZFZX\n94j5IqLR6twIRVGURNJpYuoLbDXGfGCMqQTmAlf6TzDGlBpj3gYc/3FXkGQYY151z/vaGLM/jWtt\nNng+iUtOvqRGYb+oiTaoVlNszpAVAoMHV2dbe9qE49jcCEVRlETSKSA6ADt9+7vcY6lwKrBXRP6f\niKwXkQddjSQOESkUkTUismbPnj2NsOTmQSg3xIT8CWQGM+OOByVYwx+R8pyhmrkRYP0TZWWaXa0o\nSk2aq5M6AzgfuB3oA5yMNUXFYYwpMsb0Nsb0bteu3cFdYZoJ5YYY3n14nBYRcSJMXzud82edT9Ha\nogbP6XdaQ3WOxN69ml2tKEpN0lms7yOsg9mjo3ssFXYBG4wxHwCIyDzgbOCJRl1hM6egWwGz35pN\nRaQi1okOrKlpzEtjAGo0GqrLee1vWbp3L/zxj9YP8Yc/WFOTMTa7urjYnpOfr85rRWnJ1CkgRORI\nY8y+WsY6GWN21HH5auAUEemMFQzXAz9LcV2rgaNFpJ0xZg9wIXDohyg1EK+HxISSCbzywStxY1ET\nZfRLowHIDmaztGApQL0JdKGQ/Z00qTpHwk8gALNmWSe2RjgpSsumPhNTibchIksTxuZRB8aYCDAW\neBl4F3jWGLNJRO4VkSvcOfuIyC7gWmCGiGxyr41izUtLRWQjIMDMlJ/VYYTnj/BHNnk4xsExDuWR\nciaUTKD4reKUEuig2tzk70YnAj16VEc4aa0mRWnZ1Gdi8ofRHFPHWFKMMQuBhQnHfufbXo01PSW7\n9lVsfkSLx4tsKn6rmN1f7+afZf9k8+ebY+MGw5IPl5CxPYOMQAY41Nt8yKvXNGaMFQZQXZtp/Xpr\nbsrKshFOkyapuUlRWiL1CQhTy3ayfSWNhHJDhHJDsdpNHp4T2zEOUSfKiJ4j6HRUp3oT6MBGL/lp\n377aHxEMwq232t7WlZVqblKUlkh9AuI4EbkNqy1427j7h1fY0CFCSWkJUcd+5ReEK7tcycvbXo75\nHQq6FaTclc4zM1VUWKGwfXv1mONY81J5ebXzuqREBYSitCTqExAzgSOSbIMtg6EcZLw6TJ5AGHjK\nQNq3bc/ur3fTvm37Bs3lRTUlK8XhOLB6dfwxTahTlJbFAddiEpE+rg+hWXC41GJKBS+UNadNDuMW\nj6M8Uh4Lgc0IZMTVbEppvrDVJiprVhyPIQKtWqmZSVEONxqtFpOInCEiE0VkKzCtUVanNBivDlPZ\n/jIqIhVxDYciToQxL41pUOtSrxTHqFE221qShB/4zUyKorQM6tUgRCQPGOL+VgEnAb3rqqzaFLQk\nDcIjvDNMv6f6EXEiScczAhnc3OPmBvklErUJkWqBkZ1tI5/KyjSqSVEOF+rSIOoUECISBo7EFtqb\na4x5X0Q+NMZ0Ts9SD5yWKCAAitYWMXbhWKqcqqTjtXWlq4tw2GZT794NixbZZkOBAFx4ISxZYrUJ\nNTcpyuFBXQKiPif1p9gCe8djo5beR8NbmxWFvQrpelzXmE/iiXVPsOrjVbFxg4lLmkulh4Q/2/rF\nF6vLcLziS+b2yoSHQlagaGkORTn8qFNAGGMGi8hRwE+ACSJyCrYERl9jzKq6rlUOHl6OBEDX47rS\nf3Z/KqIVgNUgRIS9FXtT6mPtxwuDrayMLxHuHw+HbYE/zZVQlMOPep3UxpgvjTGzjDGXYAvm/Q74\no4jsrOdSpQkI5YZ4eODDZAZsqXCDIepE+d/w/1IRrYiV4Sh+q5hJKyfV6cj2wmAnToQhQ+LHbr+9\n2rldWamlORTlcKRB1VyNMZ8CjwCPiMhJ6VmS8m0p21+GY6qr8Hkd6QISICABgoEgszbMIuJE6tUm\nPG1g4sRqh/WQIbBvH4webWs3eVqGV6pDUZTDg/qquc6v5/orGnEtSiPhJdP58yPAluPICGQw6JRB\nvPjei3FF/eoyN3lagjFWQMydW12/KTsbHn5YI5sU5XCkPg0ihO0KNwf4P1Io0Kc0PXWWCXeifLzv\n45SL+kG8L0KkWjiAdVavXw/TNCtGUQ476gtzDQIXY3Mgfgi8BMwxxmw6OMtLnZYa5loXteVJCEJm\nMJPh3YennCPhRSrl5Ngifv6s6+xsWLbMbms0k6IcWhxwmKvbl2ExsFhEsrGCokRE/tsY82jjL1Vp\nTEK5IaYOmlojT8LzSXQ6qlPKuRFe6KvHqFHVUU1VVTZvYvZsKziCQRg+HAoKVFAoyqFMvVFMIpIt\nIj8B/gLcAjwMvJDuhSmNQ2GvQpYPW84lJ18S199aEHZ8uSPlchx+yspqluPYvNlWfvWimWbM0P7W\ninKoU5+JqRj4Abbpz1xjzDsHa2ENRU1MdRPeGWZA8QAqIjY/AgFjDMFAkNtCt3F09tEp9ZCA6twH\nrxR4bQSDNvrpjjsa6UkoitLofJtSGw7wb3fXf6IAxhhzZKOt8luiAqJ+wjvDFL9VzMx1M4maaNxY\nQ0tyhMO2TPiSJTX7WnvhsNnZmjinKM2dA67maowJGGOOcH+P9P0e0ZyEg5IaodwQnY7qRLIvBQZD\neaSc4reKU5srZAVEdnbNMS8c9kc/go0bbckONTUpyqFHg8p9K4c++Xn5ZGdkx/kjPAyGWRtmNahM\n+NKl1mGdmRk/Fo3CvHkwciTceaf6IxTlUCStAkJELhWR90Rkq4j8Jsl4PxFZJyIREbkmyfiRIrJL\nRDRiqpHwciTuv/B+xp87PlaSw6MqWhUr7AfWLFVXSY5QyOZALF9uBUV2dk0HtjHVxf0URTl0aFCp\njYbg5lBMxeZR7AJWi8h8Y8xm32k7gGHA7bVMMxFYka41tlT8xf0GdxnM5NcnM++9eQA4OGzas4lJ\nKyext2Ivfwz/kaiJkh3MrrckRyhkS2+MGROfTOehLUsV5dAibQIC6AtsNcZ8ACAic4ErgZiA8JoO\nuc7wOESkF7bM+GIgqQNF+faEckP07dCXv7/391hZjqc3Pl3jvIpoRb0lOcCGwCbDGBg3zm6vX2//\nap6EojRv0ikgOmDLdHjsAs5K5UIRCQB/AH4OXFTHeYVAIUCnTp0OeKEtnfy8fIKBYK2d6QCCEqy3\nJAckL8vhlQr/5htb4M+Lepo1y2Zgq5BQlOZJc3VSjwEWGmN21XWSMabIGNPbGNO7Xbt2B2lphx9e\nxnVQgknHBeGyUy9LbS5fifCbb67pj/CHxCYrDx4Oa9STojQX0qlBfATk+vY7usdSIQScLyJjgLZA\nloh8bYyp4ehWGofCXoWs/2Q909dOrzEmIsx/bz4vb305pTwJzx8RDtvyG7Ul1AUC8X4JbT6kKM2L\ndGoQq4FTRKSziGQB1wP1lQ8HwBhzgzGmkzEmD+vALlbhkH4KuhWQFcyK7Qew/SMc4+AYJ+aHSBVP\nmxg50mZV+wkEqv0SnragzYcUpXmRNgFhjIkAY4GXgXeBZ40xm0TkXhG5AkBE+ojILuBaYIaINLsq\nsS2JUG6IkqEljOo1ilG9RlHYqzAuf/5A6jd5YbC9esUfdxz76w9/9fwXwWB18yE1OSlK01FnqY1D\nCS210fgkq9/kGIegBHnsssesAEmRoiKrSSRjxgwodKfyyop7nenU5KQo6eWAS20oLRsvqa6wV2FM\nOABETZRRC0ZRtLYo5bkKC60guOQSGDzYmpjA/vWHxoZCtrif9rtWlKZHBYRSJ179Jn+Pa7BlOUYt\nGMWvl/w65bkKC+Hll2H8eJtxHQjYKKdVq6yGkWhKSmZyUhTl4KEmJqVewjvD5M/OpzJamXR8xuUz\nGmRuAisQEjOuRWxNp5KSalOS3+SUaF6qa0xRlNQ44I5yigLVzuvit4rZvGczK3bEVz958PUHASjb\nX0ZOmxzK9pfV21uirKxmmXBjrCmpuLj6hp/Yyc5DQ2IVJf2ogFBSwl+/6ddLfs3k1yfHxrZ+sZWR\nC0YiCAZDQAL11m7Kz7faQmUSpWT3bmtuqkszSOafUAGhKI2L+iCUBvPARQ8w/tzxNUqGe7WcHONQ\nGa2sM2fCc0L37Vtz7KWX4O676y4Rrv4JRUk/KiCUA+Lo7KOT9pQAmy8hIuS0qbt8aygEU6ZA69bx\nJTmqqqxmUFeJcH9JDzUvKUp6UAGhHBC1NR4SJJZ9PW7xuHqT6rwb/cUXV4e+ejgO7N3b2CtXFCVV\n1AehHBBejkRij2uDwTEOBhMzM6VSu2nCBFi5smbdpj/8Afbts6XBQZPoFOVgogJCOWA8x3WPE3ow\n5qUxcULCoz4zU2wuV5MoLoaZM6vDX6NRm2D35JPWDBWJWL9D9+7WBOU46qRWlHShJiblW1PYq5AR\nPUfUMDdFTZTRC0Zz1TNXpVS/yavb9Nhj8cX9vPDXiorqqKVVq6xwCATUSa0o6UIFhNIoFHQroFVG\nqxpCwsFh3pZ5XPDUBTEhUV+f68JCGDGi/scMBOCii9S8pCjpQjOplUYjvDNM8VvFPLH+Caqcqhrj\nfU/sy009b2Lc4nFURivJCmbVmivhJcLV1ktCBFq1UuGgKN8WLdanHBRCuSGmXT6N5cOWM7jL4Brj\nqz5exagFoyiPlBM10TpzJfzRTYld6QIBOP98GDq0+piWBVeUxkc1CCVtFK0tYvSC0Tg4Nca8cNgf\nd/kx488ZX2ukk6dJVFRUH/N6XAcCtujflCm28ZBGNClKw1ENQmkSCnsV1lrEz2CImijztsyj/+z+\ntfojPE3ivvusb0Kk2uTkRTA98YQ1RWlZcEVpXFRAKGmloFsBrTNa13lOoqkp0Ynt9YgoKLAagr+X\nhAisWVMtNDIyNKJJURoLzYNQ0oqXUFdSWsLibYtZsX1FjXMCEojlS3hd7JI5sT1toqQEcnJg/Xqb\nM+GvCjtwoJqXFKWxUAGhpB0voS4/Lz928xcROhzRgZ37dmIwjFs8jq7HdaWktITKaGWcE9vvn/CX\n/x796+1EnVz8ivCiRbbXRFmZ9olQlG+LmpiUg4anTYzoOYKgBNnx5Q4c4+AYh28i3zBu8Thy2uSQ\nFcwiKEGyglnk5+UnnSu8M8yTe4dCsByIgpu9XVUFY8fWXw1WUZT6UQGhHFS8FqYRJxJXkgNsGOzo\nBaPp06EPI3qOqLOfRElpCdEO/4ChA6D3TIKZUQIB64vwqsGqw1pRvh1pFRAicqmIvCciW0XkN0nG\n+4nIOhGJiMg1vuPdRSQsIptE5G0R+Wk616kcXPLz8mNaQlCCcWMODiu2r2DWhlls/GwjoxeMZvSC\n0TWinGJzdFpN68G38V/3bo+LcAK7nZNaKShFUZKQtjwIEQkC/wQuBnYBq4EhxpjNvnPygCOB24H5\nxpi/ucdPBYwx5n0RORFYC5xujKm1+LPmQRxahHeGKSktIadNDrcuujVpv+ugBGMFALOD2SwbuixO\noyhaW8Tzm5/n6jOupuyVQu68s2bWdXY2LFumvghFqY2m6kndF9hqjPnAXcRc4EogJiCMMaXuWFwm\nlTHmn77tj0XkM6AdoN0BDhP8LUyBGgl1gsSEA0BFtILit4pj14R3hmMlO1buWMmUM0NkZnat0cK0\nosIm0U2ZokJCURpKOk1MHYCdvv1d7rEGISJ9gSxgW5KxQhFZIyJr9uzZc8ALVZqWsv1liK+eRoBA\nDf8EwKwNs2KmpsRop7KcBZSUwODBNRsPrVoF551no5sURUmdZu2kFpETgD8DNxpjatRrMMYUGWN6\nG2N6t2vX7uAvUGkU/D6JjEBGUuEAEHEiFL9VzKSVk5JGO4VC8MIL8I9/1Ox17TgwciRcdZVGNilK\nqqTTxPQRkOvb7+geSwkRORJ4CbjTGPNmI69NaUb4k+ly2uRwy8JbiDiRGucZY3hi/RM4xiErmMWU\nS6dQtr/MCoeEXIkpU2xBv2g0fo5582D+fNt3ojB5FRBFUVzSqUGsBk4Rkc4ikgVcD8xP5UL3/BeA\nYs9xrRzehHJD3HH+HRT2KmTqoKlkBjKT9paocqqImigVkQqe3/x8DeEQmy9kGw8lmptAtQlFSZW0\nVnMVkUHAFCAIPGmMuV9E7gXWGGPmi0gfrCD4LlAO7DbGnCkiPwdmAZt80w0zxmyo7bE0iunwwust\nMWvDLCqjlUnNTgEJkB3MZmnBUsD6JRIFRjgMkydbzSEZGuWktHTqimLSct9Ks8YTFDPXzYyLahIE\ngyEoQUb0HMHst2bX2YSoqAhGj46v2wS22N/999tigIrSEtFy38ohi9eEqNcJvWqMBSRAMBBkxfYV\n9TYhKiy0zutkUU57NXhaUZKiAkI5JLip501x+wZjcyWcKJs/3xwzQWUEMsjPy0/a99of5dSvnzuP\nsSaoHj2shqE+CUWpRqu5KocEXuOhB19/kG1fbMNgcIxTwzdxY/cbAWotGQ5WULRqFT//hg32d+ZM\n69zWCCdFUQ1COYQo7FVI8VXFtMpoRVCCZAYzyQxkxsaDEqTHCT2SlgxP5Oqrkz9GNGo1CU2qUxTV\nIJRDDH/OhFcKfPLrk3nxny9iMNy66FbO7nA2YH0UtZUMLyyEbdvgwQdr1m9yHBg1ClasgDPP1L4S\nSstFo5iUQ55JKydx97K746KcwGoUj132WK19scH6HMaNs+U4aiMjA6ZOVbOTcniiUUzKYU1+Xj7B\nQLDGccc4lO0vq/NaL+u6dWsb8pqMSKSm2SkchkmT1KmtHN6oiUk55AnlhhjefTjT106PO24wrPp4\nFeGd4VobD0F8r+tNm+Dpp2ue4zhWSGzbBvv2waxZVnBkZdlr1QSlHI6oBqEcFhR0K6B1Rusa5Tnm\nbZlHv6f6UbS2bq9zKGST5f7yFxg/vvYSHZMnw/Tptox4Y3WtU21Eaa6ogFAOCzzn9cheI8kOZscJ\niogTYdSCUTWERLJcCYAHHrC5EpdcUvdjikAwCDt2HPjNPRy2vbO1h7bSHFEntXLY4ZXnKFpXhJNQ\nJf6Mdmdw+amXs698H7M2zCLiRGotz+HdvMvLa0Y6eXiaRnb2gZmaJk2ywiEatcJm4kQt+6EcXJqq\no5yiNAlet7rdX+9m3nvxVfo279nM5j2b445VRCpiuRL+gn9+38TevfDQQzVrOXn7FRX2vIYKiPx8\n68eorLR/8/Mbdr2ipBPVIJTDlvDOMP2e6pe0t0QifU/sy5pP1mCMoVVGq1oL/o0ZU7PHhMcZZ8Av\nf9nwcNhw2AoXzbdQmgKt5qq0WMI7w0x+fTJ/f+/vtXaqSyRAgPsuvI87zq9p6wmHobgYFi/9N6Xv\ntwFqxsaOHw9HH603fOXQQPMglBZLKDfEC9e/wOvDX6dfp34EUvjIGww5bXKAmo7sUAgKfhvm4/6X\nQbACcCBB8Dz4INx1V8OczrU5zBWlKVEfhNIiCOWGWH7jcsI7w7HWpoveX8T89+bjEO9YMBhuWXgL\nK7av4NlNzxI10VhjolBuiJLSEqId/gHD+sNbQ5F1N2Oc6n8lY+xvebnVNurTIsI7w3UWF1SUpkI1\nCKVF4W9t+sL1L1DYq7BG7gTY0NinNz5NlVOFYxwqotWO7Py8fLKCWQQ7rab14NuY/vy7nHFGzccy\nxvot6mttmkpxQUVpClRAKC2agm4FseqwWcGsWk1QjnFYvG0xoxeMBmBpwVIm9p/I0oKlFA7uyuOP\n2yikGtc5tt1p//61C4mYwHHXkKy4oKI0BeqkVlo8ntkpPy+fyW9MZt6WWhpYuwQIcMVpVzDw+wMp\n219GTpsc+7fscp6Y1LXWwn+nnw5dukD79lBQEG968q+BXSGNalIOGhrFpCgpEt4Z5oKnLqDKqUrp\nfK83dkACZAQyGNRqIgvuGkekIgAESRblBDbBbto06No1PsTVS86rqDQEMyI8OncLhYO7NtbTU5Qa\nqIBQlAbgZWLv/no3L73/UsrCIsbOs6E0Hz7qA1uuojYhARAIGIwxZGQabhpuK9IWzTQ4UQEiBL6/\njGkPtlchoaSNJhMQInIp8CfsV6nHjTG/TxjvB0wBfghcb4z5m29sKHCXu3ufMWZ2XY+lAkJJB35h\nsWH3Bkq/LE394p1nI7NLMJHsWk7w/vckti0BAxLFRAUIgkTJzDIsX5ap5iYlLTRJqQ0RCQJTgYuB\nXcBqEZlvjPHXOdgBDANuT7j2GOAeoDf2P2ete+0X6VqvoiTDK9sBVlj0n92fimhFzLRUJ7lv8v3b\nCsnYOJx2zg/gmxxWroTqL2WeZlG9b0tHBeA7e+Df7cBkEI2YAyrjoSjflnTmQfQFthpjPgAQkbnA\nlUBMQBhjSt2xhAo3/Ah41RjzL3f8VeBSYE4a16sodRLKDbFs6LJYHsUtC2+pt4zH+22K4axitgYy\neXTQo/DSB7z5zHlUbh6IDSJMND+JPf7v493dKJmZkJ9fsyGSoqSbdAqIDsBO3/4u4KxvcW2HxJNE\npBAoBOjUqdOBrVJRGoBfowAYu3AsURMlIAF6tu9Jfud8NnyygVc/eDVOw6hyqhi5YKTdue4BWHMz\nvPQYmAAEolaJMJnu2Z7QcCDnXarabWXyY2cznvaHpBbhrzVFx3BcQUSleXNIZ1IbY4qAIrA+iCZe\njtLCKOxVSNfjuta44YV3hinZbpPfaqX343D8O9aZnVdij73+K9hyBdZlByDw+Zk4n5/JvHfhxTnw\n2GPJiwE214J/XlRWZSVkZEYxBXcQ7fAPzRg/REingPgIyPXtd3SPpXptfsK1JY2yKkVpRBI1Cu9Y\nydCS+osE5r5pf13k+ms47ZsbefcvI+CjvlgTlMHTKKJRW0120aL4XIpw2CbieSXDly1rPkKipMSu\nKxoFxwDbzsWcuDyWMa4ConmTzkzq1cApItJZRLKA64H5KV77MnCJiHxXRL4LXOIeU5RDAq9I4PTL\npxOUav+BIOQdnUf39t3jzg8QIDOYSZfu/yJz0K8hoxzw/BvVAiYatZnZ06dXZ2cXF9t+FMZARYVh\n8mO7Y+c3dTtTr99FMGj/Zn7vdc0Yd2nq9yYV0h3mOggbxhoEnjTG3C8i9wJrjDHzRaQP8ALwXaAc\n2G2MOdO9djjwW3eq+40xs+p6LA1zVZorXqgs2NIe3rfmorVFPL/5ebqf0D2uw10wEGRQq4l8/OII\nVq08Gkx1GKylOvpp1Ci7PX26p2kYgn0eZ+XzP4BdoZh5JyvrwDreNQbqg6iJ3/TWlO8NNGFHOWPM\nQmBhwrHf+bZXY81Hya59EngynetTlINBMjMUWB9GYS/rUJi0chIRJ0LURMGBvmdFyfn+Lla9kQ1R\n13ltglQr/VZgTJ/ucGzul8ARgEDAzlG84H067Q3FzDuVlQfW8a4xCIX8j5v8tWhp+E1vyd4bf+mV\npny9DmkntaIcLngF+7yS3/l5+RR/+RgM3WAd2a0/h0WPQNSfdGe1h893Hu3uR60QWXMzMzcE+K/b\n7LfT8gqDwWGvlALfAxJqP0Gj3Yyay42tuVNXq9nmVP5dBYSiNANCuSGWFiyNu7kWv1Uc78g+/h14\nqwC+Ph62XEnNPIqgq1gI0YjhoT84DBnxMU/POB7jBJh8VwdgG/u+yODJvTaaKBgIIggRJ0JWMIsp\nZ/4fZe92PaBoqFRvbHUJkVi/jrLL611HfcIoHIbiedshbzkFl5/SrASWv9954nNMVv5dBYSitHAS\nTVEF3Qp4csOTVEYrCRDgh33KeTt3rG1w9Or/wOu/IbGbXXXZDsGJwtziI8Fk2OPRAJN/29meEVgM\nPWYR7fZnyLVe0m8+7M6Ye7tANDW7eGJobSo3trqEiDdWUdoTZ/YvCTiGjKAw6LrdtD/nldhN3rvx\ne0IumTAKh6H/hVEqKjpA8Bqe3DCIkrsmNTsh4flk2Fkt5JJpk02FCghFaaZ44bL+b8mew/uJwD1U\nffcDWH+TjXhq/S94f5Drr6gOTozuPyJ+UiMYxJqq1oyEDUNh6AA7VvI7olUBMPX7LBKdrFP+upEd\nGTvICGSAQ603trqE/DbYugAAEl1JREFUiDfmfHg+RLJwjFAZNcz7SzuYa2/yjwx8hHE/60p5RUdM\nYCEMHUBlp9UUL3ifkr2hmLAqnredisqOVjhGDZXbzqH4reIGC4h05pfUJSyHdhsKxAc1NAUqIBSl\nGZOoVXj7Bd0KKO5RzO6vHwCgfdv27N6ylXnPHAFrR7gObX+tp5qFASEAkSxY/Ef4pAc4GfaYRMjI\nlDrLe5SUQHmFg3EClFdEGfM/b+IceQzBk/sy4oozKehWAFjnu98EVNe34/y8fGTXOfBlJwhEIOqW\nHSEI0Uyqtp3L84vKqKg0GCdoM89L83EkwBP/8zOiEYMEonTu+y5by7aBHA/iQLAK8pYxa8P6Bt1w\n0x1pVFsnQb/Q8F7HpkIFhKIcgiSLjAp3C7OwPJ/K9uvcMh7uv7dEbUmPWOIdVAuKoJuUJ9XHcrZw\n5BnvMW9LkOIym35U0K2AjWvb8vyiMq4emMPe9tswgUvAZGIkSnTdL8DJILK8kt2nPgrdqm90IkLP\n9j25qedNFPYqrOFribErhJm9BCpdDejoUtiXC04AxBBs+wVXD8xh2ewIjoN74y/BbCigqsJdfzTI\n1td/APwAApXQ63HoZn05ESfYIHt+fZFGqVCXnySZsGxO/gdQAaEohw2eSaq4ezFv/vCXbHjF7SHR\nfh0s/hNEsql2bPt9F4H4/bLT2bPidCavMHCaA99fxPStn8B7PwYjvPK4wxFXPQWXvgTvXg2Z++G9\nK1xzDvx98Zd8fNQ4KqIVOMYBA6s+XsWq/wsw7dOuZAWyuenGywmdb9fn3URXPTOAaFUvbNqUgb0n\ng1QhAcGYILL4Ybr+OoNH525k9NS5OCe9Bp/+ANbeTE3hBzgZyFG7yMpbT8SxyXl7t57Oj/5SwtUD\nc+rtsZFz+kYCGadhyCArS8g5fSOTVi5IOUKraN5GbnlsEc5Jr5GdNzGp0z6ZKam5+B9AGwYpymGL\nP0HvyD0/4sXnvsuWV0OYaMDVKHxaQw3qOx4FMW4SH1TXjwLO/T202mdrTHkRWDvPhqeWVYfpBisY\n/PuHGdj/aMYtHkd5pByz8yyYtRycTOIKFnqajlTR9xcvMeW+49n42UZG/fdazIKpPnNatPpc9zHG\nz1zM4IuOp6S0hE3rj+Tp/7oRolmQUcmMZ7fVKiSK1hYxduFYIjv6ENw+gNuG9OSRj39GRWlPAtsv\nZOqYa+sUMEVFMHpMFCdqIODAoLGMGhlk2uXTYu9NXc76ZFqH55zf3e4Z2p/2YaP5J5osUU5RlKYj\n0Qz1wI3VTtfF77zBir+GqGlyEmp8E/fVg6rerg6pjT8/AuH/sgIoWGkd4Llv2lyOqO/GH81k3vQf\nMm/LvZD7TfWiT10QX7AwUGUFkWN9CasyH+C8Wau46Itn4KVproBy1xCIwqCxdOdGV0vJpHDw4NjU\nd927wAoHkwERw/OLyiisHqZo3kaeX1RG97P38r+73FLuHd8gCvy/RXmUl/8Cs+iPONEsxix36FqS\n3OQUDsMtt4ATdYWwY2Dho8xsP4CCbmGr6XmmpB19KC+9kOJj3ofLSZpx781ZHZU1FoYOYNaGWTw8\n8GHK9pelLe9EBYSitCC8rOb8nUK/f99CZE0BfNITIQPjiHXq1lHaI/5volCJ2k3P2R3BOsBPWA/Z\ne925vQirAHxwEWzvVx1FNXupvYEHI3DKfGj7qfUfQHXV29w3cXaczSuzrgDHJxwkCqE/wDc5VPR+\nnNHXnsX6T9YzeoG92U5+ZiXO3o7W+e0YCFbR/ey9MS1r87qjWDHxdxA9nVeeiMApz9jHb78Os/hP\nbI1mgfzcrt9kEK2qsjkWHT+Ofdv3fDRt9p+G47SPf31MgOiH5zH59cn07dCXvVtPx7w4FdYPxTgZ\nFK2IMGPtAEzuGwDM2jCLZUOXVUd4lVg/iBVuAm8VUJH7JmMXjsUxTtoS6tTEpCgtlMSktJwcKCuD\nTV8v56+vbMHsOQ22n0u1+cgh1tAIqBYUjr35H/8O8lk3K2iS4hBfodY1C532dyg7FfacCQgSiBIc\n8N9Ez/3/7Z17sFVVHcc/33OAqyMCymUQBXmIVhYGSA6VvXAsH6k11kiPsRonjLR0TA3G0bHSP3R6\nGEU2FCqm+aq0sjSNRy9Rg3gEEkoIKIGgBkUpXO759cda+559z93nXi7cfc6J+/vMnDlrr73OWt/z\nO7P376zH/q0b2kfCTfb63nksLPks4f+thR7GW+6C1VPLPZczLoPXmmHUIgoqUrrjseB81Boc1qiF\nsDs+gf7WO0O9C74WJ/ZTbao1ftU+wN4wXGSCYgsjv3gRLw58gJKV0IvvKLdR2EtBBUp74wqsxD5v\n+CU69RthR8F5v62YE9oLxywJ2qJTHPuvz3LVx9/GuKHjuHn2Fh6650go9Qt6irvh01PQiCcxDCEu\nPvnitiGs7lC3PalriTsIx+k50s7jkZ808+yrz9I85gX+NGcqrS3BYahQ4vRzX2H+w8207oXyTR+q\nD1NVDmN1pNCnxK0PPMOyLcvYuvqNrP3vE6xZfGzbJHlbtRZ0FIpGqdRafiCw7UZeCD2Gw/8BO0an\n2k0cVdLgHjjhVxXPkaR0K85tFFph+JPwn2ZofhbGPtLmhNo5GLWE1VMDN8HrA+CJK8srymiFQZtg\nxyg6LA5I65FBax+QoQJxj/LkVWjfRuxZNRWb2vU69hV3EI7j9AhJeHEI+1EsWgTXXhuWgnZNV04j\n5L1p/L+Z+70BnHZaCGNeKlUOd6WHugpIyT7fqeEmi/MknU60Vz4nUoL+W2HXMNqv9mqFgRth50ja\nTca3fbYER62El06KQ2glGPlHGLImFFkyLfW5ak++p+2S9NQqzyXp1o5ZI/+ITr+GGy/8IDPfNTPj\nO1fHJ6kdx+kR2kdmDfTrl9zIO5aXiDdwKBbFBRfA/fdDSwu0jyNVvnFePn1A25h7qLOyXLgzFgpC\nCntNSKKlBVQwxr3vGZY/Po5sh9S+jjLRIexK72ycmpDfOaairuR8/NzWCan8Imx8T3iphcwluG2f\nr+YMKnUm6dZym+lFAhvfg922kMFnr6UncQfhOM5+kw46l8xhDB4My5aF8xfGB4HT4SouuST0QubM\nae8Ahg8PvZFp00JPJdvxqO39yith0KByJNTQRoFFi05i5fz05zqbcM9aqRWOJQtzIG0rparVVW3O\nhdTQUuWNPvRMxr93E2sWj2HPnuBEO9ZV2dMppOqrOF/qyytrxsGH6DHcQTiOc0Bk9SqyylSWnzAB\nLr00DE81NYWeRVKu0vEkDmfAAFi+HM4/v+Pe3Ok2mppCD6RYhMmTxfr1sHlzuAkXCjBpkli2LOnJ\nJJSdRlOTmDUrlJk7t1xOBeOIkS/w6obhqRVZ1Sn2gT5FsXcvoBKlkrBSmL+56saN3DTjuLZhu7lz\nVUVPfG9bBZa9qqxvX7ULG94T+ByE4zh1I69geJX1ZsVVgvJ8yoQJwQlt3dp+v++krvS8S1LfnXeG\n8tVI6oHUjnpU/76VdR51VFlXovHyy0OvSoJzzoETTgj1HX00XH31/tnQJ6kdx+n15BmZtVbk8R3c\nQTiO4ziZdOYguh5EcxzHcXoluToISWdIWitpnaQZGeebJN0Xzz8laVTM7ytpnqS/SlojqXsLex3H\ncZwDJjcHIakIzAbOBE4EPibpxIpiFwH/NLOxwLeAm2L+R4EmMxsHnAxcnDgPx3Ecpzbk2YM4BVhn\nZuvNbA9wL3BeRZnzgHkx/RPgNEnJIuHDJPUBDgX2AP/KUavjOI5TQZ4O4hjghdTxizEvs4yZ7QV2\nAoMJzuI/wBZgE/B1M3u1sgFJ0yQtkbRk+/btPf8NHMdxejGNOkl9CuGZ8qOB0cCXJI2pLGRmc8xs\nkplNGjJkSK01Oo7jHNTk+ST1ZmBE6nh4zMsq82IcThoIvAJ8HHjUzFqAbZL+BEwC1ldrbOnSpS9L\n2rifWpuBl/fzs3niurqH6+o+jarNdXWPA9E1stqJPB3En4HjJY0mOIKphBt/ml8AnwIWAx8BFpiZ\nSdoETAF+JOkwYDJwS2eNmdl+dyEkLam2DrieuK7u4bq6T6Nqc13dIy9duQ0xxTmFS4HfAGuA+81s\ntaSvSjo3FpsLDJa0DrgCSJbCzgb6S1pNcDS3m9nKvLQ6juM4Hck1WJ+Z/Rr4dUXedan064QlrZWf\n25WV7ziO49SORp2krjVz6i2gCq6re7iu7tOo2lxX98hF10ETi8lxHMfpWbwH4TiO42TiDsJxHMfJ\npFc4CEm3SdomaVUq70hJj0t6Lr4fEfMlaVYMILhS0sQa67pe0mZJy+PrrNS5mVHXWkkfyEnTCEkL\nJT0jabWky2J+I9irmrZ62+wQSU9LWhF1fSXmj45BKNfFoJT9Yn5mkMoa6rpD0vMpe42P+TX7LWN7\nRUnLJD0cj+tqr0501d1ekjYoBC9dLmlJzMv/mjSzg/4FvBuYCKxK5d0MzIjpGcBNMX0W8AhhX7/J\nwFM11nU9cGVG2ROBFUAT4enyvwPFHDQNAybG9OHAs7HtRrBXNW31tpmA/jHdF3gq2uJ+YGrM/z4w\nPaY/D3w/pqcC9+Vkr2q67gA+klG+Zr9lbO8K4MfAw/G4rvbqRFfd7QVsAJor8nK/JntFD8LMfg9U\nxnJKBwqcR3mr7/OAOy3wJDBI0rAa6qrGecC9ZrbbzJ4H1hFCkvS0pi1m9peY/jfhGZZjaAx7VdNW\njVrZzCwszYZwI+5LCDg5hRBXDDraLCtIZa10VaNmv6Wk4cDZwA/jsaizvbJ0dUHN7NVJ+7lek73C\nQVRhqJltiemtwNCY3pcgg3lzaewa3pZ0G+uhK3blJxD+eTaUvSq0QZ1tFocllgPbgMcJvZUdFh4Y\nrWy7WpDK3HWZWWKvG6O9viWpqVJXhuae5hbgaqAUjwfTAPbK0JVQb3sZ8JikpZKmxbzcr8ne7CDa\nsNAva5T1vrcCxwHjCdFsv1EPEZL6Az8FLjezdqHW622vDG11t5mZtZrZeELMsVOAN9ZaQxaVuiS9\nBZhJ0Pc24Ejgy7XUJOmDwDYzW1rLdruiE111tVfkVDObSNhf5xJJ706fzOua7M0O4qWk2xXft8X8\nfQkymBtm9lK8qEvADygPidRMl6S+hBvw3Wb2s5jdEPbK0tYINkswsx3AQuDthK59Eq0g3XabLrUP\nUlkLXWfEoTozs93A7dTeXu8EzpW0gbBPzBTg29TfXh10SbqrAeyFmW2O79uAB6OG3K/J3uwgkkCB\nxPefp/IvjCsBJgM7U9243KkYK/wwkKxw+gUwNa7oGA0cDzydQ/sixMhaY2bfTJ2qu72qaWsAmw2R\nNCimDwVOJ8yPLCQEoYSONkts2Rakska6/pa6qYgwbp22V+6/pZnNNLPhZjaKMOm8wMw+QZ3tVUXX\nJ+ttL0mHSTo8SQPvjxryvyb3d3b7/+kF3EMYemghjMddRBjDnA88B/wWODKWFSFY4N+BvwKTaqzr\nR7HdlfGHHpYqf03UtRY4MydNpxK6qiuB5fF1VoPYq5q2etvsJGBZbH8VcF3MH0NwSOuABwjb6AIc\nEo/XxfNjaqxrQbTXKuAuyiudavZbpjS+l/JqobraqxNddbVXtMuK+FoNXBPzc78mPdSG4ziOk0lv\nHmJyHMdxOsEdhOM4jpOJOwjHcRwnE3cQjuM4TibuIBzHcZxM3EE4ThdIalU5kudySTO6/tQ+1z1K\nqWi+jtNI5LonteMcJLxmIVyF4/QqvAfhOPtJjNF/s0Kc/qcljY35oyQtiMHd5ks6NuYPlfSgwv4M\nKyS9I1ZVlPQDhT0bHotPPSPpiwp7X6yUdG+dvqbTi3EH4Thdc2jFENMFqXM7zWwc8F1CJFCA7wDz\nzOwk4G5gVsyfBfzOzN5K2Adkdcw/HphtZm8GdgDnx/wZwIRYz+fy+nKOUw1/ktpxukDSLjPrn5G/\nAZhiZutjEMGtZjZY0suEcB8tMX+LmTVL2g4MtxD0LaljFCEM9/Hx+MtAXzO7QdKjwC7gIeAhK+/t\n4Dg1wXsQjnNgWJV0d9idSrdSnhs8mxBTZyLw51SkU8epCe4gHOfAuCD1vjimnyBEAwX4BPCHmJ4P\nTIe2jXwGVqtUUgEYYWYLCfsPDAQ69GIcJ0/8H4njdM2hcVe2hEfNLFnqeoSklYRewMdi3heA2yVd\nBWwHPhPzLwPmSLqI0FOYTojmm0URuCs6EQGzLOzp4Dg1w+cgHGc/iXMQk8zs5XprcZw88CEmx3Ec\nJxPvQTiO4ziZeA/CcRzHycQdhOM4jpOJOwjHcRwnE3cQjuM4TibuIBzHcZxM/gd2guItfXq1MgAA\nAABJRU5ErkJggg==\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f86dWOyZKmN9",
+        "colab_type": "text"
+      },
+      "source": [
+        "Great results! From these graphs, we can see several exciting things:\n",
+        "\n",
+        "*   Our network has reached its peak accuracy much more quickly (within 200 epochs instead of 500)\n",
+        "*   The overall loss and MAE are much better than our previous network\n",
+        "*   Metrics are better for validation than training, which means the network is not overfitting\n",
+        "\n",
+        "The reason the metrics for validation are better than those for training is that validation metrics are calculated at the end of each epoch, while training metrics are calculated throughout the epoch, so validation happens on a model that has been trained slightly longer.\n",
+        "\n",
+        "This all means our network seems to be performing well! To confirm, let's check its predictions against the test dataset we set aside earlier:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lZfztKKyhLxX",
+        "colab_type": "code",
+        "outputId": "7ed4e1c5-4d19-4d10-cd65-0cae30486734",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 318
+        }
+      },
+      "source": [
+        "# Calculate and print the loss on our test dataset\n",
+        "loss = model_2.evaluate(x_test, y_test)\n",
+        "\n",
+        "# Make predictions based on our test dataset\n",
+        "predictions = model_2.predict(x_test)\n",
+        "\n",
+        "# Graph the predictions against the actual values\n",
+        "plt.clf()\n",
+        "plt.title('Comparison of predictions and actual values')\n",
+        "plt.plot(x_test, y_test, 'b.', label='Actual')\n",
+        "plt.plot(x_test, predictions, 'r.', label='Predicted')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "\r200/1 [================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 40us/sample - loss: 0.0082 - mae: 0.0827\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEICAYAAABcVE8dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO2deXxcddX/32cmS9lkCShLqWAFHpBA\nW0rxIrQDxQLKHlERCCCQFgqP/SFS6iOP8akPpQWxytrQgkSgihTKIkgldGihUwsFtI9F2WQpi5Sw\nCdIsM+f3x/fOZDKZSSbJJLOd9+s1r5m7n3tn5nPPPd/zPV9RVQzDMIzSJ5BvAwzDMIzhwQTfMAyj\nTDDBNwzDKBNM8A3DMMoEE3zDMIwywQTfMAyjTDDBL2FE5FQRWZZvO+KIyGYicr+IfCgiv8vD8RtF\n5Db/8ygR+VhEggPYzw9FZGHuLRweRORXIvLTfNvRG8nfVY73W/DnPpSY4GeBiHxHRJ7yBeItEXlI\nRA7Jt119oaq3q+qUfNuRxDeAzwE1qnpyPg1R1ddUdUtVjfa2noiERGRDyraXq+o5Q2thcSEiZ4rI\n4/m2w+gdE/w+EJGLgPnA5TixGgVcDxyfT7v6QkQq8m1DGj4PPK+qnYPdUYGen2EUNqpqrwwvYGvg\nY+DkXtapxt0Q3vRf84Fqf1kI2ABcArwDvAWcAHwNeB54D/hh0r4agbuA3wL/Ap4G9k9afinwkr9s\nPXBi0rIzgSeAnwOtwE/9eY/7y8Vf9g7wEbAO2DfpPJuBjcCrwI+AQNJ+HweuAt4H/gEc3cv12BsI\nAx8AfwWO8+f/BGgHOvxrenaabfs6/1eAmcBfgDagAvgysMo/3p+BUNL6uwOP+fv6I3AtcJu/bDdA\ngQp/ejvgFv87fB9YCmwBfArEfJs/Bnb27bwt6TjH+ef6gX/ue6fYfLFv84f+uY3wl20PPOBv9x6w\nMn7d01ybXwCv+9/dWuDQlOt2p/8d/su3ZXzS8rH+tfyXf/zfAD/NcJzRwKO439C7wO3ANknLdwXu\n9n8rrf413RvYBET9a/SBv24YOCflN/p4P87ptgw2PgcckzRd4dszzp/+HfC2f71XAF9KWvdX8XNP\ntcefp8AXk/7bVwGvAf8EbgQ26+93V0gv8/B7xwNGAPf0ss5/4URnDLA/MAEnmHF29PexC/DfwE3A\nacABwKHAZSKye9L6x+N+sNsBdwBLRaTSX/aSv83WOAG9TUR2Str2IOBl3JPI/6bYOQWYCOzpb/9N\n3B8W4Bp/3heASUA9cFbKfv+O+5HPAxaJiKReCN/O+4FlwGeBC4HbRWQvVf0x7inpt+pCKYtSt8/i\n/AFOAb4ObOOf5+9xN7ftcMK6RER28Ne9Ayck2wOzgTMyHBPg18DmwJd823+uqp8ARwNv+jZvqapv\nppzznsBiYAawA/AgcL+IVCWt9k3gKNwNaD+c0AB8H+cQ7OCfyw9xgpOOJ3G/sfh1+Z2IjEhafhxO\nyLcB7sMJMb4dS/3z2w53bet6uQ4CzMHd2PbGCXyjv68gTuRexd0wdwF+o6rPAdOAiH+Ntull//05\np0wsxv0O4hwJvKuqT/vTDwF74L7Hp3E3rYFwBe7/Mgb4Il3/Yejfd1c45PuOU8gv4FTg7T7WeQn4\nWtL0kcAr/ucQzkMM+tNb4X4UByWtvxY4wf/cCKxOWhbAPRUcmuHYzwLH+5/PBF5LWX4mXR7+4bin\nii+T5IkAQZznvU/SvKlAOGkfLyYt29w/hx3T2HMozrNK3v9ioDHp/NJ6bdmcP85b/m7S8pnAr1P2\n8TBO2EcBncAWScvuII2HD+yE8+K3TWNTCNiQxs74fi4D7kyx+Q38Jw3f5tOSls8DbvQ//w9wL75H\n2c/f5vv4Tz++PY8kLdsH+NT/PBH31CJJy1eRwcNPc5wTgGf8zx7Ok65Is17it5Y0L0wvHn4W55TJ\nw/8i7mllc3/6duC/M6y7jf89b+1P/4osPHzcje8TYHTSMg/4x2C/u3y+zMPvnVZg+z7ixTvjPJ44\nr/rzEvvQrobBT/33fyYt/xTYMmn69fgHVY3hvIidAUSkXkSeFZEPROQDYF+c99pj21RU9VGc13cd\n8I6INInIZ/ztK9Ocwy5J028n7eff/sdkm+PsDLzu251pX32R8fxTl+PaBE6OXw//mhyCE/CdgffV\neenJtqRjV+A9VX2/H3bG6fb9+za/TobrB/ybrmt3JfAisExEXhaRSzMdREQuFpHn/AynD3BPZMnf\nfeoxRvi/252BN9RXKZ9M1wER+ZyI/EZE3hCRj4Dbko6zK/Cq5qANJstzSouqvogL6xwrIpvjnm7u\n8PcZFJErROQl3/5X/M363G8KO+Ccm7VJv60/+POhH99dIWGC3zsRXKz4hF7WeRMnPHFG+fMGyq7x\nDyISAEYCb4rI53HhoAtwWS7bAP+H80Ti9PpIqaq/VNUDcB7gnsAPcHHajjTn8MYAbH8T2NW3e6D7\nSnv+ScuTz/F1nIe/TdJrC1W9AvdksK2IbJFiSzpeB7YTkXShiL4e07t9/36oa1eyOGdV/Zeqfl9V\nv4ATrYtEZHLqeiJyKK4d6Ju4p5BtcPHpHmG1NLwF7JISgst0HcCF3RSoVdXP4MKP8W1fB0ZlcIDS\nXadPcKIZZ8f4h0GeE3SFdY4H1vs3AYDv+POOwN1Adosfsi/7RGTHpGXv4pyxLyX9trZW1S0h+++u\n0DDB7wVV/RAXs7tORE4Qkc1FpFJEjhaRef5qi4EficgOIrK9v/5g8ocPEJGT/D/VDNwNZzWuAVFx\nj9SIyFk4Dz8rRORAETnIj4d/gmtki/lPH3cC/ysiW/k3losGeA5/wnmXl/jXKQQci4stZ0um80/H\nbTgv70jfsxvhp1GOVNVXgaeAn4hIlZ9Ge2y6najqW7i47/Uisq1v+0R/8T+BGhHZOoMNdwJfF5HJ\n/rX9vm/zqr5OVESOEZEv+mL8Ia7RM5Zm1a1w4amNQIWI/Dfwmb727xPxt/1P/7xOwrUzZWIrXMPr\nhyKyC84piLMGdwO5QkS28K/3V/xl/wRGprRdPAuc5P9vvgicnaNzAvebmgKch+/dJ+23Dfd0vjnu\nBpaJPwNfEpExfttBY3yB/6R2E/BzEfksgIjsIiJH+p+z/e4KChP8PlDVn+EE8Ee4H+frOC97qb/K\nT3HC8hdc5svT/ryBci/wLVw883TgJFXtUNX1wM9wf+B/ArW4rJxs+QzuB/w+7pG+FfdYCq5x9RNc\ng+/juD/Qzf01XFXbcaJ6NM5Duh6oV9W/9WM3ac8/w/Fex3lzP6Tru/kBXb/r7+AanN8DfozLYsnE\n6bgnnb/hMplm+Mf4G+6m/rL/aJ8cXkJV/47zgq/xz/lY4Fj/WvTFHsAjOIGNANer6vI06z2MCyc8\nj/vuNtFL+C7FvnbgJFy8+j3ctb27l01+AozDidjvk9f1nYNjcTHu13Dhtm/5ix/FZQe9LSLv+vN+\njmsf+idwK90bTwd8Tr4tb+Gu2cG4zKM4zf7+3sBlsmVyFlDV53Gx+EeAF3C//WRm4sI2q/3w0CPA\nXv6ybL+7gkK6h/aMfCIijbhGoNPybUs+KPfzN4yhxjx8wzCMMsEE3zAMo0ywkI5hGEaZYB6+YRhG\nmVCwBai233573W233fJthmEYRlGxdu3ad1V1h3TLClbwd9ttN5566ql8m2EYhlFUiEjGntQW0jEM\nwygTTPANwzDKBBN8wzCMMqFgY/iGYZQmHR0dbNiwgU2bNuXblKJmxIgRjBw5ksrKyr5X9jHBNwxj\nWNmwYQNbbbUVu+22G2nG0TGyQFVpbW1lw4YN7L777n1v4GMhHcMwhpVNmzZRU1NjYj8IRISampp+\nPyWZ4JcokQjMmePeDaPQMLEfPAO5hhbSKUEiEZg8GdrboaoKWlrA8/JtlWEY+cY8/BIkHHZiH426\n93A43xYZRuGxdOlSRIS//a334Rrmz5/Pv//9717X6Y1f/epXXHDBBQPePpeY4JcgoZDz7INB9x4K\n5dsiwyg8Fi9ezCGHHMLixYt7XW+wgl9ImOCXIJ7nwjizZ2cXzrF4v1Ho5Po3+vHHH/P444+zaNEi\nfvMbNwJnNBrl4osvZt9992W//fbjmmuu4Ze//CVvvvkmhx12GIcddhgAW265ZWI/d911F2eeeSYA\n999/PwcddBBjx47liCOO4J///GdujM0hFsMvUTwvu7j9QOL9kYgLE4VC1jZgDD1D0SZ17733ctRR\nR7HnnntSU1PD2rVrWbNmDa+88grPPvssFRUVvPfee2y33XZcffXVLF++nO23377XfR5yyCGsXr0a\nEWHhwoXMmzePn/3sZ4MzNMeY4Jc5LzRHuP3TeezEm9yy6Wyamxt6FXNrEDaGm3RtUoP9zS1evJjv\nfe97AHz7299m8eLF/OMf/2DatGlUVDhZ3G677fq1zw0bNvCtb32Lt956i/b29n7lxw8XJvjlSCQC\nzc3w9tucet/9BIgCcJCu4cqml/hQt2FWZYg5Ya/HH6u3P595/sZQEG+TijsZg22Teu+993j00UdZ\nt24dIkI0GkVEOPDAA7PaPjkdMjkP/sILL+Siiy7iuOOOIxwO09jYODhDhwAT/DJiXVOEjkXNjFm7\niEC0A3CNOPGfrwIXxa4CYki78OQZ34Hnb+u2j0x/PvP8jaEi3iaVK2firrvu4vTTT2fBggWJeZMm\nTWL//fdnwYIFHHbYYd1COltttRX/+te/EiGdz33uczz33HPstdde3HPPPWy11VYAfPjhh+yyyy4A\n3HrrrYMzcoiwRtsiYbCNVhtOm8k+Uw9hzJobEV/s46j/csSoAAIoB71wO+8ceVq3dTM1CFsqqDGU\neB7MmpUbJ2Lx4sWceOKJ3ebV1dXx1ltvMWrUKPbbbz/2339/7rjjDgAaGho46qijEo22V1xxBccc\ncwwHH3wwO+20U2IfjY2NnHzyyRxwwAF9xvvzRU7GtBWRm4FjgHdUdd80ywX4BfA14N/Amar6dG/7\nHD9+vNoAKI4Bec+RCMybx6ern6Hj43a2+vgtwHnzyd94NFjJqphHtW5iRTDE96NXImjSeoIsuBFa\nW1lXE+KBVi+tl2UevpEtzz33HHvvvXe+zSgJ0l1LEVmrquPTrZ+rkM6vgGuB5gzLjwb28F8HATf4\n70YW9LvRKhKBSZPQjg5GACP82XERjxHgXo7jHXZk83Pr2aPe49Gwe1x+YcYb7LnmdjRpfaZPR2PK\nHrEKtuUsZlXV94jv5/qx2zCM3JMTwVfVFSKyWy+rHA80q3ucWC0i24jITqr6Vi6OX+r0u9EqHEY7\nOkiutJEctrmKi/mv4Fznidd3T+Gcc8Jt7LIGTuMOYgCBCoKxKBKLUU2Uc1nAWe0Lefeb4+Cys6Gh\nIXGM5PBO8rRhGIXBcDXa7gK8njS9wZ/XTfBFpAFoABg1atQwmVYcnHGGe6+vTyOkqekxoRCxQCWB\nWPdY/V/Zm+srZjDmugZmt6b3xEMhmLzZbTS1TefwQJgzLqph9DUz0E2bQJUgSoBOdt6wBqaugZde\ngrlzE2ZYWMcwCpeCytJR1SagCVwMP8/mFASpIlpf38cKvsquv+ExXj5vHvvGnmETW/DclO/xQqiB\n00O9i3BXaMYjFPIY7QEn1CLNzUQX3Yx0tCN0ZfZw5ZXw/PNwySWEw17O86UNw8gdwyX4bwC7Jk2P\n9OcZfdBn/D7DCrUNHh/X3sOdYee1f6Mfwtujl64/I1hfD/PmwdKlXctU3fSDD3LMNWFmV3k5y5c2\nDCO3DFda5n1AvTi+DHxo8fvs6LMQWi8r+NEdwuEc1SDxPLjnHrjkElSkW7YPHR3U3n4prTV7sOIr\nMy2cYxgFSE4EX0QWAxFgLxHZICJni8g0EZnmr/Ig8DLwInATcH4ujlsOJOe9/2l+BK/5PDjvvC4F\n76VSWjzac9ll7j1XhaeaRs/lPLmRToJdjcEisGIFm214kYNXzMO77rQ+9mIY+SMYDDJmzBj23Xdf\nTj755EFVwzzzzDO56667ADjnnHNYv359xnXD4TCrVq3q9zF222033n333QHbGCdXWTqn9LFcgem5\nOFapkU05As8Db12TE/pYzM285RZYvrwr/pJm42zSOftbDiESgenToTPWwJ+p5QyamTABxr35AGzY\n0LXiHXfAxInQmqF12DDyyGabbcazzz4LwKmnnsqNN97IRRddlFje2dmZqKnTHxYuXNjr8nA4zJZb\nbsnBBx/c733nAutpm0ey9sCbmmDatC6xB2hr67M7a1/hoIE8AYTDXWasxuM/K2+gbf4N8J3vdF9R\n1d0Zcv14YZQnQ1jD+9BDD+XFF18kHA5z6KGHctxxx7HPPvsQjUb5wQ9+wIEHHsh+++2XKMWgqlxw\nwQXstddeHHHEEbzzzjuJfYVCIeIdRv/whz8wbtw49t9/fyZPnswrr7zCjTfeyM9//nPGjBnDypUr\n2bhxI3V1dRx44IEceOCBPPHEEwC0trYyZcoUvvSlL3HOOeeQiw6yCeML8XXAAQdoqXP55arBoCq4\n98svT7PSggWqgYBbyX/F4husWtXnMVatcvtNt2pWx0+zv802cyZVVDjzEpx6qqqIe1VWdtmd7c6N\nsmD9+vX92yD+owsG3XsWv/u+2GKLLVRVtaOjQ4877ji9/vrrdfny5br55pvryy+/rKqqCxYs0Nmz\nZ6uq6qZNm/SAAw7Ql19+WZcsWaJHHHGEdnZ26htvvKFbb721/u53v1NV1UmTJumTTz6p77zzjo4c\nOTKxr9bWVlVV/fGPf6xXXnllwo5TTjlFV65cqaqqr776qv7Hf/yHqqpeeOGF+pOf/ERVVR944AEF\ndOPGjT3OI921BJ7SDLpaUGmZ5UafHari8RPfpY7f46MEePX71zM6izBJPNoTd5CSoysDqULYW4/a\nyPTbeGGr6UwizOfH1sCMGbkrcWiUL0NQH/nTTz9lzJgxgPPwzz77bFatWsWECRMSZY2XLVvGX/7y\nl0R8/sMPP+SFF15gxYoVnHLKKQSDQXbeeWcOP/zwHvtfvXo1EydOTOwrU6nlRx55pFvM/6OPPuLj\njz9mxYoV3H333QB8/etfZ9tttx3U+cYxwc8jfZYjSIqfJJdEuDpwCV/fxmNWlsfJ1CFqoOUQ0jUZ\ndB3Do6rKcz14W2qt1oIxeHJdH5nuMfxktthii8RnVeWaa67hyCOP7LbOgw8+OOjjx4nFYqxevZoR\nI0b0vXIOsBh+num1CmAoBNXVEAigwQourLiBbwbv4elqr9tvvq/wZm+VLLOpQphp/8nz0x4jw87X\nNUUIHzmHdU0W1zeyoL9jduaII488khtuuIGODtdj/fnnn+eTTz5h4sSJ/Pa3vyUajfLWW2+xfPny\nHtt++ctfZsWKFfzjH/8AXA1+IFFqOc6UKVO45pprEtPxm9DEiRMT1Tofeugh3n///Zyck3n4hUJT\nEyxZAnV1XfVpPI9181toXRKmpi7E6bUeu4a7O8y9lTOIC3FNzcAdpEz7T50/f352x1jXFGH01Mns\nTTvty6pYRwu1Deb9G32Q7ZidOeScc87hlVdeYdy4cagqO+ywA0uXLuXEE0/k0UcfZZ999mHUqFF4\naezaYYcdaGpq4qSTTiIWi/HZz36WP/7xjxx77LF84xvf4N57702MmTt9+nT2228/Ojs7mThxIjfe\neCM//vGPOeWUU/jSl77EwQcfnLtSM5mC+/l+lUOjbYIFC7o1ysZbQrNpq8rU8Jq67YIFmRtvU0lu\n6M20/+T5IqrTpvXeQBxn+ZTLtQO3YTtBXT7FGnPLjX432hoZ6W+jrYV08k1Tkwt7JLNkCZDdoCKZ\nUi9Tt21tzW4Aibjn/qMfuTT6Dz5Iv/9QCOJpyqpw883uc1/HqKkL0U4VHQTpoIqaulDvBhmGkTMs\npJNPZs50tWlSqasDerZV1dS4vlfQVTUzU8NrX+1cmTpchcMuxT8Wc6+f/xyuvbZn/ynPg7POggUL\nnOBHoz2TJ9Ido7bBYx1dYSoL5xjGMJLJ9c/3q+RDOqtW9civ1+22S0ls7wqTLFigWlXVtWp1dd/h\nmUwhlt5CRatWufz6+HECgcwp9H3tJ+vU6RRDswkNGcXL+vXrNRaL5duMoicWi1keftHQ3Nyt56wC\nD02cw7a1DaQpVMmcOdCRVN4+m3TkTO1cvaU1ex5cdx1ccIFbXl2duQG2t7TOrFOnU1p/181vYfIM\nL20jtFEajBgxgtbWVmpqanCjnxr9RVVpbW3tdzqnCX4+iERcLRyfGMLPAj9g5r0NVD3UVSInmVAI\nKiudLsLg0pH7Cvc0NEBtlin0mW4qWadOJ98Z2trY4spGxrU18kTMs5r6JcrIkSPZsGEDGzduzLcp\nRc2IESMYOXJkv7Yxwc8H4TB0drrPIqzceyqXrHejRrW1Oec/VeQ8z23W7I8anHbkqyzJpsPVQLLg\nUmP2WXXqit8Z/IaD3V96hGW6kimBFp6u8nLRx8YoMCorKxM9UI3hxQQ/H4RCRCuqIObc3z/tWQ+Z\nK6omyGUqcq7TmnvrzdunIS0t0NgIjzyCxGJsFmij+QuNfPKDRmrNvTeMnGFpmcNEcq/UCB6TtYX/\nZjaHx1qI4FFZ6UrKpx3GsAjIJoU0I54HjY1EK6uJSQBiMb7w8iPUXhji7RPPo/m8iBXbNIxckKk1\nN9+vUsrSSc1YmTatq9NSvONSVVVX56ViZLAFDVetUp1UtUr/wBTtwGUvxUCjoJuo1ElVq4r22hjG\ncIJ1vMovqd4vOE8+nqAQz2MfNap4GygHW+4kHIbHox6NNNJONfH8pQBQRQffa5/Xv6cGwzB6YII/\nBKQWGwuFoEGa+ANH0iBN1Nc7UZw6tY/xaouMbAqxZSLedvtk0ONrVS18sk337IM9eb7or49h5Btr\ntM0xaRsv1zXx5c6pAEzpXIasAxoa8DwXr7cKwqlZPR4r513G0UunJsYA2Euep4II0P0i9XeIRsMo\nZ0zwc0zaDkfhJXTrXrJkSaIiZh6KABYsydei6egG7ln6ECewlAAQEE0k5SdXAZ0xw2V0BoOuBES8\n0KhhGD0xwc8xaTsc1dTBsmVdK/m1cozMtLbCrXIJR+nDVNIOwSoCoVC3JygRd2NVdZ2Wp093Hcbs\nBmoY6THBzzFpOxx5vtuZUu/ewhGZCYVg9giPKW0tHB4Ic/K1IWo9j/CcrieoQMCJvvpxn2jUpfM3\nNtr1NIx0iMb/LQXG+PHjNT76eynS28AlhiPdDTH1ul14IVx9dZenHwi4+j92PY1yRUTWqur4dMss\nSydPDKqjUomRaQjFdFk/qemfc+fCihXw1a86sY/F7HoaRiYspDNMpHqrQzAuc1EykCcdjwgeYSAE\nePGOuqxcadfTMHrDBH8YyCRqWRUXK3GyLqMcJ8PFtOtpGH1jgp9rIpEeJS0ziZqlZA7gSaeXO4Rd\nT8PoHRP8XBKJwGGHucRwcAO9hsOEQp6FbzLQb8/cYmGGMWBM8HNJ3PuM09EB4TDeLM/CDb3QL888\n+Q5RU5NonY3g2fU1jD4wwc8lNTUuVSQaddOVlQkP1MINOSR+ISdPhrY2YhLg13IdTdpgKa6G0QuW\nlpkrIhHXz1/V9fM/4QQbn28oCYcTo2RJtJP5nRdwYDRiKZmG0Qsm+H2QKUe8B/FwTnxg8gkTTOyH\nklAIAgEUEKCSDuZwKRUVFtY3jEyY4PdCPAPwssvce6+i7zcmaiBIR6CKdTWhYbKyTPE8uO46VAKJ\nipqTWEF41GlZ3WezvpEbRgmRE8EXkaNE5O8i8qKIXJpm+ZkislFEnvVf5+TiuENNv3rDeh5LL2zh\nMmYT6mzhoBmeiclQ09BAx/Y7ASSqkR70wu19qni/buSGUUIMWvBFJAhcBxwN7AOcIiL7pFn1t6o6\nxn8tHOxxh4N4BmA2A5REInDy1R7/G5vFKvVoa7NY8mDJxguvPutUgERoR6CrH0QGrKyFUa7kIktn\nAvCiqr4MICK/AY4H1udg33mlPzni4XBX+B7cTcJiyQMn25ILkRPmstmVD7O//hnwhf/tt3vdt6Xy\nG+VKLkI6uwCvJ01v8OelUicifxGRu0Rk13Q7EpEGEXlKRJ7auHFjDkwbPGmH7UvjeoZCrkpjIAAV\nFW4wDmuzHTjZeuHhMFwgN9BBZSKWz/33Q1NTxn0PdvxdwyhWhisP/35gsaq2ichU4Fbg8NSVVLUJ\naAJXHnmYbOsfkQjRwyYj7e1oVRXB5VbLZSjI1gsPhWB2tcfNn55NAwsI4I8I38doKNYvwihHcuHh\nvwEke+wj/XkJVLVVVf16AywEDsjBcfPCq81htK2dgEaJtbXzanM4sWwwg3gb3cnWC4+vt/m0eggm\n+S+xmAXnDSOFXAj+k8AeIrK7iFQB3wbuS15BRHZKmjwOeC4Hx80LjxGinSo6CNJBFY8RyrdJJUtf\nN9B4ZA2g/gaPwPXXunhafBQUC84bRjcGHdJR1U4RuQB4GAgCN6vqX0Xkf4CnVPU+4D9F5DigE3gP\nOHOwxx1WkorZ71Hv8bWbW/hKR5gnKkPMqTd3Ph+kNurOnw+trQ0cc10tta1hi6sZRhpsiMO+mDkT\nrrrKlUwYMQJaWqxQVwEwZ47Lo4+PbRsMuihOt4yeLAYNtnGFjVKjtyEOrXhabzQ1wbx5XdObNiWq\nX5o45JfkRl0RJ/zJwxt6pM/rTBZ4sHGFjfLCBL83lizpPi1iceECIbVK8owZKRk9afI6I3jdBP6M\nM/o52pZhFDkm+L1RVwfLlnVNX3yxKUIBkZxaWVubGpoJ9cjrTL0HgHXAMsoLi+H7ZIzlNjU5T7+u\nDhoahs0eIwekfKnpeu+CxfCN0qK3GL4JPtl34zeKH2ukNUoda7Ttg17GxTZKDOtha5QzVg+f/lXF\nNIocK4RvlDHm4eM8vj/Nj9C6JExNXYhacwFLk6TYXbSiitvPamGPekuxNcoHE3yASITaGX4Qf2UV\n1FoQvyRJit3Fou38fUGYabd61mZjlA0W0oFea/FaBKB46fHd+bG7qASJUsFIfY2xmyI0Ntr3a5QH\nJviQMYhvQ+EVL2m/O7+31sbjz0WI0sACWjTEx3+M2PdrlAUm+JCxFq8NhVd8xL365uYM353nseOO\nUEUnQZRq2jlNm+37NcoCi3VLrwEAAB8ESURBVOHHSZOvZ0PhFRfJ/SkqKtwDG6T/7iT5s9j3a5QH\nJvi9YKNYFRfJT2QA554Lo0al+e7q6+GWW6C9HRXhlC0f4JhjP8NIb25iFeugZZQiZSH4g/nzWked\n4iH1iay+PsN353mwfDlceimBFSvY+qMNbH37PDcS89y51vPaKFlKOoYficB55zkhsIbX0qdfg5N7\nHrz5Zvd5d98NWNuNUbqUrIcf99I2bXJjl0BKrXR7Xi9J+vVEdtJJ3cc7OOkkwNpujNKlZAU/7qXF\nxT7eMHdMjT2vGz5z/Zj93Xc7sfenre3GKFVKVvCTvbSKCjjrLBfTrR1ApTRrwCth5s5NCH3q92zf\ntVFqlKzgZ/bSQv16XrcGvPIgEoFZoQhf6QgzqzLEnLDV2DFKj5IVfMjgpfXzed1KJ5cHLzRHeLB9\nMlW0095exbXzWghP8BI/EXvKM0qBkhb8jPTjed0a8MqDSYSpop0KokAbY+5t5H/ua2R2tcf8+d3H\nzLWnPKNYKem0zFzQr1Q/o+iIl2L4aGwIqa4iJgGCxDhcH2FZbDLj2iIsWuSyvSxN0yh2bIhDo2xJ\nbZ/50/wItUsa0T8+gmiMGLCSiXy18jE6Otw2VVUW1jMKm96GOCwPD7+pCY480r0bhk9q+8wDrR40\nNhITQXH1diaygv/pmAm41N7vftfE3iheSl/wm5rQqVPRZcvQqVNN9I0E6apiR/B4M7YT0FVgrY67\nCQZhxAiX2msYxUrJC/77i5YAXX/e+LRhpGufCYfhdr4DQDzY+fGUk6wNxygJSj5L57kRY/BYlvjz\nRnau42t5tcgoJFITtkIhmLzZXPjUefYfTzmJsY0nMDY8BwgBpvhG8VLagh+J8OXIL4gBIFwd/AGH\nXNKQZ6OMQqarm8Zc3g3NdXWXklp2181v4YFWz/LxjaKktAV/3jwCHW0AKMrpx37EjtaJxuiDbl7/\nnHCiZVc3bSJyfjOX4Vk+vlGUlG4MPxKB++9PTAqw4442Tq3RT0IhV4wJQJX66M0cGI1YPr5RlJSk\n4EciEG4Mo7GkPgbBINTXW61zo394nqu8J4IAQaIcLmHrdW0UJSUn+HEP/kePhPhUq4lJgE6p5LFv\nXw+elzYVzzB6pb7e5WQGgwSqq9hrasjCOUZRkpMYvogcBfwCCAILVfWKlOXVQDNwANAKfEtVX8nF\nsVOJe/BPxDyOoIVJGiZMiNW3eyyYCA0NVuvc6CdJBfeCNTXUt4bjCwBrEzKKh0ELvogEgeuArwIb\ngCdF5D5VXZ+02tnA+6r6RRH5NjAX+NZgj52OUAjOpYkTWMIS6riCWYllS5Y4wbda50a/if9gUjJ2\nrn/G4+abXYjQGnKNQicXHv4E4EVVfRlARH4DHA8kC/7xQKP/+S7gWhERHYJCPp9d2sT10akATPHz\n7xfiUjHr6nJ9NKOsSGoA0rZ2fjc9zIKo13MITRN8o0DJRQx/F+D1pOkN/ry066hqJ/AhUJODY/dA\n7u7es/b8zy5hyhRYsMB594YxYJIagDqDVTwaC/UYQtPahIxCpqDy8EWkAZw7PmrUqAHtQ0+qg3ld\nPWs/c2YdD8/NkYFGeZMUy/9bTYinZ3gEU4bQNO/eGCjxtqCaGmhtHZo2oVwI/hvArknTI/156dbZ\nICIVwNa4xttuqGoT0ASuPPJAjBk9t4GXcJ6+nlTH6Lnm1hs5xG8AqgX+RITWJWFq6kLUNpjSGwMn\nnl3Y1gaxGAQCUF2d+zahXAj+k8AeIrI7Tti/DX71qS7uA84AIsA3gEeHIn4fZ/TcBjChN4aSSITa\nGf4/9NEAcJ3FDI0BE28eirk6MMRiQ9MmNOgYvh+TvwB4GHgOuFNV/yoi/yMix/mrLQJqRORF4CLg\n0sEe1zDySjjc5Y51dsL551u3bWPAxJuHAr4iBwJD0yZkI14ZxkCIRODQQ10+ZpwTToB77smfTUZR\nk6sYfm8jXpngG8ZAOfFEdOlSBFc7PyZB1t+40uL5Rl6xIQ4NYwhYd/QldBJMDIeIRvl02gyWzoww\nZ073CE98sHSL+hhx8vGbKKi0TMMoJh5o9bhWrudaPZ8KogSAA3UNbfNCXBUIM7vao6XFrZs8WLr1\nxjUikfz8JkzwDWOAhEIweUQDYz99hgZuTDwuV9HOqbFmVrd7iWqsqRVaTfDLl0gEGhu72vyTq/aG\nw3BMTYTa1vCQJOKb4BvGAIn3w3qhuZ7YwluQTjfYjgBncQu/DdYTCrk/bFVVlzdnvXHLl3T59lVV\n8MEHMGkSHNgZ4Xs6GQ20I9W5d/0thm8Yg8DzoP4Gj4oVy5EJExCc4FfRzuKvNScK9aUOlm6UJ6n5\n9uPHw/z5cPXVcEBHhMu0kSrakNjQDNhhHr5h5ALPc//cUAja2wmg7PTQLRBx9RasQqsB7ucRDHZl\n8/75z/DMMzAhGqGFw6ikjQCggQAyBI+D5uEbRq7wPPjud10lNXAdsmxINSOJdD8RgEtkHtW0EfTX\nk/Hjh+Rx0ATfMHJJ0uhYVFTAa69ZLqaRIP5TqKzsGnXv/LERjqX7+NuMGzckj4Qm+IYxCHrkUscD\n9ueeC6pw001ED5tM83mRHrpvufnlRbzB9qabnId/7rnup1LbGiaAJkq6x8ffHgoshm8YAyRjLrXn\nuVBONArRKLFoO39fEGbarV5inXzlYRv5I2n8HABGjYp/5yFXGrOtzaXtXHfdkP0YzMM3jAGS/Afu\nkVDhV8OKSpAYwrG6lNM3NXXLt864rVGSJI2fwyHBCGeuOQ/OO88tbGmBn/4UVqwY0qqr5uEbxgCJ\n/4HT5tf7oZ33L51HzYqlHMQaDtI1vPwBQEPv2xolSTyRa/2iCFetDVGxtN0tuOUWWL4cZs3qfQc5\nwATfMAZI0gBY6TtFeh7bj/h3otaOAqPDi4CGvrc1So5IBGbMgKs3NRPQ9q4Fw9j92gTfMAZBcn59\nJALNze5zYrjDujpk2TLAz75Yu9ataLn5ZUc4DD/+dCbnsiDhAAgM6yOeCb5h5IBIJNHnCnBP6b/8\nJbS2NlC/x+3s/MIK9+eORt1dwZS+7Dj9rzPZhXlA1xMfEya4OM8w/R5M8A0jB4TD0NHRNd3eDtOn\nuy7028T2YRorgCSvLg3xATAsxFOCRCKMXHxVt/CeBALDKvZgWTqGkRNCIdeZJk4g4MQ+FoNm6mmj\nmihCNFgNY8f2SMCPp2ledpl7t9z8EiMcBu3KtReAiy8e9ju7efiGkQPiqffxGP7Ysa6Brq0NVsc8\nJstyJgfDnHFRDaNnzOiRgJ8uTdO8/BIiFHI9sDdtcr2uLr4Y5s4ddjNM8A0jR6Q2wtbWJo9R6hEK\neYwOz0mr7JamWeIUSFqWCb5hDBHps3BCRCuqINYOFVUEfWUvED0wcklqo0wBpGWZ4BvGMBLBY5a2\n8BXCbNP5AefMaGTbs+ugoaEQ9MDIFQVaO8ME3zCGkXAYHo967KHr+Gn0h7AGWOPy9IeyS70xzBRo\no4xl6RjGMBCvjFlT4xy+s1kEJKVoLlqUN9uMISC5cI7fKFMI1VHNwzeMISb16X7+fNj2FzvDerdc\nAdl557zaaOSYlEaZCF5BRHjMwzeMISb16b61Fdq/dwkdVBIDOqhk3dGX5NtMI9d4niuIliHtNh+Y\nh28YQ0y6lMsHwh7nBR7j0FiYlYEQX2/1qM2zncYAydBFOnl2oaTdmuAbxhCTKeVydrXH6naPqiq4\nMoTVVigyIhF4oTnCqbdMJtjZPVaTLkmnENJuTfANYxhITbnscRMgAocd1qUQy5cTwetVIOz+kD/i\ngv7/NoVRbQe6Z+OkC+H40Z28YoJvGHmi203gvGZXhwGgrY235zUz+WEvYyNfgaZ5lw3hMIxrizBS\nX6OTCkQgmBSrKZQQTirWaGsYBcibb/beyFcojYDlyjE1EZbFJnMuNwHKxuPP7XbXjT/BzZ5dWDdj\nE3zDKATq650rKAIVFey8sxv3NCmNuxtp0ryN4SIS4fPzZzCCTVQQpToQZccJo3qoelKSTsFggm8Y\nhUC83ObUqRAMsuP9N9Eik7n53EhaD7FQPciSp6mJ6MGHsNVzaxAUBWLBYNHccS2Gbxh5okeja1z0\nOzshGiWg7Yx6OQykV3OrvTPMRCLEzjufALFED+kY8Oex32VckXwRgxJ8EdkO+C2wG/AK8E1VfT/N\nelFgnT/5mqoeN5jjGkaxk7HR1Y/VaFs77TFhs2VLufXRGljRYOKeb5qbIRZNiL0CUYK8Fqrn4TnF\nkS012JDOpUCLqu4BtPjT6fhUVcf4LxN7o+zJ2Ojqx2qe3+tYquhkAmu4rnMq789ryqO1Bk1NcNNN\nieEJndgHmL/H9XznGq9oRiobrOAfD9zqf74VOGGQ+zOMsqDXRlfP47Nb/RvoKq7mvblkmC00Esyc\nCdOmodFoQvD/xAQOr3iclyc3FFW21GAF/3Oq+pb/+W3gcxnWGyEiT4nIahHJeFMQkQZ/vac2btw4\nSNMMo3BJ1+iaXE1x27PrACcu0DVtDDNNTei8eag/Hq0CnVRycWA+p13nJZKriiVbqs8Yvog8AuyY\nZtF/JU+oqoqIplkP4POq+oaIfAF4VETWqepLqSupahPQBDB+/PhM+zKMkiC50bVnTL8BbwGwZAnU\n1Vmt/Dzx0fxFbAXdQjnTuZZV6vH11uIbqaxPwVfVIzItE5F/ishOqvqWiOwEvJNhH2/47y+LSBgY\nC/QQfMMoV5Jj+m1t0NgIjY0NeCb0eeUt2ZmtkqZXMJGFNFBV2eXNF1O21GBDOvcBZ/ifzwDuTV1B\nRLYVkWr/8/bAV0hUAjcMA7pi+oEAxGLwyCPF0QhYsvjxtYpjju5WxvoPE69g2rSCGcCq3ww2D/8K\n4E4RORt4FfgmgIiMB6ap6jnA3sACEYnhbjBXqKoJvmEkEQ8NNDY6sY/FCmpkvPIiKb42uqqKly65\nltefbaWmLsQVDcX9ZQxK8FW1FZicZv5TwDn+51Vgpb4Noy88zwn+ypWFV3Sr1Oi10mhKzuzobVoZ\n/fCs4TdyCLCetoZRQGTVCNjUZI25gyC5gbyiAs46y5Uy2nJdhNYlYXYdU8PoQix1mQNM8A2jwOi1\nEbCpydXbAVi2zL1nEH2rl5+eZAc+GoUFC0BuamJ+dDoBYrQvq+alS+YzepvWxHi04SLpSdsXJviG\nUUwsWeIGPccf/PzKK6G2tsfQes3NcMstriyP1cvvTryBfNMmUIWDNML86AVU0ul3dGtjXbiVO0+Y\nRc06mDGjdMYdMME3jCLipTF1fGHZskSHLH3xJWTy5B5D68XFDLr3ADWPvyts1twMf10Y4YbOs6mg\nI3ETjRHg58+EeGKty5rq7HTXsq2t+BvRTfANo4i4c5sGXhH4vl7JF3iJCjTt0HpxsRdxnmlNjY2Q\nlYznuWElYzdNQugAusT+zonX8cQTHtGou47xaxmLuetYzFg9fMMoIkIh+PWIBr4baKadEWige5/+\n1Bo9U6c6cW9tLY8RspLLU/RJOEwg6jz7eM2i4ITxjL6iIXENg0F30wTn7be2DpHhw4R5+IZRRHRl\n8Xi8VNNCbWu4K4tkzhy8UIiWlvSDn5do4kmCfo3zG4nAa68RCwSRWDQx+6XQ2d0ypWpqusfwi/26\nmeAbRpHRlcXjuVeK0nktLXizeg63l5zuCc4TLqV4frqS02nPLel6xaSCJ/gK1WziFjmb3bZpYBbd\nM6Vqa128vxQwwTeMIqNHumU47FoUY7FeWxbjItabJ1zMqZzxcFaf3nhzc6JVOxiAloqjuFxnuWuR\nYZtbb3X7vfXW4m7/MME3jCIirVjX1Dixh6xaFjN5wv0KiRQgfXZaa2qCRYtg7dpES6xUVnDyL0Ns\nfCbzfrN+cigCTPANo4hIKz60EpMAAY2594ceSvTEjdQ29BDATJ5wKQhbxk5ryR3W4ojAWWfxca3H\nrTMye/BZPzkUASb4hlFEpBOfpUtDTNFqKmlHNUDl0qUup3zZMt4JPMTvuYTZ1V5CyDJ5wqUkbD1C\nU4sWJZbF69pr1QiC9fV93uiKreZ9b4hqYY4zMn78eH3qqafybYZhFBypYnbkkfDRsgghwhzPUg5i\nTcrYq0EukOvZ7X8bmNVHDbBijuHHSRuamnciLF0KuGuynr2ZXrWIOWF3ksUcykpFRNaq6vh0y8zD\nN4wiIzVsUVcHU5d5rMbjXWo4iDWJZQFAiHKtns/fampxmT3Z77sYSeuxX3IJ/P73xDo66KCSc1jE\nk1GXvjprVul48H1hgm8YRU68dtqSJXBgXYPrRDR7NrJhA+A6FVVIjNpnmmFOuORVLW1oyvPgscd4\nvTnMGTeHeDLqJZaVwlNNtlhIxzBKkUgEJk2CDlc2gMpK9x6vprZ8eUmr24bTZrL5Q3fz76NPYuRt\nc7stSxZ4KK1wDlhIxzDKCidoHsdc+5jz6gHefjsRw6atzeWiF7uyZWLmTEbePg+A7W6fB7sAc7tE\nPzlsNWdO8Wcm9QerpWMYJUS8wfKyy2D8hR7ncQOR+htgxx27r/j22/0oOlNERCKwcGH3eXffnXH1\n1NpDxZyZlA0m+IZRQqQ2WC5Y4G4A68bWO0UTccM8PfSQuyuU0kjp8bvde+91n3/SSRk3iadczp5d\nGuGcvrCQjmGUEKmDe6hfPfmBVo/acNjdEV57DW66qfTiGPG7XZzttoNzzukWzklHKWQmZYt5+IZR\nQsQ91qlTobo6JVTheS4Hsb6+exyjpgbOO8+98uTt96uscSaS4jPR6s1o/uYDRE7oXezLDcvSMYwS\npdd0w/jCmhr4z/90DbngBHOYPf6c1vCJRHjVT7183E+9LIdQTTK9ZemYh28YJUrcoU+uhJnwouML\nW1vRpDCItndAY+OwevrpOkoNGM/jjlGzeDzqlfxgLwPBYviGUQZk8qLX1YTYQ6uoxvfwUfSPjyAr\nVw6ba5zrGj6lVBMo15jgG0YZkKlA2AOtHveznNNpZixPM56nqFC/rv6MGTBuHNTXEyH9KFq5IHlQ\n8VzuL9XecupRmxFVLcjXAQccoIZh5IZVq1Q320w1GHTvq1Z1za+udvk8X2aVfsJmGgsE4gk+qqDR\nikqdVLWqx7bDYV+vG1x+edbG9Hv/RQzwlGbQVYvhG0YZkCnf3PNclYVp02DMNI+XFrQgRxzRbVvp\n7ODbHc0DjolHInDiiXDQQa4sfTr6E8d/aWYT0UMmof/1o6z7EeS0naCIsZCOYZQJmfLNu8/3oLaR\n2CMtSCzqCrEB+7CeP3Ak90odoVBD1seMRGDiRFfCB2CNX8izIWUX2cbdN5w2k91vvxJBXQnotjYk\ni6wii+s7TPANw+hGBI9muZ5fcj4BYkQJcKiuAGBK5zJkHeBlJ/rhcJfYx1mypKfgZxV3XzqTXfwa\nOfF6/zGCBLNQb8+D+fMTA4GVbQzfBN8wjG6Ew7Ag1sCz1PYYVAVIr9gZCIVcJYdk0a+rS79u6hNI\ncmbRIcEIj3ZchUDS4C7CKxdfy+gs1DsScW3Q7e2wciXU1pan6JvgG4bRjVAIAgFYHU0/qEpGxU6D\n58GKFTBvHrz5Jpx9dtb3im5x96/EwqCaEHuAN0/9AaPnZv+kUU5VMTNhgm8YRjc8D66/Hs4/H2Ix\naK5s4NIZMPpZPx4SV+ws8xw9D+65p/92JMfdnwiGUBkB7ZsQEbj4Ykb2USMndV8VFe58Kioshm8Y\nhpGgocGFPeJ6/g4N3LlNA6Faf5DEnNZDSI/nwZ/mR2hdEqamLsR6WhKfaxv6f6x4FZkCrSYzLAxK\n8EXkZKAR2BuYoKppi9+IyFHAL4AgsFBVrxjMcQ3DGHriMfW02j4cMZJIhNoZ7sDRx6qYrC08Hp1F\n1Upo6WcMPhx2pqq693IN6Qw2D///gJOAFZlWEJEgcB1wNLAPcIqI7DPI4xqGMUykzWFPN3JITkpe\n+kQirqZPW1viwF/pCCdsaG7u36HKbaCTTAzKw1fV5wAXU8vMBOBFVX3ZX/c3wPHA+sEc2zCM4SHT\noODr5ieFWCBtiGcg5Qw2nDaTne+4CtGYywwKBKCyiic0RDDqRPuWW7qG580mmpQp7bPcGI4Y/i7A\n60nTG4CD0q0oIg1AA8CoUaOG3jLDMPoknVhGIjB5hkd7u0fVSnjujDl8PuUxIILXZ5g/9Ybw0swm\nvpCSay9HHEGwsZE5fj2fgY7fUk4DnWSiT8EXkUeAHdMs+i9VvTeXxqhqE9AErh5+LvdtGMbASRXL\n1DDPY4SoT34MqKmhrXEO49pCPBHz0gpzatvAHRdGGH/NlUBSrr0EkMZG8Dw8um42t95qvWYHQp+C\nr6pH9LVOH7wB7Jo0PdKfZxhGkZIa5tmj3oP6lq5BVWbMYFJbO8tiVUwJtPBk0OO115xYx0U/+aYx\ndlOEo+aFqMTV5o97e3/+6sWMTXHLswnPWGXM9AxHSOdJYA8R2R0n9N8GvjMMxzUMY4hIL7r+Y8Cc\nOdDejsSibBZoY9FnZrD8w3H8ekE9k2/1EqGd5JvGxdF5VNOe8OxfZyQ/5TJ2DzUwNum4yUI+a1Z6\n24YhY7RoGWxa5onANcAOwO9F5FlVPVJEdsalX35NVTtF5ALgYVxa5s2q+tdBW24YRl7JGBOPK3lb\nG8Ri7PnBGvZkDVO5kYc+nUI4/HBiuzPOgN3fjnDcffdDrGsXD3IMt23WQEuoa162Qm69ajMzqLRM\nVb1HVUeqarWqfk5Vj/Tnv6mqX0ta70FV3VNVR6vq/w7WaMMwCpi4+3/EEa7R1X8BHM0yvvPrIxPi\nva4pwtj7GgnEYl1x+0CQzafV9xD0bEscWwpmZqynrWEYucfzXB79sj+ifinjuKDv+twyfjsvwumb\n1nGNXkCQTrdEBAkGkeuuoz5NT9psSxxbCmZmRAu0n/H48eP1qafSdtw1DKNYmDkTnTcvMSlAJ/DE\nPtPw1i+kkk53I5AA8tUj3E2il/x9a4ztGxFZq6rj0y0zD98wjKFj7lxk9GjaL55Fxb/eIwq0sxl7\n7gmVz8UQ9XPtK4LdxD5TrN5y6QeHDXFoGEbO6VZloaGBqo9a+euCVTw+5XJeWtDCTpfUIyOqIRBA\nKirg2msTSt5brD6X1RvKEfPwDcPICfFwi5+Gn/DQ58+H1lYIhTxCybH5DIH2TLF6S7ccPCb4hmEM\nmmQxFnF152Mxl5k5fbqrUtlDpDPEZzI1ulq65eAxwTcMY9Aki3Eg4FIiRdznaNSJ/2Dr3thA5IPH\nBN8wjEGTKsbxME5qeGcwIm3ploPHBN8wjEHTmxgnj5w1WJG2LJ3BYXn4hmEYJURvefiWlmkYxpBj\n6ZSFgYV0DMMYUiydsnAwD98wjCEl26JnxtBjgm8YxpBi1SsLBwvpGIYxpFg6ZeFggm8YxpBj6ZSF\ngYV0DMMwygQTfMMwjDLBBN8wDKNMMME3DMMoE0zwDcMwygQTfMMwjDKhYIunichG4NUBbr498G4O\nzckHxX4OxW4/2DkUAsVuPwz/OXxeVXdIt6BgBX8wiMhTmarFFQvFfg7Fbj/YORQCxW4/FNY5WEjH\nMAyjTDDBNwzDKBNKVfCb8m1ADij2cyh2+8HOoRAodvuhgM6hJGP4hmEYRk9K1cM3DMMwUjDBNwzD\nKBNKSvBF5CgR+buIvCgil+bbnv4iIjeLyDsi8n/5tmWgiMiuIrJcRNaLyF9F5Hv5tqm/iMgIEVkj\nIn/2z+En+bZpIIhIUESeEZEH8m3LQBCRV0RknYg8KyJP5duegSAi24jIXSLyNxF5TkTyWiS6ZGL4\nIhIEnge+CmwAngROUdX1eTWsH4jIROBjoFlV9823PQNBRHYCdlLVp0VkK2AtcEKRfQ8CbKGqH4tI\nJfA48D1VXZ1n0/qFiFwEjAc+o6rH5Nue/iIirwDjVbVoO16JyK3ASlVdKCJVwOaq+kG+7CklD38C\n8KKqvqyq7cBvgOPzbFO/UNUVwHv5tmMwqOpbqvq0//lfwHPALvm1qn+o42N/stJ/FZVnJCIjga8D\nC/NtS7kiIlsDE4FFAKrank+xh9IS/F2A15OmN1BkQlNqiMhuwFjgT/m1pP/44ZBngXeAP6pqsZ3D\nfOASIJZvQwaBAstEZK2INOTbmAGwO7ARuMUPrS0UkS3yaVApCb5RQIjIlsASYIaqfpRve/qLqkZV\ndQwwEpggIkUTYhORY4B3VHVtvm0ZJIeo6jjgaGC6H/IsJiqAccANqjoW+ATIa9tiKQn+G8CuSdMj\n/XnGMOPHvZcAt6vq3fm2ZzD4j+DLgaPybUs/+ApwnB8D/w1wuIjcll+T+o+qvuG/vwPcgwvbFhMb\ngA1JT4d34W4AeaOUBP9JYA8R2d1vHPk2cF+ebSo7/AbPRcBzqnp1vu0ZCCKyg4hs43/eDJcI8Lf8\nWpU9qjpLVUeq6m64/8Gjqnpans3qFyKyhd/ojx8GmQIUVfaaqr4NvC4ie/mzJgN5TV6oyOfBc4mq\ndorIBcDDQBC4WVX/mmez+oWILAZCwPYisgH4saouyq9V/eYrwOnAOj8GDvBDVX0wjzb1l52AW/3M\nrwBwp6oWZWpjEfM54B7nP1AB3KGqf8ivSQPiQuB23wl9GTgrn8aUTFqmYRiG0TulFNIxDMMwesEE\n3zAMo0wwwTcMwygTTPANwzDKBBN8wzCMMsEE3zAMo0wwwTcMwygT/j+1rXU6OUUYGQAAAABJRU5E\nrkJggg==\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3h7IcvuOOS4J",
+        "colab_type": "text"
+      },
+      "source": [
+        "Much better! The evaluation metrics we printed show that the model has a low loss and MAE on the test data, and the predictions line up visually with our data fairly well.\n",
+        "\n",
+        "The model isn't perfect; its predictions don't form a smooth sine curve. For instance, the line is almost straight when `x` is between 4.2 and 5.2. If we wanted to go further, we could try further increasing the capacity of the model, perhaps using some techniques to defend from overfitting.\n",
+        "\n",
+        "However, an important part of machine learning is knowing when to quit, and this model is good enough for our use case - which is to make some LEDs blink in a pleasing pattern.\n",
+        "\n",
+        "## Generate a TensorFlow Lite Model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sHe-Wv47rhm8",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 1. Generate Models with or without Quantization\n",
+        "We now have an acceptably accurate model. We'll use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert) to convert the model into a special, space-efficient format for use on memory-constrained devices.\n",
+        "\n",
+        "Since this model is going to be deployed on a microcontroller, we want it to be as tiny as possible! One technique for reducing the size of models is called [quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) while converting the model. It reduces the precision of the model's weights, and possibly the activations (output of each layer) as well, which saves memory, often without much impact on accuracy. Quantized models also run faster, since the calculations required are simpler.\n",
+        "\n",
+        "*Note: Currently, TFLite Converter produces TFlite models with float interfaces (input and output ops are always float). This is a blocker for users who require TFlite models with pure int8 or uint8 inputs/outputs. Refer to https://github.com/tensorflow/tensorflow/issues/38285*\n",
+        "\n",
+        "In the following cell, we'll convert the model twice: once with quantization, once without."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1muAoUm8lSXL",
+        "colab_type": "code",
+        "outputId": "5ff328ef-73c5-45cd-e339-da52696b00e3",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "# Convert the model to the TensorFlow Lite format without quantization\n",
+        "converter = tf.lite.TFLiteConverter.from_keras_model(model_2)\n",
+        "model_no_quant_tflite = converter.convert()\n",
+        "\n",
+        "# # Save the model to disk\n",
+        "open(MODEL_NO_QUANT_TFLITE, \"wb\").write(model_no_quant_tflite)\n",
+        "\n",
+        "# Convert the model to the TensorFlow Lite format with quantization\n",
+        "def representative_dataset():\n",
+        "  for i in range(500):\n",
+        "    yield([x_train[i].reshape(1, 1)])\n",
+        "# Set the optimization flag.\n",
+        "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+        "# Enforce full-int8 quantization (except inputs/outputs which are always float)\n",
+        "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]\n",
+        "# Provide a representative dataset to ensure we quantize correctly.\n",
+        "converter.representative_dataset = representative_dataset\n",
+        "model_tflite = converter.convert()\n",
+        "\n",
+        "# Save the model to disk\n",
+        "open(MODEL_TFLITE, \"wb\").write(model_tflite)"
+      ],
+      "execution_count": 18,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "2512"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 18
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8X1yO3h5pYbt",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 2. Compare Model Sizes"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jAIe0dK3pXU8",
+        "colab_type": "code",
+        "outputId": "ce15b7eb-f857-4cb0-ba70-5a67ce04566b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 68
+        }
+      },
+      "source": [
+        "import os\n",
+        "model_no_quant_size = os.path.getsize(MODEL_NO_QUANT_TFLITE)\n",
+        "print(\"Model is %d bytes\" % model_no_quant_size)\n",
+        "model_size = os.path.getsize(MODEL_TFLITE)\n",
+        "print(\"Quantized model is %d bytes\" % model_size)\n",
+        "difference = model_no_quant_size - model_size\n",
+        "print(\"Difference is %d bytes\" % difference)"
+      ],
+      "execution_count": 19,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Model is 2736 bytes\n",
+            "Quantized model is 2512 bytes\n",
+            "Difference is 224 bytes\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cR2OuokFpkEM",
+        "colab_type": "text"
+      },
+      "source": [
+        "Our quantized model is only 224 bytes smaller than the original version, which only a tiny reduction in size! At around 2.5 kilobytes, this model is already so small that the weights make up only a small fraction of the overall size, meaning quantization has little effect.\n",
+        "\n",
+        "More complex models have many more weights, meaning the space saving from quantization will be much higher, approaching 4x for most sophisticated models.\n",
+        "\n",
+        "Regardless, our quantized model will take less time to execute than the original version, which is important on a tiny microcontroller!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L_vE-ZDkHVxe",
+        "colab_type": "text"
+      },
+      "source": [
+        "### 3. Test the Models\n",
+        "\n",
+        "To prove these models are still accurate after conversion and quantization, we'll use both of them to make predictions and compare these against our test results:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-J7IKlXiYVPz",
+        "colab_type": "code",
+        "outputId": "87d2fd39-4ddc-4f73-e164-e0089a5cfb59",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 281
+        }
+      },
+      "source": [
+        "# Instantiate an interpreter for each model\n",
+        "model_no_quant = tf.lite.Interpreter(MODEL_NO_QUANT_TFLITE)\n",
+        "model = tf.lite.Interpreter(MODEL_TFLITE)\n",
+        "\n",
+        "# Allocate memory for each model\n",
+        "model_no_quant.allocate_tensors()\n",
+        "model.allocate_tensors()\n",
+        "\n",
+        "# Get the input and output tensors so we can feed in values and get the results\n",
+        "model_no_quant_input = model_no_quant.tensor(model_no_quant.get_input_details()[0][\"index\"])\n",
+        "model_no_quant_output = model_no_quant.tensor(model_no_quant.get_output_details()[0][\"index\"])\n",
+        "model_input = model.tensor(model.get_input_details()[0][\"index\"])\n",
+        "model_output = model.tensor(model.get_output_details()[0][\"index\"])\n",
+        "\n",
+        "# Create arrays to store the results\n",
+        "model_no_quant_predictions = np.empty(x_test.size)\n",
+        "model_predictions = np.empty(x_test.size)\n",
+        "\n",
+        "# Run each model's interpreter for each value and store the results in arrays\n",
+        "for i in range(x_test.size):\n",
+        "  model_no_quant_input().fill(x_test[i])\n",
+        "  model_no_quant.invoke()\n",
+        "  model_no_quant_predictions[i] = model_no_quant_output()[0]\n",
+        "\n",
+        "  model_input().fill(x_test[i])\n",
+        "  model.invoke()\n",
+        "  model_predictions[i] = model_output()[0]\n",
+        "\n",
+        "# See how they line up with the data\n",
+        "plt.clf()\n",
+        "plt.title('Comparison of various models against actual values')\n",
+        "plt.plot(x_test, y_test, 'bo', label='Actual predictions')\n",
+        "plt.plot(x_test, predictions, 'ro', label='Original predictions')\n",
+        "plt.plot(x_test, model_no_quant_predictions, 'bx', label='Lite predictions')\n",
+        "plt.plot(x_test, model_predictions, 'gx', label='Lite quantized predictions')\n",
+        "plt.legend()\n",
+        "plt.show()"
+      ],
+      "execution_count": 20,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEICAYAAABcVE8dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOydd3gUVdfAfzebQEioAiIt2ajUdCAU\nIyUIRBAEhFAEpEhVXl8/pCiIYHuFABZABBsoPUFpii+CCVXzmtAERGlZQlOQHhJCsjnfH7O7bJJN\ng0BIMr/nmWd3Zu7cuXPnzpk755x7rhIRdHR0dHSKP06FXQAdHR0dnXuDLvB1dHR0Sgi6wNfR0dEp\nIegCX0dHR6eEoAt8HR0dnRKCLvB1dHR0Sgi6wC9klFL9lFI/FnY5rCilyiil1iulriilIu/B+Q4q\npdrc7fPcC5RSRqWUKKWc85B2kFJqx70oV15QSnkopRKVUobCLsu9QCnVRil16i7ke1/d18wUG4Gv\nlHpWKRVnabRnlVI/KKUeL+xy5YaILBWRDoVdDjt6AtWAyiISdrdPJiLeIrLlbp9HJ2dEJEFEyoqI\n+U7yUUptUUoNLahy2eWb55epTvYUC4GvlBoDfAj8B01YeQDzgK6FWa7cuE8brydwWETS7uZJ7tNr\n19Ep3ohIkV6ACkAiEJZDmtJoL4QzluVDoLRlXxvgFDAeOAecBboBnYDDwEVgol1eU4FVwErgGrAb\n8Lfb/ypwzLLvd6C73b5BwE7gA+AC8I5l2w7LfmXZdw64CuwHfOyu82vgPHACeB1wsst3BzATuATE\nAx1zqI8GwBbgMnAQeNqy/U3gJpBqqdPnMx1XA0gGHrDbFgj8A7gAjwBRlmv7B1gKVLRLawImAL8B\nKYCzZVu7PNwnWz3Z5SfAo5b/nSz1fQ04DYzN5trt78Fl4DjwmGX7SUvdD8zUvrKrd4Olzv+x5POi\npUzOdsd+gdamTlvutyHz9eR03x2UfzBwyHKdx4ERmfaPt5zvDDA0Ux09BeyxnOMkMNXuOGOmsm8B\n3rbU1TXgR6CKZZ8rsMRyny8DsWgdrXcBM3ADrf3MzeYaIoG/gCvANsDbbl8ZYJalrq+gtesyQIKl\nfImWpQXas7gkh2vItq6wPPfZlO8TYGambWuBMXl8xnc4Ko9dvQ61Wx9iKeMlYCPgmd82kS95eTeE\n8L1cgCeBNPtKdZDmLSAGeBCoCvwMvG1349OAN9CE1jC0h3sZUA7wRhNyXpb0U9EEYk9L+rFoAtbF\nsj8MTTA6Ab2B60B1u8aQBvwLTdiVydRAQoFdQEXLDW9gd+zXlkZXztKQDmMRyJY8Ui1lNwCj0B54\n5aAuXICjwESgFNDW0nDr2V3fkhzqMgoYZrc+A5hv+f8o0B5NcFdFe5g/tEtrAvYCtYEydtva5eE+\n2erJLj97YXYWaGn5XwlolE35rfdgsKWu3kETJh9byt3BUh9l81DvI4E/LNfzABBNRoGzGlgAuFuu\n6VcsQiev991B+Z9Ce7EqoDWQZL1WtGfhL7Q264YmlO3rqA3gi9Y2/YC/gW6OhBOaYDoG1EVrp1uA\naZZ9I4D1lnMYgMZAeUcCLZtrGGKpT+sLfq/dvo8tedS05P2YJV2G8jlqqw6uIae6akP2Ar8V2gtR\n2bWnZKBGHp/xPAl8NA3EUcv9dkbrTPyc3zaRL3l5rwTz3VqAfsBfuaQ5BnSyWw8FTHY3PplbPa9y\nlpvUzC79LrsHYyoQY7fPCTth4+Dce4Gudo0hIdN++wbSFk2gNMfSi7RsN6D1vBvabRsBbLHL46jd\nPjfLNTzkoDwt0YSCff7LsfT2yF3gDwWiLP+V5cFolU3absAeu3UTMCRTGhO3BH5O98lWT3b77YVZ\ngqVOyufSFgYBR+zWfS35VLPbdgEIyEO9RwEj7fZ1sOTljNbjTcHyYrPs7wtE5/W+57H9rwH+bfn/\nJfCe3b5H7evIwbEfAh9Y/hvJKvBft0v7AvBfy/8haC9jPwd5biEXgZ8pfUXLeSugPUvJ2H0x26XL\nUD5HbdVRmhzqqg3ZC3xlaU+tLOvDsLT5bNJnfsbzKvB/wO4r2nL9SWhq1dtuEzktxUGHfwGokotO\nuAbaJ6KVE5ZttjzklrEq2fL7t93+ZKCs3fpJ6x8RSUdTCdUAUEo9p5Taq5S6rJS6DPgAVRwdmxkR\niQLmovVyzimlPlVKlbcc7+LgGmrarf9ll0+S5a99ma3UAE5ayp1dXjnxDdBCKVUdrSeUDmwHUEpV\nU0qtUEqdVkpdRethVsl0fLbXT+73KSd6oKl1TiiltiqlWuSQNvO9RUQc3e/c6r0GGa/HPp2n5diz\ndm1hAVpPPwM53PcsKKU6KqVilFIXLXl24lYdZy7PyUzHNlNKRSulziulrqB9oWS+P/b8Zfc/iVvt\naTGa+mGFUuqMUipcKeWSQz72ZTAopaYppY5Z2ojJsquKZXFFe/HfMbnUVbaIJn1XoL2gAZ5FU09a\n883tGc8rnsBHdvlcRHvZ1MxPm8gPxUHg/4LWk+qWQ5ozaJVrxcOy7Xapbf2jlHICagFnlFKewGfA\naDQvl4rAAbSbaEVyylhEZotIY6Ah2uf0ODQdcaqDazh9G2U/A9S2lDvfeYnIJTR9bm+0B2GF5QEB\nzWgugK+IlAf6k/HaIefrz+k+XUf7cgFAKfVQpnLFikhXNIG6BojIy/XkQm71fha7tmDZZ+UkWrus\nIiIVLUt5EfF2dKJs7nsGlFKl0V64M9G+SCoCG7hVx2fR2qKV2hlzYBmwDqgtIhWA+WS9P7kiIqki\n8qaINERTuXQGnrPuzuXwZ9FUGe3QevVGy3aFVt830NQwWU7rYFuGNgHY2kQe6io3lgM9Lc90M0te\n5PEZty8f2ZURrY2MsGsfFUWkjIj8DHlrE/mlyAt8EbmCpn//WCnVTSnlppRysbzdwy3JlgOvK6Wq\nKqWqWNIvuYPTNlZKPWP5qngZ7cGOQdPVCpoNAKXUYLS3f55QSgVZemEuaI3lBpBu+fqIAN5VSpWz\nNLoxt3kN/0PrrY231FMboAtajyavLEN7wHta/lsph2ZQu6KUqkn+G2hO92kf4K2UClBKuaJ9zgOg\nlCplGc9QQURS0Yxc6dwheaj3COAlpVQtpVQlNGOe9dizaC/GWUqp8kopJ6XUI0qp1pnPk919d1Ck\nUmj67PNAmlKqI5oayUoEMFgp1UAp5QZMznR8OeCiiNxQSjVFE775RikVopTytfjsX0V7KVrL+zfw\ncA6Hl0N7Xi6gCcL/WHdYvjq/BN5XStWwfA20sAjv85Zz2Oe9F2hlGUNQAXjNbl9udZUjIrIH7QX0\nObBRRC5bduX5GReR82idg/6WaxlCxpfZfOA1pZS3Ja8KSqkwy/+8tol8UeQFPoCIzEJ7EF9HuxEn\n0d7AayxJ3gHi0LxD9qN51rxzB6dci9bDvQQMAJ6x9Hp+R/Mw+AWt4fuieTnklfJovYdLaOqBC2hG\nUdAMvdfRvA12oAnaL/NbcBG5iSbgO6I16HnAcyLyRz6yWQfUQbOd7LPb/ibQCM274nvg23wWL9v7\nJCKH0Yy6m4EjaHVgzwDAZFETjESz7RQEOdX7Z2iqjX2Wsma+3ufQBM/vaPd0FVDdwTlyuu82ROQa\n8BKaYL+EJrDX2e3/AZiNZjw+itYJAU3AgqaHf0spdQ3tZXq7X0EPWa7lKpqHyVY0NQ/AR2g940tK\nqdkOjv3aco2n0eolJtP+sWj3PhZNxTEdTYedhOYFtNOiAmkuIpvQvOV+Q7OzfWfNJLe6yiPL0L5E\nbJ2a23jGh6F1fC6gGdN/tstrteX6Vlja7QG05xLy2Cbyi9UKrZNHlFJT0Yxg/Qu7LDo6OaGUaoAm\nRErLXR5XoVM0KBY9fB0dHQ2lVHelVGmLimk6sF4X9jpWdIGvo1O8GIE2WOcY2iCoUYVbHJ37CV2l\no6Ojo1NC0Hv4Ojo6OiWE+zaAVZUqVcRoNBZ2MXR0dHSKFLt27fpHRKo62nffCnyj0UhcXFxhF0NH\nR0enSKGUOpHdPl2lo6Ojo1NC0AW+jo6OTglBF/g6Ojo6JYT7Voevo3O/kZqayqlTp7hx40ZhF0VH\nB1dXV2rVqoWLS54ClQK6wNfRyTOnTp2iXLlyGI1GlMp3kEkdnQJDRLhw4QKnTp3Cy8srz8fpKp1i\nyNKlYDSCk5P2u3Rpbkfo5IUbN25QuXJlXdjrFDpKKSpXrpzvr029h1/MWLoUhg+HJMsUKCdOaOsA\n/QoqfmQJRhf2OvcLt9MW9R5+MWPSpFvC3kpSkrZdR0enZKML/GJGQkL+tusUPdasWYNSij/+yH0K\ngw8//JCkzD2AfLBo0SJGjx5928fnlUGDBrFq1SoAhg4dyu+//55t2i1btvDzz7aw8syfP5+vv/76\nrpexOKAL/GKGh0f+tuv6/rvH3arb5cuX8/jjj7N8+fJc096pwL8T0tJuLyrz559/TsOGDbPdn1ng\njxw5kueeey7b9Dq30AV+MePdd8HNLeM2Nzdte2as+v4TJ0Dklr4/J8GkvyDyxu3UbV5ITExkx44d\nfPHFF6xYcWtWSrPZzNixY/Hx8cHPz485c+Ywe/Zszpw5Q0hICCEhIQCULXtrXvtVq1YxaNAgANav\nX0+zZs0IDAykXbt2/P333+TE1KlTGTBgAC1atKBOnTp89tlngCaMW7ZsydNPP03Dhg0xm82MGzeO\noKAg/Pz8WLBgAaB5mYwePZp69erRrl07zp07Z8u7TZs2trAq//3vf2nUqBH+/v488cQTmEwm5s+f\nzwcffEBAQADbt29n6tSpzJw5E4C9e/fSvHlz/Pz86N69O5cuXbLlOWHCBJo2bUrdunXZvn07AAcP\nHqRp06YEBATg5+fHkSNHbvveFAlE5L5cGjduLDq3x5IlIp6eIkppv0uW3No3fcd0iToeJdOni1Qz\nnJd3mSA/GZFpwcgm2oo3+wSyHmfN181NRBNh2uLmljVdceX333/Pc1pPz4z1ZF08Pe+sDEuWLJEh\nQ4aIiEiLFi0kLi5ORETmzZsnPXr0kNTUVBERuXDhgqUcnnL+/Hnb8e7u7rb/kZGRMnDgQBERuXjx\noqSnp4uIyGeffSZjxowREZGFCxfKiy++mKUcU6ZMET8/P0lKSpLz589LrVq15PTp0xIdHS1ubm5y\n/PhxERFZsGCBvP322yIicuPGDWncuLEcP35cvvnmG2nXrp2kpaXJ6dOnpUKFChIZGSkiIq1bt5bY\n2Fg5d+6c1KpVy5aX9ZqmTJkiM2bMyFAW67qvr69s2bJFREQmT54s//73v215Wq/p+++/lyeeeEJE\nREaPHi1LLA04JSVFkpKS8ngn7g8ctUkgTrKRq3oPvxjSrx+YTJCerv2eNoYTHR9NeDhsn1Wd7vOf\nxLT6Uc6NfIxJna4QGlaRRafn0oXvONh8E/Wf9WLLCSOLB26mb99b+eZkENZ7/hm5W7aU5cuX06dP\nHwD69OljU+ts3ryZESNG4OysOd498MAD+cr31KlThIaG4uvry4wZMzh48GCux3Tt2pUyZcpQpUoV\nQkJC+PXXXwFo2rSpzTf8xx9/5OuvvyYgIIBmzZpx4cIFjhw5wrZt2+jbty8Gg4EaNWrQtm3bLPnH\nxMTQqlUrW165XdOVK1e4fPkyrVtr88QPHDiQbdu22fY/88wzADRu3BiTyQRAixYt+M9//sP06dM5\nceIEZcqUyfW6izK6wC8BHNsWRJcvuvPTdiPReypyY9sbfBJ6jDJch6D5pF2ox2HTSG5MqA6hYxl2\n3EQ8Xuwy+7Pt6tOUeqU+kL2wsqorClp9UZTJry0lL1y8eJGoqCiGDh2K0WhkxowZREREIPmYxMje\nlc/eh/tf//oXo0ePZv/+/SxYsCBP/t2Z3QKt6+7u7rZtIsKcOXPYu3cve/fuJT4+ng4dOuS5vAVJ\n6dKlATAYDDb7wrPPPsu6desoU6YMnTp1IioqqlDKdq/QBX4R4HZ6z52WduL9X96n6sud2LX5awxL\nFxLtn0DSc0+T8sQ7kFSZpKpnIN0AtX8l/ZVa4HoFgDcf6EMvIrgxKpAzQet56EhdwsMtwsoYDcHh\nGc5lMOiuoJnJjy0lr6xatYoBAwZw4sQJTCYTJ0+exMvLi+3bt9O+fXsWLFhgE2QXL14EoFy5cly7\nds2WR7Vq1Th06BDp6emsXr3atv3KlSvUrFkTgK+++ipP5Vm7di03btzgwoULbNmyhaCgoCxpQkND\n+eSTT0hNTQXg8OHDXL9+nVatWrFy5UrMZjNnz54lOjo6y7HNmzdn27ZtxMfH53hNVipUqEClSpVs\n+vnFixfbevvZcfz4cR5++GFeeuklunbtym+//Zanay+q6AOv7nPyO5Cq07vhxG+owwXx44cOr9Dw\nLy92tfwvhnrbMTsLKEDdACczpJUGQwpcfxDK/QVmJ1wOt+Nq0Apo+BO4n0eZnbmyfzBN1ocw/eFD\nDOhxg9TIW4LCzS2rsLdSkl1Brfdm0iStHjw8NGF/J4Pfli9fzoQJEzJs69GjB8uXL2fOnDkcPnwY\nPz8/XFxcGDZsGKNHj2b48OE8+eST1KhRg+joaKZNm0bnzp2pWrUqTZo0ITExEdCMsGFhYVSqVIm2\nbdvahGxO+Pn5ERISwj///MPkyZOpUaMGhw8fzpBm6NChmEwmGjVqhIhQtWpV1qxZQ/fu3YmKiqJh\nw4Z4eHjQokWLLPlXrVqVTz/9lGeeeYb09HQefPBBNm3aRJcuXejZsydr165lzpw5GY756quvGDly\nJElJSTz88MMsXLgwx2uIiIhg8eLFuLi48NBDDzFx4sRcr7soUyBz2iqlvgQ6A+dExMfBfgV8BHQC\nkoBBIrI7pzybNGki+gQoWo/+hIPpDDw9Nf18Zt5/fSavqImw+3m4+CiEjgVxApWeMaECtr4Oj26A\nmrupmOjCZfc0uFkGxACu1yCxKqxaTqmwZ+gdV4nlwQkYdg9k+YZLzGU0MW7t6Db9fb7du5kbX2zI\ncxmLKocOHaJBgwaFXYz7gqlTp1K2bFnGjh1b2EUp0Thqk0qpXSLSxFH6glLpLAKezGF/R6COZRkO\nfFJA5y325Nf4N2bJXJ7eXRWC5sOj/4V0Z3CyCHsFJD4Eqe4YbpaG4GlQYzfqtD/XSwkusYOhVJIm\n7G+UA/fzuDVcRNoVI4tbnyDN1JaUoEVsan6AOOM1fLtWYdk/Y3ngUrss6guloFOnAqsGHR2dAqBA\nVDoisk0pZcwhSVfga4vLUIxSqqJSqrqInC2I8xdnHngALlzIut3DA8LDIejKZkKWDrXpDaJPeBF8\nIoj9D3xD/KObQdAWQF2tipT7C2NsKOcuPkZyhyk4pzlRddMrnKEW9G+vJRRAFG5/hJAUtETblloK\nasVB7Eg+CV2AIf0Z/ucEbJxJ14SH2eMH//ufZrQF7ferryA4WI/hUxyZOnVqYRdB5za4V0bbmsBJ\nu/VTlm0ZUEoNV0rFKaXizp8/f4+Kdv+ydCk4sE3h4qLpg49dG0a3ZdeIPuEFIkSf8KKb8SVWd/6B\neI+/NMFtdaT4owtS7jzGhIcwBf1IWa91yNdROH2zmVavnCUqCnBKx/Vcbcp/9S3u6jppj1oMader\n0n7pGE2KN/oCRDAbgITHGRXjypeNvuWS1yNUemz8rUIao0kKDC/RhlsdnfuN+8pLR0Q+FZEmItKk\nalWHk66XKCZNgps3s24vX17rNffZtB4V1ptuxpd4gzfpZnyJm336EOOdANcrA6BSLLqWazVh40xc\nU1zx2TiAc+pBPCWEL14PYfno8cSeiWVirZ8wf55AE1M5hvyvLDddwHDWl1kz++FlMlLq1xHgnKq9\nRMxO4LGDzzvtRvyW82fd41w3V6AtmzVPnrBecDqoRBtudXTuN+6Vl85poLbdei3LNp0cyCAsg8Ph\ndBCYQrB4p8G5v+l9CJaE9eftuFdwa9IfvwP1iU9/hPNBa+iysSVrY7bTtVMN1gfNp+zGKZi+jefz\nz7OqWcYHj4dgaOgGr8wxENXEhdIxk3FuPBu8f2XF9VY4NX9B+2pILY3WdG6SGvQlpLnitPFdXFu+\nzmNlUvmtCTwa+RYxphA8PO9JVeno6OSBe9XDXwc8pzSaA1d0/X3u2AbpPNuJ8hX2Uj7sSX4yKsyi\n6Nrdi3b9DJw58H9I3Eho/TYSN5Jp3z1Ak+o/8Ni1mXz3v204IazfcBo2ziKpzq8Ohb09NR6Lxtyj\nF1GjIrjxw1usf341k8IOYh44gAf/qQYbZ/L0shGaO6fB0tu/WpP0mHGUO/IY77QGz7gniTG9Tht+\nwoG3nY6OTiFRIAJfKbUc+AWop5Q6pZR6Xik1Uik10pJkA3AcOAp8BrxQEOct6uQ2oOrddzV9fbXj\nDbkatILkg/15JsyF+v1rs87fhOx+nmhCKNVkDpO3Qqkmc+hmfIlx1dZz+psxZPC4jRlD+uINGXTq\njs4feyaWiJ4RhHhpwbZCvEJ4rlFv+jXuR6Vfz9A55hG20AbndNF6+2mloeIJeMmTU/5bKb23J7ua\nxFGt0zNsHzqe8icfAaOR6EmbCc84XkvnNjh16hRdu3alTp06PPLII/z73//mpiO9H3DmzBl69uyZ\na56dOnXi8uXLt1Ue+8BldxP7gGq5lXfNmjUZwiu/8cYbbN68+a6XsUiQXZCdwl6Ke/C0vAQimz5d\nZMKjkVKeS+LS/D/CFCW8XEuYitC/nTQzviMVxpeSqGbVRJSSqGbVpMKUCjL8P1GilOPgXUrl/fyZ\n8fQUwRglzq+6CRPKS+nm74j7BIOoyQhTECYbBGOUGDo9L0xBnCc5SZQRiaKNVOGcRE3cdFfr9G6T\nn+BpIpJzFLvbID09XYKCguTLL78UEZG0tDQZMmSIjB07NktaaxC1u03mQGb5IT9ltAZUywsDBw60\nBWIr7ujB04oIuc1M1endcFYfbs+H9dfRzziUMjGj4JInVDwFKe44e2zH3+d1VkfeJCTmL0hPJyTm\nL1YPXM0jrWJzjeVyOzNjJSQANWNJO9CfGivnsjwmlv/71YwYwOV6OW307oAOmIO+ALMLhn39iDa9\nSS8ieM3Ynth9YbdVV0WSuxAfOSoqCldXVwYPHgxoMWE++OADvvzyS5KSkli0aBFPP/00bdu2tYUS\n9vHRxkEmJSXRq1cvGjZsSPfu3WnWrJmtx2w0Gvnnn38wmUw0aNCAYcOG4e3tTYcOHUhOTgbgs88+\nIygoCH9/f3r06JFrjP1BgwYxcuRImjRpQt26dfnuu+8AspTx+vXrDBkyhKZNmxIYGMjatWsBSE5O\npk+fPjRo0IDu3bvbymFfXoCvv/4aPz8//P39GTBgAD///DPr1q1j3LhxBAQEcOzYsQyTq/z0008E\nBgbi6+vLkCFDSElJseU5ZcoUGjVqhK+vr21yma1btxIQEEBAQACBgYEOQzoUKbJ7ExT2Utx7+Ln1\nwGdNmiFMKC+lJjoLE8qLy+BArRf9hhKmIIbeHaXCOBetd+8ARz14EKlcWduX2/kdkSXkrzFKGFdF\n6oSECeOqSLmRHtrXx1SEiWWklnGRgMgA4yCpMg6J8qLgK/Iekq8e/l2Ij/zRRx/Jyy+/nGV7QECA\n7Nu3TxYuXCg1a9a0hRGOj48Xb29vERGZMWOGDB8+XERE9u/fLwaDwdZjtoZQjo+PF4PBIHv27BER\nkbCwMFm8eLGIiPzzzz+2802aNElmz54tItn38AcOHCihoaFiNpvl8OHDUrNmTUlOTs5Sxtdee812\njkuXLkmdOnUkMTFRZs2aJYMHDxYRkX379jks74EDB6ROnTq28M/WPDP38K3rycnJUqtWLfnzzz9F\nRGTAgAHywQcf2PK0XtPHH38szz//vIiIdO7cWXbs2CEiIteuXbtnX055Re/hFxFsPfDgcM2N0W57\ndHw0aXvfZdZKL26a3aDUdVI99mghD1LKYYh9HnP9/5J0cAAr2ndxmH+/fvDpp1C5csbtFy5oHc3s\nIs3mFM0xQ0Awq+vlqghO/y+CGgdbcK1agjaoV0CpNE71fgnfkG4sCVvPa5H+hKSXIJedQpprsn37\n9g7DCO/YscMWVtk6SYojvLy8CAgIADKGET5w4AAtW7bE19eXpUuX5il8cq9evXBycqJOnTo8/PDD\ntl6zfRl//PFHpk2bRkBAAG3atOHGjRskJCSwbds2+vfvD2gxexyVNyoqirCwMKpUqQLkHj75zz//\nxMvLi7p16wJ5C58cHBzMmDFjmD17NpcvX7aFny6q6AK/kLAJz9NBmuA0RuPmBv0mRdNrVS+Cfr/M\nGNM+vH7tDAaz5g3jZKb9rw1w3zATl43vUrriYa4e/Sxbw2+/fmA3wZEN69d4XqI52ht2J02CgQO1\nGDnUjMXpmwiIDyHZ/33OBH2HSnOh1253Rm58BHFOw6lUIvtbr6V/XFneM21ixMObmN11M6ecjaQr\nJ045G5ndtZgac+9CfOSGDRuya9euDNuuXr1KQkICjz76KJAxNPHtYA0hDBnDCA8aNIi5c+eyf/9+\npkyZUqDhk7/55htb+OSEhIRCi1fkKHzyq6++yueff05ycjLBwcF5mkf4fkYX+IWEtQfu+lcIj0e+\nSJWwtoxppvj0SFvCtr7IjLM/8YKxB/FNv0OllQIB5zQDm5oeIsw4Co+YnqSv2c66dY7VxFZB7Sjw\nGsDFi9r5PT21uDeentq6vcumIzX0V19pMXLc9own/bjmySNem+HwU8jSjaz4LpH5MUepEfsUinSe\nOA4/NDvJa5O+4uxf8NY6f46YvXBCOGL24q11/rjuLIYeFHchPvITTzxBUlKSbcJus9nMK6+8wqBB\ng3DLfK5MBAcHExERAcDvv//O/v3783Xua9euUb16dVJTU1maRztEZGQk6enpHDt2jOPHj1OvXr0s\naUJDQ5kzZw6aJgL27NkDQKtWrVi2bBmgfV04Clvctm1bIiMjuWCJPZJb+OR69ephMpk4evQokLfw\nyceOHcPX15cJEyYQFBSkC3yd26dfP3jimQHsoCWBcc14pzUExjXjk4MtOfrUdD7pvYlShiTKpd5k\n1MZHSEt1x2BI4YveGzhV/xRlyjg2vP7737cEdXZ4eGSdGatfv4w9+oEDHef/6aeZti/bAMvXg0l7\nAWCM5ox3DFV+jGLzV0LE8H1owNAAACAASURBVM28V2E65R5txERje3oRwRtoxtyJxvb8fbEYGnOt\nb/Sc3qj5RCnF6tWriYyMpE6dOtStWxdXV1f+85//5HrsCy+8wPnz52nYsCGvv/463t7eVKhQIc/n\nfvvtt2nWrBnBwcHUr18/T8d4eHjQtGlTOnbsyPz583F1dc2SZvLkyaSmpuLn54e3tzeTJ08GYNSo\nUSQmJtKgQQPeeOMNGjdunOVYb29vJk2aROvWrfH392fMmDGANhPYjBkzCAwM5NixY7b0rq6uLFy4\nkLCwMHx9fXFycmLkyJFZ8rXnww8/tKnAXFxc6NixY56u/b4lO+V+YS/F3WjbsaPIrMciJcqIuE4o\nJbxaQSqGjBZerSCuE0pJUGd/qdG5k/Tp7C6bjUg8nhJonCF0Hi5l+wyXPnOmZ2t4zW3Jzv0yO0Nv\nvpfg6VK6flSGc0Qdj5JhnZEq45ABxkEZjLmbjUXDmJtvt8z7iLS0NElOThYRkaNHj4rRaJSUlJS7\ndr6S5BpZmOTXaFu0LRBFlE7vhrPL+Dk/7BnBKHpQWn3DDScDl703gksipVPNTDuwjydMe1kBrLAe\naNKWRGDTpuwjaeaEp6emkpk0CQYMyDgxhyNXTUcYDGA253COU+MdnCOEN37wpP6BiowNW0/LuDYs\nbhHLqKhHqHcyzXZsdHw0sWditVAPOgVGUlISISEhpKamIiLMmzePUqVKFXaxdO412b0JCnspzj38\nWZNmCBPdNDfLvk9J2eaThckutgFVo4w95L1glWtP2mAQKVUqa++9cmXH6a1jf7IbcJWXLwY3N5FR\no7Lfr1T25xgXuEkqc04GhHgKU5E63ZqJGldZptd9zDZwzH1SBXH3iSqosUoFSlHu4esUT3S3zPuc\n8HAInLODWcvqQKob1P2exPbvglMq7Y+Ce/WdfMIoLv8dkcXmlxmzGcqVy6om/uij7O2FOQ24ys2B\nxJr/vHlZ3T2teHhkf47P4tvR99mv+L7JSV7fCn/X2UOp7f/HlK6/8UYboXurixiWLqTZATNSMGOV\ndHR07NAF/j3m2LVhdHtgIIGmSnj90k1ztzSkQ2I1nlzyMusikykV1p1VnpVtNr+cuHgxq+E1J3th\nTu7h776rpXeEdbpCq80xp5dKdue4XCmaFQHTWTVqM29HCWu2VcK15ZukHu3M263h+t/NmcKbbORJ\nLcwykPRgNKOXF0e/TR2de48u8O8x1hj27Zs/SXyLtdhmpCr7N680r8VcUyQ3I9dy1inW5kWTk9DP\nrlfuyAMnp/RWr52RI7MKfUfehJlfKpUrQ5kyms7eKZtWVbFhpsBsv57jje2p0OBb2DoZVSOON3vv\nY7bRh334E2icCWG9uPx7UPYVoKOjk2d0gX+PCfn1HM9u98Ac+iq4XMcl1YVRGx/R1DuhY1ndPAFM\nISRtumW0tEbNzEypUvl3687NPXzePFi8OG/ehNaXyuLFkJysGZBFHBt03dxgbt/xNmEPEN30Qd5q\n6YLzb71pH59K6orvSFLuvNI7nkbdffnz2XEERk6g4qWQrBnq6OjkG13g3wPsfdtPKg92GfzgWnVQ\n4PzLvwiLqc2bywLgcGd4OOsgpH79YOHCjHrzypXhyy/z79adF/fw7L4OsiM77x6DIeeXxor2XZDI\nlfznwG/sCZtGY+JIW7GeUpdqscn/b4IPVeOkaSBfXu7GMxU207dv/q61OFLWwdDp+fPn2wZjLVq0\niDNnztzrYmVAD2V8H5OdNbewl+LipZPZY6Utm6S88VupMM5FJocgFca5SHnjt9KWTVmCnBUVbicQ\nm4gW/jlq4iYxOxlkltFf1LjKUr3bE1oY6O7PihpXWWYZ/SWKNuLONanC+UL12smPl8706SJRURm3\nRUVp2+8Ed3f3HPfnJ4xwftBDGd+f5NdLp9AFe3ZLcRH4rs93FJrPyhBh0mmiq/g/7yJmlCwzVhOX\ncRW0yJOWNC4u95c7Ym7caWDI7uU1d8323aoJU5EHunUUgqeLah4ubuPcxNX4vSjM8ryxj1R86g4l\n5h2QH4EfFSVSpcotoZ95/XZxJPCtESsjIyPF3d1d6tatK/7+/pKUlCRxcXHSqlUradSokXTo0EHO\nnDmT5fiBAwfKiBEjpHHjxlKnTh1Zv369iIgsXLhQunTpIiEhIdKqVStJTEyUwYMHS1BQkAQEBMia\nNWtERCQpKUl69+4t9evXl27duknTpk2zRLYUEfnqq6/E19dX/Pz8pH///rJz506pVKmSGI1G8ff3\nl6NHj2Z4AWzevFkCAgLEx8dHBg8eLDdu3LDl+cYbb0hgYKD4+PjIoUOHRERky5Yt4u/vL/7+/hIQ\nECBXr169s8ouAugCv5DJPOcFzWcJU5TMaK4kHaRTX8tkIZleAgRPt60XJWEvcnuTqdgDIoHGGeI+\nEU3oT3YSl8GNhHFVtHoKnibevb3EaTKC8Q4l5h2QXz98q5CfPLlghL1IzgJfJGPv+ubNm9KiRQs5\nd+6ciIisWLHCFnLYHj2UcdFF98MvRBwFG+se4wEbZzAuVGg9GDbUBTbO1LZbMYXATs1I6+l5R+FW\nCoU7DRtTrVk0e8KmU3fZDHav2c9jhyuR6rEb/qkDLd+Dhis5WD8ew+FQul+8ZRHObYrIwiYkBEaN\ngrff1n5D7rHt+c8//+TAgQO0b9+egIAA3nnnHU6dOuUwrR7KuGSg10gB4sh4GUEfZsf8i1caPM52\nzx1w4nFmxZziJSbgQlqGtHcYTLFQsfr/3w4h/WNZ/XEEe0whtGUzk1b60r53GdLr/wAp5aHmHjjd\niGkrGzCw7FCMRhMnTmgvFxEtjxMnNJfQnTs1T6P7geho+OQTmDxZ+w0JubdCX0Tw9vbml19+yTVt\nfkIZO4p6ea/JLpTxU089xYYNGwgODmbjxo15DvRWUtB7+AWIowFHBszQ/EPw2AknWmq/zT/EGTNL\nlhRoMMUiy/LR4/ni9RA8PSFateMlPkJWfkfZ62XA9SrcKA8VE4g0VqFSYoItCqhV2FsRgfnz74+e\nfnQ09OoFERHw1lvab69e2va7iX1o4Hr16nH+/HmbwE9NTc124hI9lHHJQBf4BYijQU2zmiteCQU2\nzqDlwndg4wxeCYVZLRSTJmkvCfsAZiUVe1fQg/hRr3ddEt2TIbEKlL6KU0IQcWEzWWl8MMd8RLSw\nzoUt9GNjNSFv7dGHhGjrsbF3lm9SUhK1atWyLe+//36G/da5ZAMCAjCbzaxatYoJEybg7+9PQEAA\nP//8s8N89VDGJYTslPuFvRRFo60j4+VDzwYKzWfKLF4WAZnFy0Lzmdr22zRyFnfK9h4lTEFceodK\nFG1kVCds648ED81TiOa7UZ/FNXia7hpZdNGNtoWII+NltYO7meXkyRjDHADGGObQZ5cnfy3bneFY\nawCzkoYjw2s5vyjUrlG0XDkWL+KZu0HRP64spR48yLGdn+Up35Janzo6OaEbbQsYR8bLpUt7Yjyd\npqlvamU/E9Vdnt/6vsPq1WQ1dFujY3766R/gCZP2w8MJJk3l1RfSvoNLhs3MNw/FgwQS8OB5PieK\ndg7zL2n1ebssWrSosIugc4/QBf5dxpFQs/cusecO5rcukuQUqtlRSIcav2+ml9mfeLwwcoJ4vPgN\nf55gMz85EPolrT51dHJDF/gFRHg4HPvvEfrsGkfI1bUARJfvynspM0hKqZMhrUhWoV+UXTJvl5xC\nNTsiZOlQIvCic3BL6le6xtEDL7HG1IugyvFUqbqQlEdXwKVHYOf4ElmfOjq5oevwC4igK5tZGV2V\nblcXEU0bomlD96uLOJlS1Rbb3R4R3SUzp1DNDklIIIQt9Dh9kt3eR0jp0xeMW4gtfwKnvt1Qfivh\nTFCJrU8dndzQe/gFQPjOcI4df5uWncsRfWABT5m+I924lXSfl+nAj/hcSiZq56UMx1gnFCnJvPtu\nRnUX5PylE96pAs4HPfnWNIf2Kx9iU+95hPa/iSiBG2Zk+Xo8JaTEu7jq6GSH3sMvAI5tC2KpZzLR\nfmdJ6x1GcvMFpPTui9lvJdu9z9L+dMbwsLq6QSO/IRmcAyYxNuwUzxkHsdM0ibqH65HqLKQZwCVm\nBN1NF4r9tIhFITyyI/bu3cuGDRts6+vWrWPatGl3nK99KOa7ibXez5w5Q8+ePXNM++GHH5Jk14vJ\nLUT0vUQX+AVAn0QzhpWRmM2lSHEBQl8BlxTSzaWYstKfZtc8S7z6JjvyE3s/rfxYZpZ6lciwb2jQ\nLYjDfrGoNBe4WQZpNpefjAbasvm+cMkM3xlOdHzGYbXR8dGE7yz46RpHjhzJc889BxQdgf/000/z\n6quvFmKJsIVkyA81atRg1apVOabJLPA3bNhAxYoV832uu4Eu8HMgr8G5QpYOZYppGzd+HQ/OKdo8\ntc4plP51BG+athA74vN8TSii45jx42HMO2PpeBR2BfwB6Qbk0iM8tLcDN8QVc58wrjdfAH27cCK4\nU6GWNahGEL1W9bIJ/ej4aHqt6kVQjYKfrnHq1KnMnDmTVatWERcXR79+/QgICCA5OZldu3bRunVr\nGjduTGhoKGfPns1yfHx8PC1atMDX15fXX3/d1pvdsmULnTt3tqUbPXq0zYXzrbfeIigoCB8fH4YP\nH24Lt9CmTRsmTJhA06ZNqVu3Ltu3b+fmzZu88cYbrFy5koCAAFauXMmiRYsYPXo0AAEBAbalTJky\nbN26levXrzNkyBCaNm1KYGAga9dqjhDJycn06dOHBg0a0L17d5KTkx3WidFoZPz48fj6+tK0aVNb\niAbrSORmzZoxfvx4jh07xpNPPknjxo1p2bKlLRxD5jqxYjKZ8PHxAcBsNjN27Fjb6N45c+Ywe/Zs\nzpw5Q0hICCGWYdZGo5F//vkHgPfffx8fHx98fHz48MMPbXk2aNCAYcOG4e3tTYcOHWzXNXv2bBo2\nbIifnx99+vTJV7twSHYjsgp7KeyRtvkK+auUDDf2FqcJ7sLrpYUpSOnXEfcJBunc/es7nvRC5xZR\nx6PEfZKSOsPLChPdhImlhSlK6nZqKx59vYXXXYQpiGvIrGzzyBzCOq8jcvMdHvl4lFQJryKToyZL\nlfAqEnX8zuMj343wyF26dJGvvvpKRETmzp1rO0d0dLQ89dRTtnQvvviiLFy4UERuhTcWEenfv7+s\nW7fOdv4xY8aIiMj3338vTzzxhIhosfVffPFF2zGZ10VE1q1bJ48//rjcvHnztkIx2+Pp6SnvvPOO\niGhx+K3XMXDgQHnqqackLS1NRETatm0rhw8fFhGRmJgYCQkJybFO4uPjxdvbW0RE5s2bJz169LCF\nYbbWif0cAPbrcXFx4uPjI4mJiXLt2jVp2LCh7N69W+Lj48VgMMiePXtERCQsLMx27dWrV7fNA3Dp\n0qUs16mPtC0gcvIRz0x00wdZ2nsV6QahdCq4bXwT51QDYkjnp4DRBIXd5YhZJQRrL/ktwrn06XFq\n/11O05OlluJwUBQJj/4JhlTU+Tp0+KuLwzwchbC+Wzr/EK8QRjUZxdvb3mZUk1EZ5vO9F+Q1PPLO\nnTvpa5k/csCAAXnKOzo6mmbNmuHr60tUVFSGoGyOQhfnxpEjRxg3bhwRERG4uLjcUShmK9Zr6tu3\nb4aIoWFhYRgMBhITE/n5558JCwsjICCAESNG2L6A8lInmzdvZsSIEbYwzLmFfN6xYwfdu3fH3d2d\nsmXL8swzz7B9+3YAvLy8CAgIADLWm5+fH/369WPJkiUFEu65QAS+UupJpdSfSqmjSqksijml1CCl\n1Hml1F7LMrQgzns3yY+P+Id+XUg7+Cydf6vADytT+C4mGsPKSCr+1hHDoT7EnrnDiFklEEfqtNgz\nsUT0jCCt/Fhee2wrFw6+AM434GptTY1m0HSypXcP5OXT4xzmm58X+Z0SHR/NJ3GfMLnVZD6J+ySL\nTv9uI5bwyHv37mXv3r3s37+fH3/80WHazOGRAZydnUlPT7et37hxw/b7wgsvsGrVKvbv38+wYcNs\n+8Bx6OKcSExMpFevXnz22WdUr17dVvZvvvnGVvaEhAQaNGiQ94vPdE32/60hn9PT06lYsaLtHHv3\n7uXQoUMOj7nbWOsMMtbb999/z4svvsju3bsJCgq6LbuDPXcs8JVSBuBjoCPQEOirlGroIOlKEQmw\nLJ/f6XnvNvnxEd8W+RnB3z3HnO9K0dqk8CKeJqZynPnue66vXMD44PF3t7DFjOx64TVN4wnxCmH8\neEjr2pP1MdF4xz4OlY+CoC3AYBbC1SuEO7CP5new1+1i/RqJ6BnBWyFvEdEzIoNO/25xO+GRg4OD\nWbFiBQBL7T51PD09+f3330lJSeHy5cv89NNPwC3BX6VKFRITE3M1YmYuV2aGDBnC4MGDadmypW3b\nnYRitrJy5Urbb4sWLbLsL1++PF5eXkRGRgLaS2bfvn1A9nViT/v27VmwYIFNCOcW8rlly5asWbOG\npKQkrl+/zurVqzNcc2bS09M5efIkISEhTJ8+nStXrpCYmJht+rxQED38psBRETkuIjeBFUDXAsi3\nUHn3Xc190h6nluF0CZ6Zoev5/uszuewdThTt8MKEgXS8MNniu+jD+/NPXnrh48fDZK+2HPS3BKFT\nwN++cNONT0KP0bl5CM5rV2UR+vke7HWbWL9GrGqcEK8QInpG3PHX3t0Ij/zRRx/x8ccf4+vry+nT\np23ba9euTa9evfDx8aFXr14EBgYCULFiRYYNG4aPjw+hoaEEBeVuiA4JCeH333+3GW2tnDhxglWr\nVvHll1/aDLdxcXF3FIrZyqVLl/Dz8+Ojjz7igw8+cJhm6dKlfPHFF/j7++Pt7W0zDmdXJ/YMHToU\nDw8P/Pz88Pf3t72Ihg8fzpNPPmkz2lpp1KgRgwYNomnTpjRr1oyhQ4fa6tQRZrOZ/v374+vrS2Bg\nIC+99NKde/tkp9zP6wL0BD63Wx8AzM2UZhBwFvgNWAXUziav4UAcEOfh4ZHFGHGvyWzcG/3sDFHj\nKssso78W6tjoL2pcZXnce4bDEL1K6SGPbwelHIc8VipjOoKni+u/q2hzBL9STfvtNELo+5Q8ONRT\nSnceIMNfH5rhmDuZf7e4hkfOjCPDcFEjs+G0uHK/Gm3XA0YR8QM2AV85SiQin4pIExFpUrVq1XtU\ntOzJ7CP+3sa5zIysxdiwU7QKac3YsFPMjKzFmhNzs3wNKAUjR+oumLdDXnvhlco7c6PiBVxih+Du\n8g9OaS4QtACu1OJclUs4ey+jz6b1GY650/l3dXSKMgUh8E8Dte3Wa1m22RCRCyKSYln9HMj+O+w+\nxu1CAmNM+3g8zoftrbfyeJwPY0z7qJSYkEWILF58/8ytWtRwpE7LPDp56VJIrLYZNs5k5YYLvLXC\nBzGXgXQDNPkMJzGzfqWZkF/PZck/P4O9SiJ3qie+HzCZTLYJ03VuURACPxaoo5TyUkqVAvoA6+wT\nKKWq260+DRyiCBAeDtGTNtt09uk48YKxB9ubHKDl1tbsaHKA943+JOChC5ECJLdeuNWom7poA8SM\nYS6jec+0iXb/awAGMzilk/7r/7HH9HKBK+fFUVxrHZ1C4Hba4h0LfBFJA0YDG9EEeYSIHFRKvaWU\netqS7CWl1EGl1D7gJTSd/n1N1Zc78fEfQXRbdo3oE14gQp1O7fhkwFqaHarOtuitNvXOAO/RhV3c\nYkdOL9DMRt0o2lHf+DGbmx2i9E0nnMxOOLeYzkRje+3eWYzrnd7NaMHN60hqK66urly4cEEX+jqF\njohw4cIFh3MP54S6XxtvkyZN5F4ERcqOrn0Hsq7eYgw3nXFfvpKHHvovh0M/RaW68OMyM21NQgIe\nhD08mtQn09n7se56ea9wcso0gYwxGvp0xyBpLF5Zlm0PlWV+6HFcUg0MXtaDevzB2LBTvHj6Vdbv\nHEtCAjzwAFy7Bjdv3srGzS1nfX5qaiqnTp3K4HOuo1NYuLq6UqtWLVxcXDJsV0rtEpEmjo7RBX52\nODvTNegx1oXugHQncDLjdNOVzctv0NqkcFbp2tR7eijee47RmGmayOBwqHQMDvQBUwjxGAlv7swn\nT5zCeLIGJ6pdZWZkLXqeuIynmHLMWw9brVPUyUng6/Hws8NsZm3MdtwbNCLJU/P1Lh3zAph24+QZ\nT7qpcItXkskSR39nxq8rDxKYFyMcKNOa7a230nJra8aYtpJO7iMn9XlwdYozeiwdstHlGgwEN+9O\nksceMBtA4GbzOXQzvkR0v/t+oHCxxt6oC5ph155TyoP3jf7scGBczw19oJxOcabEC/zshvG37diP\nn0PXYLjpTNRiM09vbIm5VBrXn+3LirKGwi52icdq1BXRXGDtPXpm9B1tGyNhb1x/xpizcV2fmEan\nuFPiBX52w/i3VjpP/fOBbFppJsQEa2N/5uk/B1A2xZ9HWunB0O4nMnv0HGuYzsxSrzJGLoNSjJHL\ntEhqxl6/IxmOMzwajVv7cH0Alk6JocQbbbN4fFhQShMgOsWDEe9Fs/R6d75c7krP4+dY9fCDDOl7\ng37uq1nw2r0NW6yjczfJyWhb4nv4pduGE2icSTxGzDgRj5FA40xKty34qeh0Co8+iWYMSxcyvMdF\npoYIw3tcxLB0IeenmYvt/Lc6Opkp8QJ/aDUn9oZN41tjRZwQvjVWZG/YNIZWK/FVU6wIWTqUNabZ\n3Iz7F2+3hptx/2KNaTbvXx1arCc919Gxp8RLtTk7HQdEm7Nzbr5HYurcP2S+d3IiAYxbSGvyBWyd\njGoyH4xb8OQEzZM2M2CAfn91ij8lXodvVeK3Crnls70teiuCoqxbegaDbm4jMXXuD6yeV/b3brnX\nQ4zseZGrF/1RB8OQ5rNxdj/DtKV+THwoDKeHf+RmojcPNovizMQ/Cq/wOjp3iK7DzwkPxz7bpw0e\n92wqPJ2Cwdqr798/q+fV6zW6IJEr6XKwDOmhr8KVmqQ6C688fZGU0MnULr0fc+AnXPutbaGUXUfn\nXlDiBf77/R37bD9d27HPtj4S8/7EfjyFI47t/IwgUznWxGxn1MaHEY9YuPYQVDpJ2RtOHPa4BLGj\nuB6hx7TWKb6UeIG/uUxWn+2ZpV7l0COOfTL1kZj3J47GU2TmqGc7FBAWUxsSgqH8X2AuRWIZM7Wv\nQOcNHTAY8h9FU0enqFDsdfhLl2rCICGBfAU7c6QH1nX49y/ZjaewYr13NV7oRseGQaSEToZr1aDc\nX3CjArhegdiR9N4Synq66fddp8hSInX4S5dClSqaPjdz2IS89Nj0qfCKFjl9ednfu4lPNyAldDLe\nCeU0YZ/QAlyvar9B86nUprtuu9EpthRLgW/tnV+4kGlHcDj1HpxJ64FG2/e6o4kxrOizWBUdspsW\nccmSjPfuUt19jDIPJ83tKqNiofbC5bBxJs4p7oyKhWgvx/nrthud4kCxFPjZ6XMDT2uDrCJqVwQR\n3lcVGXtzGu2Si2U1lCjy+kX2x+QNzHt7Pn98DGyYy0k8qB3Tg7RlP8KGuRz62HH+uu1Gp1ggIvfl\n0rhxY7ldlBLRlDgZl3g8ZZbRX9S4ytIypLWocZVlltFfxNMz27yWLNF2K6X9Llly28XSuY8YVWO1\nQLqMYq4IyCjmCqRLZ1ZnaT9ubvp91yk6AHGSjVwtll3b7HpjHiQwxrSPx+N82N56K4/H+TDGtC/b\n7/XsQifrXhtFn6hy3RhVYy3zGE2n4FY8YvyMYXzKEepzXLR4SgSH67YbnWJFsRT4jvS5AKedHA+y\nyu4NkV3oZN2AV/T54w+Yd7obiHDtchfGhZ2ivvETeLEB/XonsjdsGoGntcfjvX0v4PJ/9XU3TZ0i\nT7Gc4tDaG8vsjhl5aDRjb05jZmQtxpi28n68P2PDTkGpVxnjIJ/sDHW6Aa94sfjgXL69XotXwk7x\nwGUjf9Y38dgfldlqehXPTls56PYdxI4Cu6880Hv9OkWPYtnDB8ceNtkNstpcJn+DrHQDXvHAOsDK\nqup76O9qXKxp4oEzRn6uf4GaLztzJug7yp+sDxtujcDVv/J0iirFfuDVnaAPviq+2N/beIx8a6zI\n2D7xiFMqpJeidFo6KWWvodKh49f/xwbT+xmO1yfI0blfKZEDrwCqvtyJrn0HgrOz9oQ6O9O170Cq\nvtwpT8frg6+KL/b2mWeMlnhKK7x4etkwcE4ixf0aCIiCDQ2T6cwaLbExGoLD9a88nSJJsRX44TvD\nqXvNzLp6i+ka9BgAwaENWVfvax77u2qe89EHXxVP7O0we2qmExD5Ki+ZDkDDVWBIBQWcDYR0Zwia\nT61O3TVhH9YLzM60GKvPiKZT9Ci2Aj+oRhCHK//IY7E+rAvdgfvIh/k5aD+PxfqyOkJ3syjpZOih\n7xzPHtNYQvkv6+uCS5qi/NaXoMJJ2PU8AEt90YT99tdQrd9j+FNBhVJuHZ07odgK/NjIEF6N9Oew\n935KXalK0kPHKf13HbpueBKVbi7s4ukUMo5cd6OMBsq7nKfj0n9xNfpD3CO/AO9vKH8siGtlIOif\na7i0fIs25yMI8dInPtcpehRbgX/sv0d427QF17P1uFnxHM6XHyKl2lE+bp6MGUNhF0+nkHFkn3Gr\nE0uDyMmsM33IKOax3vQBhu1juerxO9VPPEKsZwrqaHucvjTr/vg6RZJiKfA7vRvOvnJhXO3dn1OP\n/Emlo4GklU6Cc/VICJ1LYPN++iAanSz2mU8Hjud/JybTmbV8zGjEuAWXlu/gFPUGZ6ufxfloK1L9\nVlG7+cucOAHPvxNN37m6Ll+n6FAsBX67ZCf+53MEqf89/NGZSztmgcEMD/5Bwz+8OPDweT1Ugk4W\n+vWDxYthv2c3DEqIrQmDt1cjvWU4RL1NqepxdImtwaK28TzafCQpXXoRvUTX5esUHYqdH/7SpdB6\noJFRHW/ynd8VQAAFZmdGba2Kp+EYr+7MeM2enloPT0cnA0YjI2oZWHr6Q9JNbUk3buVm2HO0O+LM\njobnSF72E+pEiO6PHFWRkwAAIABJREFUr3NfUWL88K2DaWqYExjzXT0Mv/wbSiVDqSRK/TqCZTFx\n1N3ZNctxeqgEHUdE9/ucb3fGsN70PmOZRYqpE4a4oWwK+JtXfhEwhej++DpFimIl8K2DaRLwYKKx\nHeZm8zDcLAU3y+DU9CNSjdsZx4wsx+kPrY4jYiu0I6L8MABmMYb2xtcwN/mcRls7Mb8JNHl4JtWr\n6/Pf6hQdCkTgK6WeVEr9qZQ6qpR61cH+0kqplZb9/1NKGQvivJk5EdwJn+YDWWV8gJg+4bhKMs32\n1sP9nBcpypWbvXtzzHgqwzFubpqLno5OZsaPB0aPphcRDDQOYnPYZ4yMbENC9CLCInuwq8c0Uv6a\nqYfP1iky3LHAV0oZgI+BjkBDoK9SqmGmZM8Dl0TkUeADYPqdntcRfvFVORC6mBktDHQ+4E6j/fX4\nOegArQ5UI2DFZNIODoCasXqoBJ08E1uhHRET92Gs9S0zI2sRafqEjvzA16ZFzIysRYeaGXsLemA1\nnfuZOzbaKqVaAFNFJNSy/hqAiLxnl2ajJc0vSiln4C+gquRw8tsx2qYpZ3o0f4x1oTson+DLVY/9\nPL3xcb6J+RkX0gAwGCAtLb9XqVPicXICEd7gTd4OdqVRpQhmHthFa5PCQLoWdsFnBVx6BPXzeN2Q\nq1No3G2jbU3gpN36Kcs2h2lEJA24AlQugHNnwICZtTHbNWHv+RvlE3xZG7MdA7dG1lpjmevo5AsP\nD6JpwyeMYsDpQ+z2PkKXPgZWGh/UhH3vbuCzEk4H6TYhnXxjDdVtifGIUnfHJnRfGW2VUsOVUnFK\nqbjz58/n+3gzBro2b8lVj/2UP+HHVY/9dG3eEjMGDAYYNQrmzcs9Hx2dzET3+5xeRBBBL3xMVRm1\nsj3XxZ3Bz57H9dm2OCszrFiN27kQ3Sakky/sp1IFMFv6p3fDJlQQAv80UNtuvZZlm8M0FpVOBeBC\n5oxE5FMRaSIiTapWzXtESysd/r+9O4+LstofOP45MwwquC+5w6hppuaSkChuGEaSG3ZBE83qmmV1\nq6tImpk3jTJSb7fbT8tMrwumomFqdkli3KNA09T0asqAS+67qAwz5/fHA8iuCDjMcN6v17wYZp55\nnjOgX86c8z3f0z+UtQHbGBjbncsLf2NgbHfWBmzjif6hZGSoYK/cu6yxfD/PZLxIZJH5P3T+pRu3\nXG3cdAXDzy8RnHqegQPVnJBSPAVtpZqltOeESiPgJwIthRDNhBCuwDBgbZ5j1gKjMu//BYgvavz+\nXu1tcRa/nSNZnbADCaxO2IHfzpHsbVH8TwuKklN4OPhF+IPZTB/PZNob/8XOx3Zkp/3KLp+xwaMS\n1U8ftndTFQdzp3VApblOqFRW2gohAoFPAD2wQEoZIYSYBiRJKdcKISoDS4BOwAVgmJTyaFHnLA87\nXilKQUzNBQNC9Fx3McDRx6nFeS622Ak2AwM3d+FUQEs6dtSObVG7BeG+4fZtsFKuGY23h3MKUtxK\nAEVN2pbKJuZSyg3AhjyPvZvj/k0guDSupSj2ltimJk3PWzl8MgTrI9Fc1FvBpkPo01jbZzOulp/Y\nt1eHQWcgZmiMvZurlHMREfm3Us1S2uuEytWkraI4Au8O0ZyMW0TltssQm98Bqx70NqQO0NmQtlvc\nvKonIyqGkzv8sjMw1GpcJUvOfxOTJ8OoUVpPHrTUcSibdUKl0sNXlIoksYY/E80TiYhegQx+Hs60\nB89t2c9bDBLXHS/TZZ+VF17QVuFaLNpzWZkXoCZ3K6qsrJysHn1KCixadH8WgaoevqIUU3g4LOCv\n6M096Hy4nhbsbXqtMKsELK4Yusyml3Eq6em3g30WtRq34oqK0nrzeYdv0tJgxNxIHm02k+MuRqTQ\nPg7OfmcmgRGlt+eCCviKcg/O1WyJh88b7OxwEBerAGHVNjy3VMJFCqxC8o9h+3jQ52Xwzf8fVlVo\nrXiyevbWQnZY7XRCx+7gGaxsWhOBZLaoSVj6DPxvlF6YVgFfUe7BK5Em9vVZTedD9Xny1wboDz0B\n1kqACxnJfbDuHUmXFBeO9ZkPJ/JvkqJW41Y8ReXb9yGOX8wTmRndhLDg47T3G8T44OPMjG7CuKWf\nlVobVMBXlHtQo00ibxvX4fHdUrau/x33r1cwdtmT6H4bCk0SsV5vyN4mN6i0LBr/lNxdOlWhtWIq\n7FNdCw6zEy+20oNx5j20S+rO3l7f0iApiHHmPaX6cVBN2irKPQj3DQdfiLwI9f57mGGbXgazjSjz\nfK5fr4+114fc3DyRjeZP8a6TTLuqZlJTtZ59RISasK2IPDwKzrfXD3+Sm0dfICghhseMH7HXaz78\n8TinfBcxO7kD4+SlUmuD6uErSgmEh8MX8S1h0iRCWMlUY290XnNg8xSsvrP4JPAQVc+nsDA0jhkz\n4PWvZxNFoL2brdhBYAG/9ha+L5JyqSe3At7hSmA4G4PnQ2pXaPEjA3fVIyz4OLNHvFZqbVABX1Hu\nUc5c6iFz/JnQajDTgn/HNXoJVUxvod/1LGu9TzIosBHBH3RgyfcvE/ZDGP7N/VVufgUTFaWlXuY1\n/cQ6KreNwpD4AtL7S7hVDVqvY2BiI77dcJKZrhOJq1J6tbadbhNzRbkf8uZSA7Tq/iKnjgcyrHEY\nD52oxnvmTVwPDMPq/RXNTruTXP86BtMsRrcdx6JFuV/r5qY243FmhZVPsKLjE2N7xgefAEsVqHkM\ncaoN1T/fTkz15/C7vKbY16owm5gryv1SUMbFoW1f8viFagw7cZQPg/dQ1bgO64b5uJ9uRnKD6zxy\nGvpv8WDu3ILzsFVuvvPKOe/agsN8w2BsCASSaPMc+LOjFuwvNUbW/52bPv/H8s75998uKRXwFeUe\nFJY4seaqP37Sk0nRHTgZ/HcY0Zfr9ZNpdsqdvfUhxieFIFYV65yK48tKw+1DHGepx/P8h0305p+8\nSULgEmgRR+c/alHdcIZKic9xK2AKJx9fV+rtUAFfUe5BYXn0Hh7aZikfmjfS+U8bPBgHR/w59vkF\niJ0JAWE84DOpWOdUHF/WhO1rfMYw385YjFvpz3omGAPh0a/odrAOIckXidlSm8reMfjoX8ZqjCv1\ndqiAryj3ICJCG3fPKyUF+nzgT4tWi/jNeJHGf7SGhrvJMG7HmBDE2NgWzG9upJXIXTdf5eY7t4UL\noT9rGOHbHr1VQPAw0oy/YGv8K6129WBPi/N4nwS/hFPEjIohqI+RDaEb7nziYlKTtopyj6KitHH3\nfJNxRhMEh7Ax+gJ/7daa1Es9oe0qOBCEYd8Q9A128kjLTzmz5TQpOhM12yTy2TPhasLWSQVGRPKT\neT6XrreiW7IrP/XYgtz/F3h0oZaV43aesbEtmPNnRvEK3xdCTdoqShkIDdX+f2aVtc3WOBGiVzLH\nvILUoy+A9+fo9w8CBJbhf+FmwDsMO3yGhSYTdceG8M2/tdILKk3TOfnf0HGp3glo9R07/GKR+4PB\n+wvQp4P7efRHerMsIQlT6Pwyb4vq4StKCel0Wgnkgszk7xzxWcfcgKOQ2h08tqGzVGLyTzeZ20XH\nJP1HJB4JY+1alabptIxGZouajH/mMLjm+CULaHbKnQvV0unxx1f08BlJeClsjqZ6+IpShoqabG1O\nMlEJSehTfcBzK6T6ovvpDab3gn4/N+XDiFHc2hCn0jSdWWoq48x7aJjwDAiyb7VPNcJcrTLDK71A\nQtdxeAebyrwpKuArSglFRIDBUPBzE/iYdJ//w+bxEz1SAI9tZPSIpMqJh1nqdY1Jxr7MvjIaAl+B\nV1vneq1K03QSHh4MMr7Onz5f394zQcKFWpfoumcAi11WM6n7JBJPJpZ5U1TAV5QSCg3VsjDq1Mn/\n3BGfddwMmMLbsbX4hwncLYCQ3Gh0gJapdfgweA+vD00B77mQ3CfXa1WapmMKjIhk9jszsydlZldt\nzNrhX4BrGgYrDExsBOlu4JpGQpeVTHtiEhm2jPuy2b0K+IpSCkJD4dw5bSxfSli6NHMyt3kc4oeZ\nRCRcIK5JTaYt64Au9iMqXarHodaHyJAG1rWG5olPwIY52edTaZqOy/+GjrD0GcwWNUFKvmymA72V\nh0/WIjYKvt1vYJbuPVrr+uPVtN19C/agJm0V5b4ZUiOOLVc68CCH+Rkfqr5Zl2s1L1L1Ui2ufXKO\nDrp9/CbbqxLKji5zkjYs+Djdk9qxzWuftpGJvFQqaZd3oiZtFcWOsipjxlzxpwN7+IDJ6APHcK3G\nRVwuNeBajYvoA8fwT9ub2GxaTFDB3oFlTtJ2T2rH1l6b6Z7UjnHmPdhSUu2edqsCvqKUoayqmlmL\ns+LxJzpwE1bvryBxDBmf/AnHHsPq/RXRgbezNEzJJiK3l97m1cp95OHBbGMHtnnto8fmXmzz2sds\nYwdS8UBK7d/CmDH2Cfoq4CtKGSqoqqapGXRLfAQ2fK49EP8hLhl61rfKfD7ZRMiqELwb5d8LVyn/\nZo94jbDM/Wi3mDZn71M7xHh7IxN7pd2qgK8oZaig1Mpn/+8tdmzYjTvXmcI03M2PkREVy3lDFd41\nvUvIqhBW/mUlfs387n+DlbsWGQmmyXG5lkibJscx708br56YSMixS9gQDDFfomP0RH5tnHsjE3uk\n3ao9bRWlDBW0j+ksxuNOGusYgB+bOE19FpufpXrScKa7T2fK7hpwyUpkDUpl5aVS+qKiYHHsi0Qc\nDWRNSjP8SMGU0ozBy67Su+9hFqz5ks+sYbdfYM685WCPtFvVw1eUMlRQVc0L1KMLCTQjGRuCG1QG\no4lzXZcwcjd82jKNActusPjdw4jmJmr1j8w33qu2SLSfrHmZyUfXIYKHMtj4Ou/yHoONryOChxL6\n47p8w3h52S3tVkpZLm+dO3eWiuIMli6V0tNTSiG0r2PHSunmlpWxL2Xf/p6SidVlZZ/3pdsEN1nZ\n533JxOqy8zP1JRPqSozx0s1NO0/W+XK+HmSu55WyVbOmlH3YKG0g441ItwluEr8p0jChhow3Iq2I\nXL+bnLesfwNl+bsCkmQhcVXl4SuKHWSVVk5NhdFPCZa11WNZsZZ0qsCwwRj0V9EjuRkVD2ZtLN/T\nU0vZLGx/1KznlbLlL+LYTQeiCQGgn583t3p9TKXNE/jelEhLfTJNreZ8r7tfv5+i8vDVGL6i2EFo\naI5cewGt97VjfPAoSBoLOgsWg+StzbDDbCU+87CsSb7CJvtU7Z2yFRkJR/57mIl8gB7JYGK4YfwZ\ni9cIHt0cyBGvTxicvILp7avhFpe/+ml5WDmtxvAVxc5M1QcxzbwJl6S/Qq/pAFTaPIF/elUhyXiV\nPmhb3WVN8hW1vaJSNgIjIvkg5RHmN3idwca/A+DqMwPLiAGQVgev9IN8E21BjHqe/T565s3TevRC\naF/LS6lrFfAVxQ5yTrr+7dbHWIxbqfTYTCql68BqwJrch/TobxDBQxltHIHBcLuHWNBEcHnpQTqj\nyO2RGG8e5XLNo9ha/ZfrI56mz9CqnOv7MegtUOMYeqsgqW1NYkbF0KJnYvbmOOVt5bQK+Ipyn+Vc\nfSsl7G94HEvwSIbvt/L9Mhuu541kPPM07dhHTLSF1Manad15FGO3aTthh4ZSbnuQziRyeyQvrXuJ\nrZtcWJa+kLHxDSGjkhbkW68HnQ29xZWx8Q35vMcl1pknc3KH330rhHYvShTwhRC1hRAbhRCHM7/W\nKuQ4qxBid+ZtbUmuqSiOLt/q28aJWKJjePd7TzD3Ru4bBq432NM1Dsy92WHtwd6AJbTYWy/7JeW1\nB+lMvBt5s3jXcuJuTMGyZTKLe5tBl55rExP+7MyyHqn8I/ohtu0Ps1vJhLtVoiwdIUQkcEFKOUMI\nMRGoJaV8q4Djrkkpqxbn3CpLR3FWhW2J+Dhx7KEDKwnhEx8rawO2QWo38NjBwNjurE7YgYvMuP8N\nrqACA8FwZjpr/Wdi0Kdh0dtAb9M2MMkiwLBnKD1iRhOPP2D/bKmyrJY5CFiUeX8RMLiE51MUp1fY\n5Gq80Kpp9mYT3yZspXrqI+C5nUqpXnybsBU91vvb0ApO9/tvrN05mc6/dMPimpEr2Hc7mLnbjQRL\n+xXE+/yW/brynC1V0oBfX0r5Z+b9U0D9Qo6rLIRIEkIkCCEK/aMghBiTeVzS2bNnS9g0RSmfCpp0\nBa3X/yP+WNEzyKcHVzz2Uj2lPbc8khjk0wOp09//xlZgZ3390AeOYafPFi3Q24R2u1aXn1qfp39i\nI6oc6g3nWkOfKWDUqp2W52ypO+bhCyHigAYFPJWr1puUUgohChsf8pRSnhBCNAfihRB7pZRH8h4k\npZwHzANtSOeOrVcUB5Q13p618EqnA2uOznsnn1D2BSxhYGx3vk3YyiCfHqwN2EaQcSTf2qfJFdKj\nly7wi/dXYNND7CyofRi8P0fndh5D4nMcuOzCjQ1fagcbTdA4EbczfuU6W+qOAV9K6V/Yc0KI00KI\nhlLKP4UQDYEzhZzjRObXo0KITUAnIF/AV5SKIufCK12ez9n7mp+lXexIVidEIYGYX3YQZBzJjvrq\nU29Zi4wE78tx+EWNxtgE6h8zcrppCjy8Bjy20TaxO3/omvHg5Qz2b19InTpQtSqkpvjhIf2IKOfZ\nUiVdabsWGAXMyPyarwOSmbmTJqW8JYSoC/gCamcHRcmUr6Lmsg3sAwyZ02OeTcH8tV2aVuEcufoi\nEcu0CpiGlCBOb58FL3cEz63oUrqxf8NmXuRLvuQlDAb417/Kd4DPq6Rj+DOAvkKIw4B/5vcIIbyE\nEPMzj3kYSBJC7AFMwAwp5e8lvK6iOI3CxvSzlOdJQEdUVKXRYRtvV8CcxXgIfBXq7+WRUyA9dmDw\n+YjVDKFOHVi40LGCPaCqZSpKebB0qZR6fcEVFj09tWP69ZNyVrdo+VF3IeONSKnXy1ndomW/flLG\nH42XH237yK7vwREUVGkUpAxw2ShTdZ7ZFTArTagmGdFXMhU5NhBpAzmlay3JVCH7vz/L3m+jSBRR\nLVOttFWUciA0FBYtKrpkQnLzRxlvS8F8fAghwfBK08GMt6Ww/6GW+bZEVPXyC1bQlpNBrGJnRgf+\nsDVDANHmz7iV9AY8uBGXI70I3tCbFDxZfPICw+vOxGqMs0vbS4Mqj6wo5UjOsskeHlqwzxo2mN1N\nx/gnJMTOpNmpGiQ/8wa4puGWDtN1H5NRPYzGjeGNN+D8+dzndXNT5Rcg/6K3PsTxGx2YxAd8yNt0\nYhcbjQYqBw9iQtI1PvUyIKNX4GWuRjz+DvFzLGrhlQr4iuIgpBD80wfGBwhI7Q4eW0HAyM2efG9K\nZMrAPUyK8y90tyW9XivFkPcPSUWScy8Bl+H+PCiP8ODVW2zZ9394mpuzN/AL6DyP/n9Y6Z4KHifq\nMyr4JpbomHz7EpRXZbnSVlGU+8SKHhLehFRf8NwKNheabR7OUq9rTDL2Zch3o4vcWs9q1Xq3KSmU\n+5ovZSVrgrwPcYijj3OwVQrfdzjN9aEjtWDvPRekDpOHHo8T9RluPqUF+8aJ2ecoaPMZR6ECvqI4\niBCWM96nCXhsw8UK6DJIbrOLxvu78mHwHv7XNDMS+cyG4YFFnistTRs6ciZ3M2+RVWm0S/dgZpxa\ngSE2AquLxFrpphbsbToqWwT6FdHMNy/VXmT2g+23K2DqHXjBswr4iuIg1nZPhYAJPHUIPlrSAbfY\n96DuQY57r6fb/kYkNYbJPrUhIAyOFrpeMpszpXvmLTld2KeYyEho9HscfY9f4sPgPfQ+dUUbHtNZ\nteqXehvil9d43GzNLoaWl9WBSxqpgK8oDqLdwDgMppnc+HojH5g3sj7BxNjYFoizLVnr/SdRrWrw\nQcBF3GL/QZ+E9tmvy7uSN4uUzpPBU1D2TUGfYrwvxxHyQQcw92ZSdAc2Dv+3NhciAQm6DIHusX8R\nY6xT6LXqFP5UuacCvqI4iN0TNjC67ThMmVU1m5OMZ8IQZs55Cn2qD3s9L6NP7cL0hIt4k4inJyxd\nCosXF76wK2dP2JFTOe9mn9/ISOCzz1hJCEHEMKHBcDBc10ocZwj0iX/Fpoc0Fx1i+ODsYmh5Xb3q\nWD+bnFTAVxQHERWl5epLCfH4Y8SMF4lM8amF1SMBUnqQ4fELU3xqEcAP2Ruj5NwhqyBpaVoq590M\niZRXRe3zGxgIs31XcWTtg/TrWZ1fjRd5gDPY2q0Gm57q113QI5nx+y7qx45DnunEiz7D+GhpYoG9\n+fR0x53/UAFfURxEQcMWv/psJi1gKsR+jMstVzjYn7SAqfzqswmb3oVO3UchQgOZPFnLUBGi4HOf\nP393QyLlVVH7/CY16sF4Wwr6Ex1xabuM8cOSOTw0DBrugoxKWPWS8T/WYkLwMU6fegrDf36m55Uv\nCPcN58KFgq/nqPMfKuArioMoKMhEN6+FW+x7jEyoR8bRQG2v1YP9+bK5kaDHurHbfwn9jt6kRUoc\nzz8PtWuX/JrlUd5PMXr97T9YzdMOQMAE5jboygMrPgd9OrReByIDd3kL/fJofkpYCdErte0mLbf/\n0BX1ycERqYCvKA6ioCDjtyyM6QkX+Z5+TEm4hiE2Alp/x8FKTVgbsI2Bsd2ZkCBJwotWlt+4caPg\nnnBhE5GOFNhCQ2/39LMyaVJS4IOfzlPZIiBgAsl+i8HlppaRo4MhPzfhP+ZFWkZOjvTLrD90RX1y\ncEQq4CuKgygo+CTizQe8zUpCmMZUYhN+gBs1wXMbnG5LjYTRDCaGGz5zOTD6BRp1ejG7JyyE9nXe\nPK3MrzMEtoKGvfzM8PyyIG0jE8+tALhk6KmUrmNpl4tMN3bPd56sP3Q5Pznk/Hk56irlktbDVxTl\nPsm7U5aHB5hS/fGTcTQjGQl84mOFKhe1NMP6+1gSmAAXzkDAZCpbBF/E7aTP5I00Cp1PYg1/wsNz\nX6OwOj6OoqAhqA95i7kN6oFudfZjGbteZPTvZ/ly6H/5ddj7sLxzdumEvH/ocm5W4+hULR1FcWCv\nvAJz52r32/mMyt4asTEnmRuQY1O5jMrMinqIceY9mOjNAOM42vbcys+LnGsvIoMBnsxYw1qCsh9z\n85nGzYCpuFih/RnJ3noCiwtUiX2PG6e6Q7vlcLEFbA9HCHj5ZZgzx45vooSKqqWjeviK4sCyAtO8\nebe3RnwjIZUgktA9/BQ2zx2ZR9qYyAwu8QuzjZ24PnQE9Q7Xwlk2n4vcHol3I2+eqXaZJRcH8Sqf\nEWx8jbcbD+Lmw+vRWSrx7foaPPnbGVYYH2DE8MvcaLceEqZk9+xBS0ndsMGOb6SMqTF8RXFwc+ZA\nRgYsDdzA0d8WMYZ53PCZi83jJ/RWtOEdl3QszzzNdL8Mrg8dgbu4zvhdKWA0Ypocpy1KcmBHtngT\ntCiITaOCaBvYk7nGh3k8uDYJJ17nAdsZGqfdJHDPKb5eamOSPIXt6w1w4OkCz+UomUn3QgV8RXES\nWROWR3zWYQl4m0oWwQ9LYGZs5gGuadBrOrjc4tnlg0lsDAMeSad//A6OXH0RAFOyiZfWvUTkdsf6\nCzDsmhW5aCGn3AT7vbfB8H7I6FVUbrOYM03N9D+kHRcaqpU2lkf98DweXuC5HCkzqbhUwFcUJ5Hd\nM20eB4f6c2tZHI+bJc0TBmFIfAFk5qorl1vMbdCVuBMTWd/+MmkBU3lo52pMySaCVgSxfP/yXLtn\nOQK/qNGsMX+Ky09/11IuDemIwSO56b2ItondmVPAMI2zpVzeDRXwFcVJZPdMl22Ar9dlj02/Znwa\nyyOrcE/XMWUzVLboIGACGzueQG/TUdmi451OF+k/vw8Zl6/T85fPSIz2K/xCpaxUavikpoJxExav\nBbB5CkiBrHmCqpdrsX/DFl5pFJPvJc6Wcnk3VMBXFCdRUI8V4GS7bbgIKwtW1mWaCSKWPQJWA3Rc\ngvXnN8n4aQI3XCHNFTJ+/jtbYgbgffn+7Nt6t2WN78T02AMEBRuwRkdTt8p+EBJhg2s1LuIR2I81\ncnCBr8sa4rHZyK495MxUwFcUJ5HVY823avZiC1i9jlcun8KfjbzHVNwzrDx+FCp3iSSj62eQXgXS\nq3Cr26eE+njh99GT2ZO5pmRTmY3p321Z4ztZ3ncAMnoFA9qM4pz3NwxMbES1xd9Q/VhrUr1judrz\nldJrtANTAV9RnEhoKJw7p5VFzhqqqHMwHGH24/x5+NGo50bw88xfUZe3t4Bel66VCI5/H5Z9h4uE\nuQFHGBRQn5APOuByZSYhq0Jw0bmUSdC/m7LGd6NFtS9ZM7wa+5qd4eVE+NcGA17malxZcAASx3Kt\nbnzJG+sE1MIrRXFyOTfuxjcSTniD2Y+JvrXYV6sKcedeJb35FvQ7xmHBQKU+47jVdA+dj9TigMdF\nRmxpy1fdTmJduRpP6VeqK3BztS2HOnW0P1x5ZeXb+zW7PcdgSjaReDKRcN/wIs9XtapjryK+W2oT\nc0WpwHL1lreHZ0/m/rI9mq3rf8c14VU+3nEat+AB6Mng1oLdcORxdj54kfoX3Pmy6ykMK5fSwtyk\n1OvkR0SAq2v+x69cKfgaWfn2Jp8GoNNh8mlA0KIgjmzxzj5f3nkMg0HbtMRRa/2XJhXwFcXJFZZX\nbhL+1OUs/+E5/m7ew7vRbbAFD6fK4KehRTycaktyg+vo/uiLq7kbHzMBKN4Ye1SU1rMWQrvp9Vo5\niCyhoVCtWv7X5SxRnCUyEmotOI9ctJCgnhd4t7dkQM8r3Fq0jGHXrNnny5t5U726tmlJTo5U679U\nSSnL5a1z585SUZSSW7pUSjc3KbX+rXZzc5Ny7FgpXV217/uwUdbhjOw7uL7kH8jaIx6VTKgrCRop\nmSrkWJ8W0gbSgl4GES3h7q6r0+W+btZt7NjbxwlR8DFC3D6mXz8pR9aKkXU5I2fxpqziN07yDyR+\n78hZvCmlp2eh7bib8zsTIEkWEldVD19RnFxh+eZz5sCCBdr4djz+eBgXsaPNaRqfq8KFFrsQW9+C\nmMUQ+zFfPJ4YJnB5AAAN30lEQVTCy/3hU/7GGoYwzLDqjtedPFlLdyzIvHm37xf2CaR2bW2MX3SP\n5Hf9Eyzx3UA34/u8Z+zJzS5fwslHEd0i+V//T4qc5XW2TUxKQgV8RakACss3z8rqiT9q4tgrH9Hp\n2485Yw4Bixuy13T6GifBqY7YrO5EM4QwZjGT8URZh93xmkVl2mRtUAJ3HnfvdEJHSoct6DosYO3w\nr7gyfARSWKHePqSQRLXTY3rsgUKvFRhYvMedmQr4iqKQeDKRlX9Zybb9YbRaPw63ZctxF9fZFTwD\n1+FPwvJVXFy/mu5sw/TMJ3R94XbEjtweyeyfZudK2zQlm6gRWHgap15/+/6dxt2/MX/G2B+bYHOx\ngf4GGNLAJR1cLAyM80G/PJrlfQcUeq3Cql86c1XMwqiArygK4b7h+DXzw9MT9tMeH3MVXvi5Kufd\nId2QAQ32YOQoW30SWd8Kfquny86UcYmYQVhsGFs3adXWTckmQlaF8Mogb3SFRJgxY3J/n/cTSM7N\nwz1IZU7CEWr/9iTobVqtHH0GrfZ4syNhNWNdrLSo9mWh7620cv2dgQr4iqJkyyocFm/U828vAw9t\nfhos7hAQRtPnW0BAGMTORH69LjtTZlq7a1SJ/QdbLdN41/QuIatCWPmXlUS86MfixeDufvv8Oh2M\nHXvnDUZq1IA+xJGMEYFkkPF1LrTaDtbMkGUTHGr9Pzo98jm1X/xLvp27cips4/bibujuDFTAVxQl\nW2goDHzdBMEhEL2S/5lW0X1ZGC422OoJ3VMhIuEsHcy1SU/6G9N7QXrS31ifYOL1PTB9y3TGeo3N\nXhgVGgrXrt3OjbFa7243qZ7BL5JkvEoyzfinsQNrh/4HDDdAZ6PzH7VAgE6fhmnQLLyDTWX7Q3Ei\nJQr4QohgIcR+IYRNCFHgyq7M454UQvxPCPGHEGJiSa6pKErZ8g1J5O2WK/GUWtDe1qA6GTrgopFt\nHoJKPh8xw9gV4fU5bJ5ChtdXRPscY27Ly0zZDHN/iGD2OzPvaVOVyEgwTY7jzR8XIIKHMtj4OrMa\n94SzrUFv4eVEWLW0Og/GjsGGHpeTPZj3XWKR58w5PHQ3jzu1wvI17+YGPAw8BGwCvAo5Rg8cAZoD\nrsAeoM2dzq3y8BXF/ug6SzJVSHxmaX10n1lSTEVWnqST1Y3fyCm8Jyv7vJ+dqy9BzjJ2kGJCHfna\n8I+lp6eW7+7pqeXlF6VfPykf9X9WVjd+I+PpLeONSJcJNSWDR0rdO3o5ywdpRdzOpTfGS3w/km5u\nRZ/b07PgPPwiUvcdGmWVhy+lPCCl/N8dDnsM+ENKeVRKmQ4sBwaV5LqKotwfldvEQexMSBinPZAw\njtqHumA525E15k+ZxlSe1S+gcux0FuhH4u/bl/eYylMHXFn9wFukNJyN9DSR0iSSv75volPkAAKj\ntHzIvHXw9zd4lF2ubqQHP8tg4+tMMP9CxqXm0HEJLvuG0CmhN6nkSJ43+8H28Duumq2IG50U5n5s\nYt4YOJbj++NAl4IOFEKMAcYAeFTEVRGKUs7M99vAmK8hZwXj818n0Ic4mjEaG4LPtx9lGHFEMJkf\njT2pFDwIv61XMbUHAsajs1TGFh/BrSED2J2WxsjmM7Pr4KelAb6R1D6hI/TAr4QF/MrNxJe5OfQ5\ndqZXg+on6HwC/mi5hsHGFXiZC6jDQNEZN1lrDiZPrhjF04pyxx6+ECJOCLGvgFup99KllPOklF5S\nSq969eqV9ukVRSmmgnLkQVuZ2wwzemykoD24hw6MNB/jVnQM7/aowpDftWNthpvQN1zLn4+dSf2j\n43LVwW90oiG7g2cgTnVgVizg/TlUugI1TqA72Z6Pv+zN6tVWboY8T7xRn7+R5F41W9AOWhVto5PC\n3DHgSyn9pZTtCrh9e5fXOAE0zfF9k8zHFEVxAHmDZVbQz/JX5hPMSlYSwmJGMcu8nutJ41nSEfSp\nXTPz5q08nFqLFgkDeP753CWMXzXvxy16IWHBx5lZ5a8g9VpkulYP15pHGWx8Hd2wWMIejMGtZf4J\n2pzDM6W1g5azuh9pmYlASyFEMyGEKzAMWHsfrqsoShnIOyYejz9e+j1410kGoJPxEwxe/4bdI7F6\n/AQS9FY44HERs883VLHcTo/pQxz/ZDzTzPGIpJf4s9dXIKw0PFUH3M9yc/8IbgY/x/RTej5504+0\njbkT7uvUyb0PbWntoOWsSpqWGSSEOA50Bb4TQsRmPt5ICLEBQEqZAbwGxAIHgJVSyv0la7aiKPZS\n0DDPyEX+VD1nxtSlPkHBBgxbx6FrE629wOqKdedLYKmCNWASV3wWUse4nNq+4fT1fYIu/Tsw2acu\ntq7/AglYDXhcljx1CPD+goyDofx6OjFfIAet9HLO4Rm1qrZoascrRVFKzUtTXmT50kCGNQ7je++j\nHHc3IM+3gtrJED8N2kRDg93gko774m+ZxhQmD9/LTYOEcw9RK/kRLnb8Dgw3eC62DVGuL9Cmfxy/\nvbWBgkKVELkrcha245WnpzYcVRGoHa8URbkvsvaW/SIhhTfXdEAKoP5++LMj9J4OjRPBcAuu1eFZ\nFvGheSO634ZBRiWofZiLnWOobMugcux01uoDqb5/HBMabbjrEscqBbNoKuArilJqwsPBL8If01v/\n5T3zJgxL14LVAB47oNJl0NsQl+vjbrjI58M2Us34LWn7RgNWbaBfn4H4+TX6JTzEhe2RnD+vTboG\nBt5dIC+s9n9FzcrJSw3pKIpSqqKi4LXXoMGlfRykDfhNhV7va09muMKt6hi3DuNEr6+w6AUIC7hY\nMFgFLjaJxeZOxvJ12Xvvgha4AwO14G21auWVx4y5u7o8FY0a0lEUpUxl5b4LASNHwqVLcJB2uBg3\nQvcPtMlYCejT0Z1rhrnHciyHngbXNDBYcLHqqbJ0Na2XfUyG1MOwIDDeLoqWkgKLFt3eOMVq1b5X\n6ZbFowK+oiglkjP3Hbg9uWo0kTFiAOhsVE99mGWL6mOwgs0jEa7WRddmBQYr6KyQYatEaNs9JF8K\ngxVrYN9Qbbw/k16v0i1LgxrSURSlRArLjME3Uuvdm3vBisx1mkYTBI2C6seoYRHEfC3hgfoMePIa\nVuHCuEYxWr59juDu5pY/2GfJm6WjqCEdRVHKUKE57tvD4aNLt4M9aOPyv7zGmM5jiBn9I35HJX4J\np1g3ah3PPjqUGm0SC5x0zbu6N4squVU8qoevKEqJFNrDL8S95MTnKraWyc1NZeAURPXwFUUpMwXl\nvguR+2uWe82JV+mWpUMFfEVRSqSgYLxkiTZ5u2SJVu8mS5UqJbuOqnhZMvejHr6iKE4uNLTwAHzj\nxu37WQupsl6j3F+qh68oSplR1SvLFxXwFUUpM6p6ZfmiAr6iKGXmboueKfeHCviKopQZVb2yfFEB\nX1GUMqPSKcsXlaWjKEqZKiqDR7m/VA9fURSlglABX1EUpYJQAV9RFKWCUAFfURSlglABX1EUpYIo\nt+WRhRBngWIUXc2lLnCuFJtjD47+Hhy9/aDeQ3ng6O2H+/8ePKWU9Qp6otwG/JIQQiQVVg/aUTj6\ne3D09oN6D+WBo7cfytd7UEM6iqIoFYQK+IqiKBWEswb8efZuQClw9Pfg6O0H9R7KA0dvP5Sj9+CU\nY/iKoihKfs7aw1cURVHyUAFfURSlgnCqgC+EeFII8T8hxB9CiIn2bk9xCSEWCCHOCCH22bst90oI\n0VQIYRJC/C6E2C+EeMPebSouIURlIcQvQog9me/hPXu36V4IIfRCiF+FEOvt3ZZ7IYQwCyH2CiF2\nCyGS7N2eeyGEqCmEWCWEOCiEOCCE6GrX9jjLGL4QQg8cAvoCx4FE4Bkp5e92bVgxCCF6AteAxVLK\ndvZuz70QQjQEGkopdwkhqgE7gcEO9nsQgLuU8poQwgBsA96QUibYuWnFIoQYB3gB1aWU/e3dnuIS\nQpgBLymlwy68EkIsArZKKecLIVwBNynlJXu1x5l6+I8Bf0gpj0op04HlwCA7t6lYpJRbgAv2bkdJ\nSCn/lFLuyrx/FTgANLZvq4pHaq5lfmvIvDlUz0gI0QR4Cphv77ZUVEKIGkBP4CsAKWW6PYM9OFfA\nbwwcy/H9cRws0DgbIYQR6AT8bN+WFF/mcMhu4AywUUrpaO/hEyAcsNm7ISUggR+EEDuFEGPs3Zh7\n0Aw4CyzMHFqbL4Rwt2eDnCngK+WIEKIqsBp4U0p5xd7tKS4ppVVK2RFoAjwmhHCYITYhRH/gjJRy\np73bUkLdpZSPAv2AVzOHPB2JC/AoMFdK2Qm4Dth1btGZAv4JoGmO75tkPqbcZ5nj3quBKCnlN/Zu\nT0lkfgQ3AU/auy3F4AsMzBwDXw70EUIstW+Tik9KeSLz6xkgBm3Y1pEcB47n+HS4Cu0PgN04U8BP\nBFoKIZplTo4MA9bauU0VTuaE51fAASnlbHu3514IIeoJIWpm3q+Clghw0L6tuntSyklSyiZSSiPa\n/4N4KeUIOzerWIQQ7pmT/mQOgzwBOFT2mpTyFHBMCPFQ5kOPA3ZNXnCaTcyllBlCiNeAWEAPLJBS\n7rdzs4pFCPE10BuoK4Q4DkyVUn5l31YVmy8wEtibOQYO8LaUcoMd21RcDYFFmZlfOmCllNIhUxsd\nWH0gRus/4AIsk1L+175Nuid/A6IyO6FHgeft2RinSctUFEVRiuZMQzqKoihKEVTAVxRFqSBUwFcU\nRakgVMBXFEWpIFTAVxRFqSBUwFcURakgVMBXFEWpIP4fTVXk+niDyvEAAAAASUVORK5CYII=\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jWxvLGexKv0D",
+        "colab_type": "text"
+      },
+      "source": [
+        "We can see from the graph that the predictions for the original model, the converted model, and the quantized model are all close enough to be indistinguishable. This means that our quantized model is ready to use!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HPSFmDL7pv2L",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Generate a TensorFlow Lite for Microcontrollers Model\n",
+        "Convert the TensorFlow Lite quantized model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "j1FB4ieeg0lw",
+        "colab_type": "code",
+        "outputId": "a2ba48f0-c440-409a-dad0-747a22ac3a64",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 476
+        }
+      },
+      "source": [
+        "# Install xxd if it is not available\n",
+        "!apt-get update && apt-get -qq install xxd\n",
+        "# Convert to a C source file\n",
+        "!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}\n",
+        "# Update variable names\n",
+        "REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')\n",
+        "!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}"
+      ],
+      "execution_count": 21,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Get:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/ InRelease [3,626 B]\n",
+            "Ign:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease\n",
+            "Hit:3 http://archive.ubuntu.com/ubuntu bionic InRelease\n",
+            "Get:4 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n",
+            "Hit:5 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n",
+            "Ign:6 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease\n",
+            "Hit:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release\n",
+            "Hit:8 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release\n",
+            "Get:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n",
+            "Get:10 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic InRelease [15.4 kB]\n",
+            "Get:11 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [74.6 kB]\n",
+            "Get:14 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic/main Sources [1,810 kB]\n",
+            "Get:15 http://security.ubuntu.com/ubuntu bionic-security/restricted amd64 Packages [38.5 kB]\n",
+            "Get:16 http://security.ubuntu.com/ubuntu bionic-security/main amd64 Packages [873 kB]\n",
+            "Get:17 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 Packages [1,368 kB]\n",
+            "Get:18 http://security.ubuntu.com/ubuntu bionic-security/universe amd64 Packages [835 kB]\n",
+            "Get:19 http://archive.ubuntu.com/ubuntu bionic-updates/restricted amd64 Packages [57.5 kB]\n",
+            "Get:20 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [1,176 kB]\n",
+            "Get:21 http://ppa.launchpad.net/marutter/c2d4u3.5/ubuntu bionic/main amd64 Packages [873 kB]\n",
+            "Fetched 7,301 kB in 3s (2,475 kB/s)\n",
+            "Reading package lists... Done\n",
+            "Selecting previously unselected package xxd.\n",
+            "(Reading database ... 144568 files and directories currently installed.)\n",
+            "Preparing to unpack .../xxd_2%3a8.0.1453-1ubuntu1.3_amd64.deb ...\n",
+            "Unpacking xxd (2:8.0.1453-1ubuntu1.3) ...\n",
+            "Setting up xxd (2:8.0.1453-1ubuntu1.3) ...\n",
+            "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JvRy0ZyMhQOX",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Deploy to a Microcontroller\n",
+        "\n",
+        "Follow the instructions in the [hello_world](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world) README.md for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview) to deploy this model on a specific microcontroller.\n",
+        "\n",
+        "**Reference Model:** If you have not modified this notebook, you can follow the instructions as is, to deploy the model. Refer to the [`hello_world/train/models`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/train/models) directory to access the models generated in this notebook.\n",
+        "\n",
+        "**New Model:** If you have generated a new model, then update the values assigned to the variables defined in [`hello_world/model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/model.cc) with values displayed after running the following cell."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "l4-WhtGpvb-E",
+        "colab_type": "code",
+        "outputId": "ba008623-d568-43b1-a824-68adbe811567",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        }
+      },
+      "source": [
+        "# Print the C source file\n",
+        "!cat {MODEL_TFLITE_MICRO}"
+      ],
+      "execution_count": 22,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "unsigned char g_model[] = {\n",
+            "  0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,\n",
+            "  0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,\n",
+            "  0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n",
+            "  0x60, 0x09, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, 0x90, 0x02, 0x00, 0x00,\n",
+            "  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,\n",
+            "  0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,\n",
+            "  0x13, 0x00, 0x00, 0x00, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x75, 0x6e, 0x74,\n",
+            "  0x69, 0x6d, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x00,\n",
+            "  0x0c, 0x00, 0x00, 0x00, 0x48, 0x02, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00,\n",
+            "  0x0c, 0x02, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0xac, 0x00, 0x00, 0x00,\n",
+            "  0x8c, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00,\n",
+            "  0x2c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0xfe, 0xfd, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x05, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x35, 0x2e, 0x30, 0x00, 0x00, 0x00,\n",
+            "  0x7c, 0xfd, 0xff, 0xff, 0x80, 0xfd, 0xff, 0xff, 0x84, 0xfd, 0xff, 0xff,\n",
+            "  0x88, 0xfd, 0xff, 0xff, 0x22, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x04, 0x00, 0x00,\n",
+            "  0x9f, 0x0a, 0x00, 0x00, 0x65, 0x06, 0x00, 0x00, 0x3d, 0xf8, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0xeb, 0x0a, 0x00, 0x00, 0x2f, 0xf8, 0xff, 0xff,\n",
+            "  0xe8, 0x04, 0x00, 0x00, 0x21, 0x0a, 0x00, 0x00, 0x46, 0xfe, 0xff, 0xff,\n",
+            "  0xc8, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa3, 0xf7, 0xff, 0xff,\n",
+            "  0x28, 0xf9, 0xff, 0xff, 0x9a, 0x05, 0x00, 0x00, 0x6e, 0xfe, 0xff, 0xff,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x73, 0x1c, 0x11, 0xe1,\n",
+            "  0x0c, 0x81, 0xa5, 0x43, 0xfe, 0xd5, 0xd5, 0xb2, 0x60, 0x77, 0x19, 0xdf,\n",
+            "  0x8a, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x51, 0x0b, 0x00, 0x00, 0x47, 0xf6, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x9b, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0xe7, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x92, 0x07, 0x00, 0x00, 0xf4, 0xf4, 0xff, 0xff, 0x55, 0xf0, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0xd6, 0xfe, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x01, 0x00, 0x00, 0xee, 0xfc, 0x00, 0xec, 0x05, 0x16, 0xef, 0xec,\n",
+            "  0xe6, 0xf8, 0x03, 0x01, 0x00, 0xfa, 0xf8, 0xf5, 0xda, 0xeb, 0x27, 0x14,\n",
+            "  0xef, 0xde, 0xe2, 0xda, 0xf0, 0xdf, 0x32, 0x06, 0x01, 0xe6, 0xee, 0xf9,\n",
+            "  0x00, 0x16, 0x07, 0xe0, 0xfe, 0xff, 0xe9, 0x05, 0xe7, 0xef, 0x81, 0x1b,\n",
+            "  0x18, 0xea, 0xca, 0x01, 0x0f, 0x00, 0xdb, 0xf7, 0x0e, 0xec, 0x12, 0x1e,\n",
+            "  0x04, 0x13, 0xb2, 0xe7, 0xfd, 0x06, 0xbb, 0xe0, 0x0c, 0xec, 0xf0, 0xdf,\n",
+            "  0xeb, 0xf7, 0x05, 0x26, 0x19, 0xe4, 0x70, 0x1a, 0xea, 0x1e, 0x34, 0xdf,\n",
+            "  0x19, 0xf3, 0xf1, 0x19, 0x0e, 0x03, 0x1b, 0xe1, 0xde, 0x13, 0xf6, 0x19,\n",
+            "  0xff, 0xf6, 0x1a, 0x17, 0xf1, 0x1c, 0xdb, 0x1a, 0x1a, 0x20, 0xe6, 0x19,\n",
+            "  0xf5, 0xff, 0x97, 0x0b, 0x00, 0x00, 0xce, 0xdf, 0x0d, 0xf7, 0x15, 0xe4,\n",
+            "  0xed, 0xfc, 0x0d, 0xe9, 0xfb, 0xec, 0x5c, 0xfc, 0x1d, 0x02, 0x58, 0xe3,\n",
+            "  0xe0, 0xf4, 0x15, 0xec, 0xf9, 0x00, 0x13, 0x05, 0xec, 0x0c, 0x1c, 0x14,\n",
+            "  0x0c, 0xe9, 0x0a, 0xf4, 0x18, 0x00, 0xd7, 0x05, 0x27, 0x02, 0x15, 0xea,\n",
+            "  0xea, 0x02, 0x9b, 0x00, 0x0c, 0xfa, 0xe9, 0xea, 0xfe, 0x01, 0x14, 0xfd,\n",
+            "  0x0b, 0x02, 0xf0, 0xef, 0x06, 0xee, 0x01, 0x0d, 0x06, 0xe7, 0xf7, 0x11,\n",
+            "  0xf5, 0x0a, 0xf9, 0xf1, 0x23, 0xff, 0x0d, 0xf2, 0xec, 0x11, 0x26, 0x1d,\n",
+            "  0xf2, 0xea, 0x28, 0x18, 0xe0, 0xfb, 0xf3, 0xf4, 0x05, 0x1c, 0x1d, 0xfb,\n",
+            "  0xfd, 0x1e, 0xfc, 0x11, 0xe8, 0x06, 0x09, 0x03, 0x12, 0xf2, 0x35, 0xfb,\n",
+            "  0xdd, 0x1b, 0xf9, 0xef, 0xf3, 0xe7, 0x6f, 0x0c, 0x1d, 0x00, 0x43, 0xfd,\n",
+            "  0x0d, 0xf1, 0x0a, 0x19, 0x1a, 0xfa, 0xe0, 0x18, 0x1e, 0x13, 0x37, 0x1c,\n",
+            "  0x12, 0xec, 0x3a, 0x0c, 0xb6, 0xcb, 0xe6, 0x13, 0xf7, 0xeb, 0xf1, 0x05,\n",
+            "  0x1b, 0xfa, 0x19, 0xe5, 0xec, 0xcf, 0x0c, 0xf4, 0xe2, 0xff, 0xff, 0xff,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x21, 0xa2, 0x8c, 0xc9,\n",
+            "  0x5f, 0x1d, 0xce, 0x41, 0x9f, 0xcd, 0x20, 0xb1, 0xdf, 0x53, 0x2f, 0x81,\n",
+            "  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xe2, 0xee, 0xff, 0xff,\n",
+            "  0x80, 0xff, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f,\n",
+            "  0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xbc, 0xf9, 0xff, 0xff,\n",
+            "  0x48, 0x01, 0x00, 0x00, 0x3c, 0x01, 0x00, 0x00, 0x30, 0x01, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,\n",
+            "  0xb8, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x1a, 0xff, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00,\n",
+            "  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0xca, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00,\n",
+            "  0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n",
+            "  0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
+            "  0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x08, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0xba, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x0e, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00,\n",
+            "  0x07, 0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,\n",
+            "  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n",
+            "  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x10, 0x00, 0x04, 0x00,\n",
+            "  0x08, 0x00, 0x0c, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x0a, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0xdc, 0x04, 0x00, 0x00,\n",
+            "  0x54, 0x04, 0x00, 0x00, 0xc4, 0x03, 0x00, 0x00, 0x54, 0x03, 0x00, 0x00,\n",
+            "  0xd0, 0x02, 0x00, 0x00, 0x4c, 0x02, 0x00, 0x00, 0xe0, 0x01, 0x00, 0x00,\n",
+            "  0x5c, 0x01, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,\n",
+            "  0x3c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0xff,\n",
+            "  0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n",
+            "  0x49, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x0c, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,\n",
+            "  0x0c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x0d, 0x00, 0x00, 0x00, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f,\n",
+            "  0x69, 0x6e, 0x70, 0x75, 0x74, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc2, 0xfb, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x02, 0x58, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc4, 0xfc, 0xff, 0xff,\n",
+            "  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0xba, 0x2b, 0x4f, 0x38, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n",
+            "  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n",
+            "  0x73, 0x65, 0x5f, 0x34, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,\n",
+            "  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x2a, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n",
+            "  0x6c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x2c, 0xfd, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xb9, 0x36, 0x0b, 0x3c,\n",
+            "  0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n",
+            "  0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x34,\n",
+            "  0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,\n",
+            "  0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,\n",
+            "  0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
+            "  0xaa, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x6c, 0x00, 0x00, 0x00,\n",
+            "  0x09, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x9c, 0xfc, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,\n",
+            "  0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0xaa, 0x7b, 0xbe, 0x3b, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x2e, 0xbd, 0xbd, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n",
+            "  0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33,\n",
+            "  0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x02, 0x58, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,\n",
+            "  0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x2c, 0xfe, 0xff, 0xff,\n",
+            "  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0xe3, 0x04, 0x20, 0x39, 0x20, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n",
+            "  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n",
+            "  0x73, 0x65, 0x5f, 0x33, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f,\n",
+            "  0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x10, 0x00, 0x00, 0x00, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n",
+            "  0x6c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x94, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xe8, 0x76, 0x51, 0x3c,\n",
+            "  0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n",
+            "  0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x33,\n",
+            "  0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f, 0x52, 0x65, 0x61, 0x64,\n",
+            "  0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65, 0x4f, 0x70, 0x2f, 0x74,\n",
+            "  0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
+            "  0x12, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, 0x6c, 0x00, 0x00, 0x00,\n",
+            "  0x07, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,\n",
+            "  0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0xd2, 0x91, 0x43, 0x3c, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x40, 0xce, 0x42, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x19, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69,\n",
+            "  0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32,\n",
+            "  0x2f, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x92, 0xfe, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x02, 0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x2c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x94, 0xff, 0xff, 0xff,\n",
+            "  0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x28, 0xb3, 0xd9, 0x38, 0x20, 0x00, 0x00, 0x00,\n",
+            "  0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31,\n",
+            "  0x2f, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x2f, 0x4d, 0x61, 0x74,\n",
+            "  0x4d, 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xff, 0xff,\n",
+            "  0x00, 0x00, 0x00, 0x09, 0x78, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n",
+            "  0x34, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
+            "  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0xd5, 0x6b, 0x8a, 0x3b, 0x34, 0x00, 0x00, 0x00, 0x73, 0x65, 0x71, 0x75,\n",
+            "  0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x5f, 0x31, 0x2f, 0x64, 0x65, 0x6e,\n",
+            "  0x73, 0x65, 0x5f, 0x32, 0x2f, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x2f,\n",
+            "  0x52, 0x65, 0x61, 0x64, 0x56, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6c, 0x65,\n",
+            "  0x4f, 0x70, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n",
+            "  0x60, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,\n",
+            "  0x04, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,\n",
+            "  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x5d, 0x4f, 0xc9, 0x3c, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x0e, 0x86, 0xc8, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x12, 0x00, 0x00, 0x00, 0x64, 0x65, 0x6e, 0x73, 0x65, 0x5f, 0x32, 0x5f,\n",
+            "  0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x69, 0x6e, 0x74, 0x38, 0x00, 0x00,\n",
+            "  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,\n",
+            "  0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,\n",
+            "  0x6c, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00,\n",
+            "  0x10, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00,\n",
+            "  0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,\n",
+            "  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
+            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1a, 0xde, 0x0a, 0x3c,\n",
+            "  0x01, 0x00, 0x00, 0x00, 0x66, 0x64, 0x87, 0x3f, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x13, 0x42, 0x8d, 0xbf, 0x0d, 0x00, 0x00, 0x00, 0x49, 0x64, 0x65, 0x6e,\n",
+            "  0x74, 0x69, 0x74, 0x79, 0x5f, 0x69, 0x6e, 0x74, 0x38, 0x00, 0x00, 0x00,\n",
+            "  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
+            "  0x03, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n",
+            "  0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0e, 0x00, 0x07, 0x00,\n",
+            "  0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,\n",
+            "  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x05, 0x00,\n",
+            "  0x06, 0x00, 0x00, 0x00, 0x00, 0x72, 0x0a, 0x00, 0x0c, 0x00, 0x07, 0x00,\n",
+            "  0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,\n",
+            "  0x04, 0x00, 0x00, 0x00\n",
+            "};\n",
+            "unsigned int g_model_len = 2512;\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc b/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc
index d4dfee41a8b..fd547b433ef 100644
--- a/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc
+++ b/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc
@@ -53,7 +53,7 @@ TF_LITE_MICRO_TEST(TestImageRecognitionInvoke) {
   micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_SOFTMAX,
                                tflite::ops::micro::Register_SOFTMAX());
 
-  const int tensor_arena_size = 45 * 1024;
+  const int tensor_arena_size = 50 * 1024;
   uint8_t tensor_arena[tensor_arena_size];
 
   tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena,
diff --git a/tensorflow/lite/micro/examples/image_recognition_experimental/main.cc b/tensorflow/lite/micro/examples/image_recognition_experimental/main.cc
index 613c97f15be..09c76df0379 100644
--- a/tensorflow/lite/micro/examples/image_recognition_experimental/main.cc
+++ b/tensorflow/lite/micro/examples/image_recognition_experimental/main.cc
@@ -67,7 +67,7 @@ int main(int argc, char** argv) {
   micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_SOFTMAX,
                                tflite::ops::micro::Register_SOFTMAX());
 
-  constexpr int tensor_arena_size = 45 * 1024;
+  constexpr int tensor_arena_size = 50 * 1024;
   uint8_t tensor_arena[tensor_arena_size];
   tflite::MicroInterpreter interpreter(model, resolver, tensor_arena,
                                        tensor_arena_size, error_reporter);
diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md
index 7ccaa806366..5c20aa5fe75 100644
--- a/tensorflow/lite/micro/examples/micro_speech/README.md
+++ b/tensorflow/lite/micro/examples/micro_speech/README.md
@@ -7,7 +7,7 @@ The application listens to its surroundings with a microphone and indicates
 when it has detected a word by lighting an LED or displaying data on a
 screen, depending on the capabilities of the device.
 
-![Animation on Arduino](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/hello_world/images/animation_on_arduino.gif)
+![Animation on Arduino](images/animation_on_arduino.gif)
 
 The code has a small footprint (for example, around 22 kilobytes on a Cortex
 M3) and only uses about 10 kilobytes of RAM for working memory, so it's able to
@@ -95,9 +95,9 @@ The sample has been tested on ESP-IDF version 4.0 with the following devices: -
 ESP-EYE is a board which has a built-in microphone which can be used to run this
 example , if you want to use other esp boards you will have to connect
 microphone externally and write your own
-[audio_provider.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech/esp/audio_provider.cc).
+[audio_provider.cc](esp/audio_provider.cc).
 You can also edit the
-[command_responder.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech/command_responder.cc)
+[command_responder.cc](command_responder.cc)
 to define your own actions after detecting command.
 
 ### Install the ESP IDF
@@ -536,7 +536,7 @@ the trained TensorFlow model, runs some example inputs through it, and got the
 expected outputs.
 
 To understand how TensorFlow Lite does this, you can look at the source in
-[micro_speech_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc).
+[micro_speech_test.cc](micro_speech_test.cc).
 It's a fairly small amount of code that creates an interpreter, gets a handle to
 a model that's been compiled into the program, and then invokes the interpreter
 with the model and sample inputs.
@@ -545,4 +545,4 @@ with the model and sample inputs.
 
 So far you have used an existing trained model to run inference on
 microcontrollers. If you wish to train your own model, follow the instructions
-in [train/README.md](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech/train/README.md).
+given in the [train/](train/) directory.
diff --git a/tensorflow/lite/micro/examples/micro_speech/images/model_architecture.png b/tensorflow/lite/micro/examples/micro_speech/images/model_architecture.png
index b1c8c027554..ce91faddf67 100644
Binary files a/tensorflow/lite/micro/examples/micro_speech/images/model_architecture.png and b/tensorflow/lite/micro/examples/micro_speech/images/model_architecture.png differ
diff --git a/tensorflow/lite/micro/examples/micro_speech/main_functions.cc b/tensorflow/lite/micro/examples/micro_speech/main_functions.cc
index 23c63a32986..d3989c07333 100644
--- a/tensorflow/lite/micro/examples/micro_speech/main_functions.cc
+++ b/tensorflow/lite/micro/examples/micro_speech/main_functions.cc
@@ -74,14 +74,22 @@ void setup() {
   //
   // tflite::ops::micro::AllOpsResolver resolver;
   // NOLINTNEXTLINE(runtime-global-variables)
-  static tflite::MicroOpResolver<3> micro_op_resolver;
-  micro_op_resolver.AddBuiltin(
-      tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
-      tflite::ops::micro::Register_DEPTHWISE_CONV_2D());
-  micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_FULLY_CONNECTED,
-                               tflite::ops::micro::Register_FULLY_CONNECTED());
-  micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_SOFTMAX,
-                               tflite::ops::micro::Register_SOFTMAX());
+  static tflite::MicroOpResolver<3> micro_op_resolver(error_reporter);
+  if (micro_op_resolver.AddBuiltin(
+          tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+          tflite::ops::micro::Register_DEPTHWISE_CONV_2D()) != kTfLiteOk) {
+    return;
+  }
+  if (micro_op_resolver.AddBuiltin(
+          tflite::BuiltinOperator_FULLY_CONNECTED,
+          tflite::ops::micro::Register_FULLY_CONNECTED()) != kTfLiteOk) {
+    return;
+  }
+  if (micro_op_resolver.AddBuiltin(tflite::BuiltinOperator_SOFTMAX,
+                                   tflite::ops::micro::Register_SOFTMAX()) !=
+      kTfLiteOk) {
+    return;
+  }
 
   // Build an interpreter to run the model with.
   static tflite::MicroInterpreter static_interpreter(
diff --git a/tensorflow/lite/micro/examples/micro_speech/train/README.md b/tensorflow/lite/micro/examples/micro_speech/train/README.md
index 5793985a6e0..8e65f2bb13a 100644
--- a/tensorflow/lite/micro/examples/micro_speech/train/README.md
+++ b/tensorflow/lite/micro/examples/micro_speech/train/README.md
@@ -4,8 +4,8 @@
 This example shows how to train a 20 kB model that can recognize 2 keywords,
 "yes" and "no", from speech data.
 
-If the input does not belong to either categories, it classifies it as "unknown"
-and if the input is silent, it classifies it as "silence".
+If the input does not belong to either categories, it is classified as "unknown"
+and if the input is silent, it is classified as "silence".
 
 You can retrain it to recognize any combination of words (2 or more) from this
 list:
@@ -23,126 +23,79 @@ stop
 go
 ```
 
+The scripts used in training the model have been sourced from the
+[Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition)
+tutorial.
+
 ## Table of contents
 
 -   [Overview](#overview)
--   [Trained Models](#trained-models)
 -   [Training](#training)
+-   [Trained Models](#trained-models)
 -   [Model Architecture](#model-architecture)
 -   [Dataset](#dataset)
 -   [Preprocessing Speech Input](#preprocessing-speech-input)
+-   [Other Training Methods](#other-training-methods)
 
 ## Overview
 
-1. Training Jupyter Notebook: [`train_micro_speech_model.ipynb`]
-(https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb)
-. The training scripts used in this notebook is defined the
-[Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition)
-tutorial.
-2. Dataset Type: **Speech**
-3. Dataset: Speech Commands, Version 2. ([Download Link](https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz)
+1. Dataset: Speech Commands, Version 2. ([Download Link](https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz)
 , [Paper](https://arxiv.org/abs/1804.03209))
-4. Deep Learning Framework: **TensorFlow 1.5**
-5. Language: **Python 3.7**
-6. Model Size: **<20 kB**
-7. Model Category: **Multiclass Classification**
+2. Dataset Type: **Speech**
+3. Deep Learning Framework: **TensorFlow 1.5**
+4. Language: **Python 3.7**
+5. Model Size: **<20 kB**
+6. Model Category: **Multiclass Classification**
+
+## Training
+
+Train the model in the cloud using Google Colaboratory or locally using a
+Jupyter Notebook.
+
+<table class="tfo-notebook-buttons" align="left">
+  <td>
+    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Google Colaboratory</a>
+  </td>
+  <td>
+    <a target="_blank" href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />Jupyter Notebook</a>
+  </td>
+</table>
+
+*Estimated Training Time: ~2 Hours.*
+
+For more options, refer to the [Other Training Methods](#other-training-methods)
+section.
 
 ## Trained Models
 
-| Download Link        | [speech_commands.zip](https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/speech_commands_2020_04_13.zip)           |
+| Download Link        | [speech_commands.zip](https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/micro_speech_2020_04_13.zip)           |
 | ------------- |-------------|
 
-
-The `models` directory in the above zip file can be generated by running the
-colab notebook in the [Training](#training) section below. It
+The `models` directory in the above zip file can be generated by following the
+instructions in the [Training](#training) section above. It
 includes the following 3 model files:
 
 | Name | Format | Target Framework | Target Device |
 | :------------- |:-------------|:-------------|-----|
 | `model.pb` | Frozen GraphDef | TensorFlow | Large-Scale/Cloud/Servers   |
-| `model.tflite` *(<20 kB)*  | Fully Quantized* TFLite Model |
-TensorFlow Lite | Mobile Devices|
-| `model.cc`  | C Source File | TensorFlow Lite for Microcontrollers |
-Microcontrollers |
+| `model.tflite` *(<20 kB)*  | Fully Quantized* TFLite Model | TensorFlow Lite | Mobile Devices|
+| `model.cc`  | C Source File | TensorFlow Lite for Microcontrollers | Microcontrollers |
 
 **Fully quantized implies that the model is **strictly int8** quantized
-including the input(s)and output(s).*
-<!-- **Fully quantized implies that the model is **strictly int8** quantized
-**except** the input(s) and output(s) which remain float.* -->
-
-
-## Training
-
-You can train your own models using any of the following methods. We recommend
-that you try these methods in the order mentioned below.
-
-### 1. Use [Google Colaboratory](https://colab.research.google.com)
-
-*We strongly recommend trying this approach first.*
-
-<table class="tfo-notebook-buttons">
-  <td>
-    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in
-      Google Colab</a>
-  </td>
-</table>
-
-**Estimated Training Time:** ~2 hours.
-**Advantage:** It allows the use of a free Tesla K80 GPU for training and avoids
-the need to install dependencies.
-**Disadvantage:** Your training time is limited as the session can only run
-upto 12 hours in a row if you keep the browser open and 90 minutes if you close
-the browser.
-
-### 2. Use Google Cloud
-
-1. Create a Virtual Machine (VM) using a pre-configured Deep Learning VM Image.
-
-```
-export IMAGE_FAMILY="tf-latest-cpu"
-export ZONE="us-west1-b" # Or any other required region
-export INSTANCE_NAME="model-trainer"
-export INSTANCE_TYPE="n1-standard-8" # or any other instance type
-gcloud compute instances create $INSTANCE_NAME \
-        --zone=$ZONE \
-        --image-family=$IMAGE_FAMILY \
-        --image-project=deeplearning-platform-release \
-        --machine-type=$INSTANCE_TYPE \
-        --boot-disk-size=120GB \
-        --min-cpu-platform=Intel\ Skylake
-```
-
-2. As soon as instance has been created you can SSH to it:
-
-```
-gcloud compute ssh "jupyter@${INSTANCE_NAME}"
-```
-
-3. Train a model by following the instructions in the [`train_micro_speech_model.ipynb`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb)
-jupyter notebook.
-
-4. Finally, don't forget to remove the instance when training is done:
-
-```
-gcloud compute instances delete "${INSTANCE_NAME}" --zone="${ZONE}"
-```
-
-**Estimated Training Time:** ~2 hours (with GPU) and ~1 day (with CPU).
-**Advantage:** There are no time constraints on how long the training process
-can take and it avoids the need to install dependencies.
-**Disadvantage:** Google Cloud isn't free. You will have to pay a certain amount
-depending on how long you use run the VM and what resources you use.
+**including** the input(s) and output(s).*
+<!-- **Fully quantized implies that the model is **strictly int8** except the
+input(s) and output(s) which remain float.* -->
 
 ## Model Architecture
 
-This is a simple model comprising of a 2D Convolutional layer, a Fully Connected
-Layer (outputs: logits) and finally a Softmax layer (outputs: probabilities) as
-shown below. Refer to the [`tiny_conv`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/speech_commands/models.py#L673)
+This is a simple model comprising of a Convolutional 2D layer, a Fully Connected
+Layer or a MatMul Layer (output: logits) and a Softmax layer
+(output: probabilities) as shown below. Refer to the [`tiny_conv`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/speech_commands/models.py#L673)
 model architecture.
 
-![model_architecture.png](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/images/model_architecture.png)
+![model_architecture.png](../images/model_architecture.png)
 
-*This image was generated by visualizing the 'model.tflite' file in
+*This image was derived from visualizing the 'model.tflite' file in
 [Netron](https://github.com/lutzroeder/netron)*
 
 This doesn't produce a highly accurate model, but it's designed to be used as
@@ -154,8 +107,8 @@ simpler model for accurate results.
 
 ## Dataset
 
-The Speech Commands Dataset. ([Download Link](https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz)
-,[Paper](https://arxiv.org/abs/1804.03209)) consists of over 105,000 WAVE audio
+The Speech Commands Dataset. ([Download Link](https://storage.cloud.google.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz),
+[Paper](https://arxiv.org/abs/1804.03209)) consists of over 105,000 WAVE audio
 files of people saying thirty different words. This data was collected by
 Google and released under a CC BY license. You can help improve it by
 contributing five minutes of your own voice. The archive is over 2GB, so this
@@ -170,7 +123,7 @@ model. Here's an illustration of the process:
 ![spectrogram diagram](https://storage.googleapis.com/download.tensorflow.org/example_images/spectrogram_diagram.png)
 
 The model doesn't take in raw audio sample data, instead it works with
-spectrograms, which are two dimensional arrays that are made up of slices of
+spectrograms which are two dimensional arrays that are made up of slices of
 frequency information, each taken from a different time window.
 
 The recipe for creating the spectrogram data is that each frequency slice is
@@ -208,3 +161,41 @@ python tensorflow/tensorflow/examples/speech_commands/wav_to_features.py \
 --window_stride=20 --preprocess=average --quantize=1
 ```
 
+
+## Other Training Methods
+
+### Use [Google Cloud](https://cloud.google.com/).
+
+*Note: Google Cloud isn't free. You need to pay depending on how long you use
+run the VM and what resources you use.*
+
+1. Create a Virtual Machine (VM) using a pre-configured Deep Learning VM Image.
+
+```
+export IMAGE_FAMILY="tf-latest-cpu"
+export ZONE="us-west1-b" # Or any other required region
+export INSTANCE_NAME="model-trainer"
+export INSTANCE_TYPE="n1-standard-8" # or any other instance type
+gcloud compute instances create $INSTANCE_NAME \
+        --zone=$ZONE \
+        --image-family=$IMAGE_FAMILY \
+        --image-project=deeplearning-platform-release \
+        --machine-type=$INSTANCE_TYPE \
+        --boot-disk-size=120GB \
+        --min-cpu-platform=Intel\ Skylake
+```
+
+2. As soon as instance has been created you can SSH to it:
+
+```
+gcloud compute ssh "jupyter@${INSTANCE_NAME}"
+```
+
+3. Train a model by following the instructions in the [`train_micro_speech_model.ipynb`](train_micro_speech_model.ipynb)
+jupyter notebook.
+
+4. Finally, don't forget to remove the instance when training is done:
+
+```
+gcloud compute instances delete "${INSTANCE_NAME}" --zone="${ZONE}"
+```
diff --git a/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb b/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb
index 40f56f8012b..bfe75bdd9f7 100644
--- a/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb
+++ b/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb
@@ -1,2020 +1 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "train_micro_speech_model.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pO4-CY_TCZZS",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Train a Simple Audio Recognition Model"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BaFfr7DHRmGF",
-        "colab_type": "text"
-      },
-      "source": [
-        "This notebook demonstrates how to train a 20 kB [Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition) model to recognize keywords in speech.\n",
-        "\n",
-        "The model created in this notebook is used in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) example for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview).\n",
-        "\n",
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XaVtYN4nlCft",
-        "colab_type": "text"
-      },
-      "source": [
-        "**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and set **Hardware accelerator: GPU**. Training 15,000 iterations will take 1.5 - 2 hours on a GPU runtime.\n",
-        "\n",
-        "## Configure Defaults\n",
-        "\n",
-        "**MODIFY** the following constants for your specific use case."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ludfxbNIaegy",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# A comma-delimited list of the words you want to train for.\n",
-        "# The options are: yes,no,up,down,left,right,on,off,stop,go\n",
-        "# All the other words will be used to train an \"unknown\" label and silent\n",
-        "# audio data with no spoken words will be used to train a \"silence\" label.\n",
-        "WANTED_WORDS = \"yes,no\"\n",
-        "\n",
-        "# The number of steps and learning rates can be specified as comma-separated\n",
-        "# lists to define the rate at each stage. For example,\n",
-        "# TRAINING_STEPS=12000,3000 and LEARNING_RATE=0.001,0.0001\n",
-        "# will run 12,000 training loops in total, with a rate of 0.001 for the first\n",
-        "# 8,000, and 0.0001 for the final 3,000.\n",
-        "TRAINING_STEPS = \"12000,3000\"\n",
-        "LEARNING_RATE = \"0.001,0.0001\"\n",
-        "\n",
-        "# Calculate the total number of steps, which is used to identify the checkpoint\n",
-        "# file name.\n",
-        "TOTAL_STEPS = str(sum(map(lambda string: int(string), TRAINING_STEPS.split(\",\"))))\n",
-        "\n",
-        "# Print the configuration to confirm it\n",
-        "!echo \"Training these words:\" $WANTED_WORDS\n",
-        "!echo \"Training steps in each stage:\" $TRAINING_STEPS\n",
-        "!echo \"Learning rate in each stage:\" $LEARNING_RATE\n",
-        "!echo \"Total number of training steps:\" $TOTAL_STEPS"
-      ],
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Training these words: yes,no\n",
-            "Training steps in each stage: 12000,3000\n",
-            "Learning rate in each stage: 0.001,0.0001\n",
-            "Total number of training steps: 15000\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gCgeOpvY9pAi",
-        "colab_type": "text"
-      },
-      "source": [
-        "**DO NOT MODIFY** the following constants as they include filepaths used in this notebook and data that is shared during training and inference."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Nd1iM1o2ymvA",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# Calculate the percentage of 'silence' and 'unknown' training samples required\n",
-        "# to ensure that we have equal number of samples for each label.\n",
-        "number_of_labels = WANTED_WORDS.count(',') + 1\n",
-        "number_of_total_labels = number_of_labels + 2 # for 'silence' and 'unknown' label\n",
-        "equal_percentage_of_training_samples = int(100.0/(number_of_total_labels))\n",
-        "SILENT_PERCENTAGE = equal_percentage_of_training_samples\n",
-        "UNKNOWN_PERCENTAGE = equal_percentage_of_training_samples\n",
-        "\n",
-        "# Constants which are shared during training and inference\n",
-        "PREPROCESS = 'micro'\n",
-        "WINDOW_STRIDE ='20'\n",
-        "MODEL_ARCHITECTURE = 'tiny_conv' # Other options include: single_fc, conv,\n",
-        "                      # low_latency_conv, low_latency_svdf, tiny_embedding_conv\n",
-        "QUANTIZE = '1' # For booleans, we provide 1 or 0 (instead of True or False)\n",
-        "\n",
-        "# Constants used during training only\n",
-        "VERBOSITY = 'WARN'\n",
-        "EVAL_STEP_INTERVAL = '1000'\n",
-        "SAVE_STEP_INTERVAL = '5000'\n",
-        "\n",
-        "# Constants for training directories and filepaths\n",
-        "DATASET_DIR =  'dataset/'\n",
-        "LOGS_DIR = 'logs/'\n",
-        "TRAIN_DIR = 'train/' # for training checkpoints and other files.\n",
-        "\n",
-        "# Constants for inference directories and filepaths\n",
-        "import os\n",
-        "MODELS_DIR = 'models/'\n",
-        "os.mkdir(MODELS_DIR)\n",
-        "MODEL_TF = MODELS_DIR + 'model.pb'\n",
-        "MODEL_TFLITE = MODELS_DIR + 'model.tflite'\n",
-        "MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'"
-      ],
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6rLYpvtg9P4o",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Setup Environment\n",
-        "\n",
-        "Install Dependencies"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ed_XpUrU5DvY",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "%tensorflow_version 1.x\n",
-        "import tensorflow as tf"
-      ],
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "TensorFlow 1.x selected.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "T9Ty5mR58E4i",
-        "colab_type": "text"
-      },
-      "source": [
-        "**DELETE** any old data from previous runs\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "APGx0fEh7hFF",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "!rm -rf {DATASET_DIR} {LOGS_DIR} {TRAIN_DIR} {MODELS_DIR}"
-      ],
-      "execution_count": 5,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GfEUlfFBizio",
-        "colab_type": "text"
-      },
-      "source": [
-        "Clone the TensorFlow Github Repository, which contains the relevant code required to run this tutorial."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "yZArmzT85SLq",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "!git clone -q https://github.com/tensorflow/tensorflow"
-      ],
-      "execution_count": 6,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nS9swHLSi7Bi",
-        "colab_type": "text"
-      },
-      "source": [
-        "Load TensorBoard to visualize the accuracy and loss as training proceeds.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "q4qF1VxP3UE4",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "%load_ext tensorboard\n",
-        "%tensorboard --logdir {LOGS_DIR}"
-      ],
-      "execution_count": 7,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "x1J96Ron-O4R",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Training\n",
-        "\n",
-        "The following script downloads the dataset and begin training."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "VJsEZx6lynbY",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "!python tensorflow/tensorflow/examples/speech_commands/train.py \\\n",
-        "--data_dir={DATASET_DIR} \\\n",
-        "--wanted_words={WANTED_WORDS} \\\n",
-        "--silence_percentage={SILENT_PERCENTAGE} \\\n",
-        "--unknown_percentage={UNKNOWN_PERCENTAGE} \\\n",
-        "--preprocess={PREPROCESS} \\\n",
-        "--window_stride={WINDOW_STRIDE} \\\n",
-        "--model_architecture={MODEL_ARCHITECTURE} \\\n",
-        "--quantize={QUANTIZE} \\\n",
-        "--how_many_training_steps={TRAINING_STEPS} \\\n",
-        "--learning_rate={LEARNING_RATE} \\\n",
-        "--train_dir={TRAIN_DIR} \\\n",
-        "--summaries_dir={LOGS_DIR} \\\n",
-        "--verbosity={VERBOSITY} \\\n",
-        "--eval_step_interval={EVAL_STEP_INTERVAL} \\\n",
-        "--save_step_interval={SAVE_STEP_INTERVAL} \\"
-      ],
-      "execution_count": 8,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "..\n",
-            "..\n",
-            "..\n",
-            "..\n",
-            "WARNING:tensorflow:Confusion Matrix:\n",
-            " [[205   0   0   1]\n",
-            " [  3 162  13  28]\n",
-            " [  3   9 401   6]\n",
-            " [  2  22   6 375]]\n",
-            "W0402 00:25:28.115174 139938153863040 train.py:320] Confusion Matrix:\n",
-            " [[205   0   0   1]\n",
-            " [  3 162  13  28]\n",
-            " [  3   9 401   6]\n",
-            " [  2  22   6 375]]\n",
-            "WARNING:tensorflow:Final test accuracy = 92.5% (N=1236)\n",
-            "W0402 00:25:28.115574 139938153863040 train.py:322] Final test accuracy = 92.5% (N=1236)\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XQUJLrdS-ftl",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Generate a TensorFlow Model for Inference\n",
-        "\n",
-        "Combine relevant training results (graph, weights, etc) into a single file for inference. This process is known as freezing a model and the resulting model is known as a frozen model/graph, as it cannot be further re-trained after this process."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "xyc3_eLh9sAg",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "!python tensorflow/tensorflow/examples/speech_commands/freeze.py \\\n",
-        "--wanted_words=$WANTED_WORDS \\\n",
-        "--window_stride_ms=$WINDOW_STRIDE \\\n",
-        "--preprocess=$PREPROCESS \\\n",
-        "--model_architecture=$MODEL_ARCHITECTURE \\\n",
-        "--quantize=$QUANTIZE \\\n",
-        "--start_checkpoint=$TRAIN_DIR$MODEL_ARCHITECTURE'.ckpt-'$TOTAL_STEPS \\\n",
-        "--output_file=$MODEL_TF \\"
-      ],
-      "execution_count": 9,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "..\n",
-            "..\n",
-            "..\n",
-            "..\n",
-            "INFO:tensorflow:Restoring parameters from /content/train/tiny_conv.ckpt-15000\n",
-            "I0402 00:25:47.086113 140352379615104 saver.py:1284] Restoring parameters from /content/train/tiny_conv.ckpt-15000\n",
-            "INFO:tensorflow:Froze 12 variables.\n",
-            "I0402 00:25:47.663757 140352379615104 graph_util_impl.py:334] Froze 12 variables.\n",
-            "INFO:tensorflow:Converted 12 variables to const ops.\n",
-            "I0402 00:25:47.665771 140352379615104 graph_util_impl.py:394] Converted 12 variables to const ops.\n",
-            "INFO:tensorflow:Saved frozen graph to /content/models/model.pb\n",
-            "I0402 00:25:47.667117 140352379615104 freeze.py:186] Saved frozen graph to /content/models/model.pb\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_DBGDxVI-nKG",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Generate a TensorFlow Lite Model\n",
-        "\n",
-        "Convert the frozen graph into a TensorFlow Lite model, which is fully quantized for use with embedded devices.\n",
-        "\n",
-        "The following cell will also print the model size, which will be under 20 kilobytes."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "lBj_AyCh1cC0",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "input_tensor = 'Reshape_2'\n",
-        "output_tensor = 'labels_softmax'\n",
-        "\n",
-        "converter = tf.lite.TFLiteConverter.from_frozen_graph(\n",
-        "    MODEL_TF, [input_tensor], [output_tensor])\n",
-        "converter.inference_type = tf.uint8\n",
-        "converter.quantized_input_stats = {input_tensor: (0.0, 9.8077)} # (mean, standard deviation)\n",
-        "tflite_model = converter.convert()\n",
-        "\n",
-        "tflite_model_size = open(MODEL_TFLITE, \"wb\").write(tflite_model)\n",
-        "print(\"Model is %d bytes\" % tflite_model_size)\n"
-        ],
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Model is 18288 bytes\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dt6Zqbxu-wIi",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Generate a TensorFlow Lite for MicroControllers Model\n",
-        "Convert the TensorFlow Lite model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "XohZOTjR8ZyE",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# Install xxd if it is not available\n",
-        "!apt-get update && apt-get -qq install xxd\n",
-        "# Convert to a C source file\n",
-        "!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}\n",
-        "# Update variable names\n",
-        "REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')\n",
-        "!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}"
-      ],
-      "execution_count": 11,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2pQnN0i_-0L2",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Deploy to a Microcontroller\n",
-        "\n",
-        "Follow the instructions in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) README.md for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview) to deploy this model on a specific microcontroller.\n",
-        "\n",
-        "**Reference Model:** If you have not modified this notebook, you can follow the instructions as is, to deploy the model. Refer to the [`micro_speech/train/models`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/models) directory to access the models generated in this notebook. \n",
-        "\n",
-        "**New Model:** If you have generated a new model to identify different words: (i) Update `kCategoryCount` and `kCategoryLabels` in [`micro_speech/micro_features/micro_model_settings.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_model_settings.h) and (ii) Update the values assigned to the variables defined in [`micro_speech/micro_features/model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/model.cc) with values displayed after running the following cell."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eoYyh0VU8pca",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# Print the C source file\n",
-        "!cat {MODEL_TFLITE_MICRO}"
-      ],
-      "execution_count": 12,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "unsigned char g_model[] = {\n",
-            "  0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,\n",
-            "  0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,\n",
-            "  0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n",
-            "  0x1c, 0x47, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n",
-            "  0x2c, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,\n",
-            "  0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e,\n",
-            "  0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, 0x0a, 0x00, 0x00, 0x00,\n",
-            "  0x60, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,\n",
-            "  0x3c, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,\n",
-            "  0x20, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x0e, 0xba, 0xff, 0xff, 0x38, 0x00, 0x00, 0x00,\n",
-            "  0xbc, 0xb9, 0xff, 0xff, 0xc0, 0xb9, 0xff, 0xff, 0x1e, 0xba, 0xff, 0xff,\n",
-            "  0xe0, 0x01, 0x00, 0x00, 0xcc, 0xb9, 0xff, 0xff, 0xd0, 0xb9, 0xff, 0xff,\n",
-            "  0x2e, 0xba, 0xff, 0xff, 0x60, 0x03, 0x00, 0x00, 0x36, 0xba, 0xff, 0xff,\n",
-            "  0x7c, 0x06, 0x00, 0x00, 0x3e, 0xba, 0xff, 0xff, 0x68, 0x45, 0x00, 0x00,\n",
-            "  0xec, 0xb9, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x31, 0x2e, 0x35, 0x2e,\n",
-            "  0x30, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n",
-            "  0x13, 0x00, 0x00, 0x00, 0x6d, 0x69, 0x6e, 0x5f, 0x72, 0x75, 0x6e, 0x74,\n",
-            "  0x69, 0x6d, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x00,\n",
-            "  0x10, 0xfa, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x2c, 0x45, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x9c, 0x44, 0x00, 0x00,\n",
-            "  0x8c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xdc, 0x01, 0x00, 0x00,\n",
-            "  0x68, 0x01, 0x00, 0x00, 0x3c, 0x02, 0x00, 0x00, 0x50, 0x05, 0x00, 0x00,\n",
-            "  0x8e, 0xbb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,\n",
-            "  0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,\n",
-            "  0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f, 0x32, 0x00, 0x00, 0x00,\n",
-            "  0x94, 0xfa, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,\n",
-            "  0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0xc6, 0xd0, 0xd0, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xff, 0xcf, 0x41,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xbc, 0xff, 0xff,\n",
-            "  0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,\n",
-            "  0x1c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x04, 0xfb, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,\n",
-            "  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x09, 0xf5, 0x83, 0x3d, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x14, 0x71, 0x83, 0x41, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x72, 0xbc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,\n",
-            "  0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00,\n",
-            "  0x64, 0xbc, 0xff, 0xff, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2d, 0x95, 0x98, 0x38,\n",
-            "  0x20, 0x00, 0x00, 0x00, 0x27, 0xff, 0xff, 0xff, 0x97, 0xff, 0xff, 0xff,\n",
-            "  0x58, 0x00, 0x00, 0x00, 0x66, 0xff, 0xff, 0xff, 0x13, 0xff, 0xff, 0xff,\n",
-            "  0x72, 0xfe, 0xff, 0xff, 0x5d, 0xfb, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0xea, 0xbc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x05, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73,\n",
-            "  0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0xec, 0xfb, 0xff, 0xff,\n",
-            "  0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x5a, 0xbd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f,\n",
-            "  0x31, 0x00, 0x00, 0x00, 0x54, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00,\n",
-            "  0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x9c, 0xd2, 0xb5, 0x3d, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x48, 0x18, 0x1f, 0x41, 0x01, 0x00, 0x00, 0x00, 0x4a, 0x21, 0x4b, 0xc1,\n",
-            "  0xc2, 0xbd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x03, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,\n",
-            "  0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,\n",
-            "  0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e,\n",
-            "  0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57,\n",
-            "  0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72,\n",
-            "  0x73, 0x00, 0x00, 0x00, 0xe4, 0xfc, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00,\n",
-            "  0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x8a, 0x0f, 0x3b, 0x3a,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0xfc, 0x0b, 0xb4, 0x3d, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0xd9, 0x26, 0xbf, 0xbd, 0x80, 0x02, 0x00, 0x00, 0x60, 0x38, 0xab, 0xcb,\n",
-            "  0xfa, 0x7e, 0xa2, 0x55, 0x6e, 0x87, 0xa5, 0x9b, 0xb4, 0x66, 0x5c, 0x6f,\n",
-            "  0xae, 0xdb, 0xcd, 0xb6, 0xc2, 0x60, 0xa9, 0x7d, 0xd4, 0xac, 0xa6, 0x90,\n",
-            "  0x87, 0x6b, 0x50, 0x95, 0xde, 0xcd, 0xaa, 0xa1, 0x9c, 0x65, 0xb5, 0x6d,\n",
-            "  0xb0, 0xa5, 0xa5, 0x7f, 0x73, 0x95, 0x63, 0x81, 0x7a, 0xc6, 0xaf, 0x82,\n",
-            "  0x69, 0x89, 0xc3, 0x3c, 0x47, 0x73, 0x89, 0x4f, 0x33, 0xbc, 0x85, 0x5d,\n",
-            "  0x69, 0x11, 0x5b, 0xb9, 0xf1, 0x95, 0x8f, 0x5c, 0x7c, 0x59, 0x6c, 0xa0,\n",
-            "  0xa5, 0x7c, 0x5a, 0x7c, 0xb5, 0xa9, 0x7e, 0xa1, 0xb8, 0x65, 0xb3, 0x86,\n",
-            "  0xc1, 0x9f, 0x5c, 0x86, 0x7f, 0x74, 0x52, 0xa8, 0xc9, 0xc5, 0x71, 0x96,\n",
-            "  0x7a, 0x65, 0xc7, 0x69, 0x94, 0xa7, 0x65, 0x68, 0x69, 0x8d, 0x6d, 0x9e,\n",
-            "  0x59, 0xd4, 0x75, 0x7a, 0x4f, 0x70, 0xca, 0x48, 0x25, 0x8a, 0x69, 0x4d,\n",
-            "  0x2a, 0xa6, 0x76, 0x69, 0x6a, 0x02, 0x3b, 0xa2, 0xea, 0xc2, 0x73, 0x6b,\n",
-            "  0x86, 0x4d, 0x3a, 0xa2, 0xa2, 0x88, 0x4e, 0x6c, 0xb3, 0x83, 0x39, 0x93,\n",
-            "  0xa6, 0x85, 0xb8, 0x7a, 0xa8, 0x7d, 0x2e, 0x7b, 0x7f, 0x69, 0x56, 0xb5,\n",
-            "  0xbb, 0xae, 0x23, 0x78, 0x67, 0x5c, 0xd2, 0x82, 0x7d, 0x96, 0x46, 0x74,\n",
-            "  0x70, 0x72, 0x6a, 0x90, 0x43, 0xce, 0x44, 0x75, 0x4a, 0x58, 0xc7, 0x5c,\n",
-            "  0x34, 0x84, 0x46, 0x4b, 0x41, 0x6c, 0x62, 0x83, 0x7e, 0x01, 0x9b, 0x9b,\n",
-            "  0xeb, 0xf7, 0x58, 0x6f, 0x8a, 0x43, 0xb3, 0x9f, 0x9c, 0x9e, 0x55, 0xa8,\n",
-            "  0xaa, 0x84, 0x8f, 0x8f, 0xb0, 0x9e, 0xc8, 0x81, 0xb6, 0x80, 0xa0, 0x81,\n",
-            "  0x86, 0x73, 0x5d, 0xdc, 0xb9, 0xae, 0xa2, 0x6c, 0x46, 0x67, 0xfa, 0x79,\n",
-            "  0x89, 0xaf, 0xa0, 0x74, 0x76, 0x85, 0x72, 0xb1, 0x2a, 0xbb, 0xa0, 0x6d,\n",
-            "  0x4f, 0x50, 0xc9, 0x5d, 0x2f, 0xaa, 0x9c, 0x63, 0x3f, 0x59, 0x63, 0x90,\n",
-            "  0x73, 0x1e, 0xb3, 0x94, 0xcd, 0xff, 0x3c, 0x63, 0x9b, 0x59, 0xc5, 0xa2,\n",
-            "  0x9f, 0x9a, 0x53, 0xab, 0xb0, 0x74, 0xb2, 0x6f, 0x8a, 0xa7, 0xd5, 0x8d,\n",
-            "  0xb8, 0x7e, 0x9e, 0x78, 0x84, 0x61, 0x66, 0xe7, 0xa7, 0x9f, 0xb7, 0x45,\n",
-            "  0x24, 0x61, 0xfd, 0x69, 0x87, 0xb8, 0xb2, 0x7a, 0x7c, 0x58, 0x64, 0xa3,\n",
-            "  0x07, 0xa9, 0xaf, 0x69, 0x49, 0x2f, 0xc2, 0x46, 0x3b, 0xaf, 0x9a, 0x70,\n",
-            "  0x6b, 0x25, 0x5f, 0x9d, 0x82, 0x33, 0xa1, 0x54, 0xae, 0xff, 0x31, 0x5d,\n",
-            "  0xaf, 0x51, 0xb2, 0x82, 0x9c, 0xa9, 0x5b, 0x8c, 0xab, 0x75, 0xb3, 0x32,\n",
-            "  0x42, 0xbd, 0xcd, 0x77, 0xb6, 0x67, 0x9a, 0x5f, 0x6c, 0x71, 0x6e, 0xc2,\n",
-            "  0xac, 0x97, 0x9f, 0x4b, 0x21, 0x6a, 0xfc, 0x77, 0x83, 0xa1, 0xa3, 0x6a,\n",
-            "  0x7a, 0x6d, 0x5e, 0x87, 0x02, 0xa6, 0x8f, 0x7f, 0x5c, 0x2e, 0xc1, 0x51,\n",
-            "  0x4a, 0xa7, 0x96, 0x79, 0x83, 0x2e, 0x5a, 0x84, 0x82, 0x5c, 0x61, 0x3a,\n",
-            "  0x4a, 0xff, 0x2a, 0x51, 0xa4, 0x6b, 0x82, 0x5e, 0x67, 0xb3, 0x71, 0x80,\n",
-            "  0xad, 0x62, 0x59, 0x40, 0x26, 0xd7, 0xcf, 0x68, 0xab, 0x7c, 0x6a, 0x69,\n",
-            "  0x5b, 0x7c, 0x84, 0xbc, 0x95, 0x68, 0x77, 0x63, 0x3f, 0x85, 0xed, 0x7b,\n",
-            "  0x71, 0xa0, 0x76, 0x90, 0x8c, 0x6c, 0x61, 0x81, 0x16, 0x74, 0x72, 0x94,\n",
-            "  0x74, 0x37, 0xb5, 0x3d, 0x55, 0x96, 0x86, 0xad, 0x87, 0x39, 0x59, 0x88,\n",
-            "  0x5b, 0x65, 0x60, 0x33, 0x33, 0xe6, 0x2b, 0x4a, 0xb6, 0x82, 0x50, 0x56,\n",
-            "  0x51, 0x97, 0x71, 0x83, 0xa6, 0x60, 0x57, 0x51, 0x58, 0xe4, 0xd0, 0x87,\n",
-            "  0xa1, 0x78, 0x4c, 0x67, 0x72, 0x74, 0x86, 0xc6, 0x60, 0x47, 0x50, 0x96,\n",
-            "  0x67, 0x96, 0xdd, 0x7d, 0x63, 0x85, 0x5e, 0x98, 0xa2, 0x64, 0x5f, 0x8a,\n",
-            "  0x3b, 0x40, 0x54, 0xcb, 0xa0, 0x61, 0xa7, 0x44, 0x5f, 0x6d, 0x57, 0xb3,\n",
-            "  0xb9, 0x2e, 0x61, 0x8e, 0x54, 0x78, 0x85, 0x58, 0x43, 0xb0, 0x27, 0x5d,\n",
-            "  0x8a, 0x7c, 0x8a, 0x58, 0x40, 0x83, 0x82, 0x9b, 0x6c, 0x60, 0x6b, 0x72,\n",
-            "  0x7f, 0xde, 0xc9, 0x7d, 0x6f, 0x5f, 0x90, 0x7e, 0x7e, 0x7e, 0x8b, 0xe5,\n",
-            "  0x51, 0x37, 0x7a, 0xa9, 0xa2, 0xc5, 0xd3, 0x81, 0x32, 0x4b, 0x80, 0xa9,\n",
-            "  0xc5, 0x76, 0x56, 0x99, 0x33, 0x19, 0x72, 0xe6, 0xdb, 0x90, 0xa8, 0x50,\n",
-            "  0x65, 0x44, 0x77, 0xdb, 0xc7, 0x48, 0x65, 0x8d, 0x3d, 0x7f, 0xa2, 0x7c,\n",
-            "  0x53, 0x55, 0x26, 0x49, 0x5d, 0x7d, 0xa2, 0x6d, 0x3b, 0x5b, 0x87, 0x64,\n",
-            "  0x3a, 0x5b, 0x8d, 0x93, 0x7a, 0xb4, 0xca, 0x6d, 0x16, 0x5a, 0x99, 0x82,\n",
-            "  0x8d, 0x6a, 0x92, 0xa0, 0x39, 0x2c, 0x95, 0xc8, 0xb8, 0xf5, 0xc8, 0x66,\n",
-            "  0x2a, 0x45, 0x84, 0x9c, 0xc7, 0x8e, 0x61, 0x7b, 0x43, 0x28, 0x86, 0xff,\n",
-            "  0xd2, 0xc8, 0x9c, 0x46, 0x65, 0x33, 0x82, 0xd8, 0xcb, 0x73, 0x63, 0x80,\n",
-            "  0xda, 0xc0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa0, 0x0f, 0x00, 0x00,\n",
-            "  0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f,\n",
-            "  0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65,\n",
-            "  0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e,\n",
-            "  0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e,\n",
-            "  0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00,\n",
-            "  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x87, 0xff, 0xdb, 0x39,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0xd8, 0xb2, 0x5d, 0x3d, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x37, 0xdc, 0x56, 0xbd, 0x80, 0x3e, 0x00, 0x00, 0x67, 0x6d, 0x74, 0x77,\n",
-            "  0x35, 0x66, 0x87, 0x95, 0x8e, 0x82, 0x5e, 0x70, 0x6e, 0xa7, 0x60, 0x64,\n",
-            "  0x86, 0x5e, 0x93, 0x7a, 0x76, 0x74, 0x71, 0x8c, 0x61, 0x71, 0x60, 0x8b,\n",
-            "  0x83, 0x48, 0x8b, 0x5f, 0x95, 0x99, 0x5b, 0x59, 0x49, 0x44, 0x79, 0x62,\n",
-            "  0x8e, 0x77, 0x71, 0x89, 0x64, 0x46, 0x8f, 0x8e, 0x80, 0x73, 0x71, 0x81,\n",
-            "  0x85, 0x4a, 0x73, 0x57, 0x66, 0x58, 0x75, 0x93, 0x99, 0x58, 0x8a, 0x7b,\n",
-            "  0x87, 0x81, 0xa1, 0x46, 0x79, 0x6c, 0x83, 0x7a, 0x92, 0x74, 0x6f, 0x6b,\n",
-            "  0x79, 0x77, 0x97, 0x8a, 0x95, 0x75, 0xa2, 0x49, 0x80, 0x4e, 0x7f, 0x6d,\n",
-            "  0xaa, 0xac, 0x6c, 0x5d, 0x57, 0x82, 0x97, 0x77, 0x6f, 0x75, 0x95, 0x73,\n",
-            "  0x7e, 0x51, 0x9f, 0x5b, 0x54, 0x92, 0x60, 0x72, 0x80, 0x6a, 0x92, 0x83,\n",
-            "  0x9b, 0x85, 0x7b, 0x4d, 0x55, 0x4d, 0xb2, 0x7d, 0x65, 0x95, 0x76, 0x42,\n",
-            "  0x61, 0x49, 0xa2, 0x73, 0x9f, 0x7d, 0x7c, 0x54, 0x51, 0x76, 0xa1, 0x7f,\n",
-            "  0x86, 0x69, 0x98, 0x59, 0x6d, 0x84, 0x9f, 0x7b, 0x86, 0x79, 0x88, 0x55,\n",
-            "  0x9c, 0x72, 0x95, 0x8a, 0x91, 0x7a, 0x77, 0x95, 0x7b, 0x87, 0x87, 0x85,\n",
-            "  0x95, 0x72, 0x77, 0x59, 0x7c, 0x80, 0x90, 0x8f, 0x8a, 0x62, 0x76, 0x9f,\n",
-            "  0x64, 0x84, 0x71, 0x7e, 0x7c, 0x66, 0x8e, 0x94, 0x6e, 0xaa, 0x77, 0x5c,\n",
-            "  0x6b, 0x63, 0x68, 0x82, 0x89, 0x46, 0x61, 0x74, 0x8e, 0x85, 0x6b, 0x57,\n",
-            "  0x74, 0x50, 0x87, 0x66, 0x87, 0x98, 0x59, 0x7d, 0xa2, 0x59, 0x75, 0x64,\n",
-            "  0x72, 0x8c, 0x6a, 0x92, 0x8c, 0x56, 0x88, 0x7a, 0x6e, 0x77, 0x9c, 0x82,\n",
-            "  0x7e, 0x5a, 0x91, 0x80, 0x9c, 0x9e, 0x60, 0x8b, 0x6d, 0x76, 0x8d, 0x68,\n",
-            "  0x6c, 0x70, 0x6f, 0x8b, 0x61, 0x6e, 0x86, 0x78, 0x81, 0x81, 0x77, 0x79,\n",
-            "  0x76, 0x69, 0x7d, 0x7b, 0x96, 0x8b, 0x95, 0x91, 0xa2, 0x7b, 0x86, 0x8d,\n",
-            "  0x8b, 0x89, 0x86, 0x5a, 0x5c, 0x4d, 0x96, 0x80, 0x81, 0x55, 0x80, 0x80,\n",
-            "  0x7a, 0x76, 0x99, 0x98, 0x61, 0x95, 0x5a, 0x78, 0x5a, 0x6c, 0x89, 0x81,\n",
-            "  0x98, 0x77, 0x62, 0x77, 0x93, 0x4d, 0x9f, 0x77, 0x72, 0x87, 0x95, 0x71,\n",
-            "  0x65, 0x72, 0xac, 0x8c, 0xa2, 0x89, 0x90, 0x7b, 0x67, 0x60, 0x8a, 0xb3,\n",
-            "  0x72, 0x8f, 0x5c, 0x82, 0x74, 0x76, 0x7c, 0x85, 0x78, 0x6b, 0x97, 0x6d,\n",
-            "  0x86, 0x82, 0x76, 0x84, 0x89, 0x89, 0x7f, 0x6a, 0x7a, 0x7f, 0x6c, 0x77,\n",
-            "  0x80, 0x35, 0x7d, 0x66, 0x96, 0x7e, 0x88, 0x55, 0x6b, 0x55, 0x7c, 0xa7,\n",
-            "  0x7f, 0x9f, 0x64, 0x8b, 0xa0, 0x81, 0x80, 0x97, 0xaf, 0x7a, 0x7d, 0x61,\n",
-            "  0x7a, 0x77, 0x6f, 0x8c, 0x5e, 0x69, 0x6b, 0x94, 0x70, 0x6a, 0x66, 0x5d,\n",
-            "  0x78, 0x6e, 0x76, 0x64, 0xa0, 0x73, 0x8f, 0xa2, 0x9d, 0x50, 0x8e, 0x52,\n",
-            "  0x51, 0x85, 0x78, 0x83, 0x8f, 0x94, 0x83, 0x7c, 0x9c, 0x64, 0x59, 0x7d,\n",
-            "  0x66, 0x6a, 0x73, 0x80, 0x6a, 0x9b, 0x92, 0x7e, 0x7a, 0x78, 0x7d, 0xa0,\n",
-            "  0x8a, 0x9b, 0x61, 0x9e, 0x6c, 0x64, 0x6c, 0x8e, 0x86, 0x75, 0x8a, 0x95,\n",
-            "  0x8e, 0x89, 0x87, 0x8a, 0x5d, 0x8b, 0x82, 0x7c, 0x60, 0x63, 0x85, 0x85,\n",
-            "  0x63, 0x96, 0xa3, 0x7f, 0x93, 0x78, 0x8c, 0x86, 0x7b, 0x78, 0x8e, 0x71,\n",
-            "  0x72, 0x8b, 0x8a, 0x5e, 0x8d, 0x75, 0x78, 0xa3, 0x84, 0x67, 0xa7, 0x54,\n",
-            "  0x6c, 0x80, 0x8e, 0xa8, 0x83, 0x51, 0x6e, 0x9f, 0x8b, 0x86, 0x75, 0x95,\n",
-            "  0x7f, 0x7a, 0x80, 0x81, 0x8d, 0x9c, 0x83, 0x8a, 0x7b, 0x8a, 0x74, 0x6f,\n",
-            "  0x8d, 0x96, 0x5b, 0x9c, 0x8d, 0x7b, 0x83, 0x79, 0x7f, 0x65, 0x7e, 0x87,\n",
-            "  0x7c, 0x5d, 0x71, 0x97, 0x77, 0x44, 0x9a, 0x7f, 0xaa, 0x56, 0x75, 0x5f,\n",
-            "  0x7c, 0x51, 0x8c, 0x90, 0x84, 0x9a, 0x49, 0x5d, 0x86, 0x52, 0x94, 0x95,\n",
-            "  0x5b, 0x86, 0x66, 0x7d, 0x51, 0x4f, 0x7a, 0x91, 0x6d, 0x6e, 0x72, 0x70,\n",
-            "  0x83, 0x4f, 0x9b, 0x9a, 0x8a, 0x77, 0x6a, 0xa1, 0x71, 0x60, 0x61, 0x98,\n",
-            "  0x67, 0x4e, 0x7a, 0x8a, 0x53, 0x6b, 0x99, 0xa0, 0x91, 0x46, 0x8a, 0x8b,\n",
-            "  0x47, 0x78, 0xa9, 0x7b, 0x71, 0x6c, 0x81, 0x68, 0x53, 0x73, 0xaf, 0x70,\n",
-            "  0x62, 0x6d, 0x69, 0x97, 0x70, 0x83, 0x5f, 0x7f, 0x81, 0x87, 0x65, 0x93,\n",
-            "  0x67, 0x87, 0x70, 0x82, 0x79, 0x9e, 0x80, 0x77, 0x6c, 0x80, 0x92, 0x81,\n",
-            "  0x8d, 0x8c, 0x89, 0x8b, 0x4e, 0x91, 0x77, 0x84, 0x99, 0x8c, 0x71, 0x88,\n",
-            "  0x57, 0x7a, 0x9a, 0x8c, 0x82, 0x9b, 0x97, 0x72, 0x69, 0xac, 0x7c, 0x62,\n",
-            "  0x85, 0x7d, 0x76, 0x7f, 0x59, 0x85, 0x68, 0x63, 0x94, 0x8b, 0x7b, 0x92,\n",
-            "  0x7b, 0x6f, 0x77, 0x98, 0x66, 0x78, 0x74, 0x99, 0x85, 0x8c, 0x94, 0x89,\n",
-            "  0x6c, 0x77, 0x89, 0x80, 0x79, 0x8a, 0xa6, 0x95, 0xa9, 0x86, 0x6f, 0x95,\n",
-            "  0x90, 0x69, 0x98, 0x85, 0xa0, 0x7f, 0x56, 0xab, 0x6f, 0x5a, 0x94, 0x8b,\n",
-            "  0x5a, 0x72, 0x61, 0x83, 0x54, 0x70, 0x8d, 0x8d, 0x9c, 0x5e, 0x36, 0x9b,\n",
-            "  0x84, 0x32, 0x6e, 0x84, 0x79, 0x72, 0x64, 0x95, 0x83, 0x58, 0x67, 0x6c,\n",
-            "  0x9e, 0x8d, 0x6e, 0x9e, 0x4f, 0x78, 0x71, 0x85, 0x75, 0x60, 0x4d, 0x7d,\n",
-            "  0x64, 0x89, 0x8e, 0x89, 0x6e, 0x92, 0x53, 0x7c, 0x86, 0x8f, 0xa9, 0xb0,\n",
-            "  0x8e, 0x5e, 0x76, 0x96, 0x65, 0x7c, 0x8a, 0x89, 0x75, 0x8f, 0x65, 0x94,\n",
-            "  0x6c, 0x6c, 0x8d, 0x6d, 0x66, 0x6a, 0x62, 0x98, 0x53, 0x8f, 0x67, 0x76,\n",
-            "  0x80, 0x89, 0x66, 0x60, 0x55, 0x81, 0x85, 0x61, 0x75, 0x78, 0x80, 0x92,\n",
-            "  0x6f, 0x79, 0x66, 0x64, 0x99, 0xa7, 0x88, 0xa1, 0x86, 0x6b, 0x94, 0x88,\n",
-            "  0x77, 0x83, 0x8f, 0x61, 0x72, 0x7c, 0x6f, 0x8f, 0x61, 0x56, 0x8a, 0x7b,\n",
-            "  0x66, 0x8b, 0x98, 0x9d, 0x82, 0x65, 0x77, 0x98, 0x55, 0x83, 0x7a, 0x8c,\n",
-            "  0x74, 0x79, 0x6e, 0x85, 0x82, 0x9a, 0x7d, 0x8d, 0x76, 0x72, 0x64, 0x81,\n",
-            "  0x9a, 0x8d, 0x9f, 0x7b, 0x7c, 0x7b, 0x7b, 0x84, 0x90, 0x6b, 0xa4, 0x84,\n",
-            "  0x98, 0x6f, 0x81, 0xb8, 0x6f, 0x6c, 0x87, 0x6d, 0x8c, 0x72, 0x53, 0x85,\n",
-            "  0x59, 0x4d, 0x9c, 0x94, 0x7d, 0x6f, 0x4f, 0x82, 0x5d, 0x71, 0x6e, 0x78,\n",
-            "  0x61, 0x61, 0x34, 0x71, 0x6a, 0x5a, 0x73, 0xa3, 0x89, 0x65, 0x4d, 0x80,\n",
-            "  0x5c, 0x51, 0x81, 0x8e, 0x6c, 0x53, 0x4a, 0x95, 0x3b, 0x72, 0xa7, 0x86,\n",
-            "  0x7f, 0x75, 0x61, 0xa3, 0x85, 0x6c, 0x99, 0x88, 0x7c, 0x64, 0x7a, 0x8d,\n",
-            "  0x81, 0x7b, 0x6a, 0x7b, 0x8f, 0x74, 0x6d, 0xae, 0x42, 0x67, 0x88, 0xa1,\n",
-            "  0x90, 0x4d, 0x7c, 0x7b, 0x62, 0x55, 0x9a, 0x80, 0x4d, 0x76, 0x5c, 0x88,\n",
-            "  0x60, 0x86, 0x6f, 0x65, 0x67, 0x77, 0x8a, 0x97, 0x99, 0x7c, 0x89, 0x78,\n",
-            "  0x92, 0xa7, 0x6a, 0x7f, 0x8e, 0x88, 0x9d, 0xa1, 0x7b, 0xb0, 0x69, 0x8c,\n",
-            "  0x7e, 0x51, 0x76, 0x84, 0x7d, 0x91, 0x7a, 0x88, 0x7b, 0x88, 0x92, 0x79,\n",
-            "  0x6d, 0x82, 0x6c, 0x8a, 0x99, 0x62, 0x82, 0x9d, 0x99, 0x97, 0x78, 0x6a,\n",
-            "  0x6e, 0x83, 0x64, 0x7d, 0x8c, 0x78, 0x7c, 0x7a, 0x7d, 0x7b, 0x77, 0x84,\n",
-            "  0x76, 0x57, 0x63, 0x85, 0x97, 0x94, 0x80, 0x92, 0x88, 0x73, 0x91, 0x91,\n",
-            "  0x8f, 0x6d, 0x99, 0x86, 0x91, 0x7f, 0x8b, 0x87, 0x98, 0x62, 0x84, 0x70,\n",
-            "  0x97, 0x7b, 0x2e, 0x9b, 0x6e, 0x2a, 0xa4, 0x9c, 0x79, 0x88, 0x54, 0x81,\n",
-            "  0x4f, 0x41, 0xa0, 0x85, 0xaf, 0x9a, 0x47, 0x5a, 0x7d, 0x62, 0x7a, 0x84,\n",
-            "  0x81, 0x6e, 0x41, 0xb4, 0x60, 0x47, 0x8f, 0x98, 0x6c, 0x3c, 0x3b, 0x73,\n",
-            "  0x59, 0x55, 0x7c, 0xb0, 0x6e, 0x5f, 0x61, 0x97, 0x73, 0x59, 0x9f, 0x92,\n",
-            "  0x89, 0x5c, 0x70, 0x96, 0x5c, 0x7c, 0x7c, 0x64, 0x7e, 0x54, 0x5c, 0x94,\n",
-            "  0x56, 0x73, 0x8d, 0x95, 0x59, 0x83, 0x6c, 0x99, 0x6e, 0x5e, 0x7a, 0x99,\n",
-            "  0x83, 0x93, 0x88, 0x76, 0x5a, 0x5a, 0xa5, 0x95, 0x5d, 0x63, 0x8f, 0x6e,\n",
-            "  0x74, 0x65, 0x85, 0x86, 0x98, 0x83, 0x7b, 0x8a, 0x5c, 0x5e, 0x7f, 0x88,\n",
-            "  0x78, 0x68, 0x8f, 0x9f, 0x94, 0x8d, 0x74, 0x7b, 0x6a, 0x91, 0x7a, 0x9a,\n",
-            "  0x70, 0x67, 0xb2, 0x92, 0x75, 0x4e, 0x74, 0xa3, 0x68, 0x74, 0x91, 0x80,\n",
-            "  0x55, 0x8e, 0x88, 0x73, 0x70, 0x81, 0xa1, 0xb8, 0x96, 0x48, 0x67, 0xb2,\n",
-            "  0x76, 0xa1, 0x98, 0xa9, 0x61, 0x6c, 0x5f, 0x98, 0x84, 0x92, 0xa9, 0x83,\n",
-            "  0x9e, 0x74, 0x7b, 0xa2, 0x6f, 0x72, 0x95, 0xa3, 0xb9, 0x80, 0x81, 0x7b,\n",
-            "  0x65, 0x6b, 0x96, 0x8b, 0xae, 0x79, 0x2b, 0x86, 0x5c, 0x2c, 0x8b, 0xa3,\n",
-            "  0x84, 0x74, 0x53, 0x7c, 0x54, 0x4a, 0x65, 0x89, 0xa6, 0x89, 0x47, 0x77,\n",
-            "  0x50, 0x6d, 0x8b, 0x94, 0x8a, 0x61, 0x32, 0x7c, 0x6f, 0x47, 0x78, 0xa2,\n",
-            "  0x9f, 0x42, 0x42, 0x71, 0x78, 0x76, 0x9e, 0x88, 0x70, 0x70, 0x56, 0x8a,\n",
-            "  0x83, 0x95, 0xa7, 0x9d, 0x9d, 0x88, 0x9a, 0x92, 0x48, 0x63, 0xaf, 0x91,\n",
-            "  0x6c, 0x75, 0x5d, 0x5e, 0x83, 0x86, 0xaa, 0x6f, 0x79, 0x84, 0x67, 0x79,\n",
-            "  0x63, 0x69, 0x8e, 0x81, 0x6a, 0x96, 0x8d, 0x86, 0x7b, 0x9f, 0xaa, 0x8e,\n",
-            "  0x63, 0x89, 0x9a, 0x7a, 0x5e, 0x7c, 0x87, 0x83, 0x81, 0x64, 0x7e, 0x59,\n",
-            "  0x6d, 0x5c, 0xa4, 0x72, 0x78, 0x85, 0x9b, 0x79, 0x85, 0x7d, 0x9c, 0x7d,\n",
-            "  0x9c, 0x5c, 0x66, 0x75, 0x66, 0x72, 0xb4, 0x7c, 0x83, 0x9e, 0x90, 0xae,\n",
-            "  0x69, 0x71, 0xb0, 0x84, 0x86, 0x50, 0x66, 0xab, 0x75, 0x96, 0xa8, 0x6c,\n",
-            "  0x87, 0x7b, 0x7e, 0x7c, 0x60, 0x55, 0x96, 0xb0, 0x6a, 0x79, 0x42, 0x9c,\n",
-            "  0x97, 0xa8, 0xb2, 0x9a, 0xa0, 0x84, 0x68, 0x90, 0x90, 0x98, 0x67, 0x9c,\n",
-            "  0xa3, 0x81, 0x71, 0xaa, 0x93, 0x6a, 0x84, 0x8c, 0x77, 0x79, 0x4d, 0x82,\n",
-            "  0x45, 0x1e, 0x7b, 0x94, 0x86, 0x86, 0x26, 0x82, 0x41, 0x6f, 0x8b, 0x86,\n",
-            "  0xa4, 0x80, 0x38, 0x71, 0x5e, 0x5b, 0x9a, 0x73, 0x86, 0x60, 0x5a, 0x9d,\n",
-            "  0x7b, 0x53, 0x89, 0xa0, 0x99, 0x76, 0x57, 0x81, 0x76, 0x5a, 0x9e, 0x85,\n",
-            "  0x5a, 0x7b, 0x56, 0x74, 0x71, 0x6a, 0x9c, 0x68, 0x7e, 0x76, 0x7d, 0x7f,\n",
-            "  0x52, 0x71, 0x85, 0xa2, 0x96, 0x63, 0x73, 0x7c, 0x7a, 0x97, 0x9f, 0x7c,\n",
-            "  0x77, 0x77, 0x59, 0x6b, 0x62, 0x77, 0xbc, 0x6b, 0x7c, 0x79, 0x75, 0x90,\n",
-            "  0x67, 0x82, 0x92, 0x9c, 0x81, 0x92, 0x84, 0x7a, 0x72, 0x5b, 0x86, 0x82,\n",
-            "  0x87, 0x73, 0x87, 0x7c, 0x57, 0x76, 0xa6, 0x7d, 0x7d, 0x94, 0x6a, 0x67,\n",
-            "  0x76, 0x89, 0x9a, 0x6d, 0x7d, 0xa4, 0x6d, 0x7e, 0x74, 0x7e, 0x8f, 0xad,\n",
-            "  0x99, 0x55, 0x5c, 0x82, 0x75, 0x9e, 0xae, 0x76, 0x6b, 0x93, 0x5d, 0x92,\n",
-            "  0x6e, 0x54, 0x88, 0x8f, 0x6a, 0x72, 0x64, 0x93, 0x6e, 0x63, 0x8c, 0xa7,\n",
-            "  0xa6, 0x7a, 0x57, 0x9f, 0x94, 0x91, 0xbd, 0xa4, 0x92, 0x7a, 0x68, 0x9d,\n",
-            "  0x7d, 0x6b, 0x6b, 0xbc, 0xad, 0x7a, 0x73, 0x92, 0x7b, 0x6d, 0x91, 0x6a,\n",
-            "  0x66, 0x8d, 0x34, 0x9b, 0x75, 0x3b, 0x93, 0x78, 0x88, 0x58, 0x1a, 0x7f,\n",
-            "  0x52, 0x61, 0xa3, 0xb1, 0x9c, 0x60, 0x1d, 0x90, 0x7b, 0x37, 0x9f, 0x84,\n",
-            "  0xa3, 0x6c, 0x2e, 0xac, 0x73, 0x62, 0x92, 0x9a, 0x94, 0x6b, 0x5c, 0x82,\n",
-            "  0x5f, 0x4c, 0x9a, 0x8c, 0x76, 0x69, 0x77, 0x5f, 0x5d, 0x91, 0x80, 0x9a,\n",
-            "  0x60, 0x4c, 0x7b, 0x57, 0x67, 0x6b, 0x92, 0x93, 0x64, 0x91, 0x55, 0x75,\n",
-            "  0x41, 0x82, 0x78, 0x68, 0xa2, 0x55, 0x6a, 0x69, 0x59, 0x70, 0x8a, 0x7b,\n",
-            "  0x70, 0x6e, 0x63, 0x83, 0x7f, 0xa4, 0x80, 0x85, 0x86, 0x93, 0x7e, 0x6f,\n",
-            "  0x7b, 0x94, 0xa4, 0xa7, 0x97, 0x7a, 0x87, 0x64, 0x4a, 0x97, 0x94, 0x6a,\n",
-            "  0x96, 0x73, 0x5e, 0x79, 0x6a, 0x99, 0x86, 0xa0, 0x93, 0xac, 0x79, 0x76,\n",
-            "  0x7f, 0x7b, 0xa7, 0x75, 0x8a, 0x71, 0x53, 0x87, 0x93, 0x7f, 0x9e, 0x7b,\n",
-            "  0x81, 0x70, 0x68, 0x8b, 0x8c, 0x9c, 0xaf, 0xa7, 0x6a, 0x9b, 0x49, 0x6d,\n",
-            "  0x67, 0x80, 0x8b, 0x86, 0x9f, 0x80, 0x74, 0x7a, 0x96, 0x74, 0xc8, 0x9d,\n",
-            "  0xa4, 0x74, 0x71, 0x6c, 0x75, 0x6a, 0x9a, 0x95, 0x97, 0x8c, 0x6e, 0x8a,\n",
-            "  0x85, 0x62, 0x5f, 0x7e, 0x9e, 0x6b, 0x48, 0x93, 0x44, 0x37, 0x83, 0xa2,\n",
-            "  0x97, 0x72, 0x25, 0x79, 0x32, 0x39, 0x68, 0x8f, 0x93, 0x61, 0x2b, 0x96,\n",
-            "  0x94, 0x43, 0x82, 0x6e, 0x8f, 0x6d, 0x53, 0x9b, 0x65, 0x50, 0x70, 0x9d,\n",
-            "  0x7d, 0x53, 0x3b, 0x86, 0x77, 0x6c, 0xa6, 0x90, 0x6b, 0x3e, 0x7b, 0x7a,\n",
-            "  0x50, 0x81, 0xb4, 0x76, 0xa5, 0x74, 0x8b, 0x73, 0x79, 0x69, 0xa8, 0x9a,\n",
-            "  0x82, 0x4a, 0x5e, 0x6c, 0x8d, 0x66, 0xa3, 0x80, 0x8d, 0x74, 0x5b, 0x7c,\n",
-            "  0x77, 0xaa, 0x82, 0x69, 0x5e, 0x7d, 0x7f, 0x63, 0xa3, 0x8c, 0xb3, 0x9a,\n",
-            "  0x81, 0x8f, 0x7b, 0x77, 0x60, 0x89, 0x6a, 0x82, 0x5a, 0x7a, 0x71, 0x61,\n",
-            "  0x93, 0x73, 0x8b, 0xb0, 0xa2, 0x92, 0x7c, 0x84, 0x8b, 0x72, 0x91, 0x8d,\n",
-            "  0x91, 0x80, 0x6c, 0x75, 0x7a, 0xb3, 0x95, 0x5e, 0xa5, 0x5d, 0x54, 0x8b,\n",
-            "  0x63, 0x91, 0xa7, 0x68, 0x96, 0x4c, 0x5a, 0x86, 0x76, 0x82, 0xb6, 0xa0,\n",
-            "  0x68, 0x6b, 0x53, 0x76, 0x60, 0x65, 0x90, 0xaf, 0x82, 0x66, 0x80, 0x7b,\n",
-            "  0x84, 0xa0, 0xb0, 0xb8, 0x81, 0x6e, 0x81, 0x8a, 0x74, 0x6e, 0x97, 0xa8,\n",
-            "  0x89, 0x7b, 0x7b, 0x6e, 0x63, 0x74, 0x5a, 0x7b, 0x7e, 0x84, 0x40, 0x95,\n",
-            "  0x73, 0x3c, 0x7c, 0x72, 0x9b, 0x92, 0x27, 0x87, 0x69, 0x5b, 0x99, 0x8a,\n",
-            "  0xa8, 0x65, 0x36, 0x8f, 0x86, 0x3e, 0xa1, 0x79, 0x9f, 0x4d, 0x41, 0xc5,\n",
-            "  0x8c, 0x6a, 0x7e, 0x7f, 0x68, 0x49, 0x5c, 0x91, 0x50, 0x6a, 0x8c, 0x81,\n",
-            "  0x75, 0x4c, 0x6a, 0x74, 0x8a, 0x87, 0xa0, 0x93, 0x7e, 0x6d, 0x52, 0x79,\n",
-            "  0x86, 0x6a, 0x68, 0x6c, 0x83, 0x67, 0x79, 0x73, 0x6f, 0x72, 0x97, 0x84,\n",
-            "  0x8b, 0x78, 0x64, 0x69, 0x8f, 0x92, 0x86, 0x61, 0x5d, 0x85, 0x70, 0x64,\n",
-            "  0x7d, 0xa3, 0x92, 0xa0, 0x72, 0x71, 0x5d, 0x63, 0x7c, 0x70, 0xaf, 0x6f,\n",
-            "  0x93, 0x6a, 0x7e, 0x7f, 0x64, 0xab, 0x85, 0x73, 0x8f, 0x8a, 0x7e, 0x5f,\n",
-            "  0x7a, 0x6f, 0xaa, 0x71, 0x97, 0x7d, 0x60, 0x7c, 0x48, 0x69, 0xa9, 0xaa,\n",
-            "  0x98, 0x7c, 0x61, 0x85, 0x66, 0x97, 0xa2, 0x73, 0x74, 0x65, 0x52, 0x67,\n",
-            "  0x79, 0x8a, 0x79, 0x71, 0x85, 0x6e, 0x6d, 0x67, 0x5e, 0x7f, 0xb9, 0x93,\n",
-            "  0x96, 0x53, 0x69, 0x6e, 0x7f, 0x8f, 0xab, 0x93, 0xa9, 0x70, 0x6e, 0x71,\n",
-            "  0x7e, 0x87, 0x98, 0x7a, 0xae, 0x90, 0x64, 0x88, 0x8a, 0x4f, 0x6d, 0x9e,\n",
-            "  0xac, 0x7e, 0x31, 0x92, 0x50, 0x26, 0x95, 0xb2, 0x90, 0x99, 0x0c, 0x84,\n",
-            "  0x40, 0x4f, 0x8f, 0x76, 0xa4, 0x46, 0x4c, 0x9d, 0x8b, 0x57, 0x81, 0x79,\n",
-            "  0x7b, 0x47, 0x4d, 0x9c, 0x5f, 0x3b, 0x6f, 0x90, 0x7a, 0x3f, 0x66, 0x9d,\n",
-            "  0x6c, 0x45, 0x8b, 0x71, 0x79, 0x62, 0x72, 0x78, 0x93, 0x95, 0x7e, 0x86,\n",
-            "  0x7a, 0x6b, 0x77, 0x74, 0x6b, 0x86, 0xa4, 0x7e, 0x84, 0x48, 0x78, 0x75,\n",
-            "  0x6e, 0x8b, 0x8e, 0x56, 0x69, 0x7b, 0x59, 0x68, 0x5d, 0x77, 0x69, 0x66,\n",
-            "  0x67, 0x9f, 0x75, 0x7b, 0x76, 0x64, 0xc1, 0x78, 0x7d, 0x74, 0x82, 0x73,\n",
-            "  0x73, 0x90, 0xb8, 0x82, 0x7e, 0x70, 0x7b, 0x7a, 0x64, 0xa1, 0x7e, 0x85,\n",
-            "  0x83, 0x81, 0x60, 0x7b, 0x91, 0x82, 0x6f, 0x95, 0xa0, 0x86, 0x6d, 0x88,\n",
-            "  0x75, 0x8d, 0x94, 0x90, 0x76, 0x6d, 0x6e, 0x79, 0x64, 0x74, 0xa8, 0xb1,\n",
-            "  0x92, 0x6e, 0x61, 0x79, 0x74, 0x91, 0x95, 0x74, 0x65, 0x74, 0x5e, 0x7f,\n",
-            "  0x8b, 0x60, 0x9b, 0x9f, 0x74, 0x77, 0x4c, 0x66, 0x7c, 0x80, 0x97, 0x98,\n",
-            "  0x9d, 0x86, 0x55, 0x8a, 0x8a, 0x79, 0x8c, 0x82, 0xb0, 0x7d, 0x63, 0x8c,\n",
-            "  0x5d, 0x5b, 0x82, 0x58, 0x84, 0x56, 0x51, 0x92, 0x75, 0x24, 0x97, 0x92,\n",
-            "  0x75, 0x6e, 0x19, 0x8e, 0x47, 0x3e, 0x7b, 0x7b, 0x87, 0x6b, 0x3f, 0xa9,\n",
-            "  0x59, 0x40, 0x86, 0x74, 0x69, 0x4a, 0x2d, 0xad, 0x91, 0x62, 0xb2, 0xa9,\n",
-            "  0x74, 0x6c, 0x47, 0x94, 0x51, 0x75, 0xb2, 0x6f, 0x75, 0x4b, 0x60, 0xa2,\n",
-            "  0x8e, 0x6a, 0xa4, 0x79, 0x6f, 0x57, 0x80, 0x8c, 0x6c, 0x8e, 0x9e, 0x74,\n",
-            "  0x70, 0x5f, 0x66, 0x80, 0x80, 0x89, 0xb5, 0x8a, 0x7a, 0x96, 0x87, 0x7a,\n",
-            "  0x7b, 0x85, 0x90, 0x79, 0x59, 0x6d, 0x77, 0x8c, 0x8f, 0x82, 0xb3, 0x9c,\n",
-            "  0x6a, 0x6a, 0x6b, 0x70, 0x77, 0x89, 0x96, 0x86, 0x94, 0x72, 0x7e, 0x72,\n",
-            "  0xa9, 0x93, 0x8d, 0x7a, 0x6d, 0x8f, 0x66, 0x72, 0x9a, 0x91, 0x9e, 0x98,\n",
-            "  0xa0, 0x8b, 0x50, 0x76, 0x5c, 0x74, 0xbc, 0x9a, 0x98, 0x73, 0x80, 0x7d,\n",
-            "  0x73, 0x7c, 0xc0, 0x8b, 0x86, 0x7a, 0x66, 0x86, 0x83, 0x72, 0x8f, 0x96,\n",
-            "  0x98, 0x56, 0x45, 0x7b, 0x77, 0x92, 0xac, 0x8a, 0xae, 0x43, 0x33, 0x73,\n",
-            "  0x78, 0x83, 0x98, 0x84, 0x86, 0x78, 0x54, 0x7e, 0x70, 0x5f, 0xa6, 0xa1,\n",
-            "  0x94, 0x81, 0x73, 0x8d, 0x83, 0x5b, 0x88, 0x71, 0xb2, 0x91, 0x50, 0x99,\n",
-            "  0x6b, 0x47, 0x72, 0x92, 0x87, 0x6d, 0x07, 0x99, 0x57, 0x3d, 0x8d, 0x83,\n",
-            "  0x9d, 0x49, 0x40, 0x9d, 0x5c, 0x57, 0x95, 0x73, 0x6e, 0x4b, 0x49, 0xab,\n",
-            "  0x97, 0x58, 0x8b, 0x7a, 0x7a, 0x48, 0x47, 0x8b, 0x7e, 0x5d, 0xa9, 0x6d,\n",
-            "  0x8a, 0x3f, 0x60, 0x82, 0x86, 0x98, 0xa9, 0x7c, 0x74, 0x59, 0x9b, 0x80,\n",
-            "  0x4e, 0x75, 0x9c, 0x5e, 0x75, 0x8c, 0x67, 0x7e, 0x78, 0x75, 0x87, 0x6c,\n",
-            "  0x79, 0x73, 0x63, 0x77, 0x6e, 0x7a, 0x8d, 0x73, 0x4e, 0x72, 0x4a, 0x7c,\n",
-            "  0x8f, 0x79, 0x70, 0x7a, 0x70, 0x73, 0x7b, 0x7a, 0x62, 0xa1, 0x7b, 0x63,\n",
-            "  0x9a, 0x89, 0x76, 0x64, 0x84, 0x7d, 0x9c, 0x94, 0xb0, 0x7f, 0x6c, 0x7b,\n",
-            "  0x8d, 0x89, 0x89, 0x7b, 0x9d, 0x99, 0x64, 0x8b, 0x5c, 0x88, 0xa6, 0x8e,\n",
-            "  0x81, 0x86, 0x7e, 0x85, 0x73, 0x72, 0xad, 0x5d, 0x5f, 0x7e, 0x63, 0x74,\n",
-            "  0x64, 0xa1, 0x9c, 0x83, 0x7c, 0x83, 0x7b, 0x7b, 0x71, 0xa0, 0x9e, 0xaf,\n",
-            "  0x89, 0x79, 0x4c, 0x7c, 0x8c, 0x78, 0x91, 0x87, 0x8a, 0x87, 0x5e, 0x85,\n",
-            "  0x7b, 0x61, 0x9c, 0x88, 0xa5, 0x8d, 0x7c, 0x9c, 0x6b, 0x47, 0x95, 0x85,\n",
-            "  0x81, 0x80, 0x59, 0xb2, 0x4f, 0x3d, 0xae, 0x8c, 0x8d, 0x71, 0x11, 0x95,\n",
-            "  0x31, 0x65, 0x9d, 0xa0, 0x8e, 0x64, 0x42, 0xb9, 0x6a, 0x5c, 0x91, 0x82,\n",
-            "  0x91, 0x50, 0x33, 0xb2, 0x7a, 0x54, 0xac, 0x88, 0x92, 0x61, 0x4e, 0xad,\n",
-            "  0x65, 0x5c, 0x91, 0xb0, 0x72, 0x65, 0x4a, 0x79, 0x68, 0x77, 0x75, 0x5f,\n",
-            "  0x79, 0x6d, 0x6f, 0x7c, 0x4d, 0x71, 0xb8, 0x78, 0x8a, 0x87, 0x6e, 0x72,\n",
-            "  0x7d, 0x79, 0x87, 0x80, 0x5a, 0x78, 0x77, 0x78, 0x80, 0x8f, 0x8c, 0x56,\n",
-            "  0x7a, 0x8b, 0x62, 0x82, 0x5a, 0x96, 0x82, 0x68, 0x71, 0x5d, 0x75, 0x65,\n",
-            "  0x93, 0xb5, 0x71, 0x82, 0x82, 0x8a, 0x4b, 0x7c, 0x62, 0x6f, 0xc1, 0x86,\n",
-            "  0x9d, 0x90, 0x63, 0x71, 0x86, 0x9e, 0x9f, 0x77, 0x90, 0x97, 0x68, 0x81,\n",
-            "  0x5a, 0x8c, 0xab, 0x5e, 0x81, 0x76, 0x83, 0x79, 0x8f, 0xa1, 0x89, 0x79,\n",
-            "  0x81, 0x8a, 0x7e, 0x6c, 0x65, 0x79, 0xc7, 0x89, 0x92, 0x68, 0x78, 0x70,\n",
-            "  0x65, 0x96, 0x9e, 0x82, 0x7d, 0x5f, 0x7b, 0x77, 0x72, 0x84, 0x7e, 0x92,\n",
-            "  0x97, 0x7b, 0x6e, 0x67, 0x81, 0xa1, 0x9a, 0xab, 0x8d, 0x78, 0x61, 0x78,\n",
-            "  0x52, 0x66, 0xaa, 0x77, 0x75, 0xa3, 0x5e, 0xa0, 0x51, 0x40, 0x68, 0xb0,\n",
-            "  0x9a, 0x93, 0x11, 0x82, 0x69, 0x48, 0x9c, 0x77, 0x8d, 0x62, 0x36, 0xac,\n",
-            "  0x6c, 0x4c, 0xa3, 0xab, 0x8f, 0x32, 0x4f, 0xa9, 0x80, 0x68, 0xab, 0x7a,\n",
-            "  0x90, 0x61, 0x5c, 0xa5, 0x84, 0x4c, 0x8c, 0x7a, 0x95, 0x54, 0x72, 0xa0,\n",
-            "  0x66, 0x85, 0xb3, 0x91, 0x69, 0x64, 0x68, 0x56, 0x66, 0x8d, 0xa0, 0x9f,\n",
-            "  0x7a, 0x88, 0x5d, 0x7d, 0x48, 0x80, 0x7f, 0x7c, 0x7c, 0x99, 0x65, 0x81,\n",
-            "  0x73, 0x8b, 0x8c, 0x61, 0x44, 0x60, 0x53, 0x8e, 0x64, 0x80, 0x9c, 0x74,\n",
-            "  0x5d, 0x70, 0x8f, 0x5a, 0x68, 0x7a, 0x82, 0xa1, 0x75, 0x7b, 0x83, 0x60,\n",
-            "  0x75, 0x5e, 0xa2, 0x94, 0x6a, 0x88, 0x78, 0x71, 0x95, 0x70, 0x8b, 0x86,\n",
-            "  0x7e, 0x94, 0x5f, 0x65, 0x5f, 0xb1, 0x97, 0x99, 0x94, 0x84, 0x88, 0x7d,\n",
-            "  0x50, 0x8c, 0xaa, 0x81, 0x7b, 0x7c, 0x77, 0x65, 0x5e, 0x91, 0x9c, 0x89,\n",
-            "  0x8c, 0x85, 0x75, 0x62, 0x7b, 0x78, 0xc3, 0x7a, 0x62, 0x8c, 0x66, 0x6f,\n",
-            "  0x79, 0x7a, 0x9c, 0x6d, 0x7c, 0x6b, 0x5c, 0x7d, 0x6d, 0x54, 0x93, 0x87,\n",
-            "  0x7a, 0x7a, 0x50, 0x85, 0x60, 0x56, 0x5e, 0x6b, 0x90, 0x7c, 0x52, 0xa5,\n",
-            "  0x54, 0x42, 0x7b, 0x75, 0x83, 0x8c, 0x2c, 0xa6, 0x6f, 0x62, 0x78, 0x78,\n",
-            "  0x86, 0x36, 0x4b, 0xaa, 0x86, 0x54, 0x92, 0x8d, 0x7f, 0x53, 0x37, 0xbe,\n",
-            "  0x86, 0x7a, 0x90, 0x7e, 0x8e, 0x50, 0x58, 0xa6, 0x82, 0x58, 0x73, 0x74,\n",
-            "  0x66, 0x5c, 0x6a, 0x7f, 0xa2, 0x69, 0xbd, 0xa9, 0x74, 0x76, 0x75, 0x6f,\n",
-            "  0x45, 0x6c, 0xa5, 0x79, 0x82, 0x67, 0x56, 0x7c, 0x7f, 0x81, 0x67, 0x6d,\n",
-            "  0x81, 0x87, 0x71, 0x69, 0x69, 0x81, 0x85, 0x84, 0x5a, 0x8c, 0x5f, 0x73,\n",
-            "  0x80, 0x9c, 0x9e, 0x90, 0x77, 0xa0, 0x9c, 0x6c, 0x73, 0x8a, 0x84, 0x72,\n",
-            "  0x87, 0xa1, 0x67, 0x64, 0x5d, 0x9b, 0x9d, 0x9b, 0x97, 0x83, 0x5f, 0x61,\n",
-            "  0x77, 0x91, 0xa0, 0x8f, 0x8a, 0x6c, 0x45, 0x5f, 0x6d, 0xa6, 0x9b, 0x76,\n",
-            "  0x86, 0x93, 0x91, 0x7d, 0x54, 0x61, 0xa4, 0x6a, 0x5b, 0x69, 0x5f, 0x6d,\n",
-            "  0x83, 0xaf, 0xa0, 0x78, 0x9d, 0x62, 0x65, 0x69, 0x5f, 0x78, 0xbf, 0x91,\n",
-            "  0x7b, 0x7b, 0x52, 0x5d, 0x70, 0x78, 0xa9, 0x87, 0x93, 0x74, 0x61, 0x74,\n",
-            "  0x8c, 0x61, 0x97, 0x86, 0x9b, 0x7c, 0x7d, 0x75, 0x4b, 0x64, 0xa7, 0x81,\n",
-            "  0x8a, 0x9c, 0x29, 0xa2, 0x5f, 0x38, 0x6a, 0xb0, 0x82, 0x53, 0x1a, 0xa7,\n",
-            "  0x38, 0x47, 0x97, 0x90, 0x8d, 0x41, 0x25, 0xa7, 0x65, 0x63, 0x8b, 0x79,\n",
-            "  0x8f, 0x3e, 0x21, 0xd0, 0x5e, 0x5d, 0x9d, 0x68, 0x75, 0x3e, 0x68, 0xb6,\n",
-            "  0x6a, 0x50, 0x9a, 0x71, 0x81, 0x45, 0x6d, 0x9a, 0x7f, 0x86, 0x9c, 0x63,\n",
-            "  0x7d, 0x74, 0x69, 0x7d, 0x5a, 0x6a, 0x8d, 0x72, 0x6b, 0x69, 0x4c, 0x6f,\n",
-            "  0x7c, 0x8e, 0xa6, 0x83, 0x70, 0x65, 0x5f, 0x78, 0x69, 0x67, 0x7f, 0x8d,\n",
-            "  0x58, 0x76, 0x4a, 0x85, 0x80, 0x89, 0x9f, 0x91, 0x52, 0x62, 0x72, 0x60,\n",
-            "  0x7b, 0x5c, 0x77, 0x6f, 0x9d, 0xa4, 0x98, 0x70, 0x6f, 0xad, 0x94, 0x9f,\n",
-            "  0x7b, 0x89, 0x74, 0x7e, 0x5d, 0x8d, 0xab, 0x98, 0x8f, 0x90, 0x82, 0x84,\n",
-            "  0x60, 0x7c, 0xb7, 0x8e, 0x79, 0x83, 0x56, 0x86, 0x87, 0x79, 0x95, 0x75,\n",
-            "  0x78, 0x71, 0x58, 0x73, 0x87, 0x5d, 0xc6, 0x9f, 0x75, 0x61, 0x4f, 0x71,\n",
-            "  0x91, 0x88, 0xb3, 0x8c, 0x7d, 0x7c, 0x6a, 0x75, 0x6d, 0x66, 0x8e, 0x94,\n",
-            "  0x96, 0x74, 0x59, 0x6f, 0x6d, 0x65, 0xb0, 0x8e, 0x7b, 0x89, 0x7a, 0x6a,\n",
-            "  0x7d, 0x57, 0x82, 0x7a, 0x61, 0x9f, 0x50, 0xab, 0x57, 0x46, 0x86, 0x8d,\n",
-            "  0xa3, 0x96, 0x18, 0xab, 0x51, 0x6e, 0xb3, 0x7e, 0x90, 0x6d, 0x6d, 0xc0,\n",
-            "  0x54, 0x35, 0x96, 0x84, 0x8e, 0x49, 0x28, 0xe4, 0x81, 0x5f, 0x9b, 0x87,\n",
-            "  0x8c, 0x33, 0x56, 0xb4, 0x61, 0x5e, 0x8b, 0x81, 0x99, 0x61, 0x6b, 0x96,\n",
-            "  0x75, 0x82, 0x9e, 0x7c, 0x90, 0x63, 0x64, 0x6b, 0x55, 0x6e, 0xb6, 0x7f,\n",
-            "  0x5f, 0x55, 0x65, 0x60, 0x35, 0x8a, 0x85, 0x91, 0x4d, 0x62, 0x90, 0x90,\n",
-            "  0x57, 0x5a, 0x9f, 0x7b, 0x4c, 0x86, 0x73, 0x83, 0x4a, 0x6d, 0xb0, 0x67,\n",
-            "  0x65, 0x89, 0x54, 0x68, 0x89, 0x7b, 0x72, 0x4f, 0x7a, 0x93, 0x61, 0x7e,\n",
-            "  0x79, 0x89, 0x8f, 0x9c, 0x7b, 0x70, 0x48, 0x67, 0x82, 0x75, 0xaa, 0x92,\n",
-            "  0x9a, 0x8f, 0x79, 0x8c, 0x64, 0x94, 0x98, 0x83, 0x7c, 0x8f, 0x5c, 0x77,\n",
-            "  0x70, 0x90, 0x91, 0x88, 0x7d, 0x51, 0x5d, 0x5d, 0x8b, 0x9f, 0xbc, 0x78,\n",
-            "  0x9e, 0x73, 0x67, 0x6d, 0x82, 0x8d, 0xc9, 0x86, 0x96, 0x6a, 0x5d, 0x79,\n",
-            "  0x7e, 0x6b, 0xb2, 0x79, 0x88, 0x85, 0x65, 0x73, 0x75, 0x6b, 0x9e, 0x7f,\n",
-            "  0x8e, 0x94, 0x8e, 0x7d, 0x74, 0x61, 0x97, 0x56, 0x97, 0x6b, 0x30, 0xb6,\n",
-            "  0x5f, 0x5a, 0xaa, 0xa5, 0x85, 0x5d, 0x01, 0xbc, 0x79, 0x63, 0x6e, 0x82,\n",
-            "  0x72, 0x26, 0x4f, 0xc8, 0x98, 0x56, 0x85, 0x9a, 0x81, 0x1f, 0x48, 0xcf,\n",
-            "  0x84, 0x74, 0x75, 0x87, 0xae, 0x43, 0x6f, 0xdf, 0x6a, 0x4e, 0x97, 0x5d,\n",
-            "  0x8f, 0x37, 0x55, 0x89, 0x7d, 0x82, 0xb1, 0x89, 0x6d, 0x52, 0x65, 0x8b,\n",
-            "  0x71, 0x87, 0x8d, 0x6a, 0x99, 0x5d, 0x65, 0x78, 0x67, 0x8d, 0x7b, 0x51,\n",
-            "  0x60, 0x8a, 0x59, 0x72, 0x78, 0x93, 0x88, 0x75, 0x46, 0x60, 0x6e, 0x79,\n",
-            "  0x7b, 0x9d, 0x9c, 0x8c, 0x5c, 0x7c, 0x69, 0x71, 0x60, 0x6f, 0xb0, 0x7d,\n",
-            "  0x4c, 0x5e, 0x88, 0x77, 0x74, 0x6a, 0x6f, 0x9a, 0xa2, 0x83, 0x48, 0x5a,\n",
-            "  0x6e, 0xa2, 0x8b, 0x7a, 0x65, 0x5b, 0x4b, 0x80, 0x5b, 0x8f, 0xaf, 0x8e,\n",
-            "  0x93, 0x4a, 0x59, 0x6e, 0x5e, 0x89, 0x91, 0x87, 0x73, 0x6a, 0x47, 0x6c,\n",
-            "  0x6c, 0x81, 0xad, 0x5a, 0x76, 0x51, 0x51, 0x6c, 0x80, 0x92, 0x9d, 0xae,\n",
-            "  0x90, 0x71, 0x6c, 0x7a, 0x7c, 0x84, 0xa7, 0x7d, 0x82, 0x7c, 0x80, 0x59,\n",
-            "  0x7d, 0x86, 0xa9, 0x94, 0x8e, 0x7b, 0x7c, 0x67, 0x67, 0x66, 0x8f, 0x49,\n",
-            "  0x5d, 0xa4, 0x4a, 0xbc, 0x5a, 0x34, 0xa7, 0xaa, 0x9e, 0x86, 0x17, 0xc0,\n",
-            "  0x53, 0x67, 0x76, 0xae, 0x8d, 0x37, 0x4a, 0xd6, 0x76, 0x69, 0x95, 0x7a,\n",
-            "  0x8a, 0x0e, 0x3f, 0xe8, 0x60, 0x4d, 0x9e, 0x90, 0xad, 0x44, 0x46, 0xc5,\n",
-            "  0x4c, 0x6e, 0x72, 0x8c, 0x89, 0x49, 0x51, 0xa0, 0x60, 0x84, 0x84, 0x9d,\n",
-            "  0xa4, 0x5a, 0x84, 0x8d, 0x69, 0x6a, 0x97, 0x78, 0x72, 0x66, 0x72, 0x9b,\n",
-            "  0x74, 0x7a, 0x95, 0x7c, 0x7a, 0x6e, 0x74, 0x7f, 0x65, 0x94, 0x77, 0x7e,\n",
-            "  0x85, 0x6d, 0x65, 0x7b, 0x63, 0x7b, 0x87, 0x49, 0x80, 0x74, 0x74, 0x85,\n",
-            "  0x6e, 0x78, 0xad, 0x66, 0x8a, 0x65, 0x54, 0x7c, 0x4e, 0x62, 0x97, 0x7f,\n",
-            "  0x82, 0x6c, 0x58, 0x79, 0x91, 0x94, 0xb3, 0x7a, 0x88, 0x82, 0x60, 0x7f,\n",
-            "  0x8c, 0xa7, 0x7b, 0x93, 0x77, 0x49, 0x6f, 0x6f, 0x5a, 0x8d, 0x93, 0x8b,\n",
-            "  0x87, 0x59, 0x7d, 0x5e, 0x83, 0x7e, 0x8c, 0x7a, 0x91, 0x4e, 0x6f, 0x89,\n",
-            "  0x8a, 0x87, 0x8b, 0x85, 0x8e, 0x43, 0x63, 0x8d, 0x90, 0x6c, 0xa5, 0x73,\n",
-            "  0x8a, 0x78, 0x5f, 0x73, 0x88, 0x57, 0x9e, 0x8f, 0x7f, 0x91, 0x70, 0x77,\n",
-            "  0x8a, 0x76, 0xa2, 0x77, 0x53, 0x86, 0x51, 0xd8, 0xa9, 0x5b, 0x9b, 0x96,\n",
-            "  0x7c, 0x71, 0x01, 0xd4, 0x56, 0x4a, 0x95, 0xab, 0x91, 0x54, 0x45, 0xe5,\n",
-            "  0x74, 0x4f, 0x87, 0x6a, 0xa2, 0x3e, 0x47, 0xff, 0x91, 0x4d, 0x94, 0x97,\n",
-            "  0x6d, 0x74, 0x77, 0xe0, 0x5d, 0x4e, 0x5f, 0x73, 0x70, 0x3a, 0x68, 0xb2,\n",
-            "  0x78, 0x61, 0x8c, 0x77, 0xa8, 0x57, 0x8c, 0x99, 0x23, 0x5a, 0x84, 0x78,\n",
-            "  0x9b, 0x7f, 0x5e, 0xa0, 0x49, 0x84, 0x83, 0x94, 0x99, 0x4d, 0x8d, 0x9a,\n",
-            "  0x86, 0x90, 0x9b, 0x51, 0x75, 0x73, 0x78, 0x89, 0x59, 0x64, 0x78, 0x91,\n",
-            "  0x72, 0x9c, 0x72, 0x7e, 0x65, 0x6a, 0x80, 0xaa, 0x94, 0x65, 0x6d, 0x87,\n",
-            "  0x73, 0x93, 0x97, 0x7d, 0x99, 0x63, 0x75, 0x89, 0x67, 0xa1, 0x90, 0x7f,\n",
-            "  0x88, 0x65, 0x6d, 0x8f, 0x7d, 0x62, 0x91, 0xa7, 0x8b, 0x73, 0x51, 0x88,\n",
-            "  0x66, 0x66, 0x99, 0xa7, 0x7c, 0x54, 0x82, 0x67, 0x64, 0x8a, 0x95, 0x7c,\n",
-            "  0x8a, 0x5d, 0x5e, 0x68, 0x4b, 0x75, 0x92, 0x7a, 0x9f, 0x66, 0x71, 0x8d,\n",
-            "  0x76, 0x72, 0x8e, 0x77, 0x76, 0x8c, 0x5b, 0x88, 0x9a, 0x92, 0x7c, 0x74,\n",
-            "  0x95, 0xaa, 0x71, 0x77, 0x97, 0x93, 0x9e, 0x62, 0x96, 0x6a, 0x49, 0xd8,\n",
-            "  0x81, 0x99, 0xae, 0x87, 0x6c, 0x76, 0x3e, 0xd9, 0x6e, 0x95, 0xa3, 0x86,\n",
-            "  0x60, 0x6c, 0x5c, 0xbe, 0x98, 0x8a, 0x99, 0x7c, 0x47, 0x45, 0x69, 0xeb,\n",
-            "  0x9d, 0x7d, 0xbb, 0x90, 0x66, 0x69, 0x70, 0xc6, 0x7b, 0x59, 0x9e, 0x87,\n",
-            "  0x58, 0x76, 0x7c, 0xae, 0x72, 0x7d, 0x9f, 0x92, 0x82, 0x58, 0x51, 0x7a,\n",
-            "  0x5d, 0x77, 0xa8, 0x7c, 0x56, 0x68, 0x88, 0x8a, 0x7e, 0x8a, 0x98, 0x68,\n",
-            "  0x64, 0x79, 0x6e, 0x7a, 0x60, 0x96, 0x98, 0x60, 0x60, 0x71, 0x60, 0x8e,\n",
-            "  0x7c, 0x8c, 0x92, 0x92, 0x77, 0x80, 0x90, 0x91, 0x81, 0x82, 0x9c, 0x80,\n",
-            "  0x61, 0x7f, 0x5a, 0x8e, 0x88, 0x7c, 0x8e, 0x79, 0x69, 0x8e, 0x4e, 0x7e,\n",
-            "  0x84, 0x9e, 0x67, 0x72, 0x5c, 0x78, 0x7b, 0x8c, 0x65, 0x7d, 0x8e, 0xa4,\n",
-            "  0x5e, 0x7a, 0x5c, 0x97, 0x6a, 0x81, 0xab, 0x85, 0x4d, 0x73, 0x83, 0x96,\n",
-            "  0x8b, 0x7d, 0xa6, 0x69, 0x74, 0x86, 0x73, 0x79, 0x52, 0x8c, 0xa0, 0x86,\n",
-            "  0x64, 0x7b, 0x84, 0x77, 0x87, 0x93, 0x7d, 0x6d, 0x98, 0x6d, 0x88, 0x5f,\n",
-            "  0x7c, 0x84, 0x92, 0x82, 0x81, 0x76, 0x85, 0x77, 0x98, 0x85, 0x88, 0x68,\n",
-            "  0x7d, 0x71, 0x3c, 0xf1, 0x83, 0x86, 0xa2, 0xb3, 0x6e, 0x77, 0x53, 0xe8,\n",
-            "  0xa8, 0xc7, 0xb3, 0x83, 0x93, 0x83, 0x63, 0xe8, 0x94, 0xb3, 0x86, 0x6e,\n",
-            "  0x75, 0x5d, 0x54, 0xf0, 0x89, 0xa7, 0x94, 0xb1, 0x7e, 0x91, 0x9a, 0xb8,\n",
-            "  0x91, 0x7e, 0x99, 0x50, 0x71, 0x82, 0x8a, 0x91, 0x7a, 0x8a, 0x8b, 0x80,\n",
-            "  0x64, 0x6a, 0x5f, 0xbe, 0x5d, 0x96, 0xb1, 0x82, 0x45, 0x71, 0x8b, 0x95,\n",
-            "  0x7c, 0x9b, 0x89, 0x6d, 0x5b, 0x73, 0x81, 0x90, 0x76, 0xab, 0xa6, 0x88,\n",
-            "  0x62, 0x7d, 0x75, 0x99, 0x7a, 0x8b, 0x6e, 0x9b, 0x83, 0x89, 0x99, 0x93,\n",
-            "  0x81, 0x9e, 0x8a, 0x76, 0x75, 0x7d, 0x6c, 0x93, 0x68, 0x7a, 0x8d, 0x78,\n",
-            "  0x88, 0x93, 0x66, 0xa5, 0x6c, 0xae, 0xb1, 0x83, 0x72, 0x8f, 0x6b, 0x7b,\n",
-            "  0x79, 0x9b, 0x98, 0x7c, 0x82, 0x84, 0x7d, 0x7d, 0x71, 0x7c, 0xb0, 0x81,\n",
-            "  0x74, 0x89, 0x72, 0x89, 0x98, 0xa0, 0x7d, 0x62, 0x2f, 0x50, 0x7d, 0x8b,\n",
-            "  0x4c, 0x83, 0x87, 0x89, 0x57, 0x9e, 0x92, 0x8c, 0x81, 0x7e, 0xb9, 0x95,\n",
-            "  0x7f, 0x76, 0x8e, 0x90, 0x9d, 0x68, 0x78, 0x95, 0x7d, 0xab, 0x84, 0x8a,\n",
-            "  0x64, 0x9f, 0x80, 0x94, 0x8d, 0x89, 0x76, 0x8e, 0x6f, 0x8b, 0x75, 0x7d,\n",
-            "  0x89, 0x74, 0x67, 0x8a, 0x7d, 0x63, 0x79, 0x6d, 0x79, 0x8a, 0x78, 0x7f,\n",
-            "  0x7a, 0x9b, 0x70, 0x70, 0x84, 0x86, 0x80, 0x95, 0x5a, 0x77, 0x80, 0x91,\n",
-            "  0x9c, 0x92, 0x76, 0x81, 0x69, 0x89, 0x78, 0xa5, 0x7a, 0x8d, 0x86, 0x64,\n",
-            "  0x8f, 0x8d, 0x7d, 0xa1, 0x8c, 0x7b, 0x77, 0x7e, 0x80, 0x93, 0x86, 0x68,\n",
-            "  0x90, 0x9c, 0x71, 0x8c, 0x68, 0x52, 0x85, 0x88, 0x89, 0x92, 0x64, 0x8f,\n",
-            "  0x74, 0x64, 0x7c, 0x88, 0x8d, 0x97, 0x77, 0x97, 0x91, 0xac, 0x74, 0x7f,\n",
-            "  0x60, 0x7e, 0x6e, 0x70, 0x86, 0x83, 0x7f, 0x81, 0x6f, 0x94, 0x62, 0xa4,\n",
-            "  0x86, 0x7d, 0x90, 0x7c, 0x89, 0x63, 0x7b, 0x89, 0x75, 0xa1, 0x67, 0x69,\n",
-            "  0xa6, 0x76, 0x69, 0x9c, 0x71, 0x79, 0x76, 0x7a, 0x8e, 0x78, 0x94, 0x75,\n",
-            "  0x5a, 0x76, 0x6b, 0x91, 0x84, 0x75, 0x72, 0x93, 0x79, 0x7e, 0x75, 0x9a,\n",
-            "  0x6f, 0x7a, 0x7b, 0x80, 0x5f, 0x90, 0x74, 0x7d, 0x9b, 0x76, 0x70, 0x89,\n",
-            "  0x8f, 0x5f, 0x7f, 0x9c, 0x93, 0x6d, 0x81, 0x7f, 0x8d, 0x7d, 0x74, 0x5d,\n",
-            "  0x75, 0x88, 0x7b, 0x91, 0x75, 0x6b, 0x7f, 0x8c, 0x71, 0x74, 0x87, 0x88,\n",
-            "  0x83, 0x75, 0x77, 0x96, 0x7f, 0x67, 0x7d, 0x95, 0x81, 0x5c, 0x71, 0x5c,\n",
-            "  0x6e, 0x75, 0x86, 0x92, 0x5d, 0x7a, 0x77, 0x9f, 0x6e, 0x79, 0x68, 0x60,\n",
-            "  0x94, 0x88, 0x88, 0x88, 0x79, 0x7e, 0x8a, 0x6d, 0x84, 0xa7, 0x5b, 0x8e,\n",
-            "  0x67, 0x9c, 0x7e, 0x75, 0x82, 0x96, 0x7c, 0x7b, 0x72, 0x85, 0x8c, 0xa3,\n",
-            "  0x96, 0x5b, 0x93, 0x67, 0x7e, 0x9f, 0x71, 0x82, 0x79, 0x8c, 0x93, 0x9d,\n",
-            "  0x6b, 0x90, 0x8a, 0x8a, 0x55, 0x82, 0x94, 0x74, 0x7d, 0xaa, 0x81, 0x78,\n",
-            "  0x8a, 0x8d, 0x83, 0x7b, 0x97, 0x92, 0x68, 0x64, 0x8c, 0x5d, 0x78, 0x9b,\n",
-            "  0x73, 0x95, 0x78, 0x77, 0x6f, 0x61, 0x7c, 0x9d, 0x85, 0x6e, 0x84, 0x4c,\n",
-            "  0x87, 0x57, 0x93, 0x68, 0x8e, 0x77, 0x78, 0x72, 0x87, 0x91, 0x5f, 0x7e,\n",
-            "  0xa6, 0x75, 0x66, 0x86, 0x7a, 0x7d, 0x70, 0x6f, 0x87, 0x8b, 0x74, 0x85,\n",
-            "  0x7d, 0x8b, 0x7f, 0x70, 0x7e, 0x82, 0x84, 0x75, 0x89, 0xa6, 0x7b, 0x7a,\n",
-            "  0xa5, 0x69, 0x73, 0x74, 0x82, 0x65, 0x8f, 0x98, 0x7b, 0x77, 0x84, 0x92,\n",
-            "  0x73, 0x8a, 0xa1, 0x93, 0x80, 0x81, 0x72, 0x8a, 0x6b, 0x75, 0x8f, 0x98,\n",
-            "  0x73, 0x74, 0x6f, 0x70, 0x51, 0x6a, 0x84, 0x9e, 0x78, 0x9b, 0x8c, 0x81,\n",
-            "  0x7e, 0x75, 0x80, 0x88, 0x73, 0x4e, 0x71, 0x74, 0x8c, 0x74, 0x6a, 0x84,\n",
-            "  0x7f, 0x6b, 0x78, 0xab, 0x77, 0xa2, 0x98, 0x93, 0x77, 0x75, 0x72, 0x5c,\n",
-            "  0x60, 0x74, 0x84, 0x67, 0x83, 0x7d, 0x7f, 0x7c, 0x5c, 0x72, 0x70, 0x7f,\n",
-            "  0x6c, 0x84, 0x90, 0xab, 0x97, 0x7f, 0x6b, 0x82, 0x7f, 0x78, 0x73, 0x7d,\n",
-            "  0x8f, 0x8e, 0x8a, 0x8f, 0x8d, 0xa3, 0x74, 0x6e, 0x5e, 0x8c, 0x94, 0x86,\n",
-            "  0x57, 0xb0, 0x79, 0xa8, 0x7b, 0x8d, 0x83, 0x77, 0x89, 0xb6, 0x60, 0x9d,\n",
-            "  0x77, 0x59, 0x72, 0x4d, 0x6f, 0x94, 0x71, 0x75, 0x61, 0x96, 0x86, 0x5d,\n",
-            "  0x84, 0x68, 0x86, 0x82, 0x8d, 0x70, 0x9a, 0x86, 0x73, 0x64, 0x74, 0x7d,\n",
-            "  0x80, 0x5a, 0x64, 0x81, 0xa1, 0x71, 0x77, 0x65, 0xa3, 0x76, 0xa3, 0x9d,\n",
-            "  0x73, 0x7b, 0x8f, 0x7b, 0x79, 0x7d, 0x6c, 0x85, 0x8e, 0x75, 0x65, 0x6a,\n",
-            "  0x87, 0x70, 0x68, 0x8e, 0x76, 0x5d, 0x66, 0x7c, 0x83, 0x83, 0x7e, 0x89,\n",
-            "  0x59, 0x8c, 0x75, 0x59, 0x87, 0x7e, 0x7f, 0x90, 0x6b, 0x7b, 0x7e, 0x6d,\n",
-            "  0x6e, 0x86, 0x69, 0x92, 0x83, 0x8f, 0x8a, 0x60, 0x78, 0x75, 0x61, 0x91,\n",
-            "  0x73, 0x66, 0x86, 0x86, 0x9f, 0x6f, 0x7b, 0x9a, 0x7c, 0x54, 0x75, 0x8e,\n",
-            "  0x7e, 0x72, 0x8e, 0x98, 0x94, 0x5f, 0x71, 0x7c, 0x95, 0x9f, 0x8e, 0x83,\n",
-            "  0x96, 0x4b, 0x8d, 0x84, 0x81, 0x7d, 0x70, 0x84, 0x70, 0x53, 0x8d, 0x84,\n",
-            "  0x5a, 0x91, 0x88, 0x9a, 0x8f, 0x69, 0x8b, 0x52, 0x85, 0x89, 0x6e, 0x99,\n",
-            "  0x79, 0x89, 0x9a, 0x82, 0x6e, 0x8b, 0x65, 0x62, 0x80, 0xa8, 0x8f, 0x8a,\n",
-            "  0x71, 0x61, 0x7e, 0x7d, 0x7e, 0xaa, 0x7f, 0xa0, 0x5e, 0x67, 0x90, 0x86,\n",
-            "  0x6d, 0xac, 0x74, 0x50, 0x61, 0x91, 0x7d, 0x69, 0x8b, 0x7f, 0x81, 0x7a,\n",
-            "  0x93, 0x8c, 0x72, 0x64, 0x98, 0x88, 0x91, 0x83, 0x69, 0x6d, 0x78, 0x7a,\n",
-            "  0x68, 0x7c, 0x76, 0x81, 0xa7, 0x88, 0x8f, 0x79, 0x7d, 0x6c, 0x8a, 0x60,\n",
-            "  0x88, 0x6d, 0x79, 0x9d, 0x80, 0x82, 0x66, 0x7d, 0x7e, 0x96, 0x78, 0x70,\n",
-            "  0x9b, 0x70, 0x7e, 0x90, 0x77, 0x94, 0x7b, 0x89, 0x78, 0x84, 0x74, 0x6d,\n",
-            "  0x7d, 0xa7, 0x75, 0x97, 0x85, 0x83, 0x86, 0x65, 0x75, 0x9a, 0x7c, 0x68,\n",
-            "  0x87, 0x82, 0x75, 0x68, 0x4c, 0x8a, 0x68, 0x93, 0x7d, 0x88, 0x84, 0x72,\n",
-            "  0x58, 0x81, 0x5d, 0x83, 0x89, 0x63, 0x83, 0x7d, 0x8e, 0x75, 0x8c, 0x88,\n",
-            "  0x7f, 0x57, 0x8c, 0x8f, 0xa6, 0x71, 0x8a, 0x95, 0x88, 0x51, 0x74, 0x8a,\n",
-            "  0x8a, 0x98, 0x72, 0x80, 0x8a, 0x52, 0x90, 0x66, 0x54, 0x8e, 0x7f, 0x94,\n",
-            "  0x81, 0x49, 0x84, 0x70, 0x5c, 0x93, 0x89, 0x6d, 0x82, 0x7f, 0x70, 0x5d,\n",
-            "  0x87, 0x8a, 0x71, 0x70, 0x6f, 0xa1, 0x90, 0x9f, 0x74, 0x7c, 0x8c, 0x8b,\n",
-            "  0x72, 0xbf, 0x89, 0x90, 0x5c, 0x8c, 0x75, 0x72, 0x6f, 0xb2, 0x84, 0x6d,\n",
-            "  0x61, 0x80, 0x7d, 0x7a, 0x66, 0xaa, 0x75, 0x71, 0x89, 0x6d, 0x69, 0x72,\n",
-            "  0x73, 0x98, 0x8c, 0x78, 0x5a, 0x8e, 0x8c, 0x81, 0x55, 0x81, 0x96, 0x67,\n",
-            "  0x6f, 0x71, 0x74, 0x7d, 0x8e, 0x66, 0x9a, 0x67, 0xaa, 0x81, 0x90, 0x79,\n",
-            "  0x89, 0x59, 0x86, 0x66, 0x8f, 0x7d, 0x7e, 0xa2, 0xa4, 0x99, 0x68, 0x7a,\n",
-            "  0x8c, 0x73, 0x85, 0x77, 0x8b, 0x74, 0x75, 0x66, 0xaa, 0x98, 0x59, 0x8b,\n",
-            "  0x91, 0x6c, 0x76, 0x73, 0x87, 0xa4, 0x82, 0x82, 0x63, 0x70, 0x7e, 0x73,\n",
-            "  0x96, 0x97, 0x6f, 0x86, 0x81, 0x6f, 0x83, 0x82, 0x7b, 0x82, 0xa3, 0xa7,\n",
-            "  0x95, 0x77, 0x84, 0x65, 0x9b, 0x94, 0x6e, 0xb0, 0x75, 0x66, 0x78, 0x82,\n",
-            "  0x9c, 0x7a, 0x5f, 0xab, 0x99, 0x2f, 0x7f, 0x68, 0xa4, 0x69, 0x8f, 0x9a,\n",
-            "  0x91, 0x56, 0x6e, 0x75, 0x63, 0x9b, 0x9e, 0x97, 0x95, 0x68, 0x80, 0x6a,\n",
-            "  0x40, 0x95, 0x53, 0x72, 0x6f, 0x6b, 0x91, 0x78, 0x7f, 0x93, 0x70, 0x8d,\n",
-            "  0x62, 0x83, 0x7e, 0x64, 0x5b, 0xaa, 0x70, 0x6c, 0x7e, 0x9c, 0x88, 0x76,\n",
-            "  0x60, 0x70, 0x66, 0x69, 0x84, 0x97, 0x9d, 0x63, 0x5e, 0x9a, 0x7e, 0x52,\n",
-            "  0x58, 0xb8, 0x95, 0x7c, 0x4d, 0x96, 0x8f, 0x70, 0x71, 0xbf, 0x83, 0x83,\n",
-            "  0x9e, 0x70, 0x6f, 0x57, 0x70, 0x9a, 0x8d, 0x6e, 0x98, 0x5a, 0x69, 0x6f,\n",
-            "  0x90, 0x71, 0x8a, 0x5d, 0x8e, 0x6e, 0x69, 0x7a, 0x90, 0x86, 0x89, 0x88,\n",
-            "  0xb6, 0x77, 0x84, 0x79, 0x76, 0x86, 0x86, 0x7c, 0xbf, 0x6d, 0x5c, 0x90,\n",
-            "  0xa1, 0x93, 0x72, 0x63, 0x9a, 0x82, 0x7b, 0x61, 0x91, 0x76, 0x82, 0x96,\n",
-            "  0xb9, 0x80, 0x77, 0x7f, 0xa0, 0x73, 0x61, 0x80, 0x83, 0xc1, 0x92, 0x67,\n",
-            "  0x7c, 0x81, 0x90, 0x67, 0x8b, 0xbe, 0x81, 0x91, 0x6c, 0x7e, 0x8d, 0x6c,\n",
-            "  0x62, 0x83, 0x7e, 0x72, 0x64, 0x8a, 0x83, 0x82, 0xaa, 0x8c, 0x74, 0xab,\n",
-            "  0x79, 0x85, 0x91, 0x79, 0x90, 0x68, 0x5c, 0x9a, 0x7c, 0x36, 0x80, 0x6e,\n",
-            "  0x93, 0x76, 0x5e, 0xa0, 0xa5, 0x63, 0x73, 0x7e, 0x8d, 0x94, 0x63, 0x99,\n",
-            "  0x8f, 0x6a, 0x7f, 0x57, 0x57, 0x6f, 0x6d, 0x86, 0x8e, 0x6b, 0x8d, 0x53,\n",
-            "  0x94, 0xba, 0x84, 0x6f, 0x5a, 0x7b, 0x8c, 0x5f, 0x73, 0x93, 0x8b, 0x87,\n",
-            "  0x6f, 0x9e, 0x8a, 0x87, 0x62, 0x97, 0x86, 0x7c, 0x69, 0xab, 0xa1, 0x95,\n",
-            "  0x42, 0x8c, 0x8b, 0x66, 0x68, 0x99, 0xa8, 0x74, 0x80, 0xa5, 0x7d, 0x82,\n",
-            "  0x55, 0xb3, 0x6f, 0x81, 0xa8, 0x9a, 0x80, 0x67, 0x62, 0x7f, 0x78, 0x93,\n",
-            "  0x90, 0x83, 0x83, 0x7b, 0x77, 0x73, 0x8c, 0x56, 0xa7, 0x85, 0x7b, 0x71,\n",
-            "  0x8f, 0x5d, 0x92, 0x69, 0xbe, 0x5e, 0x7f, 0x7f, 0x8e, 0x71, 0x84, 0x75,\n",
-            "  0x95, 0x69, 0x88, 0x6b, 0x96, 0x85, 0x78, 0x39, 0xc2, 0x86, 0x7c, 0x99,\n",
-            "  0xa1, 0x94, 0x6b, 0x86, 0xb5, 0x5e, 0x7e, 0x6e, 0x81, 0x95, 0x6a, 0x88,\n",
-            "  0x7b, 0x92, 0x8f, 0x68, 0x97, 0x77, 0x84, 0x73, 0x68, 0x96, 0x5a, 0x92,\n",
-            "  0x66, 0x74, 0x74, 0x6c, 0x7d, 0x81, 0x6c, 0x93, 0x7f, 0x72, 0x86, 0x74,\n",
-            "  0xbf, 0x8f, 0x53, 0xa4, 0x89, 0x76, 0xa0, 0x87, 0x97, 0x6a, 0x6b, 0xb1,\n",
-            "  0x91, 0x50, 0x74, 0x68, 0xa3, 0x60, 0x8d, 0xbc, 0xc1, 0x3e, 0x62, 0x59,\n",
-            "  0x71, 0x72, 0x6d, 0x80, 0x9f, 0x52, 0x82, 0x6b, 0x5d, 0x7f, 0x74, 0x7e,\n",
-            "  0x74, 0x84, 0x8a, 0x59, 0x5c, 0x85, 0x6d, 0x9c, 0x75, 0x9a, 0x88, 0x89,\n",
-            "  0x81, 0x9f, 0x81, 0x88, 0x6a, 0x94, 0x84, 0x5f, 0x6b, 0x9b, 0x83, 0x4f,\n",
-            "  0x7e, 0xca, 0x99, 0x6d, 0x45, 0x7f, 0x87, 0x71, 0x69, 0xad, 0x95, 0x53,\n",
-            "  0x6e, 0x9b, 0x90, 0x73, 0x5d, 0xb0, 0x8d, 0x67, 0x83, 0x82, 0xa3, 0x70,\n",
-            "  0x70, 0x92, 0x82, 0x9a, 0x8a, 0x69, 0x6a, 0x6e, 0x7f, 0x89, 0xa4, 0x76,\n",
-            "  0x97, 0x62, 0x94, 0x80, 0x87, 0x55, 0x80, 0x76, 0xb3, 0x7e, 0x7e, 0x71,\n",
-            "  0x94, 0x88, 0x8e, 0x74, 0xb6, 0x4d, 0x7b, 0x73, 0x90, 0x86, 0x7c, 0x66,\n",
-            "  0xb5, 0x80, 0x7f, 0x84, 0x87, 0x82, 0x67, 0x83, 0x97, 0x91, 0x8a, 0x78,\n",
-            "  0x8b, 0x83, 0x5d, 0x84, 0x82, 0x9f, 0x8c, 0x91, 0x84, 0x8b, 0x6a, 0x68,\n",
-            "  0x86, 0x82, 0x73, 0x77, 0x7b, 0x83, 0x6a, 0x84, 0x92, 0x93, 0x90, 0x8b,\n",
-            "  0x4c, 0x94, 0x98, 0x76, 0xb8, 0x7b, 0xa0, 0xa2, 0x7d, 0x3e, 0x95, 0x88,\n",
-            "  0xa3, 0x6f, 0x5e, 0xc8, 0x9a, 0x52, 0x81, 0x86, 0xa3, 0x79, 0x88, 0xc3,\n",
-            "  0xbd, 0x54, 0x6c, 0x5e, 0x83, 0x8a, 0x98, 0x88, 0x92, 0x66, 0x73, 0x5b,\n",
-            "  0x6c, 0x7f, 0x6e, 0x97, 0x8d, 0x58, 0x89, 0x6e, 0x65, 0x7a, 0x7d, 0x7c,\n",
-            "  0x7e, 0x89, 0x94, 0x89, 0x55, 0xb8, 0x8f, 0x82, 0x6c, 0x9c, 0x96, 0x5e,\n",
-            "  0x6f, 0xb2, 0x70, 0x76, 0x95, 0xc8, 0x86, 0x78, 0x49, 0xac, 0x7e, 0x6c,\n",
-            "  0x68, 0xb6, 0xaf, 0x89, 0x68, 0xa5, 0x72, 0x85, 0x69, 0x9c, 0x94, 0x84,\n",
-            "  0xa4, 0x97, 0x91, 0x61, 0x7a, 0xa3, 0x8f, 0x8e, 0x93, 0x80, 0x8d, 0x76,\n",
-            "  0x74, 0x84, 0x9b, 0x79, 0x97, 0x4e, 0x67, 0x87, 0x9b, 0x69, 0x85, 0x7d,\n",
-            "  0xb2, 0x68, 0x76, 0x63, 0xa2, 0x86, 0x97, 0x7f, 0xb5, 0x63, 0x79, 0x76,\n",
-            "  0x8a, 0x7c, 0x7c, 0x91, 0xb1, 0x42, 0x7d, 0x7a, 0x8c, 0x8e, 0x72, 0xab,\n",
-            "  0xb8, 0x76, 0xab, 0x81, 0x98, 0x85, 0x56, 0x98, 0x84, 0x9f, 0x70, 0x86,\n",
-            "  0x76, 0x88, 0x70, 0x8d, 0x71, 0x7b, 0x7a, 0x8d, 0x76, 0x75, 0x62, 0x80,\n",
-            "  0x81, 0x94, 0x82, 0x6e, 0x57, 0x8d, 0xaf, 0x84, 0xbf, 0x85, 0x82, 0xa7,\n",
-            "  0x80, 0x89, 0x95, 0x81, 0x91, 0x49, 0x72, 0xa1, 0xa7, 0x3f, 0x72, 0x8b,\n",
-            "  0x99, 0x72, 0x86, 0xb2, 0xc3, 0x61, 0x55, 0x77, 0x86, 0x77, 0x83, 0xa7,\n",
-            "  0x95, 0x5a, 0x68, 0x68, 0x6a, 0x63, 0x6a, 0x77, 0x93, 0x7c, 0x88, 0x62,\n",
-            "  0x79, 0x84, 0x8b, 0x82, 0x58, 0x8f, 0x9c, 0x56, 0x77, 0xb1, 0x65, 0x8c,\n",
-            "  0x76, 0x91, 0x83, 0x5b, 0x62, 0x91, 0x87, 0x68, 0x71, 0xb0, 0x87, 0x64,\n",
-            "  0x62, 0x91, 0x94, 0x58, 0x7f, 0xac, 0xa3, 0x84, 0x75, 0xaa, 0xa3, 0x4d,\n",
-            "  0x7a, 0xc2, 0x84, 0x8a, 0x6d, 0xa2, 0x76, 0x74, 0x8c, 0x9e, 0x7c, 0x71,\n",
-            "  0x86, 0x70, 0x6d, 0x79, 0x9a, 0x74, 0xb0, 0x8d, 0xa5, 0x7e, 0x6b, 0x63,\n",
-            "  0x96, 0x74, 0x99, 0x76, 0xd0, 0x62, 0x85, 0x9d, 0x8f, 0x6d, 0x83, 0x88,\n",
-            "  0xb0, 0x62, 0x9b, 0x87, 0x91, 0x82, 0x7a, 0x90, 0x9c, 0x61, 0x6d, 0x97,\n",
-            "  0x84, 0x7c, 0x74, 0x8e, 0x8b, 0x75, 0x9a, 0x7e, 0x7c, 0x7d, 0x96, 0x81,\n",
-            "  0x94, 0x69, 0x83, 0x6f, 0x8e, 0x7c, 0x7b, 0x7a, 0x73, 0x98, 0x74, 0x9e,\n",
-            "  0x72, 0x8c, 0x5f, 0x7d, 0x99, 0x79, 0x5b, 0x73, 0x65, 0x78, 0xa5, 0x7d,\n",
-            "  0xa2, 0x98, 0x91, 0x91, 0x87, 0x7b, 0x8c, 0x82, 0xb8, 0x6b, 0x82, 0xba,\n",
-            "  0xa5, 0x3f, 0x83, 0x7a, 0x9b, 0x73, 0x93, 0xa1, 0xbe, 0x55, 0x6b, 0x75,\n",
-            "  0x94, 0x7d, 0x9c, 0xa1, 0x82, 0x50, 0x75, 0x5a, 0x88, 0x6e, 0x72, 0x7f,\n",
-            "  0x99, 0x64, 0x72, 0x49, 0x69, 0x79, 0x6d, 0x94, 0x73, 0x79, 0x80, 0x6f,\n",
-            "  0x72, 0xbc, 0x9d, 0x71, 0x7a, 0x9d, 0x8a, 0x55, 0x74, 0xaa, 0xa1, 0x85,\n",
-            "  0x7e, 0xc4, 0xa0, 0x7e, 0x50, 0x99, 0x68, 0x8c, 0x8a, 0xb0, 0x99, 0x6c,\n",
-            "  0x6d, 0xaf, 0x7b, 0x7b, 0x79, 0xba, 0x8a, 0x7a, 0x9d, 0x8b, 0x67, 0x87,\n",
-            "  0x76, 0xa9, 0x7f, 0x7e, 0x8b, 0x7b, 0x87, 0x84, 0x82, 0x74, 0xa3, 0x91,\n",
-            "  0x9a, 0x6a, 0x93, 0x7e, 0x87, 0x5b, 0x95, 0x89, 0xbb, 0x5d, 0x74, 0x6c,\n",
-            "  0x88, 0x7e, 0x81, 0x7e, 0xb6, 0x6b, 0x91, 0x92, 0x83, 0x78, 0x79, 0x95,\n",
-            "  0x90, 0x5e, 0x68, 0x8f, 0xa8, 0x92, 0x66, 0x8e, 0x6b, 0x8c, 0x86, 0x80,\n",
-            "  0x7e, 0x7e, 0x70, 0x84, 0x7d, 0x71, 0x67, 0x94, 0x71, 0x69, 0x84, 0x8f,\n",
-            "  0x6c, 0x72, 0x85, 0x83, 0x69, 0x76, 0x57, 0x62, 0x83, 0x96, 0x83, 0x77,\n",
-            "  0x64, 0x5f, 0xae, 0x7c, 0xa7, 0x88, 0x91, 0x8c, 0x9e, 0x7f, 0xa8, 0x8a,\n",
-            "  0x93, 0x6f, 0x58, 0xae, 0xb4, 0x4b, 0x7f, 0x64, 0x9f, 0x5a, 0x9e, 0xb6,\n",
-            "  0xa6, 0x6b, 0x79, 0x84, 0x6b, 0x7c, 0x8b, 0x94, 0x85, 0x60, 0x6b, 0x55,\n",
-            "  0x79, 0x68, 0x77, 0x75, 0x85, 0x5c, 0x91, 0x5e, 0x5a, 0x71, 0x68, 0x7b,\n",
-            "  0x73, 0x91, 0x6c, 0x6e, 0x71, 0x8b, 0x76, 0x86, 0x99, 0xb8, 0x91, 0x68,\n",
-            "  0x51, 0xa7, 0x6f, 0x7a, 0x8a, 0xc3, 0x8e, 0x65, 0x64, 0x9e, 0x80, 0x78,\n",
-            "  0x6c, 0xc5, 0xa2, 0x75, 0x71, 0xa5, 0x96, 0x4f, 0x70, 0xa4, 0x7a, 0x7c,\n",
-            "  0x8c, 0x80, 0x89, 0x97, 0x9a, 0x9a, 0x85, 0x89, 0x92, 0x8f, 0x81, 0x6f,\n",
-            "  0x82, 0x6a, 0xb8, 0x74, 0x8f, 0x51, 0x7b, 0x8b, 0x8c, 0x55, 0x7e, 0x8c,\n",
-            "  0xb2, 0x41, 0x85, 0x77, 0x9c, 0x73, 0x75, 0x8d, 0x9f, 0x64, 0x92, 0x77,\n",
-            "  0xa0, 0x87, 0x5f, 0x71, 0x85, 0x68, 0x8a, 0x78, 0x91, 0x78, 0x75, 0x7a,\n",
-            "  0x81, 0x67, 0x96, 0x64, 0x96, 0x85, 0x7a, 0x7e, 0x83, 0x74, 0x82, 0x8f,\n",
-            "  0x98, 0x75, 0x77, 0x84, 0x7e, 0x88, 0x94, 0x7d, 0x79, 0x8c, 0x47, 0x79,\n",
-            "  0x96, 0x7f, 0x8e, 0x90, 0x50, 0x7f, 0xa3, 0x77, 0xa8, 0x7f, 0x65, 0x9f,\n",
-            "  0xb9, 0x4c, 0xa7, 0x7f, 0xaa, 0x6e, 0xa2, 0xb0, 0xb8, 0x51, 0x6b, 0x74,\n",
-            "  0xaa, 0x63, 0x6c, 0xa3, 0xb6, 0x5e, 0x74, 0x6a, 0x75, 0x69, 0x87, 0x7f,\n",
-            "  0x9d, 0x71, 0x73, 0x72, 0x70, 0x57, 0x5a, 0x7e, 0x8b, 0x64, 0x9a, 0x4d,\n",
-            "  0x97, 0x81, 0x7b, 0x75, 0x6e, 0x92, 0x5f, 0x67, 0x7e, 0xaa, 0x90, 0x7a,\n",
-            "  0x92, 0xae, 0x92, 0x68, 0x79, 0x9d, 0x4f, 0x6c, 0x79, 0xb4, 0x9c, 0x58,\n",
-            "  0x86, 0x8e, 0x62, 0x72, 0x71, 0xc1, 0xac, 0x7d, 0x7a, 0x94, 0x8f, 0x7b,\n",
-            "  0x88, 0xa8, 0x8d, 0x82, 0x75, 0x9b, 0x5f, 0x83, 0x82, 0xb3, 0x7a, 0x93,\n",
-            "  0x94, 0x76, 0x70, 0x7e, 0x72, 0x7e, 0x8f, 0x8c, 0xa7, 0x53, 0x72, 0x77,\n",
-            "  0x7a, 0x64, 0xa8, 0x83, 0xc5, 0x56, 0x71, 0x7b, 0x96, 0x73, 0x7c, 0x73,\n",
-            "  0x93, 0x49, 0x83, 0x99, 0xa2, 0x83, 0x74, 0x79, 0xa4, 0x61, 0x8e, 0x84,\n",
-            "  0x7a, 0x7d, 0x56, 0x98, 0x97, 0x6d, 0x87, 0x8c, 0x7a, 0x77, 0x6a, 0x67,\n",
-            "  0x8a, 0x6f, 0xa2, 0x82, 0x8d, 0x85, 0x6d, 0x8f, 0x7e, 0x74, 0x72, 0x74,\n",
-            "  0x91, 0x75, 0x58, 0x7f, 0x9e, 0x7c, 0x9c, 0x75, 0x61, 0x6f, 0x85, 0x7b,\n",
-            "  0xbe, 0x84, 0x85, 0x9b, 0x8c, 0x3b, 0x9a, 0x90, 0xab, 0x77, 0x8e, 0xa2,\n",
-            "  0xbd, 0x55, 0x96, 0x70, 0xa8, 0x78, 0x98, 0x9c, 0xc3, 0x67, 0x6e, 0x81,\n",
-            "  0x70, 0x75, 0x96, 0x9c, 0x8a, 0x5b, 0x73, 0x54, 0x69, 0x6c, 0x5d, 0x82,\n",
-            "  0x99, 0x5b, 0x8c, 0x6d, 0x87, 0x80, 0x67, 0x86, 0x88, 0x7c, 0x70, 0x6b,\n",
-            "  0x75, 0xab, 0x8e, 0x79, 0x90, 0x91, 0xaf, 0x67, 0x5c, 0xa1, 0x5c, 0x6f,\n",
-            "  0x75, 0xa1, 0x95, 0x5f, 0x82, 0x8f, 0x78, 0x5d, 0x7c, 0xb8, 0x8a, 0x8a,\n",
-            "  0x6a, 0x98, 0x6e, 0x51, 0x6b, 0xaa, 0x7d, 0x7c, 0x80, 0x94, 0x79, 0x6d,\n",
-            "  0xaa, 0x8a, 0x7e, 0x77, 0xa4, 0x78, 0xa5, 0x6d, 0x7c, 0x75, 0xa8, 0x6f,\n",
-            "  0xa6, 0x51, 0x8e, 0x80, 0x96, 0x5b, 0x9d, 0x7b, 0xb8, 0x4e, 0x6c, 0x87,\n",
-            "  0x95, 0x7c, 0x78, 0x71, 0xb0, 0x5a, 0x99, 0xa0, 0x90, 0x87, 0x65, 0x8b,\n",
-            "  0x98, 0x68, 0x92, 0x76, 0x82, 0x77, 0x6a, 0x8a, 0x91, 0x84, 0x87, 0x8b,\n",
-            "  0x87, 0x84, 0x7a, 0x81, 0x77, 0x55, 0x8e, 0x86, 0x7a, 0x74, 0x65, 0x88,\n",
-            "  0x62, 0x51, 0xa1, 0x91, 0x88, 0x76, 0x5f, 0x89, 0x9f, 0x86, 0x66, 0x67,\n",
-            "  0x64, 0x75, 0x9e, 0x74, 0xc1, 0x80, 0x58, 0xa9, 0x8f, 0x5e, 0x94, 0x88,\n",
-            "  0xaf, 0x6f, 0x6c, 0xa4, 0xa1, 0x4d, 0x68, 0x66, 0xc2, 0x6e, 0x89, 0x9b,\n",
-            "  0xa3, 0x5a, 0x63, 0x5b, 0x9c, 0x7a, 0x93, 0x76, 0x9d, 0x6d, 0x71, 0x5d,\n",
-            "  0x80, 0x66, 0x79, 0x80, 0x7c, 0x65, 0x74, 0x64, 0x88, 0x90, 0x79, 0x89,\n",
-            "  0x72, 0x88, 0x67, 0x75, 0x6a, 0x96, 0x56, 0x67, 0x88, 0xa1, 0x8c, 0x6c,\n",
-            "  0x55, 0xb2, 0x8a, 0x71, 0x88, 0xdc, 0x7a, 0x72, 0x94, 0x9d, 0x7c, 0x76,\n",
-            "  0x6a, 0xaa, 0xa8, 0x7f, 0x80, 0xa0, 0x6b, 0x6f, 0x84, 0xe0, 0x68, 0x93,\n",
-            "  0xa6, 0x99, 0x69, 0x68, 0x93, 0xa0, 0x93, 0x6b, 0x87, 0x8b, 0x80, 0x90,\n",
-            "  0x90, 0x89, 0x8f, 0x7f, 0xaf, 0x6f, 0x82, 0x6d, 0x94, 0x70, 0x97, 0x8f,\n",
-            "  0xb0, 0x40, 0x9b, 0x67, 0x78, 0x86, 0x90, 0x8b, 0xa7, 0x51, 0x7f, 0x79,\n",
-            "  0x90, 0x71, 0x6d, 0x80, 0x95, 0x63, 0x7d, 0x87, 0xa0, 0x7e, 0x7b, 0x85,\n",
-            "  0x8e, 0x6d, 0xa1, 0x76, 0x70, 0x7b, 0x66, 0x87, 0x90, 0x7a, 0x86, 0x88,\n",
-            "  0x89, 0x87, 0x6a, 0x91, 0x78, 0x74, 0x76, 0x8d, 0x7e, 0x86, 0x63, 0x90,\n",
-            "  0x98, 0x7d, 0x4a, 0x85, 0x4f, 0x9d, 0xa2, 0x7c, 0xb4, 0x88, 0x78, 0xb5,\n",
-            "  0x8f, 0x3f, 0xa7, 0x7d, 0xa4, 0x7c, 0x60, 0x9c, 0xa8, 0x41, 0x6b, 0x7f,\n",
-            "  0xa2, 0x7f, 0x68, 0xaa, 0xb4, 0x73, 0x56, 0x62, 0x87, 0x72, 0xa5, 0x7c,\n",
-            "  0x97, 0x69, 0x58, 0x6b, 0x89, 0x57, 0x51, 0x80, 0x92, 0x7a, 0x7c, 0x4c,\n",
-            "  0x7c, 0x7b, 0x69, 0x5f, 0x90, 0x77, 0x78, 0x67, 0x7a, 0xad, 0x79, 0x5c,\n",
-            "  0x9c, 0xbf, 0xa6, 0x64, 0x53, 0xb3, 0x5e, 0x59, 0x86, 0xb9, 0x94, 0x65,\n",
-            "  0x70, 0x9d, 0x7a, 0x80, 0x7c, 0xae, 0x9c, 0x7b, 0x66, 0xae, 0x83, 0x5f,\n",
-            "  0x81, 0xc5, 0x8b, 0x7e, 0x9b, 0x89, 0x84, 0x7f, 0x7c, 0xa5, 0x5c, 0x89,\n",
-            "  0x8a, 0x75, 0x99, 0x6d, 0x8e, 0x90, 0x9f, 0x81, 0x81, 0x6b, 0x87, 0x76,\n",
-            "  0x92, 0x6f, 0xab, 0x95, 0x95, 0x4c, 0x97, 0x72, 0x80, 0x87, 0x83, 0x87,\n",
-            "  0xa3, 0x59, 0xad, 0x74, 0x93, 0x7f, 0x77, 0x78, 0x8d, 0x66, 0x9b, 0x7a,\n",
-            "  0x7d, 0x95, 0x64, 0x7f, 0x6d, 0x5c, 0x8e, 0x94, 0x92, 0x82, 0x60, 0x8d,\n",
-            "  0x75, 0x55, 0x8c, 0x8b, 0x8f, 0x86, 0x7d, 0x7c, 0x74, 0x57, 0x78, 0x9d,\n",
-            "  0x71, 0x65, 0x66, 0x7f, 0xaa, 0x92, 0x66, 0x81, 0x5a, 0x71, 0xa6, 0x78,\n",
-            "  0x9d, 0x8a, 0x5a, 0x8a, 0x91, 0x59, 0xb7, 0x5c, 0xc3, 0x73, 0x89, 0x9d,\n",
-            "  0xa7, 0x62, 0x77, 0x72, 0x9f, 0x92, 0x6a, 0x9f, 0xaa, 0x71, 0x6b, 0x5e,\n",
-            "  0x7d, 0x73, 0x8d, 0x89, 0xba, 0x61, 0x73, 0x6e, 0x71, 0x8a, 0x79, 0x7c,\n",
-            "  0x94, 0x76, 0x76, 0x65, 0x81, 0x6f, 0x4e, 0x75, 0x6e, 0x8b, 0x7d, 0x50,\n",
-            "  0x56, 0xb8, 0x72, 0x67, 0x93, 0xc6, 0x88, 0x6f, 0x57, 0xb7, 0x80, 0x4c,\n",
-            "  0x97, 0xc4, 0xb6, 0x71, 0x72, 0x9e, 0x6f, 0x72, 0x8d, 0xa5, 0x8f, 0x89,\n",
-            "  0x74, 0xae, 0x78, 0x70, 0x6e, 0xbb, 0x8f, 0x73, 0x74, 0x8b, 0x5e, 0x86,\n",
-            "  0x8b, 0x8a, 0x72, 0x71, 0x84, 0x84, 0x77, 0xa3, 0xa6, 0x73, 0xa4, 0x7e,\n",
-            "  0xab, 0x5d, 0x75, 0x96, 0x94, 0x5f, 0x8b, 0x74, 0x9c, 0x63, 0x8d, 0x81,\n",
-            "  0x80, 0x6a, 0x91, 0x88, 0x93, 0x53, 0x80, 0x75, 0x79, 0x8d, 0x78, 0x74,\n",
-            "  0x7c, 0x73, 0xb2, 0x89, 0x8e, 0xab, 0x75, 0x6c, 0x7a, 0x79, 0x99, 0x77,\n",
-            "  0x7d, 0x89, 0x5a, 0x81, 0x7c, 0x75, 0x6a, 0x7e, 0x8c, 0x83, 0x78, 0x8e,\n",
-            "  0x62, 0x76, 0x77, 0x6b, 0x79, 0x66, 0x6e, 0x82, 0xa1, 0x8d, 0x52, 0x79,\n",
-            "  0x70, 0x7d, 0xa9, 0x6a, 0x95, 0x7f, 0x59, 0x94, 0x8f, 0x73, 0xb7, 0x85,\n",
-            "  0xb3, 0x80, 0x77, 0x9f, 0xb8, 0x4d, 0x82, 0x7c, 0xa0, 0xa4, 0x7b, 0x8c,\n",
-            "  0xa9, 0x78, 0x62, 0x6b, 0x8a, 0x93, 0x80, 0x68, 0x9b, 0x6d, 0x6b, 0x7b,\n",
-            "  0x84, 0x8f, 0x86, 0x70, 0x70, 0x73, 0x84, 0x4f, 0x7c, 0x75, 0x64, 0x8d,\n",
-            "  0x6e, 0x81, 0x7c, 0x72, 0x81, 0xb0, 0x74, 0x65, 0xa7, 0xae, 0x80, 0x70,\n",
-            "  0x5e, 0xa4, 0x58, 0x54, 0x8e, 0xa7, 0x96, 0x65, 0x66, 0x8b, 0x6c, 0x5d,\n",
-            "  0x6b, 0xbe, 0x94, 0x79, 0x80, 0xa1, 0x91, 0x78, 0x6d, 0xc2, 0x82, 0x85,\n",
-            "  0x81, 0x7d, 0x88, 0x79, 0x93, 0x96, 0x7f, 0x7e, 0x7d, 0x92, 0x75, 0xa2,\n",
-            "  0x9f, 0x7b, 0x92, 0x77, 0x8a, 0x7c, 0x80, 0x8b, 0x9b, 0x64, 0xa5, 0x74,\n",
-            "  0xa1, 0x74, 0x7f, 0x7e, 0x85, 0x78, 0x9c, 0x86, 0x9f, 0x62, 0x8f, 0x7f,\n",
-            "  0x8a, 0x90, 0x6d, 0x7d, 0x93, 0x61, 0x9d, 0x81, 0x9b, 0x99, 0x69, 0x87,\n",
-            "  0x74, 0x7d, 0x8e, 0x8e, 0x7b, 0x7c, 0x6a, 0x71, 0x7d, 0x7f, 0x74, 0x74,\n",
-            "  0x7b, 0x65, 0x6e, 0x91, 0x7c, 0x6e, 0x80, 0x8c, 0x8a, 0x6c, 0x6b, 0x76,\n",
-            "  0xad, 0x94, 0x64, 0x81, 0x69, 0x7b, 0xac, 0x76, 0x9f, 0x71, 0x85, 0x85,\n",
-            "  0x8b, 0x66, 0xb5, 0x87, 0xb3, 0x63, 0x8b, 0x95, 0x8e, 0x50, 0x91, 0x77,\n",
-            "  0xa1, 0x99, 0x64, 0x81, 0xb3, 0x63, 0x6e, 0x7a, 0x7f, 0x73, 0x7a, 0x7b,\n",
-            "  0x93, 0x6d, 0x75, 0x75, 0x7c, 0x7b, 0x59, 0x7c, 0x7c, 0x68, 0x67, 0x78,\n",
-            "  0x79, 0x75, 0x53, 0x86, 0x84, 0x84, 0x91, 0x71, 0x85, 0xb1, 0x84, 0x64,\n",
-            "  0x88, 0xc0, 0x94, 0x5f, 0x6f, 0x9b, 0x69, 0x67, 0x97, 0x94, 0x88, 0x6a,\n",
-            "  0x7e, 0x94, 0x9e, 0x7f, 0x81, 0x9c, 0xa7, 0x7f, 0x7a, 0xa2, 0x63, 0x69,\n",
-            "  0x82, 0xc2, 0x5e, 0x8d, 0x7c, 0x89, 0x63, 0x93, 0x84, 0xb8, 0x76, 0x89,\n",
-            "  0x96, 0x87, 0x79, 0x88, 0xa6, 0x8e, 0x9b, 0x93, 0x9c, 0x5d, 0x92, 0x92,\n",
-            "  0x82, 0x5e, 0x85, 0x88, 0xad, 0x73, 0xa4, 0x6f, 0x74, 0x8e, 0x77, 0x89,\n",
-            "  0x9b, 0x6e, 0x82, 0x76, 0x93, 0xae, 0x82, 0x87, 0x76, 0x6f, 0x80, 0x76,\n",
-            "  0x95, 0x8e, 0x5e, 0x85, 0x7b, 0x68, 0x7f, 0x7c, 0x82, 0x94, 0x80, 0x91,\n",
-            "  0x77, 0x71, 0x7c, 0x94, 0x80, 0x62, 0x65, 0x7c, 0x5e, 0x70, 0x76, 0x75,\n",
-            "  0x7b, 0x60, 0x5f, 0x69, 0xb3, 0x6e, 0x95, 0x9d, 0x5a, 0x5b, 0x9e, 0x6e,\n",
-            "  0xa6, 0x80, 0x5d, 0xa5, 0x83, 0x5b, 0xa4, 0x80, 0xb3, 0x79, 0x83, 0xb6,\n",
-            "  0xa3, 0x73, 0x84, 0x67, 0x8d, 0x8f, 0x9d, 0x78, 0xb8, 0x8a, 0x7b, 0x6c,\n",
-            "  0x85, 0x87, 0x6d, 0x75, 0xae, 0x75, 0x53, 0x71, 0x6b, 0x87, 0x67, 0x7b,\n",
-            "  0x7f, 0x86, 0x58, 0x73, 0x7d, 0x87, 0x5d, 0x7f, 0x7d, 0x63, 0x92, 0x65,\n",
-            "  0x7a, 0x9c, 0x6f, 0x87, 0x81, 0xa9, 0x91, 0x54, 0x66, 0x8e, 0x58, 0x6d,\n",
-            "  0x92, 0xc2, 0xa9, 0x7b, 0x6e, 0x96, 0x7c, 0x60, 0x7e, 0xa8, 0x85, 0x94,\n",
-            "  0x90, 0x8b, 0x77, 0x79, 0x77, 0xa7, 0x8f, 0x83, 0x80, 0x99, 0x8c, 0x80,\n",
-            "  0x93, 0x9c, 0x73, 0x9e, 0x75, 0x90, 0x67, 0x74, 0x99, 0x98, 0x7e, 0x76,\n",
-            "  0x9f, 0x82, 0x90, 0x95, 0x9d, 0x5f, 0x95, 0x98, 0x8c, 0x5f, 0x77, 0x83,\n",
-            "  0x7b, 0x72, 0x85, 0x7c, 0x97, 0x74, 0x81, 0x80, 0x8d, 0x89, 0x7d, 0x69,\n",
-            "  0x95, 0x85, 0x83, 0x5e, 0x95, 0x74, 0x54, 0x7f, 0x6c, 0x67, 0x9b, 0x83,\n",
-            "  0x88, 0x8e, 0x6f, 0x96, 0x81, 0x7f, 0x6e, 0x87, 0x8f, 0x6f, 0x61, 0x87,\n",
-            "  0x63, 0x66, 0x72, 0x77, 0x75, 0x6d, 0x59, 0x7d, 0xaa, 0x85, 0x62, 0x83,\n",
-            "  0x97, 0x94, 0x96, 0x89, 0x9d, 0x90, 0x7d, 0x91, 0x78, 0x57, 0xa0, 0x7f,\n",
-            "  0xa2, 0x62, 0x63, 0x99, 0x77, 0x71, 0x7f, 0x61, 0x99, 0x89, 0x6f, 0xa2,\n",
-            "  0xae, 0x92, 0x88, 0x51, 0x87, 0x7a, 0x6f, 0x89, 0xa8, 0x89, 0x64, 0x81,\n",
-            "  0x84, 0x79, 0x5b, 0x73, 0x82, 0x6e, 0x7e, 0x5d, 0x8f, 0x82, 0x51, 0x69,\n",
-            "  0x8e, 0x76, 0x8b, 0x58, 0x89, 0xb2, 0x52, 0x72, 0x7f, 0xae, 0x96, 0x5a,\n",
-            "  0x80, 0xa1, 0x74, 0x62, 0x8d, 0xbe, 0x87, 0x6c, 0x6d, 0xad, 0x83, 0x5a,\n",
-            "  0x6c, 0xa5, 0x7f, 0x7c, 0x7a, 0xa1, 0x75, 0x6d, 0x85, 0xbe, 0x91, 0x8e,\n",
-            "  0x96, 0x8c, 0x87, 0x74, 0x8b, 0x82, 0x96, 0x8f, 0x8f, 0x93, 0x8f, 0x8c,\n",
-            "  0x9a, 0x78, 0x73, 0x6e, 0x91, 0x8d, 0x7e, 0x81, 0x81, 0x52, 0x90, 0x85,\n",
-            "  0x77, 0x66, 0x7e, 0x75, 0x8a, 0x67, 0x72, 0x76, 0x82, 0x7b, 0x6e, 0x67,\n",
-            "  0x96, 0x7b, 0x75, 0x76, 0x8d, 0x76, 0x7f, 0x79, 0x84, 0x7b, 0x57, 0x81,\n",
-            "  0x76, 0x80, 0x67, 0x8c, 0x7c, 0x80, 0x67, 0x85, 0x79, 0x5b, 0x97, 0x74,\n",
-            "  0x91, 0x75, 0x82, 0x75, 0x6b, 0x94, 0x7e, 0x85, 0x8e, 0x77, 0x5d, 0x78,\n",
-            "  0xb5, 0x8b, 0x73, 0x7f, 0x62, 0x8f, 0xb1, 0x7d, 0xa2, 0x85, 0x6b, 0x92,\n",
-            "  0x75, 0x75, 0xb8, 0x7d, 0xb3, 0x67, 0x5f, 0xa6, 0x9b, 0x85, 0x9a, 0x67,\n",
-            "  0xbe, 0x8d, 0x92, 0x88, 0xa5, 0x7c, 0xaa, 0x5a, 0x71, 0x7b, 0x70, 0x77,\n",
-            "  0xa0, 0xa4, 0x5e, 0x55, 0x6b, 0x8e, 0x53, 0x89, 0x8a, 0x5a, 0x7c, 0x54,\n",
-            "  0x7c, 0x8b, 0x53, 0x77, 0x67, 0x77, 0x67, 0x5d, 0x91, 0xac, 0x78, 0x81,\n",
-            "  0x8e, 0xb5, 0x6d, 0x58, 0x78, 0xa6, 0x7c, 0x85, 0x87, 0xb3, 0x76, 0x5d,\n",
-            "  0x7c, 0x87, 0x57, 0x68, 0x82, 0x8f, 0x89, 0x76, 0x86, 0x9f, 0x6c, 0x68,\n",
-            "  0x7c, 0x87, 0x79, 0x9f, 0x86, 0x9e, 0x83, 0x70, 0x8d, 0xb2, 0x84, 0x71,\n",
-            "  0x71, 0x91, 0x9f, 0x8e, 0x83, 0x84, 0x87, 0x80, 0x94, 0x80, 0x7d, 0x8d,\n",
-            "  0x7c, 0x56, 0x5f, 0x80, 0x7d, 0x84, 0x61, 0x6e, 0x69, 0x80, 0x8b, 0x67,\n",
-            "  0xa4, 0x8b, 0x98, 0x7a, 0x8a, 0x6c, 0x77, 0x66, 0x7d, 0x6e, 0x84, 0x78,\n",
-            "  0x82, 0x7d, 0x61, 0x88, 0x6e, 0x53, 0x92, 0x75, 0x88, 0x77, 0x82, 0x9f,\n",
-            "  0x9e, 0x6f, 0x9c, 0x76, 0x91, 0x78, 0x69, 0x7f, 0x71, 0x6c, 0x6f, 0x7d,\n",
-            "  0x83, 0x6e, 0x3c, 0x84, 0x90, 0x8b, 0x71, 0x69, 0x75, 0x81, 0xc8, 0x84,\n",
-            "  0xa7, 0x8a, 0x8a, 0x90, 0x96, 0x86, 0x9e, 0x68, 0x99, 0x84, 0x8c, 0xa0,\n",
-            "  0x8a, 0x71, 0x7d, 0x41, 0xa1, 0x98, 0x77, 0x91, 0xaa, 0x86, 0x96, 0x5e,\n",
-            "  0x86, 0x76, 0xa7, 0x83, 0xac, 0x86, 0x66, 0x46, 0x6a, 0x81, 0x64, 0x77,\n",
-            "  0x67, 0x53, 0x80, 0x59, 0x73, 0x71, 0x63, 0x71, 0x76, 0x86, 0x62, 0x4f,\n",
-            "  0x83, 0xa4, 0x5d, 0x66, 0x93, 0x87, 0x87, 0x5b, 0x7f, 0x9d, 0x61, 0x9d,\n",
-            "  0x94, 0xa4, 0x84, 0x75, 0x67, 0xb3, 0x7b, 0x6d, 0x64, 0x98, 0x62, 0x77,\n",
-            "  0x7d, 0x98, 0x8e, 0x75, 0x7d, 0xa6, 0xa4, 0x8c, 0x83, 0x8b, 0x7a, 0x97,\n",
-            "  0x6c, 0x7f, 0x66, 0x7f, 0x8f, 0x98, 0x72, 0x6e, 0x75, 0x65, 0x80, 0x8d,\n",
-            "  0x88, 0x7d, 0x8c, 0x8d, 0x67, 0x68, 0xab, 0x8c, 0x8b, 0x76, 0x87, 0x69,\n",
-            "  0x88, 0x6c, 0x83, 0x6e, 0x88, 0x64, 0xa8, 0x67, 0xa5, 0x5b, 0x65, 0x60,\n",
-            "  0x6b, 0x62, 0x76, 0x78, 0x8c, 0x5b, 0x61, 0x6f, 0x66, 0x65, 0x92, 0x67,\n",
-            "  0x84, 0x7b, 0x80, 0x86, 0x7b, 0x6c, 0x86, 0x7a, 0x72, 0x7b, 0x4d, 0x94,\n",
-            "  0x80, 0x67, 0x8e, 0x8d, 0x7f, 0x79, 0x65, 0x78, 0xa3, 0x71, 0x80, 0x74,\n",
-            "  0xa7, 0xa8, 0x97, 0x78, 0x91, 0x77, 0x98, 0x86, 0x82, 0x64, 0xa5, 0x6e,\n",
-            "  0x7a, 0x5d, 0x6f, 0xad, 0x9b, 0x7a, 0x91, 0x4b, 0xa1, 0x75, 0x95, 0x76,\n",
-            "  0xac, 0x9d, 0xa3, 0x65, 0x65, 0x6a, 0x81, 0x8b, 0x9f, 0x67, 0x6b, 0x6a,\n",
-            "  0x60, 0x5b, 0x77, 0x96, 0x73, 0x78, 0x5a, 0x77, 0x5f, 0x68, 0x70, 0x72,\n",
-            "  0x78, 0x65, 0x81, 0x20, 0x86, 0x99, 0x80, 0x7a, 0xa5, 0xb1, 0x69, 0x45,\n",
-            "  0x7d, 0xa6, 0x7d, 0x85, 0xaa, 0xa9, 0x65, 0x60, 0x75, 0x9b, 0x61, 0x92,\n",
-            "  0x91, 0x8f, 0x8a, 0x81, 0x88, 0x9c, 0x81, 0x7d, 0x7b, 0x8f, 0x7e, 0x9e,\n",
-            "  0x82, 0x94, 0x95, 0x80, 0x73, 0xae, 0x7b, 0x7a, 0x79, 0x8c, 0x8b, 0x65,\n",
-            "  0x71, 0x75, 0x8d, 0x7a, 0x90, 0x83, 0x7b, 0x77, 0x71, 0x4f, 0x70, 0x95,\n",
-            "  0x87, 0x69, 0x97, 0x8e, 0x70, 0x92, 0x6e, 0x91, 0x9d, 0x72, 0x75, 0x82,\n",
-            "  0xad, 0x81, 0x78, 0x8d, 0x6f, 0x65, 0x88, 0x86, 0x8c, 0x8e, 0x59, 0x8b,\n",
-            "  0x67, 0x69, 0x8b, 0x78, 0x7f, 0x59, 0x73, 0x87, 0x6f, 0x86, 0x66, 0x7c,\n",
-            "  0x96, 0x68, 0x59, 0x78, 0x67, 0x92, 0x7b, 0x76, 0x80, 0x6e, 0x4a, 0x7b,\n",
-            "  0x99, 0x67, 0x72, 0x9c, 0x7a, 0x80, 0x76, 0x5f, 0x8e, 0x4f, 0x71, 0x77,\n",
-            "  0xab, 0x78, 0x99, 0x50, 0x83, 0x65, 0x78, 0x8c, 0xbb, 0x8d, 0x4e, 0x54,\n",
-            "  0x81, 0x6f, 0x7f, 0x91, 0xb9, 0x79, 0x9c, 0x65, 0x5a, 0x5a, 0x73, 0x8c,\n",
-            "  0x9a, 0xac, 0x99, 0x44, 0x7d, 0x4f, 0x78, 0x5a, 0x7d, 0x79, 0x57, 0x44,\n",
-            "  0x6f, 0x6a, 0x75, 0x7f, 0x5f, 0x6f, 0x72, 0x62, 0x7f, 0x89, 0x57, 0x91,\n",
-            "  0x8d, 0x83, 0x7e, 0x63, 0x8c, 0x95, 0x48, 0x78, 0xa9, 0x88, 0x84, 0x5b,\n",
-            "  0x8c, 0xa5, 0x65, 0x71, 0x88, 0x82, 0x7e, 0xa4, 0x8d, 0x7d, 0x7d, 0x8d,\n",
-            "  0x91, 0x7c, 0x73, 0x7d, 0x99, 0x89, 0x6d, 0xa1, 0x98, 0x84, 0x8b, 0x6b,\n",
-            "  0x89, 0x86, 0x84, 0x7e, 0x86, 0x87, 0x78, 0x8c, 0x96, 0x92, 0x5a, 0xa0,\n",
-            "  0x64, 0x73, 0x91, 0x88, 0x8f, 0x6b, 0x96, 0x5c, 0x99, 0x62, 0x78, 0x6c,\n",
-            "  0x87, 0x4d, 0x5d, 0x69, 0x7b, 0x81, 0x4a, 0x61, 0x71, 0x69, 0x7d, 0x91,\n",
-            "  0x67, 0x92, 0x68, 0x6f, 0x50, 0x5e, 0x61, 0x7e, 0x81, 0x70, 0x5f, 0x7b,\n",
-            "  0x6b, 0x55, 0x71, 0x6c, 0x70, 0x53, 0x3f, 0x80, 0x6e, 0x57, 0x96, 0x84,\n",
-            "  0x75, 0x51, 0x60, 0x9a, 0x7f, 0xa5, 0x80, 0x94, 0x95, 0x74, 0x7c, 0x83,\n",
-            "  0xa0, 0x93, 0x5d, 0x92, 0x83, 0x66, 0x67, 0x8a, 0x8b, 0x9b, 0x81, 0x69,\n",
-            "  0x73, 0x91, 0x6b, 0x79, 0x93, 0x88, 0x64, 0x68, 0x81, 0x8c, 0x6f, 0x81,\n",
-            "  0x6f, 0x80, 0x68, 0x5f, 0x9c, 0x95, 0x76, 0x93, 0x87, 0x68, 0x83, 0x94,\n",
-            "  0x8b, 0x85, 0x72, 0x7f, 0x64, 0x8c, 0x6a, 0x95, 0x8d, 0x80, 0x69, 0x6b,\n",
-            "  0x98, 0x86, 0x75, 0x92, 0x7a, 0x7f, 0x5b, 0x7f, 0x9b, 0x57, 0x99, 0x8d,\n",
-            "  0x8a, 0x7b, 0x58, 0x73, 0x88, 0x6d, 0x8a, 0x8c, 0x8e, 0x82, 0x85, 0xaa,\n",
-            "  0x72, 0xa6, 0x7f, 0x7a, 0x83, 0x59, 0x6d, 0x6e, 0x79, 0x83, 0x88, 0x84,\n",
-            "  0x74, 0x85, 0x74, 0x78, 0x80, 0x7c, 0x97, 0x86, 0x94, 0x65, 0x7e, 0x80,\n",
-            "  0x6f, 0x97, 0x70, 0x74, 0x92, 0x76, 0x71, 0x91, 0x85, 0x72, 0x6e, 0x84,\n",
-            "  0x78, 0x7e, 0x88, 0x79, 0x7f, 0x80, 0x83, 0x7a, 0x85, 0x75, 0x82, 0x81,\n",
-            "  0x82, 0x7b, 0x7a, 0xa0, 0x76, 0x7f, 0x75, 0xa7, 0x67, 0x8e, 0x81, 0x98,\n",
-            "  0xa5, 0x86, 0x77, 0x78, 0x7f, 0x97, 0x90, 0x86, 0x80, 0x6b, 0x89, 0x66,\n",
-            "  0x9b, 0x5c, 0x8b, 0x74, 0xac, 0x89, 0x89, 0x92, 0x92, 0xa8, 0x61, 0x85,\n",
-            "  0x8c, 0x86, 0x88, 0x91, 0x92, 0x66, 0x63, 0x6c, 0x7a, 0x80, 0x7d, 0x90,\n",
-            "  0x6f, 0x7f, 0x92, 0x94, 0x8e, 0x7a, 0x86, 0x98, 0xa1, 0x59, 0x71, 0x8c,\n",
-            "  0x63, 0xa3, 0x60, 0x7d, 0x88, 0x6a, 0x83, 0x6e, 0x7a, 0x94, 0x7b, 0x81,\n",
-            "  0x7d, 0x83, 0x77, 0x7e, 0x63, 0xab, 0x75, 0x7b, 0x71, 0x8f, 0x76, 0x6e,\n",
-            "  0x78, 0x7b, 0x79, 0x86, 0x69, 0x67, 0x67, 0x70, 0x6c, 0x7a, 0x6c, 0x84,\n",
-            "  0x74, 0xa2, 0x74, 0x77, 0x8a, 0x58, 0x7d, 0xa0, 0x65, 0x7b, 0x79, 0x71,\n",
-            "  0x7c, 0x3c, 0x85, 0x96, 0x59, 0x76, 0x6a, 0x94, 0xa5, 0x5b, 0x70, 0x99,\n",
-            "  0x7f, 0x9a, 0x69, 0x7c, 0x6f, 0x79, 0x72, 0x8b, 0x83, 0x6e, 0x73, 0x7f,\n",
-            "  0x6f, 0x6d, 0x7e, 0xa3, 0x72, 0x87, 0x83, 0x8c, 0x8c, 0x70, 0x77, 0x75,\n",
-            "  0xa4, 0x5a, 0x89, 0x7d, 0xa0, 0x97, 0x67, 0x80, 0x78, 0x7e, 0x86, 0x6a,\n",
-            "  0x7b, 0x9c, 0x77, 0x67, 0x7b, 0x74, 0x7f, 0xa5, 0x90, 0x94, 0x92, 0x4d,\n",
-            "  0x7a, 0x79, 0x9f, 0x87, 0x64, 0x6e, 0x6d, 0x59, 0x83, 0x54, 0x79, 0x82,\n",
-            "  0x6c, 0x74, 0x82, 0x98, 0x77, 0x90, 0x85, 0xa4, 0x88, 0x81, 0x71, 0x85,\n",
-            "  0x90, 0x8e, 0x88, 0x68, 0x51, 0x6d, 0x71, 0x7b, 0x80, 0xbc, 0xa5, 0x57,\n",
-            "  0x8f, 0x9f, 0x95, 0x89, 0xb1, 0x96, 0x69, 0x65, 0x61, 0x73, 0x6f, 0x6c,\n",
-            "  0x5b, 0x95, 0x99, 0x7f, 0x76, 0x9d, 0x7c, 0x7d, 0x8d, 0xb1, 0x8f, 0x6a,\n",
-            "  0x76, 0x95, 0x74, 0x7a, 0x7b, 0xae, 0x77, 0x76, 0x6d, 0x99, 0x7d, 0x80,\n",
-            "  0x6e, 0x89, 0x7f, 0x74, 0x6f, 0x72, 0x89, 0x8b, 0x86, 0x7b, 0x7c, 0x72,\n",
-            "  0x6b, 0x4f, 0x71, 0x94, 0x80, 0x96, 0x83, 0x7e, 0x75, 0x74, 0x68, 0x83,\n",
-            "  0x95, 0x8c, 0x85, 0x7a, 0x82, 0x74, 0x85, 0x83, 0x8c, 0x7e, 0x7a, 0xa0,\n",
-            "  0x8e, 0x67, 0x6b, 0x82, 0x9b, 0x66, 0x6c, 0x8a, 0x88, 0x7e, 0x74, 0x9e,\n",
-            "  0x88, 0x82, 0x73, 0x73, 0x79, 0x7c, 0x72, 0x6b, 0x74, 0x8b, 0xa4, 0xa4,\n",
-            "  0xa3, 0x73, 0x73, 0x88, 0x8d, 0x94, 0x84, 0x9a, 0x9e, 0x93, 0x6c, 0x86,\n",
-            "  0x7a, 0x7a, 0x7e, 0xaa, 0x66, 0x8f, 0x99, 0xa4, 0x70, 0x4c, 0x6f, 0x66,\n",
-            "  0x8a, 0xaa, 0x69, 0x80, 0x6a, 0x5e, 0x71, 0x8f, 0x8b, 0x84, 0x75, 0x9d,\n",
-            "  0x5c, 0x60, 0x61, 0x4a, 0x6f, 0x91, 0x78, 0x6e, 0x8c, 0x62, 0x88, 0x75,\n",
-            "  0x64, 0x7c, 0x7d, 0x92, 0x9b, 0x96, 0x62, 0x72, 0x6c, 0x6f, 0x87, 0x5d,\n",
-            "  0xa0, 0xa7, 0x7c, 0x58, 0x6e, 0x8c, 0x82, 0x84, 0x7f, 0x8b, 0x54, 0x77,\n",
-            "  0x5b, 0x9a, 0x6a, 0x78, 0x5d, 0xb9, 0x8e, 0x7d, 0x6e, 0xa1, 0x66, 0x7c,\n",
-            "  0x87, 0xd2, 0x7a, 0x6c, 0x82, 0xa1, 0x83, 0x59, 0x64, 0x9e, 0x65, 0x6d,\n",
-            "  0x77, 0x80, 0x7c, 0x9a, 0x50, 0x9f, 0x8b, 0x7a, 0x73, 0x80, 0x92, 0x6d,\n",
-            "  0x97, 0x7f, 0x74, 0x6a, 0x5f, 0x44, 0x7d, 0x99, 0x95, 0x91, 0x8f, 0x6a,\n",
-            "  0x63, 0x56, 0x89, 0x96, 0xba, 0xa6, 0x71, 0x98, 0x9d, 0x3a, 0x8f, 0x77,\n",
-            "  0x6d, 0x76, 0x68, 0xb4, 0x8d, 0x79, 0x7a, 0x83, 0x7f, 0x96, 0x75, 0x94,\n",
-            "  0x9e, 0x51, 0x83, 0x5b, 0x66, 0x73, 0xa1, 0xbc, 0x8c, 0x70, 0x88, 0x80,\n",
-            "  0x92, 0x60, 0x7d, 0xa9, 0x97, 0x74, 0x7d, 0x98, 0x7b, 0x78, 0x85, 0xa7,\n",
-            "  0x8f, 0x8c, 0x91, 0x9d, 0x6a, 0x80, 0x6c, 0x8e, 0x8e, 0x91, 0x76, 0x8b,\n",
-            "  0x79, 0x59, 0x7d, 0x9c, 0x69, 0x83, 0x8c, 0x95, 0x8e, 0x75, 0x9d, 0x83,\n",
-            "  0x92, 0x99, 0x8a, 0x59, 0x61, 0x54, 0x63, 0x86, 0x83, 0x86, 0x98, 0x83,\n",
-            "  0x73, 0x74, 0x91, 0x52, 0x60, 0x8a, 0x7c, 0x57, 0xbc, 0x9d, 0x86, 0x6b,\n",
-            "  0x63, 0xa2, 0x78, 0x80, 0x75, 0xb1, 0x74, 0x76, 0x69, 0x8b, 0x7e, 0x76,\n",
-            "  0x7b, 0xb3, 0x77, 0x5b, 0x6c, 0x8b, 0x83, 0x80, 0x7f, 0xd1, 0x7c, 0x58,\n",
-            "  0x6f, 0x98, 0x71, 0x57, 0x60, 0xd0, 0x84, 0x62, 0x74, 0xa6, 0x8f, 0x7b,\n",
-            "  0x70, 0xaa, 0x81, 0x6b, 0x7f, 0x89, 0x6a, 0x74, 0x5a, 0x8c, 0x9c, 0x77,\n",
-            "  0x5d, 0x84, 0x63, 0x94, 0x8e, 0x91, 0x83, 0x4a, 0x49, 0x74, 0x6b, 0x70,\n",
-            "  0xc0, 0xa0, 0x6a, 0x90, 0x8e, 0x5a, 0x70, 0x96, 0xab, 0x72, 0x7e, 0xba,\n",
-            "  0xa7, 0x46, 0x86, 0x5d, 0x90, 0x76, 0x95, 0x8d, 0xa5, 0x40, 0x82, 0x8a,\n",
-            "  0x7d, 0x5e, 0x73, 0x94, 0x9d, 0x58, 0x8c, 0x8b, 0x69, 0x6c, 0x9a, 0x90,\n",
-            "  0xaa, 0x6f, 0x85, 0x8d, 0x64, 0x58, 0x7b, 0x97, 0xa9, 0x79, 0xa5, 0xa2,\n",
-            "  0x5f, 0x57, 0x9a, 0xb4, 0x89, 0x70, 0x84, 0x73, 0x46, 0x6c, 0x6e, 0x87,\n",
-            "  0x70, 0x94, 0x8a, 0x8a, 0x69, 0x7b, 0x6c, 0x68, 0x8e, 0xa2, 0x90, 0x84,\n",
-            "  0x78, 0x45, 0x63, 0x78, 0x7f, 0x90, 0x9f, 0x90, 0x68, 0x43, 0x92, 0x77,\n",
-            "  0x78, 0x77, 0x82, 0x7d, 0x8f, 0x6a, 0x7a, 0x70, 0x76, 0x75, 0x87, 0x63,\n",
-            "  0xbc, 0x8e, 0x6a, 0x71, 0x51, 0x51, 0x75, 0x6b, 0x8a, 0xb4, 0x6a, 0x5b,\n",
-            "  0x99, 0x84, 0x76, 0x84, 0x74, 0xaf, 0x86, 0x6a, 0x53, 0x97, 0x6e, 0x8e,\n",
-            "  0x61, 0xc4, 0x7e, 0x5d, 0x4d, 0x96, 0x73, 0x73, 0x53, 0xc0, 0x8f, 0x68,\n",
-            "  0x58, 0xae, 0x81, 0x83, 0x62, 0x98, 0x7b, 0x89, 0x54, 0x86, 0x78, 0x67,\n",
-            "  0x70, 0x9b, 0x63, 0x5f, 0x2d, 0x77, 0x84, 0x79, 0x6b, 0xa4, 0x7b, 0x65,\n",
-            "  0x45, 0x65, 0x56, 0x86, 0xbb, 0x8a, 0x8e, 0x92, 0x86, 0x48, 0x7c, 0x6d,\n",
-            "  0xb4, 0x7d, 0x56, 0xa4, 0x86, 0x52, 0x8b, 0x6a, 0x8d, 0x5b, 0x9d, 0xa2,\n",
-            "  0xbf, 0x36, 0x7c, 0x99, 0x9d, 0x65, 0x75, 0xa4, 0x9f, 0x6a, 0x7c, 0x6b,\n",
-            "  0x6f, 0x55, 0x70, 0x7f, 0xc2, 0x38, 0x6e, 0xa4, 0x74, 0x4c, 0x75, 0xbb,\n",
-            "  0xa4, 0x75, 0x8e, 0x8f, 0x56, 0x65, 0x57, 0x92, 0x73, 0x7f, 0x7d, 0x86,\n",
-            "  0x65, 0x76, 0x92, 0x84, 0x70, 0xa8, 0x91, 0x5b, 0x69, 0x74, 0x8e, 0x82,\n",
-            "  0x78, 0x8a, 0xaa, 0x71, 0x70, 0x50, 0x85, 0x82, 0x7d, 0x94, 0xa0, 0x76,\n",
-            "  0x6d, 0x55, 0x86, 0x79, 0x71, 0x7f, 0x9b, 0x71, 0x8a, 0x42, 0x87, 0x64,\n",
-            "  0x57, 0x88, 0xa0, 0x77, 0xa8, 0x91, 0x72, 0x65, 0x7e, 0x6b, 0x7e, 0x81,\n",
-            "  0x8d, 0x97, 0x7e, 0x6a, 0x92, 0x88, 0x84, 0x7a, 0x61, 0xa9, 0x86, 0x59,\n",
-            "  0x6c, 0x87, 0x61, 0x72, 0x4f, 0xc8, 0x99, 0x6c, 0x66, 0xa3, 0x80, 0x8b,\n",
-            "  0x5c, 0xc0, 0x69, 0x7a, 0x6c, 0xb8, 0x8e, 0x91, 0x51, 0x9f, 0x8c, 0x85,\n",
-            "  0x75, 0x96, 0x8c, 0x84, 0x6b, 0xa6, 0x71, 0x62, 0x42, 0x60, 0x74, 0x72,\n",
-            "  0x92, 0x91, 0x70, 0x5b, 0x3d, 0x71, 0x5e, 0x91, 0xa3, 0xa5, 0x6a, 0x7c,\n",
-            "  0x60, 0x58, 0x82, 0x80, 0xa3, 0x73, 0x8f, 0xa0, 0xb2, 0x4b, 0x94, 0x5e,\n",
-            "  0x9f, 0x75, 0x4d, 0x83, 0xbc, 0x42, 0x5e, 0x80, 0x8f, 0x59, 0x53, 0xac,\n",
-            "  0xb2, 0x45, 0x68, 0x7d, 0x9a, 0x65, 0x8a, 0xaa, 0xa0, 0x4e, 0x77, 0x72,\n",
-            "  0x4d, 0x62, 0x6e, 0x98, 0x8c, 0x73, 0x92, 0x5a, 0x49, 0x55, 0x7b, 0x98,\n",
-            "  0x8d, 0x84, 0x80, 0x8e, 0x2e, 0x56, 0x78, 0x73, 0x7b, 0x8f, 0x9a, 0x69,\n",
-            "  0x73, 0x68, 0x7a, 0x88, 0x78, 0xa5, 0xb1, 0x5c, 0x8f, 0x55, 0x71, 0x99,\n",
-            "  0x7a, 0xa9, 0xb0, 0x75, 0x69, 0x44, 0x5f, 0x66, 0x81, 0x7d, 0x9e, 0x4f,\n",
-            "  0x66, 0x7f, 0x87, 0x7d, 0x5d, 0x7c, 0x95, 0x62, 0xa5, 0x86, 0x90, 0x6f,\n",
-            "  0x60, 0xa5, 0x6e, 0x70, 0x80, 0x96, 0x6f, 0x55, 0x77, 0x87, 0x99, 0x7b,\n",
-            "  0x21, 0xaa, 0x7f, 0x60, 0x63, 0xae, 0x47, 0x79, 0x44, 0xb5, 0x83, 0x6e,\n",
-            "  0x6d, 0x93, 0x76, 0x54, 0x4b, 0xad, 0x91, 0x6b, 0x6a, 0x9c, 0x8c, 0x83,\n",
-            "  0x62, 0x8a, 0x88, 0x71, 0x73, 0xa0, 0x75, 0x95, 0x54, 0x80, 0x92, 0x65,\n",
-            "  0x45, 0x80, 0x63, 0x9a, 0x93, 0x9b, 0x78, 0x4e, 0x4d, 0x5f, 0x69, 0x9e,\n",
-            "  0xbd, 0xa5, 0x75, 0x6b, 0x6e, 0x6a, 0x82, 0x97, 0xab, 0x60, 0x76, 0xb3,\n",
-            "  0xc1, 0x39, 0x82, 0x5b, 0x71, 0x31, 0x7b, 0x9c, 0xb5, 0x4f, 0x75, 0x79,\n",
-            "  0x6c, 0x5d, 0x80, 0xa6, 0x9c, 0x53, 0x6f, 0x85, 0x84, 0x5e, 0x7d, 0xb5,\n",
-            "  0x95, 0x5f, 0x7c, 0x98, 0x72, 0x7c, 0x67, 0x99, 0xbb, 0x6c, 0x73, 0x66,\n",
-            "  0x59, 0x5c, 0x6c, 0x9a, 0x9b, 0x72, 0x9b, 0x5f, 0x4b, 0x51, 0x63, 0x84,\n",
-            "  0x74, 0xa0, 0xb3, 0x6e, 0x63, 0xa0, 0x84, 0x90, 0x71, 0x91, 0xba, 0x64,\n",
-            "  0x6d, 0x72, 0x78, 0x83, 0x6f, 0x8e, 0xbd, 0x64, 0x69, 0x60, 0x95, 0x67,\n",
-            "  0x70, 0x93, 0x78, 0x4d, 0x91, 0x3f, 0x7b, 0x6d, 0x69, 0x87, 0x7d, 0x8a,\n",
-            "  0xa3, 0x95, 0x9d, 0x66, 0x6d, 0x8b, 0x7a, 0x75, 0x94, 0x7b, 0x89, 0x52,\n",
-            "  0x66, 0x65, 0x79, 0x84, 0x49, 0x9c, 0x60, 0x66, 0x3e, 0xab, 0x4a, 0x86,\n",
-            "  0x54, 0xcd, 0x7c, 0x83, 0x7c, 0xac, 0x8b, 0x53, 0x67, 0xbb, 0x7c, 0x6d,\n",
-            "  0x72, 0xb3, 0x83, 0x85, 0x4f, 0x97, 0x86, 0x60, 0x7d, 0x93, 0x70, 0x8b,\n",
-            "  0x64, 0x78, 0x82, 0x73, 0x54, 0x87, 0x6c, 0xaa, 0x6f, 0x97, 0x8d, 0x51,\n",
-            "  0x2d, 0x50, 0x75, 0xa9, 0xc2, 0x94, 0x8d, 0x6f, 0x6d, 0x71, 0x7b, 0x87,\n",
-            "  0x93, 0x67, 0x7d, 0xa5, 0xa2, 0x4f, 0x99, 0x83, 0x95, 0x49, 0x70, 0x9c,\n",
-            "  0xcf, 0x37, 0x84, 0x86, 0x94, 0x5c, 0x95, 0xa1, 0xb6, 0x73, 0x80, 0x8d,\n",
-            "  0x89, 0x62, 0x6f, 0xb4, 0xa1, 0x5b, 0x64, 0x91, 0x41, 0x4f, 0x53, 0xa6,\n",
-            "  0xae, 0x75, 0x84, 0x82, 0x58, 0x8e, 0x63, 0x95, 0xa3, 0x8d, 0x8b, 0x76,\n",
-            "  0x5d, 0x78, 0x80, 0x82, 0x6e, 0x9d, 0xb8, 0x7d, 0x64, 0x8a, 0x7e, 0x80,\n",
-            "  0x72, 0x99, 0xcf, 0x76, 0x66, 0x77, 0x7c, 0x81, 0x71, 0x6f, 0xa1, 0x6c,\n",
-            "  0x6b, 0x70, 0x80, 0x7c, 0x6d, 0x83, 0x8e, 0x74, 0x7a, 0x58, 0x69, 0x53,\n",
-            "  0x58, 0x7d, 0x7f, 0x84, 0x96, 0x9c, 0x75, 0x6e, 0x62, 0x7c, 0x88, 0x7e,\n",
-            "  0x7f, 0x98, 0x93, 0x61, 0x98, 0x98, 0x80, 0x83, 0x2e, 0x7d, 0x64, 0x69,\n",
-            "  0x50, 0xa5, 0x38, 0x96, 0x2e, 0xc5, 0x66, 0x56, 0x64, 0xaa, 0x63, 0x64,\n",
-            "  0x6d, 0xb3, 0x8a, 0x6c, 0x59, 0xb6, 0x69, 0x7a, 0x54, 0x91, 0x58, 0x96,\n",
-            "  0x6b, 0x9f, 0x6d, 0x88, 0x4a, 0x82, 0x94, 0x67, 0x38, 0x93, 0x60, 0x87,\n",
-            "  0x8c, 0x93, 0x8c, 0x52, 0x31, 0x43, 0x66, 0xa9, 0xb3, 0x7a, 0x88, 0x64,\n",
-            "  0x60, 0x5b, 0x80, 0x84, 0xb7, 0x5a, 0x7a, 0x9d, 0x92, 0x50, 0x89, 0x80,\n",
-            "  0x72, 0x51, 0x7f, 0x85, 0xae, 0x47, 0x76, 0x9a, 0x7a, 0x74, 0x6d, 0x93,\n",
-            "  0xbd, 0x42, 0x72, 0x6d, 0x58, 0x5e, 0x6e, 0xa4, 0xb5, 0x4e, 0x76, 0x8f,\n",
-            "  0x75, 0x9b, 0x5d, 0x92, 0xad, 0x77, 0x7f, 0x73, 0x62, 0x7d, 0x65, 0xaf,\n",
-            "  0x98, 0x87, 0x80, 0x7c, 0x61, 0x81, 0x45, 0xa0, 0x84, 0x99, 0xbb, 0x72,\n",
-            "  0x86, 0x8f, 0x70, 0x97, 0x6a, 0x8a, 0xd3, 0x70, 0x7c, 0x91, 0x77, 0x82,\n",
-            "  0x70, 0x8c, 0xd5, 0x6c, 0x7f, 0x51, 0x5f, 0x69, 0x72, 0x89, 0x9a, 0x68,\n",
-            "  0x79, 0x70, 0x8b, 0x80, 0x52, 0x98, 0x86, 0x7a, 0xa0, 0x7b, 0x61, 0x6e,\n",
-            "  0x66, 0x6f, 0x77, 0x78, 0x64, 0xac, 0x7e, 0x73, 0x5d, 0x71, 0x6f, 0x80,\n",
-            "  0x2e, 0xa9, 0x90, 0x5c, 0x56, 0xa1, 0x32, 0x88, 0x55, 0xb9, 0x67, 0x6f,\n",
-            "  0x5c, 0xa5, 0x87, 0x61, 0x6b, 0xbd, 0x77, 0x7c, 0x62, 0xae, 0x7c, 0x7a,\n",
-            "  0x66, 0xac, 0x7a, 0x62, 0x5c, 0x9a, 0x58, 0x89, 0x5a, 0x74, 0x72, 0x66,\n",
-            "  0x5c, 0x8e, 0x51, 0x8e, 0x99, 0x92, 0xa0, 0x49, 0x31, 0x55, 0x68, 0x99,\n",
-            "  0xba, 0x82, 0xa2, 0x7a, 0x5e, 0x6f, 0x84, 0x98, 0x96, 0x52, 0x73, 0x99,\n",
-            "  0xb4, 0x5e, 0x7c, 0x59, 0x7d, 0x4a, 0x7e, 0xa0, 0xbe, 0x63, 0x67, 0x8e,\n",
-            "  0x7f, 0x71, 0x80, 0xaf, 0x93, 0x4e, 0x78, 0x7e, 0x6d, 0x52, 0x66, 0xb3,\n",
-            "  0x94, 0x56, 0x84, 0x8f, 0x50, 0x6d, 0x65, 0xa8, 0xb3, 0x4b, 0x91, 0x7f,\n",
-            "  0x4c, 0x8d, 0x69, 0x79, 0x95, 0x8f, 0x8f, 0x7c, 0x66, 0x98, 0x75, 0x9b,\n",
-            "  0x73, 0x9b, 0xac, 0x79, 0x6e, 0x84, 0x69, 0x9e, 0x80, 0xa0, 0xb0, 0x6c,\n",
-            "  0x46, 0x8b, 0x3f, 0x7a, 0x79, 0x79, 0xb3, 0x62, 0x6b, 0x60, 0x67, 0x81,\n",
-            "  0x4a, 0x7e, 0xa7, 0x8c, 0x74, 0x7f, 0x67, 0x4c, 0x4b, 0x8c, 0x8e, 0x67,\n",
-            "  0x78, 0x9d, 0x94, 0x79, 0x75, 0x7c, 0x86, 0x7b, 0x67, 0x9f, 0xa4, 0x61,\n",
-            "  0x5b, 0x6e, 0x85, 0x70, 0x20, 0xa5, 0x66, 0x5e, 0x55, 0xad, 0x3e, 0x7c,\n",
-            "  0x2d, 0xb4, 0x78, 0x6f, 0x4c, 0xc6, 0x7e, 0x6d, 0x54, 0xb4, 0x71, 0x78,\n",
-            "  0x54, 0xc3, 0x66, 0x6e, 0x4a, 0xa0, 0x7b, 0x85, 0x66, 0x94, 0x75, 0x8d,\n",
-            "  0x34, 0x88, 0x71, 0x4e, 0x49, 0x8a, 0x3b, 0x9c, 0x88, 0x76, 0x7f, 0x6a,\n",
-            "  0x37, 0x64, 0x66, 0xb6, 0xa3, 0x82, 0x76, 0x82, 0x6d, 0x65, 0x6f, 0x8c,\n",
-            "  0x99, 0x5e, 0x77, 0xa1, 0x99, 0x51, 0xa1, 0x67, 0x6f, 0x4c, 0x7f, 0x9e,\n",
-            "  0xad, 0x40, 0x65, 0x82, 0x76, 0x66, 0x72, 0xb5, 0xb2, 0x5b, 0x71, 0x8a,\n",
-            "  0x76, 0x74, 0x52, 0xa0, 0x91, 0x37, 0x86, 0x72, 0x6c, 0x75, 0x62, 0xa5,\n",
-            "  0xb6, 0x57, 0x75, 0x90, 0x3e, 0x7f, 0x49, 0x9f, 0x8e, 0x92, 0x81, 0x87,\n",
-            "  0x69, 0x9e, 0x6b, 0x86, 0x8d, 0xb1, 0x9e, 0x65, 0x6f, 0x93, 0x70, 0x79,\n",
-            "  0x7b, 0x87, 0xbe, 0x59, 0x69, 0x7a, 0x56, 0x7a, 0x81, 0x7d, 0xb8, 0x67,\n",
-            "  0x67, 0x7f, 0x54, 0x8f, 0x71, 0x85, 0xa0, 0x74, 0x89, 0x5d, 0x67, 0x52,\n",
-            "  0x65, 0x96, 0x89, 0x84, 0x81, 0x83, 0x82, 0x9a, 0x85, 0x73, 0x78, 0x62,\n",
-            "  0x87, 0x98, 0x75, 0x6a, 0x73, 0x95, 0x86, 0x71, 0x11, 0x9a, 0x91, 0x66,\n",
-            "  0x6e, 0xa4, 0x35, 0x89, 0x47, 0xbb, 0x5e, 0x46, 0x3a, 0xa8, 0x70, 0x4a,\n",
-            "  0x65, 0xb9, 0x70, 0x96, 0x66, 0xcf, 0x80, 0x79, 0x60, 0xa4, 0x79, 0x70,\n",
-            "  0x68, 0x92, 0x7f, 0x89, 0x6b, 0x87, 0x77, 0x67, 0x5b, 0x74, 0x3f, 0x9e,\n",
-            "  0x94, 0x9b, 0xa1, 0x61, 0x4b, 0x66, 0x70, 0xad, 0xb7, 0x67, 0x70, 0x6c,\n",
-            "  0x3f, 0x5b, 0x94, 0x88, 0xb3, 0x4f, 0x97, 0x97, 0x8c, 0x55, 0xb8, 0x78,\n",
-            "  0x60, 0x25, 0x51, 0x91, 0xcd, 0x44, 0x6f, 0x85, 0x5c, 0x65, 0x67, 0xa5,\n",
-            "  0x9e, 0x5f, 0x6d, 0x85, 0x6d, 0x56, 0x80, 0xae, 0x79, 0x63, 0x4f, 0x7d,\n",
-            "  0x5f, 0x6b, 0x6e, 0xa7, 0x8e, 0x76, 0x8f, 0x90, 0x6e, 0x8c, 0x88, 0x92,\n",
-            "  0x81, 0x81, 0x96, 0x7d, 0x48, 0x6b, 0x3f, 0xa1, 0x8c, 0xa2, 0x9f, 0x7f,\n",
-            "  0x77, 0x97, 0x73, 0x9c, 0x67, 0x95, 0xae, 0x77, 0x7f, 0x7a, 0x52, 0x7e,\n",
-            "  0x91, 0x77, 0xa8, 0x54, 0x6a, 0x74, 0x52, 0x8a, 0x67, 0x8e, 0x90, 0x8d,\n",
-            "  0x8b, 0x52, 0x72, 0x5a, 0x73, 0x8f, 0x94, 0x87, 0x7c, 0x88, 0x89, 0x76,\n",
-            "  0x77, 0x88, 0x5c, 0x77, 0x8f, 0x94, 0xac, 0x58, 0x70, 0x79, 0x75, 0x8a,\n",
-            "  0x20, 0x9c, 0x91, 0x55, 0x55, 0xa4, 0x5b, 0x84, 0x30, 0xc6, 0x8a, 0x51,\n",
-            "  0x31, 0xc3, 0x72, 0x6b, 0x65, 0xb9, 0x79, 0x7d, 0x62, 0xad, 0x88, 0x75,\n",
-            "  0x37, 0xb0, 0x76, 0x8a, 0x7d, 0x85, 0x7f, 0xb4, 0x46, 0x9c, 0x83, 0x7b,\n",
-            "  0x79, 0x78, 0x56, 0xac, 0x8d, 0xa2, 0xa9, 0x54, 0x44, 0x5a, 0x63, 0xb2,\n",
-            "  0xa8, 0x72, 0xa4, 0x6b, 0x5d, 0x4d, 0x8e, 0x95, 0x9e, 0x4a, 0x98, 0x8c,\n",
-            "  0xb0, 0x5c, 0xa5, 0x75, 0x83, 0x3b, 0x46, 0x92, 0xa7, 0x3b, 0x6a, 0x75,\n",
-            "  0x59, 0x57, 0x52, 0xa1, 0xab, 0x54, 0x68, 0x7c, 0x94, 0x6e, 0x5b, 0x9a,\n",
-            "  0xa3, 0x5d, 0x73, 0x74, 0x5a, 0x63, 0x56, 0x9e, 0xc1, 0x71, 0x82, 0x79,\n",
-            "  0x49, 0x92, 0x63, 0xa6, 0x99, 0x7d, 0x71, 0x81, 0x5e, 0x90, 0x5c, 0x8b,\n",
-            "  0x7e, 0xb4, 0xa0, 0x8c, 0x67, 0x93, 0x4e, 0x72, 0x65, 0x83, 0xb5, 0x77,\n",
-            "  0x83, 0x92, 0x43, 0x67, 0x8c, 0x81, 0xb1, 0x75, 0x6a, 0x61, 0x66, 0x6f,\n",
-            "  0x5d, 0x7f, 0x8d, 0x7b, 0x6b, 0x68, 0x6f, 0x85, 0x6e, 0x87, 0x97, 0x89,\n",
-            "  0x9b, 0x81, 0x7e, 0x7e, 0x9d, 0x83, 0x6b, 0x6a, 0xa5, 0x92, 0x7e, 0x70,\n",
-            "  0x60, 0x8f, 0x6f, 0x8b, 0x15, 0xa6, 0x66, 0x4e, 0x61, 0xbc, 0x38, 0x67,\n",
-            "  0x46, 0xab, 0x84, 0x5e, 0x3a, 0xac, 0x74, 0x58, 0x76, 0xc4, 0x7a, 0x76,\n",
-            "  0x67, 0xc0, 0x76, 0x6f, 0x52, 0xa6, 0xa2, 0x97, 0x76, 0xa6, 0x7f, 0x99,\n",
-            "  0x5d, 0xa5, 0x5f, 0x60, 0x58, 0x88, 0x3f, 0x9e, 0x7d, 0x81, 0x71, 0x63,\n",
-            "  0x42, 0x55, 0x3e, 0xbd, 0xa9, 0x7a, 0xa5, 0x67, 0x62, 0x7a, 0x80, 0x9e,\n",
-            "  0xc3, 0x54, 0x7f, 0x9f, 0x93, 0x73, 0xbd, 0x79, 0x74, 0x2e, 0x54, 0x9e,\n",
-            "  0xaa, 0x76, 0x68, 0x80, 0x78, 0x64, 0x57, 0x93, 0xa4, 0x56, 0x75, 0x72,\n",
-            "  0x81, 0x7f, 0x48, 0xad, 0x89, 0x67, 0x60, 0x7e, 0x7a, 0x83, 0x6e, 0x95,\n",
-            "  0xb0, 0x57, 0x89, 0x91, 0x4d, 0x86, 0x78, 0x7b, 0x74, 0x8c, 0x8f, 0x8d,\n",
-            "  0x67, 0xa4, 0x64, 0x8d, 0x77, 0x9a, 0xa1, 0x88, 0x6e, 0x94, 0x33, 0x95,\n",
-            "  0x81, 0x76, 0xc6, 0x7d, 0x7d, 0x85, 0x5a, 0x6e, 0x8e, 0x69, 0x9e, 0x71,\n",
-            "  0x82, 0x81, 0x59, 0x5b, 0x71, 0x9a, 0x91, 0x8e, 0x80, 0x69, 0x71, 0x73,\n",
-            "  0x6e, 0x9a, 0x95, 0x94, 0x7b, 0x80, 0x82, 0x7e, 0x76, 0x84, 0x70, 0x72,\n",
-            "  0x9c, 0xa0, 0x77, 0x66, 0x55, 0xa1, 0x8c, 0x73, 0x35, 0xa0, 0x68, 0x4d,\n",
-            "  0x3b, 0xaa, 0x44, 0x6f, 0x3c, 0xc0, 0x96, 0x78, 0x33, 0xbd, 0x64, 0x5b,\n",
-            "  0x75, 0xd2, 0x83, 0x87, 0x59, 0xbd, 0x80, 0x80, 0x6e, 0x8e, 0x65, 0x7a,\n",
-            "  0x87, 0xb6, 0x8d, 0x94, 0x39, 0x95, 0x8b, 0x5d, 0x66, 0x71, 0x4e, 0x9f,\n",
-            "  0x96, 0x8a, 0x98, 0x47, 0x41, 0x6c, 0x4c, 0xac, 0x95, 0x81, 0x90, 0x75,\n",
-            "  0x59, 0x4c, 0xa2, 0x93, 0x99, 0x58, 0x7b, 0xaf, 0xa3, 0x52, 0xb0, 0x6c,\n",
-            "  0x5f, 0x47, 0x6e, 0x8e, 0xae, 0x3d, 0x81, 0x6d, 0x78, 0x52, 0x4f, 0x81,\n",
-            "  0x80, 0x68, 0x4b, 0x81, 0x74, 0x71, 0x67, 0xa7, 0x9a, 0x55, 0x84, 0x72,\n",
-            "  0x64, 0x6b, 0x6e, 0x9d, 0xab, 0x76, 0x79, 0x85, 0x40, 0x84, 0x80, 0x85,\n",
-            "  0x70, 0x91, 0x9a, 0x81, 0x5b, 0x89, 0x6b, 0x8a, 0x92, 0x8c, 0xa4, 0x7b,\n",
-            "  0x75, 0x89, 0x54, 0x76, 0x69, 0x69, 0xb3, 0x6c, 0x47, 0x7d, 0x4c, 0x7f,\n",
-            "  0x81, 0x86, 0x8f, 0x63, 0x71, 0x6a, 0x63, 0x67, 0x7c, 0x8f, 0xa0, 0x68,\n",
-            "  0x86, 0x58, 0x5b, 0x87, 0x6a, 0x82, 0x89, 0x78, 0x9d, 0x8d, 0xaa, 0x82,\n",
-            "  0x6e, 0xa4, 0x6f, 0x6d, 0x70, 0x9f, 0x7f, 0x77, 0x41, 0xa5, 0x86, 0x61,\n",
-            "  0x2d, 0x99, 0xa9, 0x5f, 0x5a, 0xb3, 0x51, 0x70, 0x5a, 0xce, 0x77, 0x68,\n",
-            "  0x2c, 0xb8, 0x90, 0x44, 0x58, 0xb9, 0x74, 0x8e, 0x70, 0xb3, 0x9a, 0x75,\n",
-            "  0x6d, 0xc0, 0x9e, 0x8e, 0x8d, 0xa8, 0x7b, 0xa8, 0x4a, 0x89, 0x6e, 0x7f,\n",
-            "  0x5d, 0x6e, 0x46, 0x91, 0x6d, 0x81, 0x89, 0x3e, 0x35, 0x69, 0x44, 0xaf,\n",
-            "  0x99, 0x8d, 0x94, 0x54, 0x60, 0x5b, 0xaf, 0x97, 0x92, 0x4e, 0x80, 0xae,\n",
-            "  0x9e, 0x62, 0xa3, 0x77, 0x6e, 0x5d, 0x71, 0xa0, 0xa6, 0x59, 0x84, 0x5d,\n",
-            "  0x65, 0x4a, 0x69, 0xa1, 0xa1, 0x40, 0x75, 0x65, 0x6b, 0x68, 0x60, 0xb3,\n",
-            "  0x92, 0x27, 0x70, 0x67, 0x9b, 0x5e, 0x50, 0xaf, 0xae, 0x64, 0x7a, 0x6e,\n",
-            "  0x61, 0x94, 0x3b, 0x8f, 0x86, 0x7f, 0x98, 0x88, 0x7a, 0x7f, 0x61, 0x7b,\n",
-            "  0x64, 0x96, 0x96, 0x79, 0x5c, 0x96, 0x52, 0x92, 0x76, 0x7e, 0xc4, 0x60,\n",
-            "  0x6d, 0x7b, 0x41, 0x8c, 0x7b, 0x8e, 0x9a, 0x66, 0x79, 0x95, 0x67, 0x6a,\n",
-            "  0x7a, 0x9b, 0xa9, 0x85, 0x6d, 0x66, 0x55, 0x65, 0x76, 0x8b, 0x90, 0x86,\n",
-            "  0x88, 0x8b, 0x8f, 0x7e, 0x83, 0x7c, 0x75, 0x5f, 0x78, 0x96, 0x76, 0x47,\n",
-            "  0x54, 0x9c, 0x8d, 0x7d, 0x24, 0x9f, 0x79, 0x5c, 0x55, 0xb2, 0x3b, 0x67,\n",
-            "  0x4e, 0xd2, 0x90, 0x79, 0x3c, 0xc3, 0x8b, 0x4a, 0x7c, 0xd7, 0x70, 0x75,\n",
-            "  0x5b, 0xaf, 0xa8, 0x6b, 0x59, 0xc1, 0x6d, 0x5f, 0x5d, 0x96, 0x87, 0x9a,\n",
-            "  0x5d, 0x7f, 0x8e, 0x6d, 0x5c, 0x75, 0x3f, 0xb6, 0x8e, 0x81, 0x7b, 0x31,\n",
-            "  0x47, 0x67, 0x56, 0xb6, 0x90, 0x71, 0x89, 0x63, 0x61, 0x75, 0x8d, 0x8b,\n",
-            "  0x97, 0x62, 0x62, 0x85, 0x9c, 0x64, 0xb7, 0x61, 0x71, 0x3f, 0x6c, 0x8b,\n",
-            "  0xaa, 0x43, 0x82, 0x70, 0x52, 0x52, 0x80, 0xaa, 0x9e, 0x5d, 0x90, 0x69,\n",
-            "  0x8a, 0x77, 0x6d, 0x9f, 0x9e, 0x5f, 0x84, 0x61, 0x87, 0x70, 0x43, 0xab,\n",
-            "  0x97, 0x6e, 0x84, 0x6c, 0x5d, 0x82, 0x64, 0x85, 0x83, 0x7e, 0x82, 0x7c,\n",
-            "  0x7b, 0x91, 0x55, 0x7e, 0x77, 0x88, 0xba, 0x71, 0x6d, 0x7b, 0x71, 0x8a,\n",
-            "  0x7f, 0x84, 0xb5, 0x63, 0x4a, 0x9a, 0x3c, 0x70, 0x7a, 0x99, 0xa3, 0x50,\n",
-            "  0x84, 0x82, 0x56, 0x4c, 0x74, 0x8e, 0xa3, 0x77, 0x8f, 0x4e, 0x5f, 0x6d,\n",
-            "  0x97, 0x89, 0xa0, 0x6b, 0x7c, 0x8c, 0x85, 0x82, 0x8e, 0xa1, 0x89, 0x5b,\n",
-            "  0x7f, 0x8b, 0x8f, 0x5e, 0x74, 0x96, 0x8a, 0x7d, 0x15, 0x7b, 0x8f, 0x88,\n",
-            "  0x5f, 0xa7, 0x63, 0x5b, 0x39, 0xbd, 0x96, 0x56, 0x4c, 0xb4, 0x7b, 0x53,\n",
-            "  0x5a, 0xaf, 0x79, 0x7b, 0x5c, 0xa6, 0xaa, 0x74, 0x5f, 0xa0, 0x76, 0x9e,\n",
-            "  0x71, 0x9a, 0x60, 0xa4, 0x33, 0x87, 0x66, 0x66, 0x64, 0x7d, 0x6d, 0xac,\n",
-            "  0x9e, 0x8c, 0x78, 0x4f, 0x3d, 0x7b, 0x53, 0xb1, 0x97, 0x8a, 0x96, 0x6e,\n",
-            "  0x60, 0x4b, 0xa9, 0x9e, 0x93, 0x6e, 0x93, 0xb7, 0xae, 0x46, 0xb9, 0x60,\n",
-            "  0x72, 0x46, 0x80, 0x95, 0xb5, 0x57, 0x82, 0x53, 0x6e, 0x4e, 0x5b, 0xa2,\n",
-            "  0x9a, 0x3d, 0x8b, 0x6c, 0x84, 0x65, 0x69, 0xa1, 0x8c, 0x60, 0x83, 0x74,\n",
-            "  0x73, 0x53, 0x5d, 0x7e, 0x7f, 0x79, 0x6e, 0x81, 0x89, 0x8f, 0x51, 0x81,\n",
-            "  0x99, 0x97, 0x81, 0x8a, 0x87, 0x83, 0x43, 0x90, 0x89, 0x94, 0x93, 0x7a,\n",
-            "  0x66, 0x80, 0x82, 0x82, 0x79, 0x85, 0xb0, 0x6b, 0x87, 0x7b, 0x53, 0x89,\n",
-            "  0x79, 0x9d, 0xab, 0x6e, 0x82, 0x84, 0x50, 0x8f, 0x7e, 0x74, 0x90, 0x74,\n",
-            "  0x6e, 0x65, 0x84, 0x70, 0x82, 0x7a, 0x9e, 0x6d, 0x8f, 0x62, 0xb2, 0x84,\n",
-            "  0x78, 0x7e, 0x72, 0x5a, 0x7a, 0x85, 0x8c, 0x4b, 0x70, 0x99, 0x87, 0x78,\n",
-            "  0x26, 0x95, 0xb9, 0x77, 0x4d, 0xb6, 0x51, 0x6a, 0x41, 0xbf, 0x76, 0x68,\n",
-            "  0x56, 0xb6, 0x80, 0x53, 0x83, 0xaf, 0x87, 0x79, 0x79, 0xb4, 0x89, 0x7d,\n",
-            "  0x47, 0x9d, 0xa0, 0x86, 0x89, 0xc3, 0x6d, 0x99, 0x41, 0x89, 0x9a, 0x59,\n",
-            "  0x54, 0x83, 0x79, 0x9d, 0x7b, 0x73, 0x88, 0x4a, 0x42, 0x64, 0x7a, 0x9f,\n",
-            "  0x7b, 0x6e, 0x71, 0x7b, 0x6a, 0x61, 0xae, 0xa3, 0xa0, 0x68, 0x95, 0x9d,\n",
-            "  0x94, 0x49, 0x8b, 0x70, 0x8a, 0x5f, 0x49, 0xbb, 0xa7, 0x4a, 0xa1, 0x59,\n",
-            "  0x59, 0x59, 0x6d, 0xa0, 0x9f, 0x50, 0xa0, 0x7b, 0x75, 0x49, 0x5a, 0x8c,\n",
-            "  0x84, 0x68, 0x78, 0x57, 0x7a, 0x6e, 0x6b, 0x87, 0x9c, 0x7b, 0x84, 0x83,\n",
-            "  0x79, 0x7d, 0x5a, 0x77, 0x77, 0x6f, 0x6f, 0x7c, 0x8f, 0x83, 0x40, 0x62,\n",
-            "  0x6a, 0x87, 0xab, 0x74, 0x86, 0x96, 0x7a, 0x7d, 0x7b, 0x81, 0x9a, 0x65,\n",
-            "  0x60, 0x82, 0x61, 0x73, 0x71, 0x77, 0xa7, 0x79, 0x87, 0x8c, 0x4e, 0x72,\n",
-            "  0x8d, 0x89, 0x94, 0x6d, 0x75, 0x6d, 0x6e, 0x82, 0x7a, 0x8d, 0xa9, 0x77,\n",
-            "  0x77, 0x7c, 0x74, 0xa7, 0xb7, 0x67, 0x75, 0x67, 0x7e, 0x9f, 0x73, 0x60,\n",
-            "  0x6c, 0x95, 0x7f, 0x62, 0x31, 0x70, 0x85, 0x7a, 0x5f, 0xc0, 0x69, 0x66,\n",
-            "  0x71, 0xb0, 0x81, 0x5d, 0x48, 0xc9, 0x86, 0x39, 0x93, 0xa4, 0x8e, 0x7c,\n",
-            "  0x5e, 0xbb, 0x98, 0x5c, 0x74, 0x9c, 0x89, 0x6d, 0x74, 0xbd, 0x8e, 0x6e,\n",
-            "  0x5f, 0x9a, 0x6d, 0x70, 0x57, 0x9c, 0x58, 0xb7, 0x8e, 0x94, 0xa0, 0x3f,\n",
-            "  0x39, 0x75, 0x6f, 0xb4, 0xa2, 0x94, 0xa9, 0x70, 0x61, 0x8a, 0x70, 0x92,\n",
-            "  0xa7, 0x7f, 0x7f, 0x8d, 0x7a, 0x73, 0xa1, 0x5f, 0x8a, 0x4a, 0x65, 0xaa,\n",
-            "  0x92, 0x6e, 0x98, 0x51, 0x81, 0x47, 0x57, 0xb8, 0x89, 0x50, 0x8a, 0x6d,\n",
-            "  0x8b, 0x50, 0x8a, 0x86, 0x9b, 0x7d, 0x5b, 0x4a, 0x68, 0x74, 0x53, 0x9b,\n",
-            "  0x94, 0x74, 0x7c, 0x6f, 0x62, 0x86, 0x5b, 0x8f, 0x82, 0x96, 0x6e, 0x7c,\n",
-            "  0x80, 0x8f, 0x47, 0x5b, 0x70, 0x95, 0x97, 0x77, 0x8d, 0x8e, 0x69, 0x62,\n",
-            "  0x78, 0x8f, 0xbf, 0x5e, 0x76, 0xae, 0x4d, 0x84, 0x73, 0x76, 0xab, 0x6f,\n",
-            "  0x7f, 0x8c, 0x4b, 0x7d, 0x96, 0x7d, 0xb3, 0x55, 0x78, 0x8d, 0x76, 0x73,\n",
-            "  0x8d, 0x8e, 0x98, 0x6a, 0x91, 0x86, 0x6d, 0x8c, 0x7d, 0x93, 0x97, 0x56,\n",
-            "  0x79, 0x8f, 0xa3, 0x7f, 0x7e, 0x82, 0xa0, 0x63, 0x3d, 0x6b, 0x88, 0x5e,\n",
-            "  0x61, 0xc0, 0x45, 0x5f, 0x66, 0xb0, 0x6c, 0x6d, 0x29, 0xd5, 0x95, 0x3b,\n",
-            "  0x77, 0xaa, 0x62, 0x70, 0x63, 0xce, 0x8c, 0x6e, 0x56, 0xaa, 0x77, 0x6e,\n",
-            "  0x90, 0xcc, 0x6d, 0x7e, 0x41, 0x9f, 0x88, 0x4f, 0x5d, 0xb4, 0x4c, 0x9b,\n",
-            "  0x80, 0x97, 0x98, 0x59, 0x4c, 0x71, 0x53, 0xb4, 0x90, 0x97, 0x93, 0x90,\n",
-            "  0x46, 0x63, 0xa6, 0x87, 0x9d, 0x56, 0x7f, 0xab, 0x8e, 0x68, 0xc6, 0x5d,\n",
-            "  0x6e, 0x58, 0x4b, 0x85, 0xa1, 0x70, 0x8a, 0x60, 0x84, 0x44, 0x68, 0x8e,\n",
-            "  0x9b, 0x3a, 0x8c, 0x57, 0x91, 0x4c, 0x6b, 0x9c, 0xa7, 0x64, 0x82, 0x5f,\n",
-            "  0x68, 0x6d, 0x4d, 0xa1, 0x6c, 0x91, 0x6c, 0x6b, 0x64, 0x97, 0x86, 0x81,\n",
-            "  0x8d, 0x8e, 0x80, 0x72, 0x88, 0x96, 0x5d, 0x6e, 0x7c, 0x67, 0x97, 0x69,\n",
-            "  0x95, 0x93, 0x61, 0x8b, 0x9b, 0x7d, 0xc8, 0x6f, 0x85, 0x80, 0x67, 0x68,\n",
-            "  0x90, 0x6b, 0xcc, 0x7c, 0xa3, 0xa0, 0x58, 0x81, 0x7a, 0x8d, 0x9f, 0x65,\n",
-            "  0x81, 0x82, 0x78, 0x6b, 0x85, 0x7b, 0x9b, 0x69, 0x86, 0x6c, 0x83, 0x6c,\n",
-            "  0x8e, 0x59, 0xab, 0x56, 0x7c, 0x7f, 0x7b, 0x84, 0x71, 0x63, 0x7d, 0x73,\n",
-            "  0x60, 0x8b, 0x7a, 0x7b, 0x5e, 0xbb, 0x4b, 0x40, 0x30, 0xcc, 0x80, 0x65,\n",
-            "  0x6c, 0xb7, 0x80, 0x35, 0x7d, 0xa3, 0x5c, 0x6c, 0x49, 0xa6, 0x9b, 0x7b,\n",
-            "  0x53, 0xba, 0x62, 0x76, 0x78, 0xa0, 0x72, 0x80, 0x78, 0x93, 0x87, 0x62,\n",
-            "  0x64, 0x84, 0x6f, 0xa1, 0x70, 0x90, 0x9a, 0x6b, 0x42, 0x55, 0x6d, 0xc5,\n",
-            "  0xa6, 0x8a, 0x79, 0x64, 0x4c, 0x72, 0x7b, 0xa9, 0xa3, 0x70, 0x84, 0x8f,\n",
-            "  0x63, 0x7a, 0x9c, 0x4e, 0x5a, 0x76, 0x91, 0x67, 0xaf, 0x76, 0xbf, 0x46,\n",
-            "  0x62, 0x3f, 0x7d, 0xa7, 0x8d, 0x62, 0x90, 0x5b, 0x9a, 0x44, 0x51, 0x80,\n",
-            "  0xa6, 0x7e, 0x8d, 0x6a, 0x73, 0x65, 0x72, 0x82, 0x99, 0xb4, 0x6a, 0x75,\n",
-            "  0x85, 0x90, 0x47, 0x62, 0x9e, 0x95, 0x94, 0x78, 0x89, 0x74, 0x5d, 0xa3,\n",
-            "  0x7f, 0x9d, 0x7d, 0x63, 0x96, 0x86, 0x8d, 0xa2, 0x95, 0xab, 0xae, 0x5d,\n",
-            "  0x93, 0x8d, 0x3d, 0x76, 0x9e, 0x9c, 0xc4, 0x71, 0x7d, 0xa3, 0x75, 0x7e,\n",
-            "  0x6d, 0x9d, 0xa3, 0x7f, 0x94, 0x89, 0x47, 0x71, 0x8b, 0x95, 0xb1, 0x72,\n",
-            "  0x90, 0x53, 0x7e, 0x8f, 0x8c, 0x90, 0xa1, 0x4d, 0x59, 0x62, 0x73, 0xa0,\n",
-            "  0x69, 0x88, 0x86, 0x71, 0x60, 0x3b, 0x81, 0x57, 0x7d, 0x86, 0x58, 0x63,\n",
-            "  0x7d, 0x98, 0x74, 0x67, 0x5d, 0xb0, 0x67, 0x45, 0x9b, 0xa9, 0x94, 0x68,\n",
-            "  0x43, 0x8b, 0x85, 0x56, 0x63, 0x96, 0x87, 0x78, 0x88, 0xbf, 0x92, 0x8d,\n",
-            "  0x60, 0xa8, 0x7e, 0x7e, 0x78, 0x80, 0x66, 0x92, 0x6e, 0x97, 0xab, 0x7f,\n",
-            "  0x4f, 0x65, 0x59, 0xb0, 0x9b, 0x6b, 0x9f, 0x70, 0x6f, 0x5c, 0xac, 0x95,\n",
-            "  0xa3, 0x54, 0x8e, 0xa9, 0x9e, 0x8c, 0xa5, 0x66, 0x5f, 0x5b, 0x6c, 0x83,\n",
-            "  0x90, 0x73, 0x85, 0x64, 0x61, 0x51, 0x4a, 0x63, 0xa1, 0x96, 0x7e, 0x4e,\n",
-            "  0x87, 0x60, 0x68, 0xb5, 0x9a, 0x8d, 0x75, 0x4e, 0x8a, 0x7a, 0x5f, 0x9f,\n",
-            "  0x74, 0x80, 0x69, 0x6d, 0x73, 0x92, 0x79, 0x7e, 0x85, 0x68, 0x83, 0x9d,\n",
-            "  0xb6, 0x9d, 0x6e, 0x8f, 0x78, 0x91, 0xaf, 0x8f, 0xa0, 0x9d, 0x73, 0x55,\n",
-            "  0x91, 0x8f, 0xb2, 0x76, 0x97, 0xab, 0x63, 0x63, 0x68, 0x7b, 0xab, 0x5c,\n",
-            "  0x77, 0xae, 0x4c, 0x72, 0x6e, 0x93, 0xb8, 0x51, 0x79, 0x84, 0x7d, 0x6b,\n",
-            "  0x7f, 0x8a, 0xba, 0x68, 0x7a, 0x43, 0x9a, 0x8d, 0x77, 0x8a, 0x6d, 0x56,\n",
-            "  0x79, 0x41, 0x7a, 0x4b, 0x81, 0x7a, 0x5c, 0x68, 0x58, 0x36, 0x6f, 0x6f,\n",
-            "  0x9f, 0xa6, 0x5f, 0x60, 0x4e, 0x67, 0x70, 0x4c, 0x69, 0x69, 0x94, 0x63,\n",
-            "  0x6d, 0x7b, 0x88, 0x9e, 0x6d, 0x98, 0x69, 0x68, 0x88, 0x80, 0x80, 0x7a,\n",
-            "  0x8e, 0x78, 0x5e, 0x8d, 0x7e, 0x91, 0x76, 0x64, 0x7e, 0x7f, 0x4e, 0xc9,\n",
-            "  0x79, 0x8f, 0x9c, 0x82, 0x3d, 0x62, 0x63, 0xc3, 0xb8, 0x7b, 0x72, 0x7b,\n",
-            "  0x50, 0x56, 0x95, 0x72, 0x8f, 0x6b, 0x90, 0x9d, 0x76, 0xa4, 0xa5, 0x79,\n",
-            "  0x54, 0x4f, 0x59, 0x85, 0xc5, 0x92, 0x97, 0x4d, 0x6f, 0x69, 0x77, 0x7f,\n",
-            "  0x71, 0x7c, 0x87, 0x59, 0x98, 0x61, 0x80, 0x81, 0x88, 0x6b, 0x6d, 0x7f,\n",
-            "  0x7f, 0x77, 0x60, 0xa2, 0x96, 0x73, 0x69, 0x86, 0x83, 0x8d, 0x60, 0x66,\n",
-            "  0x88, 0x8c, 0x93, 0x67, 0x98, 0x82, 0x7e, 0x91, 0x99, 0x59, 0x8e, 0x6e,\n",
-            "  0x90, 0xa1, 0x62, 0x8a, 0x98, 0x7b, 0xc8, 0x67, 0x85, 0x8d, 0x6c, 0xa1,\n",
-            "  0xa1, 0x92, 0xd0, 0x49, 0x85, 0x76, 0x89, 0x75, 0x88, 0x83, 0xa3, 0x77,\n",
-            "  0x85, 0x68, 0x82, 0x83, 0x7f, 0x79, 0xae, 0x85, 0x76, 0x84, 0x80, 0x9a,\n",
-            "  0x9d, 0x7b, 0x83, 0x90, 0x79, 0x88, 0x79, 0x9a, 0x93, 0x6c, 0x69, 0x79,\n",
-            "  0x5f, 0x90, 0x81, 0x7b, 0x87, 0x9d, 0x86, 0x82, 0x7a, 0x77, 0x71, 0x85,\n",
-            "  0x8b, 0x99, 0x8f, 0x7b, 0x58, 0x98, 0x84, 0x6e, 0x9a, 0xa1, 0x7a, 0x8c,\n",
-            "  0x77, 0xa8, 0x86, 0x93, 0x7b, 0x90, 0x79, 0x8a, 0x85, 0x8f, 0x84, 0x97,\n",
-            "  0x73, 0x83, 0x7b, 0x76, 0x8e, 0xa1, 0x89, 0x8a, 0x83, 0x9c, 0x65, 0x68,\n",
-            "  0x7b, 0x89, 0x92, 0x84, 0x6d, 0x90, 0x61, 0x78, 0x98, 0x8c, 0x8d, 0x87,\n",
-            "  0xa0, 0x99, 0x79, 0x7b, 0x69, 0xa4, 0x7a, 0x8d, 0x73, 0x71, 0x70, 0x80,\n",
-            "  0x82, 0x77, 0x81, 0x67, 0x75, 0x97, 0x71, 0x73, 0x85, 0x6d, 0x8e, 0x86,\n",
-            "  0x6e, 0x80, 0x86, 0x9e, 0x6f, 0x70, 0x67, 0x59, 0x65, 0x89, 0x67, 0x8b,\n",
-            "  0x7d, 0x68, 0x69, 0x7a, 0x5b, 0x7e, 0x87, 0xa1, 0x92, 0x7b, 0x64, 0x7e,\n",
-            "  0x76, 0x72, 0x71, 0xab, 0x7c, 0x83, 0x6f, 0xa1, 0x86, 0x76, 0x71, 0x6f,\n",
-            "  0x91, 0x77, 0x6c, 0x71, 0x92, 0x78, 0x70, 0x7f, 0x6e, 0x65, 0x77, 0x93,\n",
-            "  0x7e, 0x6c, 0x85, 0x9d, 0x78, 0x8b, 0x7c, 0x5f, 0x94, 0x86, 0x7c, 0x7f,\n",
-            "  0x83, 0x6e, 0x72, 0x9e, 0x6e, 0x6b, 0x8d, 0x91, 0x97, 0x8b, 0x7b, 0x72,\n",
-            "  0x86, 0x75, 0x7f, 0x96, 0x7d, 0x81, 0xa1, 0x55, 0xa6, 0x88, 0x96, 0x87,\n",
-            "  0x93, 0x68, 0x89, 0x72, 0x6f, 0x9c, 0x75, 0x7c, 0x79, 0x6c, 0x74, 0x84,\n",
-            "  0x7d, 0xa4, 0x86, 0x84, 0x84, 0x8d, 0x63, 0x7a, 0x63, 0xbc, 0x7e, 0x93,\n",
-            "  0x80, 0x8d, 0x71, 0x7a, 0x5f, 0x8c, 0x74, 0x96, 0x7e, 0x9b, 0x9d, 0x8d,\n",
-            "  0x5b, 0xa4, 0x71, 0x5e, 0x83, 0x78, 0x86, 0x7f, 0x70, 0x99, 0x87, 0x85,\n",
-            "  0x8e, 0x81, 0x93, 0x80, 0x89, 0xa0, 0x7a, 0x77, 0x8e, 0x73, 0x5f, 0x80,\n",
-            "  0x6d, 0x87, 0x5b, 0x7a, 0x85, 0x7c, 0x85, 0x63, 0x61, 0x9d, 0x6f, 0x68,\n",
-            "  0x77, 0x86, 0x61, 0x6d, 0x84, 0x98, 0x7c, 0x78, 0x69, 0x84, 0x91, 0x6d,\n",
-            "  0x81, 0xa1, 0x6c, 0x62, 0x95, 0x6d, 0x86, 0x8b, 0x95, 0x8f, 0x5e, 0x86,\n",
-            "  0x73, 0xa1, 0x83, 0x58, 0x5f, 0x8e, 0x76, 0x79, 0x9e, 0x92, 0x7c, 0x7b,\n",
-            "  0x81, 0x8b, 0x83, 0x7b, 0x78, 0x75, 0x70, 0x83, 0x70, 0x5a, 0x6a, 0x59,\n",
-            "  0xa3, 0x82, 0x7a, 0x91, 0x8b, 0x6e, 0x82, 0x8e, 0x70, 0x73, 0x91, 0x76,\n",
-            "  0xa5, 0x7f, 0x70, 0x81, 0x6f, 0x85, 0x94, 0xa6, 0x8c, 0x50, 0x76, 0x6e,\n",
-            "  0x64, 0x95, 0xa0, 0x64, 0x6c, 0x68, 0x8e, 0x8b, 0xa1, 0x7d, 0xa0, 0x7f,\n",
-            "  0x76, 0x8b, 0x7b, 0x93, 0x7b, 0x6e, 0x7e, 0x64, 0x8a, 0xa7, 0x78, 0x64,\n",
-            "  0x93, 0x67, 0x7d, 0x68, 0x5c, 0xa0, 0x76, 0x98, 0xaf, 0x80, 0x55, 0x96,\n",
-            "  0x97, 0x9c, 0x78, 0x75, 0x87, 0x85, 0x77, 0x77, 0x62, 0x93, 0x76, 0x68,\n",
-            "  0xa0, 0x80, 0x81, 0x7f, 0x9a, 0x68, 0x74, 0x69, 0x94, 0x77, 0x77, 0x72,\n",
-            "  0x90, 0x9a, 0x6f, 0x95, 0x89, 0x6b, 0x6b, 0x94, 0x7e, 0x9c, 0x6f, 0x67,\n",
-            "  0x8f, 0x82, 0x80, 0x92, 0x76, 0x80, 0x65, 0x9b, 0x6a, 0x7c, 0x75, 0x5a,\n",
-            "  0x87, 0xa1, 0x69, 0x7a, 0x79, 0x9e, 0x9a, 0x58, 0x81, 0x92, 0x72, 0x67,\n",
-            "  0x90, 0x80, 0x82, 0x61, 0x9f, 0x9e, 0x6a, 0x8d, 0x8d, 0x8a, 0x73, 0x81,\n",
-            "  0x68, 0x7f, 0x5b, 0x59, 0x98, 0x89, 0x71, 0x72, 0x58, 0x7b, 0x94, 0x5d,\n",
-            "  0xa9, 0x8b, 0x72, 0x7b, 0x65, 0x73, 0x5b, 0x8b, 0x7d, 0x86, 0x6e, 0x8c,\n",
-            "  0x66, 0x6f, 0x6b, 0x8b, 0x71, 0x80, 0x7f, 0x70, 0x70, 0x88, 0x70, 0x7e,\n",
-            "  0x84, 0x89, 0x7f, 0x81, 0x87, 0x77, 0x71, 0x88, 0x7f, 0x8f, 0x5e, 0x80,\n",
-            "  0x5d, 0xa1, 0x89, 0x77, 0x93, 0x8e, 0x55, 0x64, 0x88, 0x9a, 0x8b, 0x80,\n",
-            "  0x77, 0x6f, 0x91, 0x83, 0x6b, 0x9b, 0x85, 0x5c, 0x57, 0x7e, 0xa9, 0x63,\n",
-            "  0x83, 0xaa, 0x7c, 0xa1, 0x91, 0x5f, 0x68, 0x76, 0x7a, 0x97, 0x96, 0x84,\n",
-            "  0xca, 0x8d, 0x8c, 0x8b, 0x71, 0x81, 0x88, 0x92, 0xaa, 0x74, 0x49, 0x7a,\n",
-            "  0x90, 0x93, 0x7a, 0x61, 0x8c, 0x66, 0x71, 0xa0, 0xab, 0x7d, 0x86, 0x6c,\n",
-            "  0x9f, 0x77, 0x67, 0x6a, 0x89, 0x89, 0x88, 0x70, 0xad, 0x88, 0x69, 0x84,\n",
-            "  0x70, 0x8f, 0x79, 0x7c, 0x66, 0xa6, 0x71, 0x8d, 0x77, 0x99, 0x69, 0x76,\n",
-            "  0x79, 0x7d, 0x9c, 0x6f, 0x64, 0x8b, 0x70, 0x82, 0x69, 0xa4, 0x65, 0x6e,\n",
-            "  0x7f, 0x9e, 0x7e, 0x84, 0x8c, 0x9c, 0x6c, 0x5b, 0x6e, 0xa7, 0x6d, 0x7a,\n",
-            "  0x92, 0x78, 0x9a, 0x6f, 0x81, 0x91, 0x71, 0x7d, 0x6b, 0x99, 0x6b, 0x92,\n",
-            "  0x5e, 0x7e, 0x64, 0x95, 0x78, 0x90, 0x6f, 0x68, 0x8a, 0x85, 0x6f, 0x88,\n",
-            "  0x64, 0x66, 0x7f, 0x78, 0x7c, 0x95, 0x66, 0x6c, 0x76, 0x6a, 0x9b, 0x8f,\n",
-            "  0x9d, 0x78, 0x86, 0x95, 0x73, 0x66, 0x6d, 0x71, 0x8b, 0x7f, 0x6f, 0x70,\n",
-            "  0x64, 0x94, 0xa0, 0x83, 0x6b, 0x6d, 0x85, 0x89, 0x68, 0x92, 0x8e, 0x51,\n",
-            "  0x81, 0x85, 0x86, 0x6e, 0x83, 0x85, 0x8a, 0x5e, 0x68, 0xbf, 0xc4, 0xa5,\n",
-            "  0x8b, 0x67, 0x86, 0x59, 0x85, 0x9e, 0x96, 0x67, 0x82, 0x7c, 0x6c, 0x80,\n",
-            "  0x84, 0xae, 0x9d, 0x80, 0xc2, 0x58, 0x5d, 0x95, 0x85, 0x8b, 0x7f, 0x5d,\n",
-            "  0xc7, 0x75, 0x75, 0x87, 0xa2, 0x8c, 0x62, 0x71, 0x9c, 0x61, 0x7f, 0x9c,\n",
-            "  0xca, 0x8d, 0x89, 0x6e, 0x7c, 0x71, 0x81, 0x99, 0x95, 0xa4, 0x76, 0x6f,\n",
-            "  0x64, 0x7b, 0x6c, 0x72, 0x8b, 0x83, 0x70, 0x70, 0x8b, 0xa4, 0x69, 0x76,\n",
-            "  0x6e, 0x8d, 0x7a, 0x80, 0x8f, 0x9e, 0x73, 0x4b, 0x75, 0x78, 0x77, 0x7b,\n",
-            "  0x8e, 0x92, 0x88, 0x49, 0x54, 0x9f, 0x7a, 0x7f, 0x68, 0x9f, 0x7f, 0x57,\n",
-            "  0x6b, 0xad, 0x85, 0x6f, 0x81, 0xa1, 0x96, 0x6f, 0x73, 0x8d, 0x5e, 0x65,\n",
-            "  0x7a, 0x8c, 0x7c, 0x6a, 0x7e, 0x7a, 0x6a, 0x97, 0x59, 0x86, 0x62, 0x77,\n",
-            "  0x70, 0x7a, 0x68, 0x62, 0x68, 0x86, 0x7e, 0x76, 0x9a, 0x7f, 0x6c, 0x7e,\n",
-            "  0x8a, 0x76, 0x65, 0x8f, 0x7d, 0x65, 0x76, 0xa4, 0x95, 0x62, 0x78, 0x97,\n",
-            "  0x7a, 0x6e, 0x7a, 0x7a, 0x7e, 0x91, 0x8c, 0x8a, 0x91, 0x82, 0x89, 0x6d,\n",
-            "  0x87, 0x90, 0x69, 0x71, 0x96, 0xa6, 0x7c, 0x7c, 0xa8, 0xa8, 0x62, 0x77,\n",
-            "  0x76, 0x99, 0xdd, 0x76, 0x8a, 0x5c, 0x86, 0x6a, 0x69, 0x9c, 0xa5, 0x7d,\n",
-            "  0x78, 0x6a, 0x88, 0x77, 0x77, 0xae, 0x8a, 0x99, 0xcb, 0x85, 0x59, 0x84,\n",
-            "  0x7b, 0x97, 0x8a, 0x82, 0xc5, 0x65, 0x8c, 0x93, 0xc3, 0x8c, 0x87, 0x64,\n",
-            "  0x91, 0x41, 0x70, 0xa8, 0xd1, 0x8b, 0x82, 0x71, 0x9c, 0x71, 0x4e, 0x86,\n",
-            "  0x98, 0x86, 0x7f, 0x7e, 0x69, 0x99, 0x79, 0x78, 0x77, 0xb3, 0x6b, 0x80,\n",
-            "  0x84, 0x8b, 0x56, 0x73, 0x84, 0x95, 0x82, 0x94, 0x5b, 0x92, 0x83, 0x46,\n",
-            "  0x66, 0x89, 0x6d, 0x61, 0x99, 0xa6, 0x99, 0x3f, 0x6c, 0xab, 0x5d, 0x5f,\n",
-            "  0x6c, 0x8e, 0x6b, 0x4a, 0x72, 0xb6, 0x6c, 0x75, 0x78, 0xa6, 0x6f, 0x5b,\n",
-            "  0x56, 0x8b, 0x57, 0x74, 0x8f, 0xab, 0x53, 0x56, 0x5d, 0x63, 0x63, 0x8b,\n",
-            "  0x65, 0x78, 0x71, 0x67, 0x7a, 0x62, 0x8d, 0x78, 0x99, 0x76, 0x94, 0x7a,\n",
-            "  0xa3, 0x70, 0x55, 0x87, 0x7e, 0x7c, 0x57, 0x57, 0x6e, 0x79, 0x94, 0x8f,\n",
-            "  0x86, 0x80, 0x90, 0x7d, 0x7d, 0x7f, 0x7f, 0x68, 0x41, 0x86, 0x8c, 0x6f,\n",
-            "  0x8a, 0x7f, 0x87, 0x8a, 0x7e, 0x7f, 0x5d, 0x71, 0x91, 0x81, 0x93, 0x71,\n",
-            "  0x91, 0xc6, 0x70, 0x4a, 0x74, 0xa8, 0xf3, 0x72, 0xa7, 0x80, 0x7e, 0x41,\n",
-            "  0x84, 0xa3, 0xb6, 0x94, 0xba, 0x84, 0x70, 0x74, 0x71, 0xac, 0x9f, 0x9d,\n",
-            "  0xe4, 0x67, 0x6a, 0x87, 0x92, 0x8e, 0x92, 0x82, 0xdb, 0x5e, 0x9b, 0x90,\n",
-            "  0xd5, 0x87, 0x8d, 0x7c, 0x9c, 0x3c, 0x6c, 0xab, 0xc2, 0x86, 0x83, 0x79,\n",
-            "  0x6c, 0x61, 0x51, 0xa9, 0x99, 0x79, 0x72, 0x80, 0x6f, 0x85, 0x57, 0x6c,\n",
-            "  0x81, 0x86, 0x6e, 0x88, 0x87, 0x8d, 0x8e, 0x81, 0x67, 0x88, 0x62, 0x99,\n",
-            "  0x87, 0xab, 0x8f, 0x57, 0x60, 0x77, 0x64, 0x81, 0x96, 0xa3, 0x81, 0x3d,\n",
-            "  0x4e, 0xb9, 0x57, 0x6e, 0x99, 0xad, 0x6a, 0x3e, 0x74, 0x96, 0x7e, 0x79,\n",
-            "  0x65, 0xa4, 0x7c, 0x6a, 0x53, 0x87, 0x56, 0x6f, 0x5e, 0x97, 0x85, 0x42,\n",
-            "  0x56, 0x6b, 0x67, 0x78, 0x7d, 0xa6, 0x7c, 0x7c, 0x7d, 0x78, 0x7b, 0x84,\n",
-            "  0x99, 0x7b, 0x89, 0x71, 0x76, 0x8b, 0x76, 0x73, 0x7d, 0x83, 0x56, 0x4f,\n",
-            "  0x86, 0x72, 0x83, 0x88, 0x6a, 0x93, 0x69, 0x90, 0x6c, 0x73, 0x6f, 0x63,\n",
-            "  0x55, 0x88, 0x6b, 0x88, 0x7c, 0x86, 0x87, 0x7b, 0x6c, 0x7e, 0x60, 0x57,\n",
-            "  0xa8, 0x81, 0xa3, 0x72, 0xba, 0xbf, 0x66, 0x65, 0x70, 0xb9, 0xe4, 0x78,\n",
-            "  0x99, 0x67, 0x8c, 0x72, 0x88, 0x96, 0xb5, 0x72, 0x8a, 0x66, 0x81, 0x39,\n",
-            "  0x85, 0x93, 0xa0, 0x9c, 0xdf, 0x74, 0x8a, 0x6d, 0x93, 0xa1, 0x8c, 0x7a,\n",
-            "  0xb5, 0x4b, 0x89, 0xae, 0xba, 0x9c, 0x96, 0x9a, 0xb4, 0x33, 0x5a, 0xb1,\n",
-            "  0xcd, 0x88, 0x84, 0x63, 0x8c, 0x5e, 0x71, 0x6d, 0xa7, 0x8a, 0x62, 0x85,\n",
-            "  0x77, 0x75, 0x62, 0x79, 0x96, 0x73, 0x4f, 0x7d, 0x93, 0x8a, 0x88, 0x7e,\n",
-            "  0x59, 0x6c, 0x7f, 0x87, 0x6f, 0x91, 0x88, 0x59, 0x6d, 0x83, 0x70, 0x7c,\n",
-            "  0x7f, 0x8d, 0x7f, 0x26, 0x41, 0xcf, 0x6b, 0x6e, 0x75, 0xa3, 0x90, 0x5e,\n",
-            "  0x3a, 0x94, 0x61, 0x9a, 0x6f, 0x9f, 0x69, 0x7d, 0x55, 0x8c, 0x60, 0x7c,\n",
-            "  0x93, 0x85, 0x85, 0x4b, 0x54, 0x71, 0x60, 0x8a, 0x6d, 0x8c, 0x9c, 0x7e,\n",
-            "  0x5b, 0x79, 0x74, 0x7b, 0x7b, 0x9d, 0x5b, 0x65, 0x81, 0x82, 0x66, 0x89,\n",
-            "  0x82, 0x72, 0x77, 0x78, 0x75, 0x76, 0x6b, 0x74, 0x89, 0x73, 0x6c, 0x6b,\n",
-            "  0x77, 0x7e, 0x67, 0x84, 0x41, 0x90, 0x58, 0x87, 0x98, 0x60, 0x96, 0x81,\n",
-            "  0x6b, 0x74, 0x7d, 0x56, 0x72, 0x71, 0x9a, 0x7d, 0xc5, 0xd0, 0x88, 0x6e,\n",
-            "  0x4d, 0xbe, 0xef, 0x8a, 0xa7, 0x92, 0x82, 0x67, 0x7f, 0x91, 0xc5, 0x7d,\n",
-            "  0xad, 0x77, 0x6b, 0x4e, 0x8e, 0x99, 0x9b, 0x8e, 0xc7, 0x7f, 0x8a, 0x8e,\n",
-            "  0x8f, 0x87, 0x9c, 0x75, 0xb0, 0x53, 0x75, 0x97, 0xc7, 0x98, 0xa4, 0xa4,\n",
-            "  0x80, 0x41, 0x79, 0xc3, 0xdb, 0x86, 0x9d, 0x75, 0x7f, 0x67, 0x7a, 0x96,\n",
-            "  0xc3, 0x83, 0x54, 0x8e, 0x6f, 0xa8, 0x7c, 0x65, 0x78, 0x7e, 0x59, 0xa3,\n",
-            "  0x8a, 0x97, 0x8b, 0x82, 0x5e, 0x66, 0x82, 0x9b, 0x9e, 0x9f, 0x70, 0x49,\n",
-            "  0x55, 0x88, 0x8a, 0x7e, 0x90, 0xa7, 0x6b, 0x3b, 0x28, 0xc0, 0x63, 0x7e,\n",
-            "  0x60, 0x90, 0x7c, 0x3f, 0x54, 0x9c, 0x7d, 0x8a, 0x6a, 0xa9, 0x6f, 0x61,\n",
-            "  0x76, 0x86, 0x64, 0x88, 0x72, 0xa5, 0x6b, 0x4d, 0x56, 0x6c, 0x52, 0xa1,\n",
-            "  0x84, 0x69, 0x69, 0x5b, 0x71, 0x84, 0x76, 0x9b, 0x92, 0x70, 0x86, 0x8b,\n",
-            "  0x71, 0x68, 0x56, 0x92, 0x76, 0x8f, 0x8f, 0x72, 0x5a, 0x77, 0x6f, 0x92,\n",
-            "  0x72, 0x72, 0x5e, 0x7a, 0x70, 0x73, 0x60, 0x7d, 0x5a, 0x93, 0x7f, 0x6b,\n",
-            "  0x89, 0x6b, 0xa1, 0x85, 0x5c, 0x8d, 0x76, 0x7c, 0x6f, 0x73, 0x96, 0x6d,\n",
-            "  0xbb, 0xad, 0x53, 0x53, 0x5f, 0x9a, 0xe2, 0x8d, 0xa7, 0x6d, 0x8a, 0x5b,\n",
-            "  0x85, 0x9c, 0xb4, 0x7b, 0xb3, 0x52, 0x75, 0x7f, 0x7a, 0x8c, 0x91, 0x7e,\n",
-            "  0xca, 0x5f, 0x64, 0x71, 0x85, 0x9a, 0x91, 0x72, 0xbd, 0x6e, 0x9b, 0x81,\n",
-            "  0x8f, 0xa8, 0xac, 0x7d, 0xb4, 0x5f, 0x45, 0xc5, 0xc8, 0x7a, 0x93, 0x8e,\n",
-            "  0x7b, 0x41, 0x69, 0x94, 0x8b, 0x76, 0x59, 0x81, 0x73, 0x92, 0x8e, 0x63,\n",
-            "  0x8e, 0x74, 0x33, 0xa5, 0x9c, 0xa2, 0x88, 0x48, 0x5d, 0x8c, 0x7d, 0xa6,\n",
-            "  0x68, 0x9a, 0x6f, 0x58, 0x6c, 0x8f, 0x77, 0x65, 0x97, 0x9d, 0x7a, 0x37,\n",
-            "  0x59, 0xab, 0x6e, 0x8f, 0x7a, 0xae, 0x65, 0x3e, 0x46, 0xa9, 0x82, 0x82,\n",
-            "  0x9c, 0x9d, 0x62, 0x79, 0x66, 0x7f, 0x5e, 0x88, 0x9e, 0x8f, 0x84, 0x71,\n",
-            "  0x5d, 0x6d, 0x70, 0xa0, 0x69, 0x92, 0x7f, 0x70, 0x66, 0x6f, 0x75, 0x8c,\n",
-            "  0x96, 0x7a, 0x85, 0x6a, 0x5a, 0x7c, 0x72, 0x8a, 0x8d, 0x7b, 0x8b, 0x5c,\n",
-            "  0x76, 0x69, 0x70, 0x7f, 0x74, 0xa1, 0x71, 0x91, 0x5a, 0x8c, 0x6e, 0x83,\n",
-            "  0x52, 0x78, 0x71, 0x6d, 0xa9, 0x63, 0x9d, 0x81, 0x52, 0x9e, 0x5d, 0x60,\n",
-            "  0x76, 0x93, 0x97, 0x67, 0xce, 0xc1, 0x75, 0x5e, 0x5f, 0x8c, 0xea, 0x76,\n",
-            "  0xad, 0x7a, 0x7d, 0x62, 0x85, 0x92, 0xd0, 0x6a, 0xbc, 0x53, 0x55, 0x5c,\n",
-            "  0x6d, 0x89, 0x9e, 0x71, 0xd2, 0x8b, 0x64, 0x61, 0x85, 0x9a, 0x77, 0x75,\n",
-            "  0xb9, 0x67, 0x8a, 0xac, 0x90, 0x8a, 0xb4, 0x91, 0xbb, 0x58, 0x94, 0xaf,\n",
-            "  0xb2, 0x76, 0xa2, 0x71, 0x95, 0x5e, 0x73, 0xa5, 0x92, 0x8c, 0x52, 0x96,\n",
-            "  0x53, 0x95, 0x84, 0x91, 0x93, 0x7a, 0x40, 0x88, 0xab, 0xa5, 0x63, 0x70,\n",
-            "  0x66, 0x88, 0x7e, 0x92, 0x89, 0x84, 0x78, 0x57, 0x3d, 0x8d, 0x84, 0x77,\n",
-            "  0x9b, 0x87, 0x5e, 0x4e, 0x42, 0xa0, 0x76, 0x8a, 0x77, 0x90, 0x83, 0x4c,\n",
-            "  0x42, 0x9b, 0x75, 0x7a, 0x88, 0x94, 0x98, 0x69, 0x4c, 0xa2, 0x6b, 0x7b,\n",
-            "  0x6e, 0x9b, 0x5d, 0x5f, 0x53, 0x6a, 0x63, 0x95, 0x69, 0x8a, 0x61, 0x75,\n",
-            "  0x6c, 0x7a, 0x58, 0x89, 0x84, 0x8f, 0x6b, 0x5a, 0x71, 0x6f, 0x59, 0x89,\n",
-            "  0x7d, 0x87, 0x5f, 0x77, 0x4b, 0x61, 0x77, 0x92, 0x67, 0x8e, 0x5c, 0x6f,\n",
-            "  0x5b, 0x77, 0x76, 0x6b, 0x44, 0x9d, 0x9f, 0x7f, 0x8b, 0x94, 0x9e, 0x7c,\n",
-            "  0x62, 0x94, 0x60, 0x55, 0x77, 0x8f, 0xa6, 0x62, 0xb5, 0xb2, 0x3c, 0x61,\n",
-            "  0x5c, 0x99, 0xeb, 0x5b, 0x90, 0x6c, 0x7f, 0x5f, 0x75, 0xa6, 0xcf, 0x77,\n",
-            "  0x98, 0x5d, 0x75, 0x69, 0x7f, 0x8a, 0xa7, 0x73, 0xc8, 0x74, 0x70, 0x82,\n",
-            "  0x76, 0x8f, 0xa2, 0x7a, 0xa4, 0x7a, 0x66, 0x81, 0x9b, 0x8f, 0x9e, 0x8b,\n",
-            "  0xa1, 0x51, 0x7b, 0xba, 0xc8, 0x90, 0xab, 0x92, 0x72, 0x57, 0x5b, 0xa3,\n",
-            "  0xb0, 0x7f, 0x4c, 0x7d, 0x5f, 0x8e, 0x6c, 0x7d, 0x71, 0x7e, 0x4e, 0x87,\n",
-            "  0xb7, 0x97, 0x7a, 0x4c, 0x5f, 0x72, 0x78, 0x84, 0x82, 0x7e, 0x63, 0x65,\n",
-            "  0x68, 0x78, 0x73, 0x85, 0x90, 0x99, 0x80, 0x57, 0x42, 0x8b, 0x8a, 0x77,\n",
-            "  0x71, 0x97, 0x6d, 0x44, 0x41, 0x8f, 0x78, 0x7d, 0x95, 0x81, 0x95, 0x5f,\n",
-            "  0x64, 0x87, 0x66, 0x80, 0x89, 0x9a, 0x61, 0x4d, 0x68, 0x7b, 0x72, 0x73,\n",
-            "  0x85, 0x92, 0x77, 0x7d, 0x73, 0x77, 0x54, 0x7a, 0x77, 0x7d, 0x7d, 0x7a,\n",
-            "  0x6e, 0x8e, 0x4f, 0x7d, 0x80, 0x9a, 0x79, 0x8b, 0x7b, 0x68, 0x6e, 0x86,\n",
-            "  0x7f, 0x93, 0x7a, 0x76, 0x72, 0x85, 0x6a, 0x7b, 0x57, 0x84, 0x96, 0x9a,\n",
-            "  0x8f, 0x91, 0x9b, 0x72, 0x73, 0x91, 0x53, 0x66, 0x76, 0x80, 0xae, 0x63,\n",
-            "  0xbf, 0x99, 0x5e, 0x77, 0x73, 0x9c, 0xd8, 0x74, 0xa7, 0x79, 0x52, 0x64,\n",
-            "  0x82, 0x95, 0xc7, 0x4f, 0xa8, 0x4f, 0x6d, 0x42, 0x7c, 0x89, 0xab, 0x83,\n",
-            "  0xc0, 0x82, 0x6a, 0x5f, 0x83, 0x92, 0xa8, 0x76, 0xc1, 0x77, 0x6e, 0x7b,\n",
-            "  0xa3, 0x9b, 0xaf, 0x87, 0xab, 0x60, 0x8d, 0xc2, 0xd2, 0x83, 0xb2, 0x78,\n",
-            "  0x8d, 0x39, 0x57, 0x9c, 0x90, 0x8e, 0x6e, 0x6a, 0x74, 0x79, 0x81, 0x6d,\n",
-            "  0x6f, 0x8e, 0x77, 0x92, 0x93, 0x7d, 0x5f, 0x68, 0x6a, 0x6c, 0x80, 0x8f,\n",
-            "  0x99, 0x84, 0x4f, 0x64, 0x5c, 0x93, 0x7c, 0x91, 0x98, 0x82, 0x62, 0x3f,\n",
-            "  0x41, 0x9f, 0x5d, 0x89, 0x98, 0x89, 0x73, 0x50, 0x32, 0xa8, 0xa0, 0x7a,\n",
-            "  0xa0, 0x95, 0x78, 0x69, 0x74, 0x7c, 0x89, 0x7b, 0x80, 0x65, 0x56, 0x6b,\n",
-            "  0x69, 0x78, 0x62, 0x87, 0xaf, 0x94, 0x7a, 0x64, 0x53, 0x86, 0x45, 0x99,\n",
-            "  0x88, 0x79, 0x4d, 0x74, 0x59, 0x91, 0x5f, 0x7b, 0x88, 0x90, 0x80, 0x86,\n",
-            "  0x7d, 0x7b, 0x64, 0xa3, 0x7f, 0x74, 0x89, 0x80, 0x7d, 0x7c, 0x7a, 0x87,\n",
-            "  0x5f, 0x8a, 0x5a, 0x72, 0x79, 0x74, 0x8c, 0x7c, 0x86, 0x91, 0x6e, 0x5d,\n",
-            "  0x61, 0x8e, 0xa2, 0x68, 0xd4, 0x92, 0x67, 0x66, 0x62, 0xa1, 0xf3, 0x63,\n",
-            "  0x91, 0x81, 0x74, 0x5f, 0x88, 0x98, 0xbb, 0x5a, 0x9b, 0x54, 0x6a, 0x5c,\n",
-            "  0x75, 0x88, 0xad, 0x7c, 0xb4, 0x7c, 0x69, 0x74, 0x84, 0x76, 0x9d, 0x9a,\n",
-            "  0xb0, 0x91, 0x5d, 0xa3, 0xa4, 0x7f, 0xbb, 0x80, 0xa4, 0x5d, 0x83, 0xaf,\n",
-            "  0xb7, 0x66, 0xb0, 0x7f, 0x89, 0x4b, 0x72, 0x9e, 0x99, 0x7c, 0x66, 0x71,\n",
-            "  0x6a, 0x6f, 0x6d, 0x67, 0x8d, 0x6d, 0x46, 0xa5, 0x9b, 0x84, 0x7a, 0x61,\n",
-            "  0x64, 0x5c, 0x88, 0x89, 0x95, 0x8c, 0x70, 0x4b, 0x6c, 0x85, 0x83, 0x8b,\n",
-            "  0x98, 0x87, 0x6a, 0x44, 0x4d, 0x9d, 0x78, 0x71, 0x78, 0x7e, 0x91, 0x5b,\n",
-            "  0x3f, 0x9f, 0x80, 0x62, 0xa7, 0x95, 0x5d, 0x74, 0x65, 0x9c, 0x6d, 0x7a,\n",
-            "  0x98, 0x79, 0x80, 0x61, 0x49, 0x82, 0x65, 0x92, 0x80, 0x96, 0x7c, 0x72,\n",
-            "  0x4f, 0x76, 0x5e, 0x8d, 0x97, 0xa5, 0x72, 0x57, 0x79, 0x87, 0x67, 0x87,\n",
-            "  0x80, 0x84, 0x7c, 0x6f, 0x66, 0x6b, 0x70, 0x9b, 0x64, 0x90, 0x59, 0x96,\n",
-            "  0x7a, 0x6f, 0x75, 0x89, 0x4e, 0x8a, 0x62, 0x6e, 0x9c, 0x8c, 0x9a, 0x78,\n",
-            "  0x8e, 0x91, 0x3d, 0x50, 0x72, 0x92, 0x9f, 0x63, 0xda, 0x92, 0x72, 0x60,\n",
-            "  0x59, 0xa6, 0xd0, 0x56, 0xc1, 0x6b, 0x5e, 0x76, 0x6e, 0x81, 0xbb, 0x4b,\n",
-            "  0xbb, 0x59, 0x68, 0x4f, 0x77, 0x87, 0xa1, 0x73, 0xbf, 0x65, 0x56, 0x67,\n",
-            "  0x77, 0x84, 0x8a, 0x7e, 0xb8, 0x85, 0x66, 0xa6, 0x99, 0xa0, 0xa5, 0x73,\n",
-            "  0x8d, 0x4a, 0x7d, 0xab, 0xb0, 0x6a, 0x94, 0x84, 0x87, 0x4c, 0x74, 0xa3,\n",
-            "  0xb3, 0xa9, 0x62, 0x7a, 0x71, 0x7f, 0x53, 0x79, 0x7a, 0x7c, 0x5e, 0x8f,\n",
-            "  0xa0, 0x90, 0x5c, 0x76, 0x6c, 0x92, 0x70, 0x9c, 0xb3, 0x8b, 0x7e, 0x57,\n",
-            "  0x5b, 0x9d, 0x96, 0x85, 0x70, 0x93, 0x8b, 0x67, 0x4c, 0x9c, 0x6a, 0x83,\n",
-            "  0x84, 0x90, 0x8e, 0x60, 0x56, 0xb3, 0x87, 0x7d, 0x86, 0x88, 0x79, 0x5b,\n",
-            "  0x58, 0x94, 0x92, 0x8e, 0x90, 0x76, 0x58, 0x51, 0x52, 0x63, 0x57, 0x88,\n",
-            "  0x9b, 0x7a, 0x85, 0x6c, 0x8b, 0x87, 0x5f, 0x8b, 0x90, 0x92, 0x81, 0x64,\n",
-            "  0x52, 0x8b, 0x77, 0x94, 0x96, 0x98, 0x69, 0x5b, 0x79, 0x87, 0x61, 0x96,\n",
-            "  0x7b, 0x9a, 0x61, 0x74, 0x7e, 0x8b, 0x82, 0x92, 0x4f, 0x87, 0x7f, 0x80,\n",
-            "  0x74, 0x97, 0x98, 0x7a, 0x79, 0x97, 0x65, 0x67, 0x66, 0xb1, 0xb1, 0x49,\n",
-            "  0xd6, 0x97, 0x58, 0x47, 0x62, 0x94, 0xd5, 0x82, 0xa0, 0x60, 0x3f, 0x67,\n",
-            "  0x6c, 0x9d, 0xb6, 0x58, 0xb1, 0x6e, 0x58, 0x4e, 0x7c, 0x83, 0x8b, 0x83,\n",
-            "  0xd5, 0x62, 0x8d, 0x84, 0x84, 0x8c, 0xa9, 0x6e, 0xac, 0x7f, 0x6d, 0x88,\n",
-            "  0xab, 0x8b, 0xb1, 0x77, 0x9b, 0x46, 0x76, 0xa7, 0xb8, 0x7b, 0xc5, 0x6e,\n",
-            "  0x73, 0x62, 0x68, 0x95, 0xab, 0x7c, 0x6f, 0x74, 0x56, 0x71, 0x61, 0x83,\n",
-            "  0x8a, 0x73, 0x54, 0x94, 0x86, 0x91, 0x60, 0x69, 0x65, 0x6b, 0x76, 0x85,\n",
-            "  0xae, 0x87, 0x8f, 0x55, 0x41, 0x98, 0x68, 0x87, 0x5e, 0x7a, 0x80, 0x38,\n",
-            "  0x50, 0xaf, 0x93, 0x79, 0x57, 0x96, 0x7b, 0x53, 0x4e, 0xc0, 0xa0, 0x85,\n",
-            "  0x87, 0x95, 0x86, 0x70, 0x4c, 0x9f, 0x77, 0x7d, 0x8b, 0x7a, 0x7b, 0x6d,\n",
-            "  0x57, 0x74, 0x81, 0x7d, 0xa2, 0x79, 0x64, 0x6c, 0x55, 0x70, 0x3c, 0x88,\n",
-            "  0x8a, 0x7a, 0x58, 0x72, 0x71, 0x7d, 0x6a, 0x8d, 0x78, 0x7e, 0x95, 0x8b,\n",
-            "  0x84, 0x7e, 0x73, 0x7c, 0x7e, 0x67, 0x89, 0x8b, 0x6d, 0x68, 0x66, 0x73,\n",
-            "  0x5a, 0x93, 0x82, 0x85, 0x97, 0x6b, 0x9a, 0x72, 0x51, 0xa2, 0x4f, 0x67,\n",
-            "  0x67, 0x7e, 0xbb, 0x37, 0xe3, 0x9c, 0x57, 0x5b, 0x6f, 0xa0, 0xdc, 0x5c,\n",
-            "  0xa6, 0x7c, 0x71, 0x77, 0x72, 0x88, 0xd0, 0x4d, 0x93, 0x58, 0x74, 0x6d,\n",
-            "  0x8f, 0x77, 0xa3, 0x76, 0xb7, 0x76, 0x6d, 0x6d, 0x6f, 0x7b, 0xaa, 0x6d,\n",
-            "  0xaa, 0x6a, 0x72, 0x98, 0x8d, 0x98, 0xb0, 0x52, 0x76, 0x5d, 0x61, 0xb7,\n",
-            "  0xac, 0x90, 0xa5, 0x75, 0x7e, 0x3d, 0x5b, 0x9a, 0xbf, 0x81, 0x83, 0x7b,\n",
-            "  0x5c, 0x77, 0x74, 0x82, 0x8d, 0x7e, 0x4f, 0x9f, 0x8f, 0x97, 0x7c, 0x75,\n",
-            "  0x5b, 0x73, 0x97, 0x73, 0x85, 0x7f, 0x70, 0x5a, 0x53, 0x81, 0x81, 0x89,\n",
-            "  0x73, 0x8d, 0x8a, 0x5c, 0x5f, 0x84, 0x86, 0x6f, 0x76, 0x78, 0x82, 0x6d,\n",
-            "  0x4f, 0xbb, 0x91, 0x61, 0x7e, 0x97, 0x6c, 0x67, 0x62, 0x83, 0x61, 0x7d,\n",
-            "  0x89, 0x76, 0x7b, 0x67, 0x56, 0x74, 0x49, 0x7b, 0x6b, 0x8b, 0x89, 0x74,\n",
-            "  0x5b, 0x7f, 0x78, 0x7b, 0x80, 0x7e, 0x63, 0x71, 0x5e, 0x91, 0x81, 0x92,\n",
-            "  0x7b, 0x90, 0x9c, 0x7a, 0x73, 0x85, 0x79, 0x9b, 0x66, 0x93, 0x60, 0x87,\n",
-            "  0x79, 0x69, 0x73, 0x8b, 0x53, 0x8c, 0x8d, 0x68, 0x93, 0xa0, 0x91, 0x65,\n",
-            "  0x57, 0x8d, 0x71, 0x65, 0x6c, 0x7e, 0xb3, 0x4f, 0xc7, 0xaa, 0x5a, 0x77,\n",
-            "  0x6e, 0x85, 0xe4, 0x6c, 0xa3, 0x89, 0x69, 0x54, 0x6d, 0x99, 0xb9, 0x77,\n",
-            "  0xa0, 0x80, 0x85, 0x71, 0x70, 0x78, 0x99, 0x66, 0xaf, 0x8a, 0x59, 0x64,\n",
-            "  0x54, 0x62, 0xbf, 0x5c, 0xbd, 0x77, 0x7f, 0xab, 0x95, 0x85, 0xaa, 0x6e,\n",
-            "  0xaa, 0x5a, 0x7b, 0x9f, 0xc3, 0x65, 0x93, 0x64, 0x7c, 0x2d, 0x4e, 0x8f,\n",
-            "  0xb2, 0x5f, 0x4e, 0x61, 0x64, 0x73, 0x56, 0x75, 0x79, 0x90, 0x5c, 0x81,\n",
-            "  0x8a, 0x8c, 0x70, 0x64, 0x74, 0x86, 0x86, 0x82, 0xab, 0x7e, 0x62, 0x4f,\n",
-            "  0x51, 0x89, 0x7b, 0x88, 0x73, 0x97, 0x77, 0x75, 0x5c, 0x9e, 0x97, 0x70,\n",
-            "  0x5a, 0x98, 0x7a, 0x54, 0x47, 0x99, 0xab, 0x5d, 0x91, 0xa0, 0x64, 0x51,\n",
-            "  0x57, 0x88, 0x88, 0x85, 0x81, 0x83, 0xa1, 0x89, 0x6a, 0x88, 0x69, 0x81,\n",
-            "  0x92, 0x63, 0x6a, 0x71, 0x72, 0x6a, 0x75, 0x8e, 0x90, 0x9d, 0x69, 0x60,\n",
-            "  0x73, 0x95, 0x79, 0x7b, 0x79, 0x7f, 0x77, 0x6e, 0x69, 0x63, 0x60, 0xa0,\n",
-            "  0x84, 0x91, 0x80, 0x96, 0x92, 0x70, 0x69, 0x7c, 0x3f, 0x90, 0x5c, 0x79,\n",
-            "  0x82, 0x63, 0x8d, 0x63, 0x56, 0x8a, 0x8e, 0x7a, 0x5c, 0x8d, 0xb8, 0x4e,\n",
-            "  0xb6, 0x84, 0x57, 0x79, 0x59, 0x79, 0xe8, 0x7e, 0xa8, 0x71, 0x61, 0x62,\n",
-            "  0x89, 0x71, 0xb7, 0x83, 0x7b, 0x53, 0x86, 0x88, 0x74, 0x71, 0xb1, 0x61,\n",
-            "  0xae, 0x7e, 0x8f, 0x69, 0x6b, 0x69, 0xb2, 0x6d, 0xb1, 0x7f, 0x5c, 0x9f,\n",
-            "  0xaa, 0x8c, 0xbd, 0x74, 0xaa, 0x5b, 0x7f, 0xa5, 0xb0, 0x6e, 0xc1, 0x5c,\n",
-            "  0x94, 0x34, 0x5b, 0xa6, 0xbc, 0x49, 0x75, 0x5b, 0x6e, 0x74, 0x7a, 0x92,\n",
-            "  0x92, 0x79, 0x78, 0x8a, 0x9e, 0x97, 0x7c, 0x5f, 0x76, 0x86, 0x59, 0x81,\n",
-            "  0x83, 0x7a, 0x65, 0x5b, 0x42, 0x95, 0x84, 0x99, 0x81, 0x8d, 0x6a, 0x5e,\n",
-            "  0x59, 0xb7, 0x96, 0x8a, 0x77, 0x86, 0x7a, 0x67, 0x3b, 0xa8, 0xae, 0x7a,\n",
-            "  0xa0, 0x97, 0x6c, 0x73, 0x5b, 0x9b, 0x77, 0x84, 0x7a, 0x77, 0x75, 0x6f,\n",
-            "  0x7d, 0x7a, 0x71, 0x86, 0x6c, 0x6f, 0x7d, 0x71, 0x68, 0x60, 0x64, 0x86,\n",
-            "  0x90, 0x75, 0x6a, 0x61, 0x60, 0x87, 0x68, 0x99, 0x87, 0x7e, 0x92, 0x87,\n",
-            "  0x87, 0x5f, 0x60, 0x91, 0x68, 0x8c, 0x7b, 0x67, 0x79, 0x5d, 0x67, 0x77,\n",
-            "  0x47, 0x72, 0x76, 0x88, 0x82, 0xa2, 0x7a, 0x5d, 0x64, 0x87, 0x75, 0x78,\n",
-            "  0x5e, 0x6f, 0xa4, 0x52, 0xc2, 0x9d, 0x81, 0x89, 0x55, 0x86, 0xc9, 0x6f,\n",
-            "  0x95, 0x71, 0x9d, 0x87, 0x95, 0x74, 0xac, 0x7f, 0x95, 0x6c, 0x68, 0x66,\n",
-            "  0x8a, 0x5f, 0x96, 0x69, 0x95, 0x79, 0x7f, 0x71, 0x86, 0x7e, 0x98, 0x71,\n",
-            "  0xac, 0x8f, 0x75, 0xa5, 0xac, 0x7a, 0xca, 0x63, 0xa0, 0x63, 0x69, 0xbf,\n",
-            "  0xae, 0x62, 0xc9, 0x46, 0x74, 0x2c, 0x66, 0x96, 0xb7, 0x70, 0x7c, 0x6b,\n",
-            "  0x7b, 0x90, 0x72, 0x74, 0x8d, 0x5f, 0x63, 0x93, 0x97, 0x78, 0x79, 0x64,\n",
-            "  0x67, 0x84, 0x64, 0x82, 0x90, 0x83, 0x91, 0x5f, 0x72, 0x93, 0x91, 0xae,\n",
-            "  0x6d, 0x99, 0x5b, 0x69, 0x54, 0x9f, 0x97, 0x80, 0x80, 0xa4, 0x91, 0x66,\n",
-            "  0x65, 0xa4, 0xa7, 0x7b, 0x97, 0x87, 0x72, 0x68, 0x6a, 0x96, 0x7b, 0x79,\n",
-            "  0x69, 0x83, 0x6f, 0x85, 0x6b, 0x92, 0x7f, 0x71, 0x84, 0x87, 0x6a, 0x7b,\n",
-            "  0x63, 0x72, 0x5f, 0x87, 0x98, 0x7b, 0x96, 0x71, 0x62, 0x90, 0x71, 0xa3,\n",
-            "  0x8c, 0x77, 0x90, 0x6f, 0x83, 0x76, 0x65, 0x87, 0x72, 0x8a, 0x64, 0x87,\n",
-            "  0x75, 0x75, 0x6d, 0x84, 0x54, 0x89, 0x88, 0xa0, 0x87, 0x73, 0x7f, 0x6f,\n",
-            "  0x5f, 0x90, 0x5e, 0x94, 0x5d, 0x61, 0xa6, 0x56, 0xb3, 0x91, 0x95, 0x75,\n",
-            "  0x4d, 0x74, 0xd9, 0x87, 0x92, 0x74, 0x7f, 0x79, 0x97, 0x6e, 0x90, 0x54,\n",
-            "  0x84, 0x5d, 0x5f, 0x75, 0x8b, 0x84, 0xa6, 0x75, 0xb4, 0x77, 0x78, 0x85,\n",
-            "  0x90, 0x76, 0xbd, 0x78, 0xd1, 0xa0, 0x5d, 0x96, 0xa9, 0x7c, 0xc1, 0x61,\n",
-            "  0xc2, 0x71, 0x8b, 0xa5, 0xa5, 0x5b, 0xc8, 0x50, 0x7b, 0x4b, 0x93, 0x99,\n",
-            "  0xae, 0x72, 0x67, 0x54, 0x81, 0x89, 0x96, 0x81, 0x6e, 0x68, 0x55, 0x7f,\n",
-            "  0x93, 0x8c, 0x5e, 0x65, 0x6c, 0x84, 0x7f, 0x8f, 0x9e, 0x7b, 0x73, 0x7f,\n",
-            "  0x51, 0x63, 0x8a, 0x8b, 0x6b, 0x9b, 0x9d, 0x57, 0x68, 0x89, 0x98, 0x70,\n",
-            "  0x73, 0xa3, 0x7f, 0x69, 0x44, 0x89, 0xae, 0x68, 0x89, 0x80, 0x7e, 0x6d,\n",
-            "  0x70, 0x95, 0x85, 0x65, 0x91, 0x7f, 0x66, 0x74, 0x96, 0x72, 0x60, 0x7a,\n",
-            "  0x87, 0x85, 0x79, 0x54, 0x53, 0x6c, 0x88, 0x87, 0xa9, 0x90, 0x75, 0x8b,\n",
-            "  0x69, 0x98, 0x7d, 0x95, 0x85, 0x7a, 0x8b, 0x82, 0x87, 0x6f, 0x86, 0x7f,\n",
-            "  0x74, 0xab, 0x93, 0x6c, 0x8a, 0x78, 0x68, 0x81, 0x62, 0x88, 0x78, 0x91,\n",
-            "  0x8b, 0x55, 0xa7, 0x58, 0x64, 0x88, 0x71, 0x93, 0x7d, 0x69, 0xbc, 0x58,\n",
-            "  0xbe, 0x9a, 0x6f, 0x74, 0x6f, 0x7f, 0xeb, 0x9e, 0xb7, 0x60, 0x63, 0x98,\n",
-            "  0x82, 0x77, 0x94, 0x63, 0x80, 0x6f, 0x7d, 0x8f, 0x8b, 0x85, 0xa5, 0x62,\n",
-            "  0xad, 0x86, 0x5f, 0x76, 0x88, 0x74, 0xa5, 0x66, 0xa5, 0x94, 0x88, 0x9b,\n",
-            "  0x87, 0x9e, 0xa8, 0x5a, 0xc9, 0x81, 0x92, 0xcd, 0xb5, 0x67, 0xb9, 0x63,\n",
-            "  0x86, 0x65, 0x8d, 0xad, 0x98, 0x7c, 0x8a, 0x40, 0x67, 0x65, 0x60, 0x71,\n",
-            "  0x8e, 0x84, 0x73, 0x64, 0x98, 0x80, 0x73, 0x81, 0x48, 0x75, 0x71, 0x9e,\n",
-            "  0x73, 0x89, 0x89, 0x68, 0x73, 0xa6, 0x84, 0x8a, 0x7e, 0x9f, 0x78, 0x83,\n",
-            "  0x60, 0x77, 0xa1, 0x87, 0x76, 0xab, 0x74, 0x57, 0x6d, 0x99, 0xa5, 0x5e,\n",
-            "  0x9d, 0x91, 0x6d, 0x6a, 0x76, 0x9c, 0x7b, 0x66, 0x96, 0x84, 0x85, 0x6e,\n",
-            "  0x6c, 0x75, 0x86, 0x6a, 0x71, 0x67, 0x8a, 0x66, 0x66, 0x68, 0x73, 0x90,\n",
-            "  0x92, 0x68, 0x8f, 0x71, 0x82, 0x7e, 0x71, 0xad, 0x9f, 0x84, 0x9e, 0x7d,\n",
-            "  0x77, 0x6b, 0x67, 0x8f, 0x73, 0x9a, 0x91, 0x74, 0x8a, 0x74, 0x5a, 0x87,\n",
-            "  0x37, 0x80, 0x8c, 0x8f, 0x7f, 0x75, 0xa8, 0x49, 0x63, 0x9b, 0x67, 0x68,\n",
-            "  0x4f, 0x87, 0xbf, 0x59, 0x9c, 0xbe, 0x93, 0x7e, 0x6f, 0x8a, 0xea, 0x77,\n",
-            "  0x83, 0x7a, 0x75, 0x8e, 0x7d, 0x50, 0x95, 0x60, 0x74, 0x60, 0x6f, 0x97,\n",
-            "  0x72, 0x5c, 0xa3, 0x6d, 0xb9, 0x86, 0x7b, 0x89, 0x9a, 0x76, 0xc7, 0x56,\n",
-            "  0xba, 0x86, 0x8d, 0x93, 0xa9, 0x98, 0xbb, 0x6a, 0x97, 0x74, 0x68, 0x84,\n",
-            "  0xc3, 0x65, 0xb6, 0x68, 0x89, 0x58, 0x87, 0xa1, 0xac, 0x60, 0x65, 0x68,\n",
-            "  0x7d, 0x98, 0x67, 0x8f, 0x8e, 0x84, 0x50, 0x75, 0x83, 0x91, 0x8a, 0x90,\n",
-            "  0x66, 0x74, 0x96, 0x89, 0x81, 0x7a, 0x7a, 0x64, 0x7f, 0x73, 0x8f, 0x95,\n",
-            "  0x8c, 0x89, 0x96, 0x76, 0x7a, 0x6c, 0x89, 0x91, 0x6d, 0x84, 0x68, 0x8d,\n",
-            "  0x47, 0x94, 0x9a, 0x67, 0x8f, 0x89, 0x8e, 0x79, 0x73, 0xa8, 0x7f, 0x6c,\n",
-            "  0x80, 0x64, 0x75, 0x81, 0x96, 0x9c, 0x68, 0x65, 0x76, 0x68, 0x74, 0x72,\n",
-            "  0x68, 0x76, 0x62, 0x6d, 0x6e, 0x6a, 0x84, 0x65, 0x8a, 0x73, 0x76, 0x91,\n",
-            "  0x78, 0x7c, 0x7a, 0x88, 0x6a, 0x87, 0x60, 0x99, 0x88, 0x75, 0x7b, 0x71,\n",
-            "  0x81, 0x7b, 0x76, 0x7d, 0x58, 0x75, 0x65, 0xa3, 0x95, 0x7e, 0x96, 0x3e,\n",
-            "  0x4c, 0x97, 0x86, 0x7a, 0x62, 0x92, 0xd1, 0x72, 0x8e, 0xaa, 0x85, 0x8e,\n",
-            "  0x59, 0x5f, 0xec, 0x77, 0x96, 0x66, 0x91, 0x9a, 0x89, 0x6c, 0xa2, 0x69,\n",
-            "  0x7d, 0x6e, 0x76, 0x63, 0x82, 0x72, 0x9c, 0x72, 0xa3, 0x75, 0x85, 0x7b,\n",
-            "  0x6d, 0x96, 0xc2, 0x69, 0xa7, 0x6a, 0x6b, 0x83, 0xa2, 0x7d, 0xce, 0x5c,\n",
-            "  0x94, 0x61, 0x7d, 0xae, 0xc3, 0x6d, 0x9f, 0x3c, 0x52, 0x4d, 0x8e, 0x92,\n",
-            "  0xae, 0x6e, 0x70, 0x5a, 0x76, 0x84, 0x7f, 0x72, 0x92, 0x72, 0x76, 0x5e,\n",
-            "  0x73, 0x8e, 0x82, 0x6d, 0x72, 0x81, 0x79, 0x94, 0x81, 0x88, 0x8b, 0x81,\n",
-            "  0x72, 0x72, 0x69, 0x84, 0x59, 0x6e, 0x74, 0x7d, 0x66, 0x74, 0x8d, 0x7b,\n",
-            "  0x7d, 0x7e, 0x7a, 0x83, 0x4d, 0x7e, 0x6a, 0x5a, 0x87, 0x66, 0x84, 0xa5,\n",
-            "  0x50, 0x5d, 0x6a, 0x8e, 0x87, 0x74, 0x88, 0x7c, 0x7d, 0x6c, 0x93, 0x98,\n",
-            "  0x8c, 0x76, 0x7f, 0xa3, 0x6e, 0x5d, 0x7d, 0x9f, 0x7c, 0x7a, 0x98, 0x88,\n",
-            "  0x74, 0x73, 0x50, 0x8c, 0x78, 0x8b, 0x71, 0x77, 0x9d, 0x56, 0x71, 0x85,\n",
-            "  0x6b, 0x8a, 0x93, 0x82, 0x8c, 0x79, 0x68, 0x8b, 0x57, 0x7b, 0x7c, 0x8a,\n",
-            "  0x6c, 0x87, 0x98, 0x54, 0x63, 0x7e, 0x78, 0x6b, 0x63, 0x77, 0xc1, 0x52,\n",
-            "  0xcd, 0xab, 0x75, 0x8e, 0x64, 0x68, 0xce, 0x68, 0x88, 0x6d, 0x67, 0x6d,\n",
-            "  0x68, 0x76, 0xa7, 0x78, 0x83, 0x67, 0x65, 0x5b, 0x8f, 0x63, 0x90, 0x5b,\n",
-            "  0xa1, 0x6f, 0x6a, 0x88, 0x70, 0x5c, 0x78, 0x49, 0xbc, 0x85, 0x8d, 0x8e,\n",
-            "  0xa3, 0x90, 0x97, 0x84, 0xa2, 0x46, 0x7a, 0x8e, 0x9e, 0xb1, 0xaa, 0x53,\n",
-            "  0x7d, 0x6b, 0x72, 0x86, 0x8c, 0x67, 0x6b, 0x48, 0x6f, 0x9c, 0x51, 0x94,\n",
-            "  0x6d, 0x66, 0x8e, 0x90, 0x79, 0x81, 0x66, 0x9f, 0x82, 0x9f, 0x98, 0x97,\n",
-            "  0x7c, 0x86, 0x7f, 0x57, 0x57, 0x83, 0x97, 0x8f, 0x73, 0x6f, 0x75, 0x6c,\n",
-            "  0x56, 0x8f, 0x7f, 0x73, 0x71, 0x84, 0x7d, 0x5f, 0x69, 0x69, 0x8e, 0x67,\n",
-            "  0x8a, 0x7f, 0x8c, 0x5a, 0x7a, 0x67, 0x82, 0x5a, 0x7a, 0x68, 0x73, 0x58,\n",
-            "  0x84, 0x83, 0x8d, 0x6d, 0x83, 0x72, 0x80, 0x7a, 0x8e, 0x7a, 0x68, 0x88,\n",
-            "  0x65, 0x74, 0x78, 0x73, 0x83, 0x97, 0x7b, 0x84, 0x77, 0x6d, 0x95, 0x99,\n",
-            "  0x76, 0x69, 0x5f, 0x9b, 0x7c, 0x75, 0x91, 0x80, 0x7b, 0x73, 0x6f, 0x9f,\n",
-            "  0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00,\n",
-            "  0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,\n",
-            "  0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, 0x75, 0x6c, 0x5f, 0x62,\n",
-            "  0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0xaa, 0xcc, 0xe2, 0x37, 0x10, 0x00, 0x00, 0x00, 0xd6, 0x01, 0x00, 0x00,\n",
-            "  0xfd, 0xfd, 0xff, 0xff, 0x53, 0xfe, 0xff, 0xff, 0x74, 0x01, 0x00, 0x00,\n",
-            "  0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00,\n",
-            "  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00,\n",
-            "  0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,\n",
-            "  0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00,\n",
-            "  0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00,\n",
-            "  0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,\n",
-            "  0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,\n",
-            "  0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,\n",
-            "  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00,\n",
-            "  0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00,\n",
-            "  0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,\n",
-            "  0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,\n",
-            "  0x04, 0x00, 0x00, 0x00, 0xfa, 0xff, 0xff, 0xff, 0x00, 0x19, 0x06, 0x00,\n",
-            "  0x06, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x09, 0x06, 0x00,\n",
-            "  0x08, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04\n",
-            "};\n",
-            "unsigned int g_model_len = 18288;\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"train_micro_speech_model.ipynb","provenance":[{"file_id":"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb","timestamp":1587690382292}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"pO4-CY_TCZZS","colab_type":"text"},"source":["# Train a Simple Audio Recognition Model"]},{"cell_type":"markdown","metadata":{"id":"BaFfr7DHRmGF","colab_type":"text"},"source":["This notebook demonstrates how to train a 20 kB [Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition) model to recognize keywords in speech.\n","\n","The model created in this notebook is used in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) example for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview).\n","\n","<table class=\"tfo-notebook-buttons\" align=\"left\">\n","  <td>\n","    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n","  </td>\n","  <td>\n","    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/train_micro_speech_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n","  </td>\n","</table>\n"]},{"cell_type":"markdown","metadata":{"id":"XaVtYN4nlCft","colab_type":"text"},"source":["**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and set **Hardware accelerator: GPU**. Training 15,000 iterations will take 1.5 - 2 hours on a GPU runtime.\n","\n","## Configure Defaults\n","\n","**MODIFY** the following constants for your specific use case."]},{"cell_type":"code","metadata":{"id":"ludfxbNIaegy","colab_type":"code","colab":{}},"source":["# A comma-delimited list of the words you want to train for.\n","# The options are: yes,no,up,down,left,right,on,off,stop,go\n","# All the other words will be used to train an \"unknown\" label and silent\n","# audio data with no spoken words will be used to train a \"silence\" label.\n","WANTED_WORDS = \"yes,no\"\n","\n","# The number of steps and learning rates can be specified as comma-separated\n","# lists to define the rate at each stage. For example,\n","# TRAINING_STEPS=12000,3000 and LEARNING_RATE=0.001,0.0001\n","# will run 12,000 training loops in total, with a rate of 0.001 for the first\n","# 8,000, and 0.0001 for the final 3,000.\n","TRAINING_STEPS = \"12000,3000\"\n","LEARNING_RATE = \"0.001,0.0001\"\n","\n","# Calculate the total number of steps, which is used to identify the checkpoint\n","# file name.\n","TOTAL_STEPS = str(sum(map(lambda string: int(string), TRAINING_STEPS.split(\",\"))))\n","\n","# Print the configuration to confirm it\n","print(\"Training these words: %s\" % WANTED_WORDS)\n","print(\"Training steps in each stage: %s\" % TRAINING_STEPS)\n","print(\"Learning rate in each stage: %s\" % LEARNING_RATE)\n","print(\"Total number of training steps: %s\" % TOTAL_STEPS)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gCgeOpvY9pAi","colab_type":"text"},"source":["**DO NOT MODIFY** the following constants as they include filepaths used in this notebook and data that is shared during training and inference."]},{"cell_type":"code","metadata":{"id":"Nd1iM1o2ymvA","colab_type":"code","colab":{}},"source":["# Calculate the percentage of 'silence' and 'unknown' training samples required\n","# to ensure that we have equal number of samples for each label.\n","number_of_labels = WANTED_WORDS.count(',') + 1\n","number_of_total_labels = number_of_labels + 2 # for 'silence' and 'unknown' label\n","equal_percentage_of_training_samples = int(100.0/(number_of_total_labels))\n","SILENT_PERCENTAGE = equal_percentage_of_training_samples\n","UNKNOWN_PERCENTAGE = equal_percentage_of_training_samples\n","\n","# Constants which are shared during training and inference\n","PREPROCESS = 'micro'\n","WINDOW_STRIDE = 20\n","MODEL_ARCHITECTURE = 'tiny_conv' # Other options include: single_fc, conv,\n","                      # low_latency_conv, low_latency_svdf, tiny_embedding_conv\n","\n","# Constants used during training only\n","VERBOSITY = 'WARN'\n","EVAL_STEP_INTERVAL = '1000'\n","SAVE_STEP_INTERVAL = '1000'\n","\n","# Constants for training directories and filepaths\n","DATASET_DIR =  'dataset/'\n","LOGS_DIR = 'logs/'\n","TRAIN_DIR = 'train/' # for training checkpoints and other files.\n","\n","# Constants for inference directories and filepaths\n","import os\n","MODELS_DIR = 'models'\n","if not os.path.exists(MODELS_DIR):\n","  os.mkdir(MODELS_DIR)\n","MODEL_TF = os.path.join(MODELS_DIR, 'model.pb')\n","MODEL_TFLITE = os.path.join(MODELS_DIR, 'model.tflite')\n","FLOAT_MODEL_TFLITE = os.path.join(MODELS_DIR, 'float_model.tflite')\n","MODEL_TFLITE_MICRO = os.path.join(MODELS_DIR, 'model.cc')\n","SAVED_MODEL = os.path.join(MODELS_DIR, 'saved_model')\n","\n","QUANT_INPUT_MIN = 0.0\n","QUANT_INPUT_MAX = 26.0\n","QUANT_INPUT_RANGE = QUANT_INPUT_MAX - QUANT_INPUT_MIN"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"6rLYpvtg9P4o","colab_type":"text"},"source":["## Setup Environment\n","\n","Install Dependencies"]},{"cell_type":"code","metadata":{"id":"ed_XpUrU5DvY","colab_type":"code","colab":{}},"source":["%tensorflow_version 1.x\n","import tensorflow as tf"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"T9Ty5mR58E4i","colab_type":"text"},"source":["**DELETE** any old data from previous runs\n"]},{"cell_type":"code","metadata":{"id":"APGx0fEh7hFF","colab_type":"code","colab":{}},"source":["!rm -rf {DATASET_DIR} {LOGS_DIR} {TRAIN_DIR} {MODELS_DIR}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"GfEUlfFBizio","colab_type":"text"},"source":["Clone the TensorFlow Github Repository, which contains the relevant code required to run this tutorial."]},{"cell_type":"code","metadata":{"id":"yZArmzT85SLq","colab_type":"code","colab":{}},"source":["!git clone -q --depth 1 https://github.com/tensorflow/tensorflow"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nS9swHLSi7Bi","colab_type":"text"},"source":["Load TensorBoard to visualize the accuracy and loss as training proceeds.\n"]},{"cell_type":"code","metadata":{"id":"q4qF1VxP3UE4","colab_type":"code","colab":{}},"source":["%load_ext tensorboard\n","%tensorboard --logdir {LOGS_DIR}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x1J96Ron-O4R","colab_type":"text"},"source":["## Training\n","\n","The following script downloads the dataset and begin training."]},{"cell_type":"code","metadata":{"id":"VJsEZx6lynbY","colab_type":"code","colab":{}},"source":["!python tensorflow/tensorflow/examples/speech_commands/train.py \\\n","--data_dir={DATASET_DIR} \\\n","--wanted_words={WANTED_WORDS} \\\n","--silence_percentage={SILENT_PERCENTAGE} \\\n","--unknown_percentage={UNKNOWN_PERCENTAGE} \\\n","--preprocess={PREPROCESS} \\\n","--window_stride={WINDOW_STRIDE} \\\n","--model_architecture={MODEL_ARCHITECTURE} \\\n","--how_many_training_steps={TRAINING_STEPS} \\\n","--learning_rate={LEARNING_RATE} \\\n","--train_dir={TRAIN_DIR} \\\n","--summaries_dir={LOGS_DIR} \\\n","--verbosity={VERBOSITY} \\\n","--eval_step_interval={EVAL_STEP_INTERVAL} \\\n","--save_step_interval={SAVE_STEP_INTERVAL}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"UczQKtqLi7OJ","colab_type":"text"},"source":["# Skipping the training\n","\n","If you don't want to spend an hour or two training the model from scratch, you can download pretrained checkpoints by uncommenting the lines below (removing the '#'s at the start of each line) and running them."]},{"cell_type":"code","metadata":{"id":"RZw3VNlnla-J","colab_type":"code","colab":{}},"source":["#!curl -O \"https://storage.googleapis.com/download.tensorflow.org/models/tflite/speech_micro_train_2020_05_10.tgz\"\n","#!tar xzf speech_micro_train_2020_05_10.tgz"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"XQUJLrdS-ftl","colab_type":"text"},"source":["## Generate a TensorFlow Model for Inference\n","\n","Combine relevant training results (graph, weights, etc) into a single file for inference. This process is known as freezing a model and the resulting model is known as a frozen model/graph, as it cannot be further re-trained after this process."]},{"cell_type":"code","metadata":{"id":"xyc3_eLh9sAg","colab_type":"code","colab":{}},"source":["!rm -rf {SAVED_MODEL}\n","!python tensorflow/tensorflow/examples/speech_commands/freeze.py \\\n","--wanted_words=$WANTED_WORDS \\\n","--window_stride_ms=$WINDOW_STRIDE \\\n","--preprocess=$PREPROCESS \\\n","--model_architecture=$MODEL_ARCHITECTURE \\\n","--start_checkpoint=$TRAIN_DIR$MODEL_ARCHITECTURE'.ckpt-'{TOTAL_STEPS} \\\n","--save_format=saved_model \\\n","--output_file={SAVED_MODEL}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"_DBGDxVI-nKG","colab_type":"text"},"source":["## Generate a TensorFlow Lite Model\n","\n","Convert the frozen graph into a TensorFlow Lite model, which is fully quantized for use with embedded devices.\n","\n","The following cell will also print the model size, which will be under 20 kilobytes."]},{"cell_type":"code","metadata":{"id":"RIitkqvGWmre","colab_type":"code","colab":{}},"source":["import sys\n","# We add this path so we can import the speech processing modules.\n","sys.path.append(\"/content/tensorflow/tensorflow/examples/speech_commands/\")\n","import input_data\n","import models\n","import numpy as np"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"kzqECqMxgBh4","colab_type":"code","colab":{}},"source":["SAMPLE_RATE = 16000\n","CLIP_DURATION_MS = 1000\n","WINDOW_SIZE_MS = 30.0\n","FEATURE_BIN_COUNT = 40\n","BACKGROUND_FREQUENCY = 0.8\n","BACKGROUND_VOLUME_RANGE = 0.1\n","TIME_SHIFT_MS = 100.0\n","\n","DATA_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz'\n","VALIDATION_PERCENTAGE = 10\n","TESTING_PERCENTAGE = 10"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"rNQdAplJV1fz","colab_type":"code","colab":{}},"source":["model_settings = models.prepare_model_settings(\n","    len(input_data.prepare_words_list(WANTED_WORDS.split(','))),\n","    SAMPLE_RATE, CLIP_DURATION_MS, WINDOW_SIZE_MS,\n","    WINDOW_STRIDE, FEATURE_BIN_COUNT, PREPROCESS)\n","audio_processor = input_data.AudioProcessor(\n","    DATA_URL, DATASET_DIR,\n","    SILENT_PERCENTAGE, UNKNOWN_PERCENTAGE,\n","    WANTED_WORDS.split(','), VALIDATION_PERCENTAGE,\n","    TESTING_PERCENTAGE, model_settings, LOGS_DIR)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lBj_AyCh1cC0","colab_type":"code","colab":{}},"source":["with tf.Session() as sess:\n","  float_converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL)\n","  float_tflite_model = float_converter.convert()\n","  float_tflite_model_size = open(FLOAT_MODEL_TFLITE, \"wb\").write(float_tflite_model)\n","  print(\"Float model is %d bytes\" % float_tflite_model_size)\n","\n","  converter = tf.lite.TFLiteConverter.from_saved_model(SAVED_MODEL)\n","  converter.optimizations = [tf.lite.Optimize.DEFAULT]\n","  converter.inference_input_type = tf.lite.constants.INT8\n","  converter.inference_output_type = tf.lite.constants.INT8\n","  def representative_dataset_gen():\n","    for i in range(100):\n","      data, _ = audio_processor.get_data(1, i*1, model_settings,\n","                                         BACKGROUND_FREQUENCY, \n","                                         BACKGROUND_VOLUME_RANGE,\n","                                         TIME_SHIFT_MS,\n","                                         'testing',\n","                                         sess)\n","      flattened_data = np.array(data.flatten(), dtype=np.float32).reshape(1, 1960)\n","      yield [flattened_data]\n","  converter.representative_dataset = representative_dataset_gen\n","  tflite_model = converter.convert()\n","  tflite_model_size = open(MODEL_TFLITE, \"wb\").write(tflite_model)\n","  print(\"Quantized model is %d bytes\" % tflite_model_size)\n"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EeLiDZTbLkzv","colab_type":"text"},"source":["# Testing the TensorFlow Lite model's accuracy\n","\n","Verify that the model we've exported is still accurate, using the TF Lite Python API and our test set."]},{"cell_type":"code","metadata":{"id":"wQsEteKRLryJ","colab_type":"code","colab":{}},"source":["with tf.Session() as sess:\n","  test_data, test_labels = audio_processor.get_data(\n","      -1, 0, model_settings, BACKGROUND_FREQUENCY, BACKGROUND_VOLUME_RANGE,\n","      TIME_SHIFT_MS, 'testing', sess)\n","\n","float_interpreter = tf.lite.Interpreter(FLOAT_MODEL_TFLITE)\n","float_interpreter.allocate_tensors()\n","\n","float_input_index = float_interpreter.get_input_details()[0][\"index\"]\n","\n","float_output_index = float_interpreter.get_output_details()[0][\"index\"]\n","float_model_output = float_interpreter.tensor(float_output_index)\n","\n","float_correct_predictions = 0\n","for i in range(len(test_data)):\n","  current_input = test_data[i]\n","  current_label = test_labels[i]\n","  flattened_input = np.array(current_input.flatten(), dtype=np.float32).reshape(1, 1960)\n","  float_interpreter.set_tensor(float_input_index, flattened_input)\n","  float_interpreter.invoke()\n","  top_prediction = float_model_output()[0].argmax()\n","  if top_prediction == current_label:\n","    float_correct_predictions += 1\n","\n","print('Float accuracy is %f%% (N=%d)' % ((float_correct_predictions * 100) / len(test_data), len(test_data)))\n","\n","interpreter = tf.lite.Interpreter(MODEL_TFLITE)\n","interpreter.allocate_tensors()\n","\n","input_index = interpreter.get_input_details()[0][\"index\"]\n","\n","output_index = interpreter.get_output_details()[0][\"index\"]\n","model_output = interpreter.tensor(output_index)\n","\n","with tf.Session() as sess:\n","  test_data, test_labels = audio_processor.get_data(\n","      -1, 0, model_settings, BACKGROUND_FREQUENCY, BACKGROUND_VOLUME_RANGE,\n","      TIME_SHIFT_MS, 'testing', sess)\n","\n","correct_predictions = 0\n","for i in range(len(test_data)):\n","  current_input = test_data[i]\n","  current_label = test_labels[i]\n","  quantized_input = np.zeros((1960), np.int8)\n","  for index, input_value in enumerate(current_input.flatten()):\n","    # These scaling values are derived from those used in input_data.py in the\n","    # training pipeline.\n","    value = ((input_value - QUANT_INPUT_MIN) * 256) / QUANT_INPUT_RANGE\n","    value -= 128\n","    if value < -128:\n","      value = -128\n","    if value > 127:\n","      value = 127\n","    quantized_input[index] = value\n","  flattened_input = np.array(quantized_input.flatten(), dtype=np.int8).reshape(1, 1960)\n","  interpreter.set_tensor(input_index, flattened_input)\n","  interpreter.invoke()\n","  top_prediction = model_output()[0].argmax()\n","  if top_prediction == current_label:\n","    correct_predictions += 1\n","\n","print('Quantized accuracy is %f%% (N=%d)' % ((correct_predictions * 100) / len(test_data), len(test_data)))\n"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"dt6Zqbxu-wIi","colab_type":"text"},"source":["## Generate a TensorFlow Lite for MicroControllers Model\n","Convert the TensorFlow Lite model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers."]},{"cell_type":"code","metadata":{"id":"XohZOTjR8ZyE","colab_type":"code","colab":{}},"source":["# Install xxd if it is not available\n","!apt-get update && apt-get -qq install xxd\n","# Convert to a C source file\n","!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}\n","# Update variable names\n","REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')\n","!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"2pQnN0i_-0L2","colab_type":"text"},"source":["## Deploy to a Microcontroller\n","\n","Follow the instructions in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/micro_speech) README.md for [TensorFlow Lite for MicroControllers](https://www.tensorflow.org/lite/microcontrollers/overview) to deploy this model on a specific microcontroller.\n","\n","**Reference Model:** If you have not modified this notebook, you can follow the instructions as is, to deploy the model. Refer to the [`micro_speech/train/models`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/train/models) directory to access the models generated in this notebook. \n","\n","**New Model:** If you have generated a new model to identify different words: (i) Update `kCategoryCount` and `kCategoryLabels` in [`micro_speech/micro_features/micro_model_settings.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_model_settings.h) and (ii) Update the values assigned to the variables defined in [`micro_speech/micro_features/model.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/micro_speech/micro_features/model.cc) with values displayed after running the following cell."]},{"cell_type":"code","metadata":{"id":"eoYyh0VU8pca","colab_type":"code","colab":{}},"source":["# Print the C source file\n","!cat {MODEL_TFLITE_MICRO}"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"iYlIKpO2mkhv","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}
\ No newline at end of file
diff --git a/tensorflow/lite/micro/examples/person_detection/main_functions.cc b/tensorflow/lite/micro/examples/person_detection/main_functions.cc
index 279b09e4c2b..0e5c6394d56 100644
--- a/tensorflow/lite/micro/examples/person_detection/main_functions.cc
+++ b/tensorflow/lite/micro/examples/person_detection/main_functions.cc
@@ -34,7 +34,7 @@ tflite::MicroInterpreter* interpreter = nullptr;
 TfLiteTensor* input = nullptr;
 
 // An area of memory to use for input, output, and intermediate arrays.
-constexpr int kTensorArenaSize = 73 * 1024;
+constexpr int kTensorArenaSize = 93 * 1024;
 static uint8_t tensor_arena[kTensorArenaSize];
 }  // namespace
 
diff --git a/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc
index 51a61881ead..8acb93ced17 100644
--- a/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc
+++ b/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/lite/version.h"
 
 // Create an area of memory to use for input, output, and intermediate arrays.
-constexpr int tensor_arena_size = 73 * 1024;
+constexpr int tensor_arena_size = 93 * 1024;
 uint8_t tensor_arena[tensor_arena_size];
 
 TF_LITE_MICRO_TESTS_BEGIN
diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc
index 719f16b2d36..92d2c091f55 100644
--- a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc
+++ b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc
@@ -41,7 +41,7 @@ TfLiteTensor* input = nullptr;
 // signed value.
 
 // An area of memory to use for input, output, and intermediate arrays.
-constexpr int kTensorArenaSize = 125 * 1024;
+constexpr int kTensorArenaSize = 136 * 1024;
 static uint8_t tensor_arena[kTensorArenaSize];
 }  // namespace
 
diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc
index b0979735d4f..c3719e559ca 100644
--- a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc
+++ b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc
@@ -27,7 +27,7 @@ limitations under the License.
 #include "tensorflow/lite/version.h"
 
 // Create an area of memory to use for input, output, and intermediate arrays.
-constexpr int tensor_arena_size = 125 * 1024;
+constexpr int tensor_arena_size = 136 * 1024;
 uint8_t tensor_arena[tensor_arena_size];
 
 TF_LITE_MICRO_TESTS_BEGIN
diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
index a1003a84201..50a0a4f9190 100644
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@@ -256,6 +256,7 @@ tflite_micro_cc_test(
         ":all_ops_resolver",
         "//tensorflow/lite/c:common",
         "//tensorflow/lite/micro:micro_framework",
+        "//tensorflow/lite/micro:micro_utils",
         "//tensorflow/lite/micro/testing:micro_test",
     ],
 )
diff --git a/tensorflow/lite/micro/kernels/all_ops_resolver.cc b/tensorflow/lite/micro/kernels/all_ops_resolver.cc
index 6ba2e1aa78e..dee3cbe0664 100644
--- a/tensorflow/lite/micro/kernels/all_ops_resolver.cc
+++ b/tensorflow/lite/micro/kernels/all_ops_resolver.cc
@@ -76,6 +76,7 @@ AllOpsResolver::AllOpsResolver() {
              /* min_version = */ 1,
              /* max_version = */ 2);
   AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
+  AddBuiltin(BuiltinOperator_TANH, Register_TANH());
 }
 
 }  // namespace micro
diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc
index 7334cf13e49..ff425e90ee8 100644
--- a/tensorflow/lite/micro/kernels/conv.cc
+++ b/tensorflow/lite/micro/kernels/conv.cc
@@ -33,7 +33,6 @@ constexpr int kInputTensor = 0;
 constexpr int kFilterTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
-constexpr int kMaxChannels = 1024;
 
 // Conv is quantized along dimension 0:
 // https://www.tensorflow.org/lite/performance/quantization_spec
@@ -49,9 +48,8 @@ struct OpData {
   int output_shift;
 
   // Per channel output multiplier and shift.
-  // TODO(b/141139247): Allocate these dynamically when possible.
-  int32_t per_channel_output_multiplier[kMaxChannels];
-  int32_t per_channel_output_shift[kMaxChannels];
+  int32_t* per_channel_output_multiplier;
+  int32_t* per_channel_output_shift;
 
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
@@ -72,10 +70,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
 }
 
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteConvParams* params, int width, int height,
-                             int filter_width, int filter_height, int out_width,
-                             int out_height, const TfLiteType data_type,
-                             OpData* data) {
+                             const TfLiteConvParams* params, int width,
+                             int height, int filter_width, int filter_height,
+                             int out_width, int out_height,
+                             const TfLiteType data_type, OpData* data) {
   bool has_bias = node->inputs->size == 3;
   // Check number of inputs/outputs
   TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
@@ -109,8 +107,69 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  OpData* data = static_cast<OpData*>(node->user_data);
+  const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+
+  int input_width = input->dims->data[2];
+  int input_height = input->dims->data[1];
+  int filter_width = filter->dims->data[2];
+  int filter_height = filter->dims->data[1];
+  int output_width = output->dims->data[2];
+  int output_height = output->dims->data[1];
+
+  // Dynimically allocate per-channel quantization parameters.
+  const int num_channels = filter->dims->data[kConvQuantizedDimension];
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&data->per_channel_output_shift)));
+
+  // All per-channel quantized tensors need valid zero point and scale arrays.
+  if (input->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
+                      kTfLiteAffineQuantization);
+
+    const auto* affine_quantization =
+        static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
+    TF_LITE_ENSURE(context, affine_quantization);
+    TF_LITE_ENSURE(context, affine_quantization->scale);
+    TF_LITE_ENSURE(context, affine_quantization->zero_point);
+
+    TF_LITE_ENSURE(context,
+                   affine_quantization->scale->size == 1 ||
+                       affine_quantization->scale->size ==
+                           filter->dims->data[kConvQuantizedDimension]);
+    TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
+                      affine_quantization->zero_point->size);
+  }
+
+  return CalculateOpData(context, node, params, input_width, input_height,
+                         filter_width, filter_height, output_width,
+                         output_height, input->type, data);
+}  // namespace conv
+
 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                   TfLiteConvParams* params, OpData* data,
+                   TfLiteConvParams* params, const OpData& data,
                    const TfLiteTensor* input, const TfLiteTensor* filter,
                    const TfLiteTensor* bias, TfLiteTensor* im2col,
                    TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
@@ -118,10 +177,11 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
   const int32_t filter_offset = -filter->params.zero_point;
   const int32_t output_offset = output->params.zero_point;
 
+  // TODO(b/154032858): Investigate removing extra copies.
   ConvParams op_params;
   op_params.padding_type = RuntimePaddingType(params->padding);
-  op_params.padding_values.width = data->padding.width;
-  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data.padding.width;
+  op_params.padding_values.height = data.padding.height;
   op_params.stride_width = params->stride_width;
   op_params.stride_height = params->stride_height;
   op_params.dilation_width_factor = params->dilation_width_factor;
@@ -129,10 +189,10 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
   op_params.input_offset = input_offset;
   op_params.weights_offset = filter_offset;
   op_params.output_offset = output_offset;
-  op_params.output_multiplier = data->output_multiplier;
-  op_params.output_shift = -data->output_shift;
-  op_params.quantized_activation_min = data->output_activation_min;
-  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.output_multiplier = data.output_multiplier;
+  op_params.output_shift = -data.output_shift;
+  op_params.quantized_activation_min = data.output_activation_min;
+  op_params.quantized_activation_max = data.output_activation_max;
   reference_ops::Conv(op_params, GetTensorShape(input),
                       GetTensorData<uint8_t>(input), GetTensorShape(filter),
                       GetTensorData<uint8_t>(filter), GetTensorShape(bias),
@@ -142,11 +202,12 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 }
 
 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteConvParams* params, OpData* data,
+                             TfLiteConvParams* params, const OpData& data,
                              const TfLiteTensor* input,
                              const TfLiteTensor* filter,
                              const TfLiteTensor* bias, TfLiteTensor* output,
                              TfLiteTensor* im2col) {
+  // TODO(b/154032858): Investigate removing extra copies.
   ConvParams op_params;
   op_params.input_offset = -input->params.zero_point;
   op_params.output_offset = output->params.zero_point;
@@ -154,14 +215,14 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
   op_params.stride_width = params->stride_width;
   op_params.dilation_height_factor = params->dilation_height_factor;
   op_params.dilation_width_factor = params->dilation_width_factor;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->output_activation_min;
-  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.padding_values.height = data.padding.height;
+  op_params.padding_values.width = data.padding.width;
+  op_params.quantized_activation_min = data.output_activation_min;
+  op_params.quantized_activation_max = data.output_activation_max;
 
   reference_integer_ops::ConvPerChannel(
-      op_params, data->per_channel_output_multiplier,
-      data->per_channel_output_shift, GetTensorShape(input),
+      op_params, data.per_channel_output_multiplier,
+      data.per_channel_output_shift, GetTensorShape(input),
       GetTensorData<int8>(input), GetTensorShape(filter),
       GetTensorData<int8>(filter), GetTensorShape(bias),
       GetTensorData<int32>(bias), GetTensorShape(output),
@@ -169,18 +230,18 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
 }
 
 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
-               TfLiteConvParams* params, OpData* data,
+               TfLiteConvParams* params, const OpData& data,
                const TfLiteTensor* input, const TfLiteTensor* filter,
                const TfLiteTensor* bias, TfLiteTensor* im2col,
                TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
-
+  // TODO(b/154032858): Investigate removing extra copies.
   ConvParams op_params;
   op_params.padding_type = RuntimePaddingType(params->padding);
-  op_params.padding_values.width = data->padding.width;
-  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data.padding.width;
+  op_params.padding_values.height = data.padding.height;
   op_params.stride_width = params->stride_width;
   op_params.stride_height = params->stride_height;
   op_params.dilation_width_factor = params->dilation_width_factor;
@@ -204,50 +265,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
 
-  int input_width = input->dims->data[2];
-  int input_height = input->dims->data[1];
-  int filter_width = filter->dims->data[2];
-  int filter_height = filter->dims->data[1];
-  int output_width = output->dims->data[2];
-  int output_height = output->dims->data[1];
-
-  OpData data;
-
-  // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
-                      kTfLiteAffineQuantization);
-
-    const auto* affine_quantization =
-        reinterpret_cast<TfLiteAffineQuantization*>(
-            filter->quantization.params);
-    TF_LITE_ENSURE(context, affine_quantization);
-    TF_LITE_ENSURE(context, affine_quantization->scale);
-    TF_LITE_ENSURE(context, affine_quantization->zero_point);
-
-    TF_LITE_ENSURE(context,
-                   affine_quantization->scale->size == 1 ||
-                       affine_quantization->scale->size ==
-                           filter->dims->data[kConvQuantizedDimension]);
-    TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
-                      affine_quantization->zero_point->size);
-  }
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(
-      context, node, params, input_width, input_height, filter_width,
-      filter_height, output_width, output_height, input->type, &data));
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32:
-      EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
+      EvalFloat(context, node, params, data, input, filter, bias, nullptr,
                 nullptr, output);
       break;
     case kTfLiteInt8:
-      EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
+      EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
                               output, nullptr);
       break;
     case kTfLiteUInt8:
-      EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
+      EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
                     nullptr, output);
       break;
     default:
@@ -261,9 +292,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace conv
 
 TfLiteRegistration* Register_CONV_2D() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
+  static TfLiteRegistration r = {/*init=*/conv::Init,
                                  /*free=*/nullptr,
-                                 /*prepare=*/nullptr,
+                                 /*prepare=*/conv::Prepare,
                                  /*invoke=*/conv::Eval,
                                  /*profiling_string=*/nullptr,
                                  /*builtin_code=*/0,
diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc
index 8618646a4ea..0568d6865c2 100644
--- a/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -35,7 +35,6 @@ constexpr int kInputTensor = 0;
 constexpr int kFilterTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
-constexpr int kMaxChannels = 1024;
 
 // Depthwise conv is quantized along dimension 3:
 // https://www.tensorflow.org/lite/performance/quantization_spec
@@ -49,10 +48,8 @@ struct OpData {
   int output_shift;
 
   // Per channel output multiplier and shift.
-  // TODO(b/141139247): Allocate these dynamically when possible.
-  int32_t per_channel_output_multiplier[kMaxChannels];
-  int32_t per_channel_output_shift[kMaxChannels];
-
+  int32_t* per_channel_output_multiplier;
+  int32_t* per_channel_output_shift;
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
   int32_t output_activation_min;
@@ -84,20 +81,81 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
     TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
     int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
 
-    TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
+    return tflite::PopulateConvolutionQuantizationParams(
         context, input, filter, bias, output, params->activation,
         &data->output_multiplier, &data->output_shift,
         &data->output_activation_min, &data->output_activation_max,
         data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
+        reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
   }
   return kTfLiteOk;
 }
 
 }  // namespace
 
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  auto* params =
+      reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+
+  const TfLiteType data_type = input->type;
+  int width = SizeOfDimension(input, 2);
+  int height = SizeOfDimension(input, 1);
+  int filter_width = SizeOfDimension(filter, 2);
+  int filter_height = SizeOfDimension(filter, 1);
+
+  // Per channel quantization is only needed for int8 inference. For other
+  // quantized types, only a single scale and zero point is needed.
+  const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
+  // Dynimically allocate per-channel quantization parameters.
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&data->per_channel_output_shift)));
+
+  // All per-channel quantized tensors need valid zero point and scale arrays.
+  if (input->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
+                      kTfLiteAffineQuantization);
+
+    const auto* affine_quantization =
+        reinterpret_cast<TfLiteAffineQuantization*>(
+            filter->quantization.params);
+    TF_LITE_ENSURE(context, affine_quantization);
+    TF_LITE_ENSURE(context, affine_quantization->scale);
+    TF_LITE_ENSURE(context, affine_quantization->zero_point);
+    TF_LITE_ENSURE(
+        context, affine_quantization->scale->size == 1 ||
+                     affine_quantization->scale->size ==
+                         filter->dims->data[kDepthwiseConvQuantizedDimension]);
+    TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
+                      affine_quantization->zero_point->size);
+  }
+
+  return CalculateOpData(context, node, params, width, height, filter_width,
+                         filter_height, data_type, data);
+}
+
 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
-               TfLiteDepthwiseConvParams* params, OpData* data,
+               TfLiteDepthwiseConvParams* params, const OpData* data,
                const TfLiteTensor* input, const TfLiteTensor* filter,
                const TfLiteTensor* bias, TfLiteTensor* output) {
   float output_activation_min, output_activation_max;
@@ -125,8 +183,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
 }
 
 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteDepthwiseConvParams* params, OpData* data,
-                             const TfLiteTensor* input,
+                             TfLiteDepthwiseConvParams* params,
+                             const OpData* data, const TfLiteTensor* input,
                              const TfLiteTensor* filter,
                              const TfLiteTensor* bias, TfLiteTensor* output) {
   DepthwiseParams op_params;
@@ -155,7 +213,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
 }
 
 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                   TfLiteDepthwiseConvParams* params, OpData* data,
+                   TfLiteDepthwiseConvParams* params, const OpData* data,
                    const TfLiteTensor* input, const TfLiteTensor* filter,
                    const TfLiteTensor* bias, TfLiteTensor* output) {
   const int32_t input_offset = -input->params.zero_point;
@@ -189,8 +247,12 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
   auto* params =
       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
@@ -198,37 +260,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bias =
       (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
 
-  const TfLiteType data_type = input->type;
-  int width = SizeOfDimension(input, 2);
-  int height = SizeOfDimension(input, 1);
-  int filter_width = SizeOfDimension(filter, 2);
-  int filter_height = SizeOfDimension(filter, 1);
-
-  OpData data;
-
-  // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
-                      kTfLiteAffineQuantization);
-
-    const auto* affine_quantization =
-        reinterpret_cast<TfLiteAffineQuantization*>(
-            filter->quantization.params);
-    TF_LITE_ENSURE(context, affine_quantization);
-    TF_LITE_ENSURE(context, affine_quantization->scale);
-    TF_LITE_ENSURE(context, affine_quantization->zero_point);
-    TF_LITE_ENSURE(
-        context, affine_quantization->scale->size == 1 ||
-                     affine_quantization->scale->size ==
-                         filter->dims->data[kDepthwiseConvQuantizedDimension]);
-    TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
-                      affine_quantization->zero_point->size);
-  }
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
-                                        filter_width, filter_height, data_type,
-                                        &data));
-
   // TODO(aselle): Consider whether float conv and quantized conv should be
   // separate ops to avoid dispatch overhead here.
   switch (input->type) {  // Already know in/out types are same.
@@ -253,9 +284,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace depthwise_conv
 
 TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
+  static TfLiteRegistration r = {/*init=*/depthwise_conv::Init,
                                  /*free=*/nullptr,
-                                 /*prepare=*/nullptr,
+                                 /*prepare=*/depthwise_conv::Prepare,
                                  /*invoke=*/depthwise_conv::Eval,
                                  /*profiling_string=*/nullptr,
                                  /*builtin_code=*/0,
diff --git a/tensorflow/lite/micro/kernels/dequantize.cc b/tensorflow/lite/micro/kernels/dequantize.cc
index 37fb8ffc3c6..4b87c0eb04c 100644
--- a/tensorflow/lite/micro/kernels/dequantize.cc
+++ b/tensorflow/lite/micro/kernels/dequantize.cc
@@ -28,7 +28,27 @@ namespace ops {
 namespace micro {
 namespace dequantize {
 
+struct OpData {
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
@@ -42,10 +62,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(
       context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
 
+  if (output->type == kTfLiteInt32) {
+    const double effective_output_scale =
+        static_cast<double>(input->params.scale) /
+        static_cast<double>(output->params.scale);
+    QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
+                       &data->output_shift);
+  }
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
 
@@ -76,28 +106,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         return kTfLiteError;
     }
   } else if (output->type == kTfLiteInt32) {
-    int32_t output_multiplier;
-    int output_shift;
-    const double effective_output_scale =
-        static_cast<double>(input->params.scale) /
-        static_cast<double>(output->params.scale);
-    QuantizeMultiplier(effective_output_scale, &output_multiplier,
-                       &output_shift);
     int flat_size =
         MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
     switch (input->type) {
       case kTfLiteInt16: {
         reference_ops::Requantize(
-            GetTensorData<int16_t>(input), flat_size, output_multiplier,
-            output_shift, input->params.zero_point, output->params.zero_point,
-            GetTensorData<int32_t>(output));
+            GetTensorData<int16_t>(input), flat_size, data->output_multiplier,
+            data->output_shift, input->params.zero_point,
+            output->params.zero_point, GetTensorData<int32_t>(output));
         break;
       }
       case kTfLiteInt8: {
         reference_ops::Requantize(
-            GetTensorData<int8_t>(input), flat_size, output_multiplier,
-            output_shift, input->params.zero_point, output->params.zero_point,
-            GetTensorData<int32_t>(output));
+            GetTensorData<int8_t>(input), flat_size, data->output_multiplier,
+            data->output_shift, input->params.zero_point,
+            output->params.zero_point, GetTensorData<int32_t>(output));
         break;
       }
       default:
@@ -119,7 +142,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace dequantize
 
 TfLiteRegistration* Register_DEQUANTIZE() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
+  static TfLiteRegistration r = {/*init=*/dequantize::Init,
                                  /*free=*/nullptr,
                                  /*prepare=*/dequantize::Prepare,
                                  /*invoke=*/dequantize::Eval,
diff --git a/tensorflow/lite/micro/kernels/elementwise.cc b/tensorflow/lite/micro/kernels/elementwise.cc
index 2e3ebe9f351..93fc4ec0d88 100644
--- a/tensorflow/lite/micro/kernels/elementwise.cc
+++ b/tensorflow/lite/micro/kernels/elementwise.cc
@@ -106,6 +106,10 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
   return EvalLogical(context, node, [](bool v) { return !v; });
 }
 
+TfLiteStatus TANHEval(TfLiteContext* context, TfLiteNode* node) {
+  return EvalNumeric(context, node, std::tanh);
+}
+
 }  // namespace
 }  // namespace elementwise
 
@@ -221,6 +225,20 @@ TfLiteRegistration* Register_LOGICAL_NOT() {
   return &r;
 }
 
+TfLiteRegistration* Register_TANH() {
+  static TfLiteRegistration r = {
+      /*init=*/nullptr,
+      /*free=*/nullptr,
+      /*prepare=*/
+      elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      /*invoke=*/elementwise::TANHEval,
+      /*profiling_string=*/nullptr,
+      /*builtin_code=*/0,
+      /*custom_name=*/nullptr,
+      /*version=*/0};
+  return &r;
+}
+
 }  // namespace micro
 }  // namespace ops
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/elementwise_test.cc b/tensorflow/lite/micro/kernels/elementwise_test.cc
index f009dca181a..c1e807974dc 100644
--- a/tensorflow/lite/micro/kernels/elementwise_test.cc
+++ b/tensorflow/lite/micro/kernels/elementwise_test.cc
@@ -158,11 +158,11 @@ TF_LITE_MICRO_TEST(Abs) {
   constexpr int output_dims_count = 4;
   float output_data[output_dims_count];
   tflite::testing::TestElementwiseFloat(
-      tflite::BuiltinOperator_ABS,  // ABS operator
-      {2, 2, 2},                    // Input shape
-      {0.01, -0.01, 10, -10},       // Input values
-      {2, 2, 2},                    // Output shape
-      {0.01, 0.01, 10, 10},         // Output values
+      tflite::BuiltinOperator_ABS,     // ABS operator
+      {2, 2, 2},                       // Input shape
+      {0.01f, -0.01f, 10.0f, -10.0f},  // Input values
+      {2, 2, 2},                       // Output shape
+      {0.01f, 0.01f, 10.0f, 10.0f},    // Output values
       output_data);
 }
 
@@ -170,11 +170,11 @@ TF_LITE_MICRO_TEST(Sin) {
   constexpr int output_dims_count = 4;
   float output_data[output_dims_count];
   tflite::testing::TestElementwiseFloat(
-      tflite::BuiltinOperator_SIN,    // SIN operator
-      {2, 2, 2},                      // Input shape
-      {0, 3.1415926, -3.1415926, 1},  // Input values
-      {2, 2, 2},                      // Output shape
-      {0, 0, 0, 0.84147},             // Output values
+      tflite::BuiltinOperator_SIN,            // SIN operator
+      {2, 2, 2},                              // Input shape
+      {0.0f, 3.1415926f, -3.1415926f, 1.0f},  // Input values
+      {2, 2, 2},                              // Output shape
+      {0.0f, 0.0f, 0.0f, 0.84147f},           // Output values
       output_data);
 }
 
@@ -182,11 +182,11 @@ TF_LITE_MICRO_TEST(Cos) {
   constexpr int output_dims_count = 4;
   float output_data[output_dims_count];
   tflite::testing::TestElementwiseFloat(
-      tflite::BuiltinOperator_COS,    // COS operator
-      {2, 2, 2},                      // Input shape
-      {0, 3.1415926, -3.1415926, 1},  // Input values
-      {2, 2, 2},                      // Output shape
-      {1, -1, -1, 0.54030},           // Output values
+      tflite::BuiltinOperator_COS,            // COS operator
+      {2, 2, 2},                              // Input shape
+      {0.0f, 3.1415926f, -3.1415926f, 1.0f},  // Input values
+      {2, 2, 2},                              // Output shape
+      {1.0f, -1.0f, -1.0f, 0.54030f},         // Output values
       output_data);
 }
 
@@ -194,11 +194,11 @@ TF_LITE_MICRO_TEST(Log) {
   constexpr int output_dims_count = 4;
   float output_data[output_dims_count];
   tflite::testing::TestElementwiseFloat(
-      tflite::BuiltinOperator_LOG,    // LOG operator
-      {2, 2, 2},                      // Input shape
-      {1, 2.7182818, 0.5, 2},         // Input values
-      {2, 2, 2},                      // Output shape
-      {0, 1, -0.6931472, 0.6931472},  // Output values
+      tflite::BuiltinOperator_LOG,            // LOG operator
+      {2, 2, 2},                              // Input shape
+      {1.0f, 2.7182818f, 0.5f, 2.0f},         // Input values
+      {2, 2, 2},                              // Output shape
+      {0.0f, 1.0f, -0.6931472f, 0.6931472f},  // Output values
       output_data);
 }
 
@@ -208,9 +208,9 @@ TF_LITE_MICRO_TEST(Sqrt) {
   tflite::testing::TestElementwiseFloat(
       tflite::BuiltinOperator_SQRT,  // SQRT operator
       {2, 2, 2},                     // Input shape
-      {0, 1, 2, 4},                  // Input values
+      {0.0f, 1.0f, 2.0f, 4.0f},      // Input values
       {2, 2, 2},                     // Output shape
-      {0, 1, 1.41421, 2},            // Output values
+      {0.0f, 1.0f, 1.41421f, 2.0f},  // Output values
       output_data);
 }
 
@@ -218,11 +218,11 @@ TF_LITE_MICRO_TEST(Rsqrt) {
   constexpr int output_dims_count = 4;
   float output_data[output_dims_count];
   tflite::testing::TestElementwiseFloat(
-      tflite::BuiltinOperator_RSQRT,  // RSQRT operator
-      {2, 2, 2},                      // Input shape
-      {1, 2, 4, 9},                   // Input values
-      {2, 2, 2},                      // Output shape
-      {1, 0.7071, 0.5, 0.33333},      // Output values
+      tflite::BuiltinOperator_RSQRT,    // RSQRT operator
+      {2, 2, 2},                        // Input shape
+      {1.0f, 2.0f, 4.0f, 9.0f},         // Input values
+      {2, 2, 2},                        // Output shape
+      {1.0f, 0.7071f, 0.5f, 0.33333f},  // Output values
       output_data);
 }
 
@@ -232,9 +232,9 @@ TF_LITE_MICRO_TEST(Square) {
   tflite::testing::TestElementwiseFloat(
       tflite::BuiltinOperator_SQUARE,  // SQARE operator
       {2, 2, 2},                       // Input shape
-      {1, 2, 0.5, -3.0},               // Input values
+      {1.0f, 2.0f, 0.5f, -3.0f},       // Input values
       {2, 2, 2},                       // Output shape
-      {1, 4.0, 0.25, 9.0},             // Output values
+      {1.0f, 4.0f, 0.25f, 9.0f},       // Output values
       output_data);
 }
 
@@ -250,4 +250,16 @@ TF_LITE_MICRO_TEST(LogicalNot) {
       output_data);
 }
 
+TF_LITE_MICRO_TEST(TANH) {
+  constexpr int output_dims_count = 4;
+  float output_data[output_dims_count];
+  tflite::testing::TestElementwiseFloat(
+      tflite::BuiltinOperator_TANH,    // TANH operator
+      {2, 2, 2},                       // Input shape
+      {0.0f, 50.0f, 0.5f, -50.0f},     // Input values
+      {2, 2, 2},                       // Output shape
+      {0.0f, 1.0f, 0.462117f, -1.0f},  // Output values
+      output_data);
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/kernels/fully_connected.cc b/tensorflow/lite/micro/kernels/fully_connected.cc
index 6156ddb7ab9..66b8379739d 100644
--- a/tensorflow/lite/micro/kernels/fully_connected.cc
+++ b/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -71,24 +71,41 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
 }  // namespace
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  return nullptr;
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  OpData* data = static_cast<OpData*>(node->user_data);
+  const auto params =
+      static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
+
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
   TF_LITE_ENSURE_MSG(context, input->type == filter->type,
                      "Hybrid models are not supported on TFLite Micro.");
-  return kTfLiteOk;
+
+  return CalculateOpData(context, params->activation, input->type, input,
+                         filter, bias, output, data);
 }
 
 TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
                                const OpData& data, const TfLiteTensor* input,
                                const TfLiteTensor* filter,
                                const TfLiteTensor* bias, TfLiteTensor* output) {
-  FullyConnectedParams op_params;
+  tflite::FullyConnectedParams op_params;
   op_params.input_offset = -input->params.zero_point;
   op_params.weights_offset = -filter->params.zero_point;
   op_params.output_offset = output->params.zero_point;
@@ -174,10 +191,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
-  TfLiteType data_type = input->type;
-  OpData data;
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params->activation, data_type,
-                                        input, filter, bias, output, &data));
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   // Checks in Prepare ensure input, output and filter types are all the same.
   switch (input->type) {
diff --git a/tensorflow/lite/micro/kernels/fully_connected_test.cc b/tensorflow/lite/micro/kernels/fully_connected_test.cc
index 32a1b67b88e..a920ca3b132 100644
--- a/tensorflow/lite/micro/kernels/fully_connected_test.cc
+++ b/tensorflow/lite/micro/kernels/fully_connected_test.cc
@@ -13,11 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cstddef>
 #include <cstdint>
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+#include "tensorflow/lite/micro/test_helpers.h"
 #include "tensorflow/lite/micro/testing/micro_test.h"
 #include "tensorflow/lite/micro/testing/test_utils.h"
 
@@ -25,7 +28,7 @@ namespace tflite {
 namespace testing {
 namespace {
 
-void TestFullyConnectedFloat(
+TfLiteStatus TestFullyConnectedFloat(
     const int* input_dims_data, const float* input_data,
     const int* weights_dims_data, const float* weights_data,
     const int* bias_dims_data, const float* bias_data,
@@ -82,20 +85,25 @@ void TestFullyConnectedFloat(
   node.custom_initial_data_size = 0;
   node.delegate = nullptr;
   if (registration->prepare) {
-    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+    TF_LITE_ENSURE_OK(&context, registration->prepare(&context, &node));
   }
   TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  TfLiteStatus invoke_status = registration->invoke(&context, &node);
+
   if (registration->free) {
     registration->free(&context, user_data);
   }
+  if (invoke_status != kTfLiteOk) {
+    return invoke_status;
+  }
   for (int i = 0; i < output_dims_count; ++i) {
     TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f);
   }
+  return kTfLiteOk;
 }
 
 template <typename T>
-void TestFullyConnectedQuantized(
+TfLiteStatus TestFullyConnectedQuantized(
     const int* input_dims_data, const T* input_data, const float input_min,
     const float input_max, const int* weights_dims_data, const T* weights_data,
     const float weights_min, const float weights_max, const int* bias_dims_data,
@@ -159,16 +167,20 @@ void TestFullyConnectedQuantized(
   node.delegate = nullptr;
 
   if (registration->prepare) {
-    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+    TF_LITE_ENSURE_OK(&context, registration->prepare(&context, &node));
   }
   TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  TfLiteStatus invoke_status = registration->invoke(&context, &node);
   if (registration->free) {
     registration->free(&context, user_data);
   }
+  if (invoke_status != kTfLiteOk) {
+    return invoke_status;
+  }
   for (int i = 0; i < output_dims_count; ++i) {
     TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]);
   }
+  return kTfLiteOk;
 }
 
 }  // namespace
@@ -198,10 +210,12 @@ TF_LITE_MICRO_TEST(SimpleTest) {
 
   const int output_dims_count = 6;
   float output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedFloat(
-      input_dims_data, input_data, weights_dims_data, weights_data,
-      bias_dims_data, bias_data, expected_output_data, output_dims_data,
-      kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedFloat(
+          input_dims_data, input_data, weights_dims_data, weights_data,
+          bias_dims_data, bias_data, expected_output_data, output_dims_data,
+          kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTest2) {
@@ -224,10 +238,12 @@ TF_LITE_MICRO_TEST(SimpleTest2) {
 
   const int output_dims_count = 6;
   float output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedFloat(
-      input_dims_data, input_data, weights_dims_data, weights_data,
-      bias_dims_data, bias_data, expected_output_data, output_dims_data,
-      kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedFloat(
+          input_dims_data, input_data, weights_dims_data, weights_data,
+          bias_dims_data, bias_data, expected_output_data, output_dims_data,
+          kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestRelu) {
@@ -251,10 +267,12 @@ TF_LITE_MICRO_TEST(SimpleTestRelu) {
 
   const int output_dims_count = 6;
   float output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedFloat(
-      input_dims_data, input_data, weights_dims_data, weights_data,
-      bias_dims_data, bias_data, expected_output_data, output_dims_data,
-      kTfLiteActRelu, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedFloat(
+          input_dims_data, input_data, weights_dims_data, weights_data,
+          bias_dims_data, bias_data, expected_output_data, output_dims_data,
+          kTfLiteActRelu, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8) {
@@ -315,11 +333,13 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8) {
 
   const int output_dims_count = 6;
   uint8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<uint8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<uint8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 // TODO(b/138811455): Fix code duplication in micro tests
@@ -381,11 +401,13 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8) {
 
   const int output_dims_count = 6;
   int8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<int8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<int8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8Relu) {
@@ -446,11 +468,13 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8Relu) {
 
   const int output_dims_count = 6;
   uint8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<uint8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActRelu, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<uint8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActRelu, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8Relu) {
@@ -511,11 +535,13 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8Relu) {
 
   const int output_dims_count = 6;
   int8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<int8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActRelu, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<int8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActRelu, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8OutputMultiplierGreaterThan1) {
@@ -576,11 +602,13 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8OutputMultiplierGreaterThan1) {
 
   const int output_dims_count = 6;
   uint8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<uint8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<uint8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8OutputMultiplierGreaterThan1) {
@@ -641,11 +669,13 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8OutputMultiplierGreaterThan1) {
 
   const int output_dims_count = 6;
   int8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<int8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<int8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTest4DInput) {
@@ -669,10 +699,12 @@ TF_LITE_MICRO_TEST(SimpleTest4DInput) {
 
   const int output_dims_count = 6;
   float output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedFloat(
-      input_dims_data, input_data, weights_dims_data, weights_data,
-      bias_dims_data, bias_data, expected_output_data, output_dims_data,
-      kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedFloat(
+          input_dims_data, input_data, weights_dims_data, weights_data,
+          bias_dims_data, bias_data, expected_output_data, output_dims_data,
+          kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedUInt8) {
@@ -733,11 +765,13 @@ TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedUInt8) {
 
   const int output_dims_count = 6;
   uint8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<uint8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<uint8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8) {
@@ -798,11 +832,13 @@ TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8) {
 
   const int output_dims_count = 6;
   int8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<int8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<int8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(
@@ -864,11 +900,13 @@ TF_LITE_MICRO_TEST(
 
   const int output_dims_count = 6;
   uint8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<uint8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<uint8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8OutputMultiplierGreaterThan1) {
@@ -929,11 +967,13 @@ TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8OutputMultiplierGreaterThan1) {
 
   const int output_dims_count = 6;
   int8_t output_data[output_dims_count];
-  tflite::testing::TestFullyConnectedQuantized<int8_t>(
-      input_dims_data, input_data, input_min, input_max, weights_dims_data,
-      weights_data, weights_min, weights_max, bias_dims_data, bias_data,
-      bias_scale, expected_output_data, output_dims_data, output_min,
-      output_max, kTfLiteActNone, output_data);
+  TF_LITE_MICRO_EXPECT_EQ(
+      tflite::testing::TestFullyConnectedQuantized<int8_t>(
+          input_dims_data, input_data, input_min, input_max, weights_dims_data,
+          weights_data, weights_min, weights_max, bias_dims_data, bias_data,
+          bias_scale, expected_output_data, output_dims_data, output_min,
+          output_max, kTfLiteActNone, output_data),
+      kTfLiteOk);
 }
 
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/kernels/micro_ops.h b/tensorflow/lite/micro/kernels/micro_ops.h
index 83ccccaa0ab..24180aab8c5 100644
--- a/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/tensorflow/lite/micro/kernels/micro_ops.h
@@ -81,6 +81,7 @@ TfLiteRegistration* Register_SUB();
 TfLiteRegistration* Register_SVDF();
 TfLiteRegistration* Register_UNPACK();
 TfLiteRegistration* Register_L2_NORMALIZATION();
+TfLiteRegistration* Register_TANH();
 
 }  // namespace micro
 }  // namespace ops
diff --git a/tensorflow/lite/micro/kernels/pad_test.cc b/tensorflow/lite/micro/kernels/pad_test.cc
index a114ca0a56a..7248a9b2126 100644
--- a/tensorflow/lite/micro/kernels/pad_test.cc
+++ b/tensorflow/lite/micro/kernels/pad_test.cc
@@ -109,7 +109,7 @@ TfLiteStatus ValidatePadV2Goldens(TfLiteTensor* tensors, int tensors_size,
 
 // output data and golden must be shaped correctly
 void TestPadFloat(const int* input_dims_data, const float* input_data,
-                  const int* pad_dims_data, const int* pad_data,
+                  const int* pad_dims_data, const int32_t* pad_data,
                   const int* output_dims_data, const float* golden,
                   float* output_data,
                   TfLiteStatus expected_status = kTfLiteOk) {
@@ -135,7 +135,7 @@ void TestPadFloat(const int* input_dims_data, const float* input_data,
 
 // output data and golden must be shaped correctly
 void TestPadV2Float(const int* input_dims_data, const float* input_data,
-                    const int* pad_dims_data, const int* pad_data,
+                    const int* pad_dims_data, const int32_t* pad_data,
                     const float pad_value, const int* output_dims_data,
                     const float* golden, float* output_data,
                     TfLiteStatus expected_status = kTfLiteOk) {
@@ -166,7 +166,7 @@ template <typename T>
 void TestPadQuantized(const int* input_dims_data, const float* input_data,
                       T* input_quantized, float input_scale,
                       int input_zero_point, const int* pad_dims_data,
-                      const int* pad_data, const int* output_dims_data,
+                      const int32_t* pad_data, const int* output_dims_data,
                       const float* golden, T* golden_quantized,
                       float output_scale, int output_zero_point, T* output_data,
                       TfLiteStatus expected_status = kTfLiteOk) {
@@ -199,7 +199,7 @@ template <typename T>
 void TestPadV2Quantized(const int* input_dims_data, const float* input_data,
                         T* input_quantized, float input_scale,
                         int input_zero_point, const int* pad_dims_data,
-                        const int* pad_data, const float pad_value,
+                        const int32_t* pad_data, const float pad_value,
                         const float pad_value_scale,
                         const int pad_value_zero_point,
                         const int* output_dims_data, const float* golden,
@@ -249,7 +249,7 @@ TF_LITE_MICRO_TEST(Test2DFloat) {
   const int input_dims[] = {4, 1, 2, 2, 1};
   const float input_values[] = {1, 2, 3, 4};
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const int output_dims[] = {4, 3, 2, 4, 1};
   const float golden[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,
                           0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -263,7 +263,7 @@ TF_LITE_MICRO_TEST(Test4DFloat) {
   const int input_dims[] = {4, 1, 1, 1, 1};
   const float input_values[] = {42};
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 1, 1, 1, 1, 1, 1};
+  const int32_t pad_values[] = {1, 1, 1, 1, 1, 1, 1, 1};
   const int output_dims[] = {4, 3, 3, 3, 3};
   const int kOutputLen = 81;  // 3 * 3 * 3 * 3
   float golden[kOutputLen];
@@ -282,7 +282,7 @@ TF_LITE_MICRO_TEST(Test2DFloatV2) {
   const int input_dims[] = {4, 1, 2, 2, 1};
   const float input_values[] = {1, 2, 3, 4};
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const float pad_value = 42;
   const int output_dims[] = {4, 3, 2, 4, 1};
   const float golden[] = {42, 42, 42, 42, 42, 42, 42, 42, 42, 1,  2,  42,
@@ -300,7 +300,7 @@ TF_LITE_MICRO_TEST(Test2DUInt8) {
   const float input_scale = 1.0f;
   const int input_zero_point = 127;
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const int output_dims[] = {4, 3, 2, 4, 1};
   const float golden[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,
                           0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -322,7 +322,7 @@ TF_LITE_MICRO_TEST(Test2DUInt8V2) {
   const float input_scale = 1.0f;
   const int input_zero_point = 127;
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const float pad_value = 42;
   const float pad_value_scale = 1.0;
   const float pad_value_zero_point = 127;
@@ -348,7 +348,7 @@ TF_LITE_MICRO_TEST(Test2DInt8) {
   const float input_scale = 1.0f;
   const int input_zero_point = 0;
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const int output_dims[] = {4, 3, 2, 4, 1};
   const float golden[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,
                           0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -370,7 +370,7 @@ TF_LITE_MICRO_TEST(Test2DInt8V2) {
   const float input_scale = 1.0f;
   const int input_zero_point = 0;
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const float pad_value = 42;
   const float pad_value_scale = 1.0;
   const float pad_value_zero_point = 0;
@@ -396,7 +396,7 @@ TF_LITE_MICRO_TEST(Test2DInt8V2ExpectFailurePadValueQuantizationMismatch) {
   const float input_scale = 1.0f;
   const int input_zero_point = 0;
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const float pad_value = 42;
   // Causes failure since this is in a different quantization space than input.
   const float pad_value_scale = .5;
@@ -424,7 +424,7 @@ TF_LITE_MICRO_TEST(Test2DInt8ExpectFailureQuantizationRangeExcludesZero) {
   const float input_scale = 1.0f;
   const int input_zero_point = 0;
   const int pad_dims[] = {2, 4, 2};
-  const int pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
+  const int32_t pad_values[] = {1, 1, 0, 0, 1, 1, 0, 0};
   const int output_dims[] = {4, 3, 2, 4, 1};
   const float golden[] = {42, 42, 42, 42, 42, 42, 42, 42, 42, 1,  2,  42,
                           42, 3,  4,  42, 42, 42, 42, 42, 42, 42, 42, 42};
diff --git a/tensorflow/lite/micro/kernels/prelu.cc b/tensorflow/lite/micro/kernels/prelu.cc
index a20d2c88225..2c575269cca 100644
--- a/tensorflow/lite/micro/kernels/prelu.cc
+++ b/tensorflow/lite/micro/kernels/prelu.cc
@@ -64,14 +64,20 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   const TfLiteTensor* alpha = GetInput(context, node, 1);
   TfLiteTensor* output = GetOutput(context, node, 0);
-  int32_t output_multiplier = 0;
-  int output_shift = 0;
+  int32_t output_multiplier_1 = 0;
+  int output_shift_1 = 0;
+  int32_t output_multiplier_2 = 0;
+  int output_shift_2 = 0;
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
-    double real_multiplier = static_cast<double>(input->params.scale) *
-                             static_cast<double>(alpha->params.scale) /
-                             static_cast<double>(output->params.scale);
-    QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
-                                        &output_shift);
+    double real_multiplier_1 = static_cast<double>(input->params.scale) *
+                               static_cast<double>(output->params.scale);
+    double real_multiplier_2 = static_cast<double>(input->params.scale) *
+                               static_cast<double>(alpha->params.scale) /
+                               static_cast<double>(output->params.scale);
+    QuantizeMultiplier(real_multiplier_1, &output_multiplier_1,
+                       &output_shift_1);
+    QuantizeMultiplier(real_multiplier_2, &output_multiplier_2,
+                       &output_shift_2);
   }
   switch (input->type) {
     case kTfLiteFloat32: {
@@ -86,8 +92,10 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
       op_params.input_offset = -input->params.zero_point;
       op_params.alpha_offset = -alpha->params.zero_point;
       op_params.output_offset = output->params.zero_point;
-      op_params.output_multiplier = output_multiplier;
-      op_params.output_shift = output_shift;
+      op_params.output_multiplier_1 = output_multiplier_1;
+      op_params.output_shift_1 = output_shift_1;
+      op_params.output_multiplier_2 = output_multiplier_2;
+      op_params.output_shift_2 = output_shift_2;
       reference_ops::BroadcastPrelu4DSlow(
           op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
           GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
diff --git a/tensorflow/lite/micro/kernels/prelu_test.cc b/tensorflow/lite/micro/kernels/prelu_test.cc
index 4b35dac5849..d6c851a2726 100644
--- a/tensorflow/lite/micro/kernels/prelu_test.cc
+++ b/tensorflow/lite/micro/kernels/prelu_test.cc
@@ -154,14 +154,14 @@ TF_LITE_MICRO_TESTS_BEGIN
 TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) {
   const int output_dims_count = 12;
   float output_data[output_dims_count];
-  tflite::testing::TestPreluFloat({4, 1, 2, 2, 3},  // input shape
+  tflite::testing::TestPreluFloat({1, 2, 2, 3},  // input shape
                                   {
                                       0.0f, 0.0f, 0.0f,     // Row 1, Column 1
                                       1.0f, 1.0f, 1.0f,     // Row 1, Column 2
                                       -1.0f, -1.0f, -1.0f,  // Row 2, Column 1
                                       -2.0f, -2.0f, -2.0f,  // Row 1, Column 2
                                   },
-                                  {3, 1, 1, 3},        // alpha shape
+                                  {1, 1, 1, 3},        // alpha shape
                                   {0.0f, 1.0f, 2.0f},  // alpha values
                                   {
                                       0.0f, 0.0f, 0.0f,    // Row 1, Column 1
@@ -169,7 +169,7 @@ TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) {
                                       0.0f, -1.0f, -2.0f,  // Row 2, Column 1
                                       0.0f, -2.0f, -4.0f,  // Row 1, Column 2
                                   },
-                                  {4, 1, 2, 2, 3},  // output shape
+                                  {1, 2, 2, 3},  // output shape
                                   output_data);
 }
 
@@ -182,13 +182,13 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) {
   const int output_dims_count = 12;
   uint8_t output_data[output_dims_count];
   tflite::testing::TestPreluQuantized(
-      {4, 1, 2, 2, 3},  // input shape
+      {1, 2, 2, 3},  // input shape
       {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax),
        F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax),
        F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax),
        F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax),
        F2Q(-0.25f, kMin, kMax)},
-      kMin, kMax, {3, 1, 1, 3},  // alpha shape
+      kMin, kMax, {1, 1, 1, 3},  // alpha shape
       {F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-0.5f, kMin, kMax)},
       kMin, kMax,
       {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax),
@@ -196,7 +196,7 @@ TF_LITE_MICRO_TEST(QuantizedPreluActivationsOpTest) {
        F2Q(0.0f, kMin, kMax), F2Q(-0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax),
        F2Q(0.0f, kMin, kMax), F2Q(-0.125f, kMin, kMax),
        F2Q(0.125f, kMin, kMax)},
-      {4, 1, 2, 2, 3},  // output shape
+      {1, 2, 2, 3},  // output shape
       kMin, kMax, output_data);
 }
 
diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc
index d40471df948..b5bba83beb8 100644
--- a/tensorflow/lite/micro/kernels/quantize.cc
+++ b/tensorflow/lite/micro/kernels/quantize.cc
@@ -26,7 +26,27 @@ namespace ops {
 namespace micro {
 namespace quantize {
 
+struct OpData {
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
@@ -48,10 +68,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(context,
                  output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
 
+  if (input->type == kTfLiteInt16 && output->type == kTfLiteInt8) {
+    double effective_scale =
+        static_cast<double>(input->params.scale / output->params.scale);
+
+    QuantizeMultiplier(effective_scale, &data->output_multiplier,
+                       &data->output_shift);
+  }
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
 
@@ -79,17 +109,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
   } else if (input->type == kTfLiteInt16) {
     size_t size = ElementCount(*input->dims);
-    int32_t output_multiplier;
-    int output_shift;
-    double effective_scale =
-        static_cast<double>(input->params.scale / output->params.scale);
     switch (output->type) {
       case kTfLiteInt8:
-        QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
         reference_ops::Requantize(
-            GetTensorData<int16_t>(input), size, output_multiplier,
-            output_shift, input->params.zero_point, output->params.zero_point,
-            GetTensorData<int8_t>(output));
+            GetTensorData<int16_t>(input), size, data->output_multiplier,
+            data->output_shift, input->params.zero_point,
+            output->params.zero_point, GetTensorData<int8_t>(output));
         break;
       default:
         TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
@@ -113,7 +138,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 // AffineQuantize takes scale and zero point and quantizes the float value to
 // quantized output, in int8 or uint8 format.
 TfLiteRegistration* Register_QUANTIZE() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
+  static TfLiteRegistration r = {/*init=*/quantize::Init,
                                  /*free=*/nullptr,
                                  /*prepare=*/quantize::Prepare,
                                  /*invoke=*/quantize::Eval,
diff --git a/tensorflow/lite/micro/kernels/svdf.cc b/tensorflow/lite/micro/kernels/svdf.cc
index e2cacf17927..8c33fde5a87 100644
--- a/tensorflow/lite/micro/kernels/svdf.cc
+++ b/tensorflow/lite/micro/kernels/svdf.cc
@@ -31,10 +31,6 @@ namespace micro {
 namespace svdf {
 namespace {
 
-// These constants represent constants specific to the hotword "OK G" model.
-// They exist until (b/132070898) is fixed.
-constexpr int kScratchTensorMaxSize = 64;
-
 struct OpData {
   int32 effective_scale_1_a;
   int32 effective_scale_2_a;
@@ -42,6 +38,8 @@ struct OpData {
   // shift value - typically between [-32, 32].
   int effective_scale_1_b;
   int effective_scale_2_b;
+  int scratch_tensor_index;
+  int scratch_output_tensor_index;
 };
 
 /**
@@ -54,7 +52,6 @@ struct OpData {
  * and resizes the output tensor. Micro runtime does not support tensor
  * resizing.
  */
-
 static inline void ApplyTimeWeightsBiasAndActivation(
     int batch_size, int memory_size, int num_filters, int num_units, int rank,
     const float* const __restrict__ weights_time_ptr,
@@ -120,7 +117,8 @@ inline void EvalFloatSVDF(
     TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
     const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
     const TfLiteTensor* bias, const TfLiteSVDFParams* params,
-    TfLiteTensor* activation_state, TfLiteTensor* output) {
+    int scratch_tensor_index, TfLiteTensor* activation_state,
+    TfLiteTensor* output) {
   const int rank = params->rank;
   const int batch_size = input->dims->data[0];
   const int input_size = input->dims->data[1];
@@ -135,10 +133,11 @@ inline void EvalFloatSVDF(
 
   float* state_ptr = GetTensorData<float>(activation_state);
 
-  // TODO(b/132070898): Move this temp variable to the new scratch buffer API
-  // when ready.
-  float scratch_tensor[kScratchTensorMaxSize];
-  float* scratch_ptr = scratch_tensor;
+  TFLITE_DCHECK(context != nullptr);
+  TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
+
+  float* scratch_ptr = static_cast<float*>(
+      context->GetScratchBuffer(context, scratch_tensor_index));
 
   float* output_ptr = GetTensorData<float>(output);
 
@@ -185,13 +184,15 @@ inline void EvalFloatSVDF(
       bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
 }
 
-void EvalIntegerSVDF(
-    TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input_tensor,
-    const TfLiteTensor* weights_feature_tensor,
-    const TfLiteTensor* weights_time_tensor, const TfLiteTensor* bias_tensor,
-    const TfLiteSVDFParams* params, TfLiteTensor* activation_state_tensor,
-    TfLiteTensor* output_tensor, int32_t scale_1_a, int scale_1_b,
-    int32_t scale_2_a, int scale_2_b, int32_t input_zp, int32_t output_zp) {
+void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
+                     const TfLiteTensor* input_tensor,
+                     const TfLiteTensor* weights_feature_tensor,
+                     const TfLiteTensor* weights_time_tensor,
+                     const TfLiteTensor* bias_tensor,
+                     const TfLiteSVDFParams* params,
+                     TfLiteTensor* activation_state_tensor,
+                     TfLiteTensor* output_tensor, const OpData& data,
+                     int32_t input_zp, int32_t output_zp) {
   const int n_rank = params->rank;
   const int n_batch = input_tensor->dims->data[0];
   const int n_input = input_tensor->dims->data[1];
@@ -199,10 +200,13 @@ void EvalIntegerSVDF(
   const int n_unit = n_filter / n_rank;
   const int n_memory = weights_time_tensor->dims->data[1];
 
-  // TODO(b/132070898): Move these temp variables to the new scratch buffer API
-  // when ready.
-  int32_t scratch_tensor[kScratchTensorMaxSize];
-  int32_t scratch_output_tensor[kScratchTensorMaxSize];
+  TFLITE_DCHECK(context != nullptr);
+  TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
+
+  int32_t* scratch_tensor = static_cast<int32_t*>(
+      context->GetScratchBuffer(context, data.scratch_tensor_index));
+  int32_t* scratch_output_tensor = static_cast<int32_t*>(
+      context->GetScratchBuffer(context, data.scratch_output_tensor_index));
 
   // Shift states.
   int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
@@ -236,8 +240,8 @@ void EvalIntegerSVDF(
         for (int c = 0; c < n_input; c++) {
           dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
         }
-        dot_prod =
-            MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = MultiplyByQuantizedMultiplier(
+            dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
         dot_prod = std::min(std::max(output_min, dot_prod), output_max);
         // This assumes state is symmetrically quantized. Otherwise last bit of
         // state should be initialized to its zero point and accumulate the
@@ -310,7 +314,8 @@ void EvalIntegerSVDF(
     const int32_t output_min = std::numeric_limits<int8_t>::min();
     for (int i = 0; i < n_batch * n_unit; ++i) {
       int32_t x1 = scratch_output_tensor[i];
-      int32_t x2 = MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
+                                                 data.effective_scale_2_b);
       int32_t x3 = x2 + output_zp;
       int32_t x4 = std::min(std::max(output_min, x3), output_max);
       GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
@@ -331,8 +336,20 @@ constexpr int kInputActivationStateTensor = 4;
 // Output tensor.
 constexpr int kOutputTensor = 0;
 
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
 
   // Validate Tensor Inputs (dtype depends on quantization):
   // [0] = Input, {2, batch_size, input_size}
@@ -341,7 +358,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // [3] = Bias (optional), {1, num_units}
   // [4] = Activation State (variable),
   //         {2, batch_size, memory_size * num_filters}
-
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* weights_feature =
       GetInput(context, node, kWeightsFeatureTensor);
@@ -360,8 +376,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int num_units = num_filters / rank;
   const int memory_size = weights_time->dims->data[1];
 
-  const bool is_full_integer = input->type == kTfLiteInt8;
-
   // Validate Input Tensor:
   TF_LITE_ENSURE(context,
                  input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
@@ -385,7 +399,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
 
   // Validate Optional Bias Input Tensor:
-  if (bias) {
+  if (bias != nullptr) {
     TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
   }
 
@@ -395,51 +409,74 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
                     memory_size * num_filters);
 
-  if (is_full_integer) {
-    TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
 
+  if (input->type == kTfLiteInt8) {
     TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
     TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
-
-    if (bias) {
+    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
+    if (bias != nullptr) {
       TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
     }
 
-    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
-
-    // Validate Scratch Tensors:
-    // [0] = (shared - see float block below for usage)
-    // [1] = Output Temp, int8_t, {2, num_units, batch_size}
-    // TODO(b/132070898): Scratch values are used as stack variables in
-    // EvalIntegerSVDF().
-
-    // Validate output tensor:
     TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
-  } else {
-    TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
 
-    // Validate Input Tensor dtypes:
+    const auto* input_params =
+        reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
+    const auto* weights_feature_params =
+        static_cast<const TfLiteAffineQuantization*>(
+            weights_feature->quantization.params);
+    const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
+        activation_state->quantization.params);
+    const auto* weight_time_params =
+        static_cast<const TfLiteAffineQuantization*>(
+            weights_time->quantization.params);
+    const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
+        output->quantization.params);
+    const double effective_scale_1 = static_cast<double>(
+        input_params->scale->data[0] * weights_feature_params->scale->data[0] /
+        state_params->scale->data[0]);
+    const double effective_scale_2 = static_cast<double>(
+        state_params->scale->data[0] * weight_time_params->scale->data[0] /
+        output_params->scale->data[0]);
+
+    TFLITE_DCHECK(node->user_data != nullptr);
+    OpData* data = static_cast<OpData*>(node->user_data);
+
+    QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
+                       &(data->effective_scale_1_b));
+    QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
+                       &(data->effective_scale_2_b));
+
+    TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
+
+    const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
+        context, batch_size * num_filters * sizeof(int32_t),
+        &(data->scratch_tensor_index));
+    TF_LITE_ENSURE_OK(context, scratch_status);
+
+    const TfLiteStatus scratch_output_status =
+        context->RequestScratchBufferInArena(
+            context, batch_size * num_units * sizeof(int32_t),
+            &(data->scratch_output_tensor_index));
+    TF_LITE_ENSURE_OK(context, scratch_output_status);
+  } else {
     TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
     TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
     TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
-
-    if (bias) {
+    if (bias != nullptr) {
       TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
     }
-
-    // Validate shared Scratch Tensor:
-    // [0] = Holds dot-product of time-forward calculations in
-    //       ApplyTimeWeightsBiasAndActivation():
-    //         float/int32, {2, batch_size, num_filters}
-    // TODO(b/132070898): Scratch values are used as stack variables in
-    // EvalIntegerSVDF().
-
-    // Full-float SVDF only uses the one shared scratch tensor (see above for
-    // usage).
-    // TODO(b/132070898): Use input tensor as variable until scratch tensor
-    // allocation has been implemented.
-    // TF_LITE_ENSURE_EQ(context, node->temporaries->size, 1);
     TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
+
+    TFLITE_DCHECK(node->user_data != nullptr);
+    OpData* data = static_cast<OpData*>(node->user_data);
+
+    TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
+    const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
+        context, batch_size * num_filters * sizeof(float),
+        &(data->scratch_tensor_index));
+    TF_LITE_ENSURE_OK(context, scratch_status);
   }
 
   return kTfLiteOk;
@@ -458,57 +495,25 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetVariableInput(context, node, kInputActivationStateTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
-  const bool is_full_integer = input->type == kTfLiteInt8;
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   switch (weights_feature->type) {
     case kTfLiteFloat32: {
-      // TODO(b/132070898): Use input tensor as variable until scratch tensor
-      // allocation has been implemented.
-      // TfLiteTensor* scratch = GetTemporary(context, node, /*index=*/0);
       EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
-                    params, activation_state, output);
+                    params, data.scratch_tensor_index, activation_state,
+                    output);
       return kTfLiteOk;
       break;
     }
 
     case kTfLiteInt8: {
-      if (is_full_integer) {
-        // TODO(b/132070898): Store these values in ::Prepare() instead of
-        // ::Eval():
-        // Calculate effective scales.
-        OpData op_data;
-        auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
-            input->quantization.params);
-        auto* weights_feature_params =
-            reinterpret_cast<TfLiteAffineQuantization*>(
-                weights_feature->quantization.params);
-        auto* state_params = reinterpret_cast<TfLiteAffineQuantization*>(
-            activation_state->quantization.params);
-        auto* weight_time_params = reinterpret_cast<TfLiteAffineQuantization*>(
-            weights_time->quantization.params);
-        auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
-            output->quantization.params);
-        const double effective_scale_1 =
-            static_cast<double>(input_params->scale->data[0] *
-                                weights_feature_params->scale->data[0] /
-                                state_params->scale->data[0]);
-        const double effective_scale_2 = static_cast<double>(
-            state_params->scale->data[0] * weight_time_params->scale->data[0] /
-            output_params->scale->data[0]);
-        QuantizeMultiplier(effective_scale_1, &op_data.effective_scale_1_a,
-                           &op_data.effective_scale_1_b);
-        QuantizeMultiplier(effective_scale_2, &op_data.effective_scale_2_a,
-                           &op_data.effective_scale_2_b);
+      TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
 
-        TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
-        EvalIntegerSVDF(
-            context, node, input, weights_feature, weights_time, bias, params,
-            activation_state, output, op_data.effective_scale_1_a,
-            op_data.effective_scale_1_b, op_data.effective_scale_2_a,
-            op_data.effective_scale_2_b, input->params.zero_point,
-            output->params.zero_point);
-        return kTfLiteOk;
-      }
+      EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
+                      params, activation_state, output, data,
+                      input->params.zero_point, output->params.zero_point);
+      return kTfLiteOk;
       break;
     }
 
@@ -523,7 +528,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace svdf
 
 TfLiteRegistration* Register_SVDF() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
+  static TfLiteRegistration r = {/*init=*/svdf::Init,
                                  /*free=*/nullptr,
                                  /*prepare=*/svdf::Prepare,
                                  /*invoke=*/svdf::Eval,
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
index 03eba5082af..8895ccf52d7 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
@@ -185,9 +185,6 @@ inline void Conv1x32Input32x32Filter(
     ae_q56s acc_56 = AE_ZEROQ56();
     const int8_t* input_vals_ptr = input_data - 2;
     for (int i = 0; i < kFilterDepth; i += 2) {
-      // Find current input index, minus 2 for Xtensa load
-      // alignments:
-
       // Load signed 2x 8bit values and right shift into 24bit
       // alignment:
       ae_p24x2s input_vals_24x2;
@@ -244,7 +241,6 @@ constexpr int kInputTensor = 0;
 constexpr int kFilterTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
-constexpr int kMaxChannels = 32;
 
 // Conv is quantized along dimension 0:
 // https://www.tensorflow.org/lite/performance/quantization_spec
@@ -258,9 +254,8 @@ struct OpData {
   int output_shift;
 
   // Per channel output multiplier and shift.
-  // TODO(b/141139247): Allocate these dynamically when possible.
-  int32_t per_channel_output_multiplier[kMaxChannels];
-  int32_t per_channel_output_shift[kMaxChannels];
+  int32_t* per_channel_output_multiplier;
+  int32_t* per_channel_output_shift;
 
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
@@ -268,12 +263,6 @@ struct OpData {
   int32_t output_activation_max;
 };
 
-// These constants represent constants specific to the music detect model.
-// They exist until (b/132070898) is fixed.
-static const int kMaxOpDataSize = 6;
-static int op_data_counter = 0;
-static OpData kStaticOpData[kMaxOpDataSize];
-
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
                              TfLiteConvParams* params, int width, int height,
                              int filter_width, int filter_height, int out_width,
@@ -301,30 +290,37 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
     TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
     int output_channels = filter->dims->data[kConvQuantizedDimension];
 
-    TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
+    return tflite::PopulateConvolutionQuantizationParams(
         context, input, filter, bias, output, params->activation,
         &data->output_multiplier, &data->output_shift,
         &data->output_activation_min, &data->output_activation_max,
         data->per_channel_output_multiplier,
         reinterpret_cast<int*>(data->per_channel_output_shift),
-        output_channels));
+        output_channels);
   }
   return kTfLiteOk;
 }
 
-void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
 
-  // TODO(b/132070898): Use statically slotted OpData structures until a
-  // scratch memory API is ready.
-  OpData* op_data = &kStaticOpData[op_data_counter++];
-  node->user_data = op_data;
+  auto* op_data = reinterpret_cast<OpData*>(node->user_data);
 
   int input_width = input->dims->data[2];
   int input_height = input->dims->data[1];
@@ -333,6 +329,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   int output_width = output->dims->data[2];
   int output_height = output->dims->data[1];
 
+  // Per channel quantization is only needed for int8 inference. For other
+  // quantized types, only a single scale and zero point is needed.
+  const int num_channels = filter->dims->data[kConvQuantizedDimension];
+  // Dynimically allocate per-channel quantization parameters.
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&op_data->per_channel_output_multiplier)));
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&op_data->per_channel_output_shift)));
+
   // All per-channel quantized tensors need valid zero point and scale arrays.
   if (input->type == kTfLiteInt8) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
@@ -353,11 +360,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       affine_quantization->zero_point->size);
   }
 
-  TF_LITE_ENSURE_STATUS(CalculateOpData(
-      context, node, params, input_width, input_height, filter_width,
-      filter_height, output_width, output_height, input->type, op_data));
-
-  return kTfLiteOk;
+  return CalculateOpData(context, node, params, input_width, input_height,
+                         filter_width, filter_height, output_width,
+                         output_height, input->type, op_data);
 }
 
 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
@@ -366,6 +371,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
                              const TfLiteTensor* filter,
                              const TfLiteTensor* bias, TfLiteTensor* output,
                              TfLiteTensor* im2col) {
+  // TODO(b/154032858): Investigate removing extra copies.
   ConvParams op_params;
   op_params.input_offset = -input->params.zero_point;
   op_params.output_offset = output->params.zero_point;
@@ -388,6 +394,8 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
   auto* op_data = reinterpret_cast<OpData*>(node->user_data);
 
@@ -429,8 +437,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace conv
 
 TfLiteRegistration* Register_CONV_2D() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
-                                 /*free=*/conv::Free,
+  static TfLiteRegistration r = {/*init=*/conv::Init,
+                                 /*free=*/nullptr,
                                  /*prepare=*/conv::Prepare,
                                  /*invoke=*/conv::Eval,
                                  /*profiling_string=*/nullptr,
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
index 75bc29efdfc..cf7552c57b5 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
@@ -40,8 +40,7 @@ inline void DepthwiseConvPerChannel(
     const int8* filter_data, const RuntimeShape& bias_shape,
     const int32* bias_data, const RuntimeShape& output_shape,
     int8* output_data) {
-  // Get parameters.
-  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
+  // TODO(b/154032858): Investigate removing extra copies.
   const int stride_width = params.stride_width;
   const int stride_height = params.stride_height;
   const int dilation_width_factor = params.dilation_width_factor;
@@ -289,7 +288,6 @@ constexpr int kInputTensor = 0;
 constexpr int kFilterTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
-constexpr int kMaxChannels = 32;
 
 // Depthwise conv is quantized along dimension 3:
 // https://www.tensorflow.org/lite/performance/quantization_spec
@@ -304,8 +302,8 @@ struct OpData {
 
   // Per channel output multiplier and shift.
   // TODO(b/141139247): Allocate these dynamically when possible.
-  int32_t per_channel_output_multiplier[kMaxChannels];
-  int32_t per_channel_output_shift[kMaxChannels];
+  int32_t* per_channel_output_multiplier;
+  int32_t* per_channel_output_shift;
 
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
@@ -313,12 +311,6 @@ struct OpData {
   int32_t output_activation_max;
 };
 
-// These constants represent constants specific to the music detect model.
-// They exist until (b/132070898) is fixed.
-static const int kMaxOpDataSize = 6;
-static int op_data_counter = 0;
-static OpData kStaticOpData[kMaxOpDataSize];
-
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
                              TfLiteDepthwiseConvParams* params, int width,
                              int height, int filter_width, int filter_height,
@@ -358,19 +350,26 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
 
 }  // namespace
 
-void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params =
       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
 
-  // TODO(b/132070898): Use statically slotted OpData structures until a
-  // scratch memory API is ready.
-  OpData* op_data = &kStaticOpData[op_data_counter++];
-  node->user_data = op_data;
+  auto* op_data = reinterpret_cast<OpData*>(node->user_data);
 
   const TfLiteType data_type = input->type;
   int width = SizeOfDimension(input, 2);
@@ -378,6 +377,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   int filter_width = SizeOfDimension(filter, 2);
   int filter_height = SizeOfDimension(filter, 1);
 
+  // Per channel quantization is only needed for int8 inference. For other
+  // quantized types, only a single scale and zero point is needed.
+  const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
+  // Dynimically allocate per-channel quantization parameters.
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&op_data->per_channel_output_multiplier)));
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, num_channels * sizeof(int32_t),
+      reinterpret_cast<void**>(&op_data->per_channel_output_shift)));
+
   // All per-channel quantized tensors need valid zero point and scale arrays.
   if (input->type == kTfLiteInt8) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
@@ -397,10 +407,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       affine_quantization->zero_point->size);
   }
 
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
-                                        filter_width, filter_height, data_type,
-                                        op_data));
-  return kTfLiteOk;
+  return CalculateOpData(context, node, params, width, height, filter_width,
+                         filter_height, data_type, op_data);
 }
 
 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
@@ -434,6 +442,8 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params =
       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
   auto* op_data = reinterpret_cast<OpData*>(node->user_data);
@@ -477,8 +487,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace depthwise_conv
 
 TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
-                                 /*free=*/depthwise_conv::Free,
+  static TfLiteRegistration r = {/*init=*/depthwise_conv::Init,
+                                 /*free=*/nullptr,
                                  /*prepare=*/depthwise_conv::Prepare,
                                  /*invoke=*/depthwise_conv::Eval,
                                  /*profiling_string=*/nullptr,
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
index c2c2c86fe81..c8bba633de7 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
@@ -33,13 +33,12 @@ namespace micro {
 namespace xtensa {
 namespace hifimini {
 
-// Int8 optimized:
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int8_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int32* bias_data,
+                    const RuntimeShape& output_shape, int8_t* output_data) {
+  // TODO(b/154032858): Investigate removing extra copies.
   const int32 input_offset = params.input_offset;
   const int32 filter_offset = params.weights_offset;
   const int32 output_offset = params.output_offset;
@@ -142,72 +141,66 @@ constexpr int kWeightsTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
 
-// This size will work for both the hotword (5) and ambient music (2):
-constexpr int kMaxOpDataSize = 7;
-static int op_data_counter = 0;
-static OpData kStaticOpData[kMaxOpDataSize];
-
 TfLiteStatus CalculateOpData(TfLiteContext* context,
-                             TfLiteFullyConnectedParams* params,
+                             TfLiteFusedActivation activation,
                              TfLiteType data_type, const TfLiteTensor* input,
                              const TfLiteTensor* filter,
                              const TfLiteTensor* bias, TfLiteTensor* output,
                              OpData* data) {
-  TfLiteStatus status = kTfLiteOk;
-  if (data_type != kTfLiteFloat32) {
-    double real_multiplier = 0.0;
-    TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
-        context, input, filter, bias, output, &real_multiplier));
-    int exponent;
-    xtensa::hifimini::QuantizeMultiplier(real_multiplier,
-                                         &data->output_multiplier, &exponent);
-    data->output_shift = -exponent;
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-  }
-  return status;
+  TFLITE_DCHECK(data_type != kTfLiteFloat32);
+
+  double real_multiplier = 0.0;
+  TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
+      context, input, filter, bias, output, &real_multiplier));
+  xtensa::hifimini::QuantizeMultiplier(
+      real_multiplier, &data->output_multiplier, &data->output_shift);
+  return CalculateActivationRangeQuantized(context, activation, output,
+                                           &data->output_activation_min,
+                                           &data->output_activation_max);
 }
 
 }  // namespace
 
-void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  auto* params =
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  OpData* data = static_cast<OpData*>(node->user_data);
+  const auto* params =
       reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TfLiteType data_type = input->type;
 
-  // TODO(b/132070898): Use statically slotted OpData structures until a
-  // scratch memory API is ready.
-  OpData* op_data = &kStaticOpData[op_data_counter++];
-  node->user_data = op_data;
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input,
-                                        filter, bias, output, op_data));
-
-  return kTfLiteOk;
+  return CalculateOpData(context, params->activation, input->type, input,
+                         filter, bias, output, data);
 }
 
 TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
-                               TfLiteFullyConnectedParams* params, OpData* data,
-                               const TfLiteTensor* input,
+                               const OpData& data, const TfLiteTensor* input,
                                const TfLiteTensor* filter,
                                const TfLiteTensor* bias, TfLiteTensor* output) {
+  // TODO(b/154032858): Investigate removing extra copies.
   FullyConnectedParams op_params;
   op_params.input_offset = -input->params.zero_point;
   op_params.weights_offset = -filter->params.zero_point;
   op_params.output_offset = output->params.zero_point;
-  op_params.output_multiplier = data->output_multiplier;
-  // TODO(b/138810107): Figure out whether output shift should be inverted
-  op_params.output_shift = -data->output_shift;
-  op_params.quantized_activation_min = data->output_activation_min;
-  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.output_multiplier = data.output_multiplier;
+  op_params.output_shift = data.output_shift;
+  op_params.quantized_activation_min = data.output_activation_min;
+  op_params.quantized_activation_max = data.output_activation_max;
 
   xtensa::hifimini::FullyConnected(
       op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
@@ -218,33 +211,23 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  auto* params =
-      reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
-  auto* op_data = reinterpret_cast<OpData*>(node->user_data);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
-  switch (filter->type) {  // Already know in/out types are same.
-    case kTfLiteInt8:
-      return EvalQuantizedInt8(context, node, params, op_data, input, filter,
-                               bias, output);
-
-    default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(filter->type), filter->type);
-      return kTfLiteError;
-  }
-  return kTfLiteOk;
+  TFLITE_DCHECK(filter->type == kTfLiteInt8);
+  return EvalQuantizedInt8(context, node, data, input, filter, bias, output);
 }
 
 }  // namespace fully_connected
 
 TfLiteRegistration* Register_FULLY_CONNECTED() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
-                                 /*free=*/fully_connected::Free,
+  static TfLiteRegistration r = {/*init=*/fully_connected::Init,
+                                 /*free=*/nullptr,
                                  /*prepare=*/fully_connected::Prepare,
                                  /*invoke=*/fully_connected::Eval,
                                  /*profiling_string=*/nullptr,
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
index 2177bf1c363..29b2544a625 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
@@ -108,22 +108,24 @@ struct OpData {
   int scale_multiplier = 0;
 };
 
-// This size will work for both the hotword (1) and ambient music (1):
-constexpr int kMaxOpDataSize = 2;
-static int op_data_counter = 0;
-static OpData kStaticOpData[kMaxOpDataSize];
-
-void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  auto* op_data = static_cast<OpData*>(node->user_data);
+
   TfLiteTensor* output = GetOutput(context, node, 0);
   const TfLiteTensor* input = GetInput(context, node, 0);
 
-  // TODO(b/132070898): Use statically slotted OpData structures until a
-  // scratch memory API is ready.
-  OpData* op_data = &kStaticOpData[op_data_counter++];
-  node->user_data = op_data;
-
+  // TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
   op_data->scale_multiplier = xtensa::hifimini::CreateQConstantForInt24(
       0, input->params.scale / output->params.scale);
 
@@ -131,7 +133,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  auto* op_data = reinterpret_cast<OpData*>(node->user_data);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  auto* op_data = static_cast<OpData*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
@@ -159,8 +162,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 // AffineQuantize takes scale and zero point and quantizes the float value to
 // quantized output, in int8 or uint8 format.
 TfLiteRegistration* Register_QUANTIZE() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
-                                 /*free=*/quantize::Free,
+  static TfLiteRegistration r = {/*init=*/quantize::Init,
+                                 /*free=*/nullptr,
                                  /*prepare=*/quantize::Prepare,
                                  /*invoke=*/quantize::Eval,
                                  /*profiling_string=*/nullptr,
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc
index c77e9d1173c..a7c5604ef64 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc
@@ -29,71 +29,133 @@ namespace micro {
 namespace activations {
 namespace {
 
-// TODO(b/141176180): This code is currently a strict subset of the portable
-// implementation (softmax.cc one directory up). When TFLM implements
-// registrations for selective types (e.g. compile without float support), this
-// can be removed. Otherwise, any HiFi specific optimizations should land here.
+struct OpData {
+  uint16_t* exp_lut;
+};
 
-// This size will work for both the hotword (1) and ambient music (0):
-static SoftmaxParams kStaticOpData;
+// Number of unique int8 and int16 values.  Used in exponent lookup table
+// conputation.
+constexpr int kInt8Range =
+    std::numeric_limits<int8_t>::max() - std::numeric_limits<int8>::min() + 1;
+constexpr int kInt16Range =
+    std::numeric_limits<int16_t>::max() - std::numeric_limits<int16>::min() + 1;
+// Each 16-bit precalculated exponent is expressed as a Q0.16 fixedpoint
+// value. We special-case e^0 since 1.0 requires 1 integer bit to
+// express.
+constexpr int kExpFractionalBits = 16;
+// e^0 expressed as Q1.15 exceeds the int16_t range, so it must be handled
+// specially.
+constexpr int kMaxExponentValue = (1 << kExpFractionalBits);
+
+// Quantized softmax with int8 input and int16 output.
+// TODO(b/155656675): Investigate removing const ref params.
+inline TfLiteStatus Softmax(const OpData& op_data,
+                            const RuntimeShape& input_shape,
+                            const int8_t* input_data,
+                            const RuntimeShape& output_shape,
+                            int16_t* output_data) {
+  // The last dimension is depth.  Outer size is the the total input size
+  // divided by depth.
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  for (int i = 0; i < outer_size; ++i) {
+    int8_t max_in_row = std::numeric_limits<int8_t>::min();
+    for (int c = 0; c < depth; ++c) {
+      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
+    }
+
+    uint32_t sum_of_exps = 0;
+    for (int c = 0; c < depth; ++c) {
+      TFLITE_DCHECK(max_in_row >= input_data[i * depth + c]);
+      uint8_t input_diff = max_in_row - input_data[i * depth + c];
+
+      sum_of_exps +=
+          input_diff == 0 ? kMaxExponentValue : op_data.exp_lut[input_diff];
+    }
+
+    // Ensure we cannnot overflow the full_range_output value.  We need to
+    // guarantee that kInt16Range * max(input_data) / sum_of_exps < kInt16Range.
+    TFLITE_DCHECK(sum_of_exps >= kMaxExponentValue);
+
+    for (int c = 0; c < depth; ++c) {
+      uint8_t input_diff = max_in_row - input_data[i * depth + c];
+      // Special case for diff == 0
+      uint32_t unscaled_output =
+          input_diff == 0 ? kMaxExponentValue : op_data.exp_lut[input_diff];
+      int64_t scaled_output = static_cast<int64_t>(unscaled_output) *
+                              static_cast<int64_t>(kInt16Range);
+      int32_t full_range_output =
+          scaled_output / sum_of_exps + std::numeric_limits<int16_t>::min();
+      // Round up if remainder exceeds half of the divider value.
+      uint32_t remainder = scaled_output % sum_of_exps;
+      if (remainder * 2 >= sum_of_exps) {
+        full_range_output++;
+      }
+      output_data[i * depth + c] = static_cast<int16_t>(std::max(
+          std::min(full_range_output,
+                   static_cast<int32>(std::numeric_limits<int16_t>::max())),
+          static_cast<int32_t>(std::numeric_limits<int16_t>::min())));
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
 
 TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
                                     const TfLiteTensor* input,
                                     TfLiteTensor* output,
                                     const TfLiteSoftmaxParams* params,
-                                    SoftmaxParams* op_data) {
+                                    OpData* op_data) {
   if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
     if (input->type == kTfLiteUInt8) {
       TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
     } else {
       if (output->type == kTfLiteInt16) {
-        TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
+        TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                          std::numeric_limits<int16_t>::min());
         // NOTE: Current int16 softmax output does not require symmetric scaling
         // - so no need to verify scale here.
       } else {
-        TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
+        TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                          std::numeric_limits<int8_t>::min());
         TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
       }
     }
 
-    static const int kScaledDiffIntegerBits = 5;
+    // Precompute e^(-x * input_scale * beta) for every possible int8 input.
+    // This computation is used for every iteration of Softmax.  We must compute
+    // using pre-scaled inputs to avoid introducing additional error, while
+    // restricting our input range to the int8 range. This is valid since beta
+    // and input scale are constant for a given op in the graph. Skip index 0
+    // since that is a special case which requires 1 integer bit instead of 0.
+    for (int i = 1; i <= kInt8Range; i++) {
+      float scaled_input = i * input->params.scale;
+      float exp_value =
+          std::exp((-scaled_input) * static_cast<float>(params->beta));
 
-    int input_left_shift;
-    tflite::PreprocessSoftmaxScaling(
-        static_cast<double>(params->beta),
-        static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
-        &op_data->input_multiplier, &input_left_shift);
-    op_data->input_left_shift = input_left_shift;
-    op_data->diff_min =
-        -1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
-                                            op_data->input_left_shift);
+      float exponent_scaled =
+          std::round(exp_value * static_cast<float>(1 << kExpFractionalBits));
+      op_data->exp_lut[i] = static_cast<uint16_t>(exponent_scaled);
+    }
   }
   return kTfLiteOk;
 }
 
-TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input,
-                              TfLiteTensor* output,
-                              const SoftmaxParams& op_params) {
-  switch (output->type) {
-    case kTfLiteInt16:
-      tflite::reference_ops::Softmax(
-          op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
-          GetTensorShape(output), GetTensorData<int16_t>(output));
-      return kTfLiteOk;
-    case kTfLiteInt8:
-      tflite::reference_ops::Softmax(
-          op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
-          GetTensorShape(output), GetTensorData<int8_t>(output));
-      return kTfLiteOk;
-    default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(output->type), output->type);
-      return kTfLiteError;
+void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
   }
+  return data;
 }
 
-}  // namespace
-
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
 
@@ -103,36 +165,45 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = GetOutput(context, node, 0);
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
 
-  // TODO(b/132070898): Use statically slotted SoftmaxParams structures until a
-  // scratch memory API is ready.
-  SoftmaxParams* op_params = &kStaticOpData;
-  node->user_data = op_params;
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* op_data = static_cast<OpData*>(node->user_data);
+
+  // Allocate an array to precompute exponents over all int8 inputs, applying
+  // the scale and beta before calculating exp. It is mandatory to apply beta
+  // and scale here, since each softmax op may have different beta and scale
+  // values. Beta and scale will remain constant for a given softmax op.
+  void* allocated_ptr;
+  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
+      context, kInt8Range * sizeof(int16_t), &allocated_ptr));
+  op_data->exp_lut = static_cast<uint16_t*>(allocated_ptr);
 
   TF_LITE_ENSURE_STATUS(
-      CalculateSoftmaxOpData(context, input, output, params, op_params));
+      CalculateSoftmaxOpData(context, input, output, params, op_data));
 
   return kTfLiteOk;
 }
 
 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
-  auto* op_params = static_cast<SoftmaxParams*>(node->user_data);
+  auto* op_data = static_cast<OpData*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
 
-  switch (input->type) {
-    case kTfLiteInt8:
-      return SoftmaxQuantized(context, input, output, *op_params);
-    default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
+  if (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) {
+    // TODO(b/155656675): Const ref params can be slow on xtensa.
+    return Softmax(*op_data, GetTensorShape(input),
+                   GetTensorData<int8_t>(input), GetTensorShape(output),
+                   GetTensorData<int16_t>(output));
+  } else {
+    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+                       TfLiteTypeGetName(input->type), input->type);
+    return kTfLiteError;
   }
 }
 }  // namespace activations
 
 TfLiteRegistration* Register_SOFTMAX() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
+  static TfLiteRegistration r = {/*init=*/activations::SoftmaxInit,
                                  /*free=*/nullptr,
                                  /*prepare=*/activations::SoftmaxPrepare,
                                  /*invoke=*/activations::SoftmaxEval,
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
index 2b14bedc01f..4f784d32b2e 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
@@ -32,11 +32,6 @@ namespace micro {
 namespace svdf {
 namespace {
 
-// These constants represent constants specific to the hotword "OK G" model.
-// They exist until (b/132070898) is fixed.
-constexpr int kScratchTensorMaxSize = 64;
-constexpr int kMaxOpDataSize = 7;
-
 struct OpData {
   int32 effective_scale_1_a;
   int32 effective_scale_2_a;
@@ -44,23 +39,37 @@ struct OpData {
   // shift value - typically between [-32, 32].
   int effective_scale_1_b;
   int effective_scale_2_b;
+  int scratch_tensor_index;
+  int scratch_output_tensor_index;
 };
 
-static int op_data_counter = 0;
-static OpData kStaticOpData[kMaxOpDataSize];
+// Input tensors.
+constexpr int kInputTensor = 0;
+constexpr int kWeightsFeatureTensor = 1;
+constexpr int kWeightsTimeTensor = 2;
+constexpr int kBiasTensor = 3;
+// This is a variable tensor, and will be modified by this op.
+constexpr int kInputActivationStateTensor = 4;
+
+// Output tensor.
+constexpr int kOutputTensor = 0;
 
 /**
  * This version of SVDF is specific to TFLite Micro. It contains only a full
  * integer receipe with optimizations for the Xtensa HiFiMini platform.
+ *
+ * Note: passing OpData by value might seem like an oversight but it helps
+ * reduce the latency. See b/155656675 for more details.
  */
-
-void EvalIntegerSVDF(
-    TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input_tensor,
-    const TfLiteTensor* weights_feature_tensor,
-    const TfLiteTensor* weights_time_tensor, const TfLiteTensor* bias_tensor,
-    const TfLiteSVDFParams* params, TfLiteTensor* activation_state_tensor,
-    TfLiteTensor* output_tensor, int32_t scale_1_a, int scale_1_b,
-    int32_t scale_2_a, int scale_2_b, int32_t input_zp, int32_t output_zp) {
+void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
+                     const TfLiteTensor* input_tensor,
+                     const TfLiteTensor* weights_feature_tensor,
+                     const TfLiteTensor* weights_time_tensor,
+                     const TfLiteTensor* bias_tensor,
+                     const TfLiteSVDFParams* params,
+                     TfLiteTensor* activation_state_tensor,
+                     TfLiteTensor* output_tensor, OpData data, int32_t input_zp,
+                     int32_t output_zp) {
   const int n_rank = params->rank;
   const int n_batch = input_tensor->dims->data[0];
   const int n_input = input_tensor->dims->data[1];
@@ -68,10 +77,15 @@ void EvalIntegerSVDF(
   const int n_unit = n_filter / n_rank;
   const int n_memory = weights_time_tensor->dims->data[1];
 
-  // TODO(b/132070898): Move these temp variables to the new scratch buffer API
-  // when ready.
-  int32_t scratch_tensor[kScratchTensorMaxSize];
-  int32_t scratch_output_tensor[kScratchTensorMaxSize];
+  TFLITE_DCHECK(context != nullptr);
+  TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
+
+  int32_t* scratch_tensor = static_cast<int32_t*>(
+      context->GetScratchBuffer(context, data.scratch_tensor_index));
+  TFLITE_DCHECK(scratch_tensor != nullptr);
+  int32_t* scratch_output_tensor = static_cast<int32_t*>(
+      context->GetScratchBuffer(context, data.scratch_output_tensor_index));
+  TFLITE_DCHECK(scratch_output_tensor != nullptr);
 
   // Shift states.
   int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
@@ -136,7 +150,8 @@ void EvalIntegerSVDF(
 
         dot_prod_56 =
             tflite::ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
-                dot_prod_24x2, scale_1_a, scale_1_b);
+                dot_prod_24x2, data.effective_scale_1_a,
+                data.effective_scale_1_b);
 
         // Cap min/max and convert to int32:
         dot_prod_56 = AE_MAXQ56S(dot_prod_56, output_int16_min_56);
@@ -227,7 +242,8 @@ void EvalIntegerSVDF(
     for (int i = 0; i < n_batch * n_unit; ++i) {
       ae_q56s x_56 =
           tflite::ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
-              scratch_output_tensor[i], scale_2_a, scale_2_b);
+              scratch_output_tensor[i], data.effective_scale_2_a,
+              data.effective_scale_2_b);
       // Add output adjustment:
       x_56 = AE_ADDQ56(x_56, output_zp_56);
       // Cap min/max and convert to int32 (already aligned to 32bit):
@@ -239,22 +255,22 @@ void EvalIntegerSVDF(
   }
 }
 
-// Input tensors.
-constexpr int kInputTensor = 0;
-constexpr int kWeightsFeatureTensor = 1;
-constexpr int kWeightsTimeTensor = 2;
-constexpr int kBiasTensor = 3;
-// This is a variable tensor, and will be modified by this op.
-constexpr int kInputActivationStateTensor = 4;
-
-// Output tensor.
-constexpr int kOutputTensor = 0;
 }  // namespace
 
-void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context != nullptr);
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  void* data = nullptr;
+  if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
+      kTfLiteError) {
+    return nullptr;
+  }
+  return data;
+}
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  const auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
 
   // Validate Tensor Inputs (dtype depends on quantization):
   // [0] = Input, {2, batch_size, input_size}
@@ -263,7 +279,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // [3] = Bias (optional), {1, num_units}
   // [4] = Activation State (variable),
   //         {2, batch_size, memory_size * num_filters}
-
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* weights_feature =
       GetInput(context, node, kWeightsFeatureTensor);
@@ -316,8 +331,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
 
   // Validate Optional Bias Input Tensor:
-  if (bias) {
+  if (bias != nullptr) {
     TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
+    TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
   }
 
   // Validate Activation State Input Tensor:
@@ -327,60 +343,56 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                     memory_size * num_filters);
 
   TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
-
   TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
   TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
-
-  if (bias) {
-    TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
-  }
-
   TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
 
-  // Validate Scratch Tensors:
-  // [0] = (shared - see float block below for usage)
-  // [1] = Output Temp, int8_t, {2, num_units, batch_size}
-  // TODO(b/132070898): Scratch values are used as stack variables in
-  // EvalIntegerSVDF().
-
   // Validate output tensor:
   TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
 
-  // TODO(b/132070898): Use statically slotted OpData structures until a
-  // scratch memory API is ready.
-  OpData* op_data = &kStaticOpData[op_data_counter++];
-  node->user_data = op_data;
-
   // Calculate effective scales.
   auto* input_params =
-      reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
-  auto* weights_feature_params = reinterpret_cast<TfLiteAffineQuantization*>(
+      static_cast<TfLiteAffineQuantization*>(input->quantization.params);
+  auto* weights_feature_params = static_cast<TfLiteAffineQuantization*>(
       weights_feature->quantization.params);
-  auto* state_params = reinterpret_cast<TfLiteAffineQuantization*>(
+  auto* state_params = static_cast<TfLiteAffineQuantization*>(
       activation_state->quantization.params);
-  auto* weight_time_params = reinterpret_cast<TfLiteAffineQuantization*>(
-      weights_time->quantization.params);
+  auto* weight_time_params =
+      static_cast<TfLiteAffineQuantization*>(weights_time->quantization.params);
   auto* output_params =
-      reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
+      static_cast<TfLiteAffineQuantization*>(output->quantization.params);
   const float effective_scale_1 = input_params->scale->data[0] *
                                   weights_feature_params->scale->data[0] /
                                   state_params->scale->data[0];
   const float effective_scale_2 = state_params->scale->data[0] *
                                   weight_time_params->scale->data[0] /
                                   output_params->scale->data[0];
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
   xtensa::hifimini::QuantizeMultiplier(effective_scale_1,
-                                       &op_data->effective_scale_1_a,
-                                       &op_data->effective_scale_1_b);
+                                       &data->effective_scale_1_a,
+                                       &data->effective_scale_1_b);
   xtensa::hifimini::QuantizeMultiplier(effective_scale_2,
-                                       &op_data->effective_scale_2_a,
-                                       &op_data->effective_scale_2_b);
+                                       &data->effective_scale_2_a,
+                                       &data->effective_scale_2_b);
+
+  const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
+      context, batch_size * num_filters * sizeof(int32_t),
+      &(data->scratch_tensor_index));
+  TF_LITE_ENSURE_OK(context, scratch_status);
+  const TfLiteStatus scratch_output_status =
+      context->RequestScratchBufferInArena(
+          context, batch_size * num_units * sizeof(int32_t),
+          &(data->scratch_output_tensor_index));
+  TF_LITE_ENSURE_OK(context, scratch_output_status);
 
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
-  auto* op_data = reinterpret_cast<OpData*>(node->user_data);
+  auto* params = static_cast<TfLiteSVDFParams*>(node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* weights_feature =
@@ -393,10 +405,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
 
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+
   EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
-                  params, activation_state, output,
-                  op_data->effective_scale_1_a, op_data->effective_scale_1_b,
-                  op_data->effective_scale_2_a, op_data->effective_scale_2_b,
+                  params, activation_state, output, data,
                   input->params.zero_point, output->params.zero_point);
   return kTfLiteOk;
 }
@@ -404,8 +417,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace svdf
 
 TfLiteRegistration* Register_SVDF() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
-                                 /*free=*/svdf::Free,
+  static TfLiteRegistration r = {/*init=*/svdf::Init,
+                                 /*free=*/nullptr,
                                  /*prepare=*/svdf::Prepare,
                                  /*invoke=*/svdf::Eval,
                                  /*profiling_string=*/nullptr,
diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc
index 28de77cdb6c..54ce3383a08 100644
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@@ -388,10 +388,8 @@ TfLiteStatus MicroAllocator::Init() {
     return kTfLiteError;
   }
   subgraph_ = (*subgraphs)[0];
-  tensors_ = subgraph_->tensors();
-  operators_ = subgraph_->operators();
 
-  context_->tensors_size = tensors_->size();
+  context_->tensors_size = subgraph_->tensors()->size();
   context_->tensors =
       reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
           sizeof(TfLiteTensor) * context_->tensors_size,
@@ -405,9 +403,9 @@ TfLiteStatus MicroAllocator::Init() {
   }
 
   // Initialize runtime tensors in context_ using the flatbuffer.
-  for (size_t i = 0; i < tensors_->size(); ++i) {
+  for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
     TfLiteStatus status = internal::InitializeRuntimeTensor(
-        memory_allocator_, *tensors_->Get(i), model_->buffers(),
+        memory_allocator_, *subgraph_->tensors()->Get(i), model_->buffers(),
         error_reporter_, &context_->tensors[i]);
     if (status != kTfLiteOk) {
       TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
@@ -419,6 +417,19 @@ TfLiteStatus MicroAllocator::Init() {
   return kTfLiteOk;
 }
 
+size_t MicroAllocator::used_bytes() const {
+  if (active_) {
+    return 0;
+  }
+  TF_LITE_REPORT_ERROR(error_reporter_, "Total buffer usage: %d bytes",
+                       memory_allocator_->GetUsedBytes());
+  TF_LITE_REPORT_ERROR(error_reporter_, "Head usage: %d bytes",
+                       memory_allocator_->GetHeadUsedBytes());
+  TF_LITE_REPORT_ERROR(error_reporter_, "Tail usage: %d bytes",
+                       memory_allocator_->GetTailUsedBytes());
+  return memory_allocator_->GetUsedBytes();
+}
+
 MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
                                uint8_t* tensor_arena, size_t arena_size,
                                ErrorReporter* error_reporter)
@@ -459,7 +470,7 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
 
   auto* output = reinterpret_cast<NodeAndRegistration*>(
       memory_allocator_->AllocateFromTail(
-          sizeof(NodeAndRegistration) * operators_->size(),
+          sizeof(NodeAndRegistration) * subgraph_->operators()->size(),
           alignof(NodeAndRegistration)));
   if (output == nullptr) {
     TF_LITE_REPORT_ERROR(
@@ -470,8 +481,8 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
   TfLiteStatus status = kTfLiteOk;
   auto* opcodes = model_->operator_codes();
   MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
-  for (size_t i = 0; i < operators_->size(); ++i) {
-    const auto* op = operators_->Get(i);
+  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
+    const auto* op = subgraph_->operators()->Get(i);
     size_t index = op->opcode_index();
     if (index >= opcodes->size()) {
       TF_LITE_REPORT_ERROR(error_reporter_,
@@ -554,7 +565,7 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() {
 
     AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
     TF_LITE_ENSURE_STATUS(
-        builder.Init(tensors_->size(), scratch_buffer_count_));
+        builder.Init(subgraph_->tensors()->size(), scratch_buffer_count_));
     TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph_, context_->tensors));
     TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
     const AllocationInfo* allocation_info = builder.Finish();
@@ -593,8 +604,8 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() {
 
   // Data in variables need to be kept for the next invocation so allocating
   // them from the tail (persistent area).
-  if (AllocateVariables(tensors_, context_->tensors, memory_allocator_) !=
-      kTfLiteOk) {
+  if (AllocateVariables(subgraph_->tensors(), context_->tensors,
+                        memory_allocator_) != kTfLiteOk) {
     TF_LITE_REPORT_ERROR(
         error_reporter_,
         "Failed to allocate variables. Please increase arena size.");
diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h
index a846b0c63ba..6a6e1e03e53 100644
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@@ -91,12 +91,7 @@ class MicroAllocator {
 
   // Returns the arena usage in bytes, only available after
   // `FinishTensorAllocation`. Otherwise, it will return 0.
-  size_t used_bytes() const {
-    if (active_) {
-      return 0;
-    }
-    return memory_allocator_->GetUsedBytes();
-  }
+  size_t used_bytes() const;
 
   // Run through the model to allocate nodes and registrations. We need to keep
   // them for the entire life time of the model to allow persistent tensors.
@@ -140,8 +135,6 @@ class MicroAllocator {
   size_t scratch_buffer_count_ = 0;
 
   const SubGraph* subgraph_;
-  const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
-  const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc
index c5d35407648..2d774d0a139 100644
--- a/tensorflow/lite/micro/micro_interpreter.cc
+++ b/tensorflow/lite/micro/micro_interpreter.cc
@@ -87,8 +87,6 @@ MicroInterpreter::MicroInterpreter(const Model* model,
     return;
   }
   subgraph_ = (*subgraphs)[0];
-  tensors_ = subgraph_->tensors();
-  operators_ = subgraph_->operators();
 
   context_.impl_ = static_cast<void*>(&context_helper_);
   context_.ReportError = context_helper_.ReportOpError;
@@ -112,7 +110,7 @@ MicroInterpreter::MicroInterpreter(const Model* model,
 
 MicroInterpreter::~MicroInterpreter() {
   if (node_and_registrations_ != nullptr) {
-    for (size_t i = 0; i < operators_->size(); ++i) {
+    for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
       TfLiteNode* node = &(node_and_registrations_[i].node);
       const TfLiteRegistration* registration =
           node_and_registrations_[i].registration;
@@ -171,7 +169,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
   context_.RequestScratchBufferInArena = nullptr;
   context_.GetScratchBuffer = nullptr;
 
-  for (size_t i = 0; i < operators_->size(); ++i) {
+  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
     context_helper_.SetNodeIndex(i);
     auto* node = &(node_and_registrations_[i].node);
     auto* registration = node_and_registrations_[i].registration;
@@ -195,7 +193,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
   // in Prepare stage.
   context_.RequestScratchBufferInArena =
       context_helper_.RequestScratchBufferInArena;
-  for (size_t i = 0; i < operators_->size(); ++i) {
+  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
     // Set node idx to annotate the lifetime for scratch buffers.
     context_helper_.SetNodeIndex(i);
     auto* node = &(node_and_registrations_[i].node);
@@ -237,7 +235,7 @@ TfLiteStatus MicroInterpreter::Invoke() {
     TF_LITE_ENSURE_OK(&context_, AllocateTensors());
   }
 
-  for (size_t i = 0; i < operators_->size(); ++i) {
+  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
     auto* node = &(node_and_registrations_[i].node);
     auto* registration = node_and_registrations_[i].registration;
 
diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h
index b2046128c78..15f53b681a6 100644
--- a/tensorflow/lite/micro/micro_interpreter.h
+++ b/tensorflow/lite/micro/micro_interpreter.h
@@ -132,7 +132,7 @@ class MicroInterpreter {
 
   TfLiteStatus initialization_status() const { return initialization_status_; }
 
-  size_t operators_size() const { return operators_->size(); }
+  size_t operators_size() const { return subgraph_->operators()->size(); }
 
   // For debugging only.
   const NodeAndRegistration node_and_registration(int node_index) const {
@@ -163,8 +163,6 @@ class MicroInterpreter {
   bool tensors_allocated_;
 
   TfLiteStatus initialization_status_;
-  const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
-  const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
 
   const SubGraph* subgraph_;
   internal::ContextHelper context_helper_;
diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h
index ac304352a57..ead9be490a3 100644
--- a/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -34,12 +34,16 @@ inline int MicroOpResolverAnyVersion() { return 0; }
 template <unsigned int tOpCount = TFLITE_REGISTRATIONS_MAX>
 class MicroOpResolver : public OpResolver {
  public:
+  explicit MicroOpResolver(ErrorReporter* error_reporter = nullptr)
+      : error_reporter_(error_reporter) {}
+
   const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
                                    int version) const override {
     for (unsigned int i = 0; i < registrations_len_; ++i) {
       const TfLiteRegistration& registration = registrations_[i];
       if ((registration.builtin_code == op) &&
           (registration.version == MicroOpResolverAnyVersion() ||
+           version == MicroOpResolverAnyVersion() ||
            registration.version == version)) {
         return &registration;
       }
@@ -53,6 +57,7 @@ class MicroOpResolver : public OpResolver {
       if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
           (strcmp(registration.custom_name, op) == 0) &&
           (registration.version == MicroOpResolverAnyVersion() ||
+           version == MicroOpResolverAnyVersion() ||
            registration.version == version)) {
         return &registration;
       }
@@ -60,11 +65,16 @@ class MicroOpResolver : public OpResolver {
     return nullptr;
   }
 
-  void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
-                  int version = 1) {
+  TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
+                          TfLiteRegistration* registration, int version = 1) {
     if (registrations_len_ >= tOpCount) {
-      // TODO(b/147748244) - Add error reporting hooks so we can report this!
-      return;
+      if (error_reporter_) {
+        TF_LITE_REPORT_ERROR(error_reporter_,
+                             "Couldn't register builtin op #%d, resolver size "
+                             "is too small (%d)",
+                             op, tOpCount);
+      }
+      return kTfLiteError;
     }
     TfLiteRegistration* new_registration = &registrations_[registrations_len_];
     registrations_len_ += 1;
@@ -72,20 +82,32 @@ class MicroOpResolver : public OpResolver {
     *new_registration = *registration;
     new_registration->builtin_code = op;
     new_registration->version = version;
+
+    return kTfLiteOk;
   }
 
-  void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
-                  int min_version, int max_version) {
+  TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
+                          TfLiteRegistration* registration, int min_version,
+                          int max_version) {
     for (int version = min_version; version <= max_version; ++version) {
-      AddBuiltin(op, registration, version);
+      TfLiteStatus add_status = AddBuiltin(op, registration, version);
+      if (add_status != kTfLiteOk) {
+        return add_status;
+      }
     }
+    return kTfLiteOk;
   }
 
-  void AddCustom(const char* name, TfLiteRegistration* registration,
-                 int version = 1) {
+  TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration,
+                         int version = 1) {
     if (registrations_len_ >= tOpCount) {
-      // TODO(b/147748244) - Add error reporting hooks so we can report this!
-      return;
+      if (error_reporter_) {
+        TF_LITE_REPORT_ERROR(
+            error_reporter_,
+            "Couldn't register custom op '%s', resolver size is too small (%d)",
+            name, tOpCount);
+      }
+      return kTfLiteError;
     }
     TfLiteRegistration* new_registration = &registrations_[registrations_len_];
     registrations_len_ += 1;
@@ -94,13 +116,19 @@ class MicroOpResolver : public OpResolver {
     new_registration->builtin_code = BuiltinOperator_CUSTOM;
     new_registration->custom_name = name;
     new_registration->version = version;
+
+    return kTfLiteOk;
   }
 
-  void AddCustom(const char* name, TfLiteRegistration* registration,
-                 int min_version, int max_version) {
+  TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration,
+                         int min_version, int max_version) {
     for (int version = min_version; version <= max_version; ++version) {
-      AddCustom(name, registration, version);
+      TfLiteStatus add_status = AddCustom(name, registration, version);
+      if (add_status != kTfLiteOk) {
+        return add_status;
+      }
     }
+    return kTfLiteOk;
   }
 
   unsigned int GetRegistrationLength() { return registrations_len_; }
@@ -108,6 +136,7 @@ class MicroOpResolver : public OpResolver {
  private:
   TfLiteRegistration registrations_[tOpCount];
   unsigned int registrations_len_ = 0;
+  ErrorReporter* error_reporter_;
 
   TF_LITE_REMOVE_VIRTUAL_DELETE
 };
diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc b/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc
index 0619591523a..61ab0e3bec9 100644
--- a/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc
+++ b/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
-
 #include "tensorflow/lite/micro/testing/micro_test.h"
 
 namespace tflite {
@@ -35,6 +34,22 @@ TfLiteStatus MockPrepare(TfLiteContext* context, TfLiteNode* node) {
 TfLiteStatus MockInvoke(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
+
+class MockErrorReporter : public ErrorReporter {
+ public:
+  MockErrorReporter() : has_been_called_(false) {}
+  int Report(const char* format, va_list args) override {
+    has_been_called_ = true;
+    return 0;
+  };
+
+  bool HasBeenCalled() { return has_been_called_; }
+
+ private:
+  bool has_been_called_;
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
 }  // namespace
 }  // namespace tflite
 
@@ -52,8 +67,10 @@ TF_LITE_MICRO_TEST(TestOperations) {
   // We need space for 7 operators because of 2 ops, one with 3 versions, one
   // with 4 versions.
   MicroOpResolver<7> micro_op_resolver;
-  micro_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 1, 3);
-  micro_op_resolver.AddCustom("mock_custom", &r, 1, 4);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, micro_op_resolver.AddBuiltin(
+                                         BuiltinOperator_CONV_2D, &r, 1, 3));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          micro_op_resolver.AddCustom("mock_custom", &r, 1, 4));
   OpResolver* resolver = &micro_op_resolver;
 
   const TfLiteRegistration* registration =
@@ -96,8 +113,10 @@ TF_LITE_MICRO_TEST(TestOpRegistrationOverflow) {
   MicroOpResolver<4> micro_op_resolver;
   // Register 7 ops, but only 4 is expected because the class is created with
   // that limit..
-  micro_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r, 0, 2);
-  micro_op_resolver.AddCustom("mock_custom", &r, 0, 3);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, micro_op_resolver.AddBuiltin(
+                                         BuiltinOperator_CONV_2D, &r, 0, 2));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError,
+                          micro_op_resolver.AddCustom("mock_custom", &r, 0, 3));
   OpResolver* resolver = &micro_op_resolver;
 
   TF_LITE_MICRO_EXPECT_EQ(4, micro_op_resolver.GetRegistrationLength());
@@ -137,4 +156,117 @@ TF_LITE_MICRO_TEST(TestZeroVersionRegistration) {
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
 }
 
+TF_LITE_MICRO_TEST(TestZeroModelVersion) {
+  using tflite::MicroOpResolver;
+  using tflite::OpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  MicroOpResolver<2> micro_op_resolver;
+  micro_op_resolver.AddCustom("mock_custom", &r, 1, 2);
+  TF_LITE_MICRO_EXPECT_EQ(2, micro_op_resolver.GetRegistrationLength());
+  OpResolver* resolver = &micro_op_resolver;
+
+  // If the Op version in the model is 0, we should always get the first
+  // registration.
+  const TfLiteRegistration* registration = resolver->FindOp("mock_custom", 0);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+  TF_LITE_MICRO_EXPECT_EQ(1, registration->version);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
+
+  // If a non-zero version is requested, the correct version'd op should be
+  // returned. TODO(b/151245712): Realistically, we are better off removing
+  // these version checks altogether.
+  for (int i = 1; i <= 2; ++i) {
+    registration = resolver->FindOp("mock_custom", i);
+    TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+    TF_LITE_MICRO_EXPECT_EQ(i, registration->version);
+    TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0));
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr));
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr));
+  }
+
+  registration = resolver->FindOp("mock_custom", 42);
+  TF_LITE_MICRO_EXPECT_EQ(nullptr, registration);
+}
+
+TF_LITE_MICRO_TEST(TestBuiltinRegistrationErrorReporting) {
+  using tflite::BuiltinOperator_CONV_2D;
+  using tflite::BuiltinOperator_RELU;
+  using tflite::MicroOpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  tflite::MockErrorReporter mock_reporter;
+  MicroOpResolver<1> micro_op_resolver(&mock_reporter);
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, micro_op_resolver.AddBuiltin(BuiltinOperator_CONV_2D, &r));
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteError, micro_op_resolver.AddBuiltin(BuiltinOperator_RELU, &r));
+  TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled());
+}
+
+TF_LITE_MICRO_TEST(TestCustomRegistrationErrorReporting) {
+  using tflite::BuiltinOperator_CONV_2D;
+  using tflite::BuiltinOperator_RELU;
+  using tflite::MicroOpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  tflite::MockErrorReporter mock_reporter;
+  MicroOpResolver<1> micro_op_resolver(&mock_reporter);
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          micro_op_resolver.AddCustom("mock_custom_0", &r));
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError,
+                          micro_op_resolver.AddCustom("mock_custom_1", &r));
+  TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled());
+}
+
+TF_LITE_MICRO_TEST(TestBuiltinVersionRegistrationErrorReporting) {
+  using tflite::BuiltinOperator_CONV_2D;
+  using tflite::BuiltinOperator_RELU;
+  using tflite::MicroOpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  tflite::MockErrorReporter mock_reporter;
+  MicroOpResolver<2> micro_op_resolver(&mock_reporter);
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, micro_op_resolver.AddBuiltin(
+                                         BuiltinOperator_CONV_2D, &r, 1, 2));
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, micro_op_resolver.AddBuiltin(
+                                            BuiltinOperator_RELU, &r, 1, 2));
+  TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled());
+}
+
+TF_LITE_MICRO_TEST(TestCustomVersionRegistrationErrorReporting) {
+  using tflite::BuiltinOperator_CONV_2D;
+  using tflite::BuiltinOperator_RELU;
+  using tflite::MicroOpResolver;
+
+  static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree,
+                                 tflite::MockPrepare, tflite::MockInvoke};
+
+  tflite::MockErrorReporter mock_reporter;
+  MicroOpResolver<2> micro_op_resolver(&mock_reporter);
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, micro_op_resolver.AddCustom("mock_custom_0", &r, 1, 2));
+  TF_LITE_MICRO_EXPECT_EQ(false, mock_reporter.HasBeenCalled());
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteError, micro_op_resolver.AddCustom("mock_custom_1", &r, 1, 2));
+  TF_LITE_MICRO_EXPECT_EQ(true, mock_reporter.HasBeenCalled());
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/micro_optional_debug_tools.cc b/tensorflow/lite/micro/micro_optional_debug_tools.cc
index 70f16c78d79..42c42aea9f8 100644
--- a/tensorflow/lite/micro/micro_optional_debug_tools.cc
+++ b/tensorflow/lite/micro/micro_optional_debug_tools.cc
@@ -95,6 +95,8 @@ const char* AllocTypeName(TfLiteAllocationType type) {
       return "kTfLiteArenaRw";
     case kTfLiteArenaRwPersistent:
       return "kTfLiteArenaRwPersistent";
+    case kTfLitePersistentRo:
+      return "kTfLitePersistentRo";
   }
   return "(invalid)";
 }
diff --git a/tensorflow/lite/micro/simple_memory_allocator.h b/tensorflow/lite/micro/simple_memory_allocator.h
index cf1818609f6..ed73104a2c6 100644
--- a/tensorflow/lite/micro/simple_memory_allocator.h
+++ b/tensorflow/lite/micro/simple_memory_allocator.h
@@ -51,6 +51,9 @@ class SimpleMemoryAllocator {
   size_t GetAvailableMemory() const { return tail_ - head_; }
   size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }
 
+  size_t GetHeadUsedBytes() const { return head_ - buffer_head_; }
+  size_t GetTailUsedBytes() const { return buffer_tail_ - tail_; }
+
  private:
   size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
 
diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc
index 9174c6c143f..62621db40d3 100644
--- a/tensorflow/lite/micro/testing/test_utils.cc
+++ b/tensorflow/lite/micro/testing/test_utils.cc
@@ -15,9 +15,80 @@ limitations under the License.
 
 #include "tensorflow/lite/micro/testing/test_utils.h"
 
+#include "tensorflow/lite/micro/simple_memory_allocator.h"
+
 namespace tflite {
 namespace testing {
 
+namespace {
+// TODO(b/141330728): Refactor out of test_utils.cc
+// The variables below (and the AllocatePersistentBuffer function) are only
+// needed for the kernel tests and benchmarks, i.e. where we do not have an
+// interpreter object, and the fully featured MicroAllocator.
+// Currently, these need to be sufficient for all the kernel_tests. If that
+// becomes problematic, we can investigate allowing the arena_size to be
+// specified for each call to PopulatContext.
+constexpr size_t kArenaSize = 10000;
+uint8_t raw_arena_[kArenaSize];
+SimpleMemoryAllocator* simple_memory_allocator_ = nullptr;
+constexpr size_t kBufferAlignment = 16;
+
+// We store the pointer to the ith scratch buffer to implement the Request/Get
+// ScratchBuffer API for the tests. scratch_buffers_[i] will be the ith scratch
+// buffer and will still be allocated from within raw_arena_.
+constexpr size_t kNumScratchBuffers = 5;
+uint8_t* scratch_buffers_[kNumScratchBuffers];
+size_t scratch_buffer_count_ = 0;
+
+// Note that the context parameter in this function is only needed to match the
+// signature of TfLiteContext::AllocatePersistentBuffer and isn't needed in the
+// implementation because we are assuming a single global
+// simple_memory_allocator_
+TfLiteStatus AllocatePersistentBuffer(TfLiteContext* context, size_t bytes,
+                                      void** ptr) {
+  TFLITE_DCHECK(simple_memory_allocator_ != nullptr);
+  TFLITE_DCHECK(ptr != nullptr);
+  *ptr = simple_memory_allocator_->AllocateFromTail(bytes, kBufferAlignment);
+  if (*ptr == nullptr) {
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context, size_t bytes,
+                                         int* buffer_index) {
+  TFLITE_DCHECK(simple_memory_allocator_ != nullptr);
+  TFLITE_DCHECK(buffer_index != nullptr);
+
+  if (scratch_buffer_count_ == kNumScratchBuffers) {
+    TF_LITE_REPORT_ERROR(
+        static_cast<ErrorReporter*>(context->impl_),
+        "Exceeded the maximum number of scratch tensors allowed (%d).",
+        kNumScratchBuffers);
+    return kTfLiteError;
+  }
+
+  // For tests, we allocate scratch buffers from the tail and keep them around
+  // for the lifetime of model. This means that the arena size in the tests will
+  // be more than what we would have if the scratch buffers could share memory.
+  scratch_buffers_[scratch_buffer_count_] =
+      simple_memory_allocator_->AllocateFromTail(bytes, kBufferAlignment);
+  TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
+
+  *buffer_index = scratch_buffer_count_++;
+  return kTfLiteOk;
+}
+
+void* GetScratchBuffer(TfLiteContext* context, int buffer_index) {
+  TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers);
+  if (buffer_index >= scratch_buffer_count_) {
+    return nullptr;
+  }
+  return scratch_buffers_[buffer_index];
+}
+
+}  // namespace
+
 uint8_t F2Q(float value, float min, float max) {
   int32_t result = ZeroPointFromMinMax<uint8_t>(min, max) +
                    (value / ScaleFromMinMax<uint8_t>(min, max)) + 0.5f;
@@ -48,6 +119,11 @@ int32_t F2Q32(float value, float scale) {
 // TODO(b/141330728): Move this method elsewhere as part clean up.
 void PopulateContext(TfLiteTensor* tensors, int tensors_size,
                      ErrorReporter* error_reporter, TfLiteContext* context) {
+  simple_memory_allocator_ = CreateInPlaceSimpleMemoryAllocator(
+      error_reporter, raw_arena_, kArenaSize);
+  TFLITE_DCHECK(simple_memory_allocator_ != nullptr);
+  scratch_buffer_count_ = 0;
+
   context->tensors_size = tensors_size;
   context->tensors = tensors;
   context->impl_ = static_cast<void*>(error_reporter);
@@ -61,6 +137,10 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size,
   context->GetExternalContext = nullptr;
   context->SetExternalContext = nullptr;
 
+  context->AllocatePersistentBuffer = AllocatePersistentBuffer;
+  context->RequestScratchBufferInArena = RequestScratchBufferInArena;
+  context->GetScratchBuffer = GetScratchBuffer;
+
   for (int i = 0; i < tensors_size; ++i) {
     if (context->tensors[i].is_variable) {
       ResetVariableTensor(&context->tensors[i]);
diff --git a/tensorflow/lite/micro/testing/test_xcore_binary.sh b/tensorflow/lite/micro/testing/test_xcore_binary.sh
new file mode 100755
index 00000000000..e059968c885
--- /dev/null
+++ b/tensorflow/lite/micro/testing/test_xcore_binary.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -e
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests an  XS3  binary by executing it using the XSIM simulator and parsing
+# the log output.
+#
+# First argument is the binary location.
+# Second argument is a regular expression that's required to be in the output
+# logs for the test to pass.
+
+declare -r ROOT_DIR=`pwd`
+declare -r TEST_TMPDIR=/tmp/test_xcore_binary/
+declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
+declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
+declare -r XCORE_
+mkdir -p ${MICRO_LOG_PATH}
+
+# Get the location of this script file as an absolute path
+SCRIPT_PATH="`dirname \"$BASH_SOURCE\"`"
+SCRIPT_PATH="`( cd \"$SCRIPT_PATH\" && pwd )`"
+XSIM_FLAGS=""
+
+
+xsim $1 ${XSIM_FLAGS} 2>&1 | tee ${MICRO_LOG_FILENAME}
+
+if grep -q "$2" ${MICRO_LOG_FILENAME}
+then
+  echo "$1: PASS"
+  exit 0
+else
+  echo "$1: FAIL - '$2' not found in logs."
+  exit 1
+fi
+
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
index 8599a27df52..1331163a410 100644
--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@@ -86,8 +86,8 @@ else ifeq ($(BUILD_TYPE), release)
 	CXXFLAGS += -DNDEBUG -O3 -DTF_LITE_STRIP_ERROR_STRINGS
 	CCFLAGS  += -DNDEBUG -O3 -DTF_LITE_STRIP_ERROR_STRINGS
 else
-	CXXFLAGS += -O3
-	CCFLAGS  += -O3
+	CXXFLAGS += -DNDEBUG -O3
+	CCFLAGS  += -DNDEBUG -O3
 endif
 
 # This library is the main target for this makefile. It will contain a minimal
diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh
index 2248031f6d1..da9a474b004 100755
--- a/tensorflow/lite/micro/tools/make/download_and_extract.sh
+++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh
@@ -86,7 +86,7 @@ patch_kissfft() {
 # CIFAR10 test dataset.
 patch_cifar10_dataset() {
   xxd -l 30730 -i ${1}/test_batch.bin ${1}/../../../../examples/image_recognition_experimental/first_10_cifar_images.h
-  sed -i "s/unsigned char/const unsigned char/g" ${1}/../../../../examples/image_recognition_experimental/first_10_cifar_images.h
+  sed -i -E "s/unsigned char/const unsigned char/g" ${1}/../../../../examples/image_recognition_experimental/first_10_cifar_images.h
 }
 
 build_embarc_mli() {
@@ -137,6 +137,9 @@ download_and_extract() {
     exit 1
   fi
 
+  # delete anything after the '?' in a url that may mask true file extension
+  url=$(echo "${url}" | sed "s/\?.*//")
+
   if [[ "${url}" == *gz ]]; then
     tar -C "${dir}" --strip-components=1 -xzf ${tempfile}
   elif [[ "${url}" == *tar.xz ]]; then
diff --git a/tensorflow/lite/micro/tools/make/targets/xcore_makefile.inc b/tensorflow/lite/micro/tools/make/targets/xcore_makefile.inc
new file mode 100644
index 00000000000..9a0f7463688
--- /dev/null
+++ b/tensorflow/lite/micro/tools/make/targets/xcore_makefile.inc
@@ -0,0 +1,24 @@
+# Settings for XMOS XS3 based processors (xcore.ai, ...)
+
+#IMPORTANT: to set up environment variables correctly run the following from the top tensorflow directory:
+# $ make -f tensorflow/lite/micro/tools/make/Makefile TARGET="xcore" clean clean_downloads test
+# $ pushd tensorflow/lite/micro/tools/make/downloads/xtimecomposer/xTIMEcomposer/15.0.0/ && source SetEnv && popd
+# $ make -f tensorflow/lite/micro/tools/make/Makefile TARGET="xcore" test
+
+ifeq ($(TARGET), xcore)
+  XTIME_URL := "https://www.xmos.com/download/Tools-15---Linux-64%2815.0.0_rc1%29.tgz?key=132D-9DC9-E913-0229-ECE6-D5AB-F511-2B19"
+  XTIME_MD5 := "8f6543c8ac4af7583edf75e62df322a2"
+  $(eval $(call add_third_party_download,$(XTIME_URL),$(XTIME_MD5),xtimecomposer))
+  PLATFORM_FLAGS = -target=XU316-1024-FB265-C32 -mcmodel=large -Os -DXCORE -Wno-xcore-fptrgroup -report
+  CXX_TOOL := xcc
+  CC_TOOL := xcc
+  AR_TOOL := xmosar   
+  override CXXFLAGS := -std=c++11 -g -DTF_LITE_STATIC_MEMORY -DNDEBUG
+  override CXXFLAGS += $(PLATFORM_FLAGS) 
+  override CCFLAGS := -g -DTF_LITE_STATIC_MEMORY -DNDEBUG
+  override CCFLAGS += $(PLATFORM_FLAGS)
+  TARGET_ARCH := xcore
+  #TARGET_TOOLCHAIN_PREFIX := tensorflow/lite/micro/tools/make/downloads/xtimecomposer/bin/
+  TEST_SCRIPT := tensorflow/lite/micro/testing/test_xcore_binary.sh
+  #GCC_XCORE := $(MAKEFILE_DIR)/downloads/xtimecomposer/bin/
+endif
diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc
index b331db2c80e..9251e4c161e 100644
--- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc
+++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc
@@ -56,8 +56,8 @@ SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64"
 KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip"
 KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca"
 
-RUY_URL="https://github.com/google/ruy/archive/4bdb31ab484e624deef9620ecde2156ca17f6567.zip"
-RUY_MD5="191d6a173a4fde9742f597f0f4e1f08b"
+RUY_URL="https://github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip"
+RUY_MD5="2d54f058f8f7120dfc1ecee79dbf259e"
 
 CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz"
 CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530"
diff --git a/tensorflow/lite/micro/xcore/README.md b/tensorflow/lite/micro/xcore/README.md
new file mode 100644
index 00000000000..796b73a8ca1
--- /dev/null
+++ b/tensorflow/lite/micro/xcore/README.md
@@ -0,0 +1,30 @@
+# Quickstart to install tools and run unit tests:
+
+```
+$ make -f tensorflow/lite/micro/tools/make/Makefile TARGET="xcore" clean clean_downloads && make -f tensorflow/lite/micro/tools/make/Makefile TARGET="xcore" test_greedy_memory_planner_test || true && pushd tensorflow/lite/micro/tools/make/downloads/xtimecomposer/xTIMEcomposer/15.0.0/ && source SetEnv && popd  && make -f tensorflow/lite/micro/tools/make/Makefile TARGET="xcore" test
+```
+
+(add -jN to the final make command to run builds / tests in N parallel threads)
+
+# Background information:
+
+*   To start from a fresh repo (this will also remove non-xcore builds and
+    downloads): `$ make -f tensorflow/lite/micro/tools/make/Makefile
+    TARGET="xcore" clean clean_downloads`
+*   To force xcore.ai tools download from a clean repo: `$ make -f
+    tensorflow/lite/micro/tools/make/Makefile TARGET="xcore"
+    test_greedy_memory_planner_test` (this will fail to build the test, but if
+    it succeeds because you already have tools it will exit quickly)
+
+*   To set up environment variables correctly run the following from the top
+    tensorflow directory: `$ make -f tensorflow/lite/micro/tools/make/Makefile
+    TARGET="xcore" test $ pushd
+    ./tensorflow/lite/micro/tools/make/downloads/xtimecomposer/xTIMEcomposer/15.0.0/
+    && source SetEnv && popd $ make -f tensorflow/lite/micro/tools/make/Makefile
+    TARGET="xcore" test`
+
+*   Assuming tools are already set up the following are the most commonly used
+    commands: `$ make -f tensorflow/lite/micro/tools/make/Makefile
+    TARGET="xcore" build $ make -f tensorflow/lite/micro/tools/make/Makefile
+    TARGET="xcore" test $ make -f tensorflow/lite/micro/tools/make/Makefile
+    TARGET="xcore" < name_of_example i.e. hello_world_test >`
diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/lite/micro/xcore/debug_log.cc
similarity index 67%
rename from tensorflow/core/graph/graph_constructor.h
rename to tensorflow/lite/micro/xcore/debug_log.cc
index bbbcf6ce423..c206f057c02 100644
--- a/tensorflow/core/graph/graph_constructor.h
+++ b/tensorflow/lite/micro/xcore/debug_log.cc
@@ -1,11 +1,8 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -13,9 +10,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_CORE_GRAPH_GRAPH_CONSTRUCTOR_H_
-#define TENSORFLOW_CORE_GRAPH_GRAPH_CONSTRUCTOR_H_
+#include "tensorflow/lite/micro/debug_log.h"
 
-#include "tensorflow/core/common_runtime/graph_constructor.h"
-
-#endif  // TENSORFLOW_CORE_GRAPH_GRAPH_CONSTRUCTOR_H_
+#include <cstdio>
+extern "C" void DebugLog(const char* s) { printf("%s", s); }
diff --git a/tensorflow/lite/optional_debug_tools.cc b/tensorflow/lite/optional_debug_tools.cc
index c5ccdb98390..2e25b0a17f7 100644
--- a/tensorflow/lite/optional_debug_tools.cc
+++ b/tensorflow/lite/optional_debug_tools.cc
@@ -77,6 +77,8 @@ const char* AllocTypeName(TfLiteAllocationType type) {
       return "kTfLiteArenaRw";
     case kTfLiteArenaRwPersistent:
       return "kTfLiteArenaRwPersistent";
+    case kTfLitePersistentRo:
+      return "kTfLitePersistentRo";
   }
   return "(invalid)";
 }
diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD
index 7248792523e..c1f37c81b7f 100644
--- a/tensorflow/lite/python/BUILD
+++ b/tensorflow/lite/python/BUILD
@@ -157,6 +157,7 @@ py_test(
     name = "lite_v2_test",
     srcs = ["lite_v2_test.py"],
     python_version = "PY3",
+    shard_count = 4,
     srcs_version = "PY2AND3",
     tags = [
         "no_windows",
diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py
index 70bea536433..6b7a32f1bcc 100644
--- a/tensorflow/lite/python/convert.py
+++ b/tensorflow/lite/python/convert.py
@@ -108,18 +108,24 @@ class ConverterError(Exception):
   pass
 
 
-def mlir_quantize(input_data_str):
+def mlir_quantize(input_data_str, disable_per_channel=False,
+                  inference_type=_types_pb2.INT8):
   """Quantize `input_data_str` with calibration results.
 
   Args:
     input_data_str: Input data in serialized form (e.g. a TFLITE model with
                     calibration results).
+    disable_per_channel: Bool indicating whether to do per-channel or
+                         per-tensor quantization
+    inference_type: Data type for the activations. The default value is int8.
 
   Returns:
     Quantized model in serialized form (e.g. a TFLITE model) with floating-point
     inputs and outputs.
   """
-  return wrap_toco.wrapped_experimental_mlir_quantize(input_data_str)
+  return wrap_toco.wrapped_experimental_mlir_quantize(input_data_str,
+                                                      disable_per_channel,
+                                                      inference_type)
 
 
 def mlir_sparsify(input_data_str):
diff --git a/tensorflow/lite/python/interpreter.py b/tensorflow/lite/python/interpreter.py
index 39f303b3a68..ccbba9014c8 100644
--- a/tensorflow/lite/python/interpreter.py
+++ b/tensorflow/lite/python/interpreter.py
@@ -406,13 +406,23 @@ class Interpreter(object):
     """
     self._interpreter.SetTensor(tensor_index, value)
 
-  def resize_tensor_input(self, input_index, tensor_size):
+  def resize_tensor_input(self, input_index, tensor_size, strict=False):
     """Resizes an input tensor.
 
+    ```
+    interpreter = Interpreter(model_content=tflite_model)
+    interpreter.resize_tensor_input(0, [1, 224, 224, 3], strict=True)
+    interpreter.allocate_tensors()
+    interpreter.invoke()
+    ```
+
     Args:
       input_index: Tensor index of input to set. This value can be gotten from
         the 'index' field in get_input_details.
       tensor_size: The tensor_shape to resize the input to.
+      strict: Only unknown dimensions can be resized when `strict` is True.
+        Unknown dimensions are indicated as `-1` in the `shape_signature`
+        attribute of a given tensor. (default False)
 
     Raises:
       ValueError: If the interpreter could not resize the input tensor.
@@ -421,7 +431,7 @@ class Interpreter(object):
     # `ResizeInputTensor` now only accepts int32 numpy array as `tensor_size
     # parameter.
     tensor_size = np.array(tensor_size, dtype=np.int32)
-    self._interpreter.ResizeInputTensor(input_index, tensor_size)
+    self._interpreter.ResizeInputTensor(input_index, tensor_size, strict)
 
   def get_output_details(self):
     """Gets model output details.
diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
index bd78d56172e..92e7c22a702 100644
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@@ -257,7 +257,7 @@ PyObject* InterpreterWrapper::OutputIndices() const {
   return PyArray_Return(reinterpret_cast<PyArrayObject*>(np_array));
 }
 
-PyObject* InterpreterWrapper::ResizeInputTensor(int i, PyObject* value) {
+PyObject* InterpreterWrapper::ResizeInputTensorImpl(int i, PyObject* value) {
   TFLITE_PY_ENSURE_VALID_INTERPRETER();
 
   std::unique_ptr<PyObject, PyDecrefDeleter> array_safe(
@@ -282,10 +282,27 @@ PyObject* InterpreterWrapper::ResizeInputTensor(int i, PyObject* value) {
     return nullptr;
   }
 
+  PyArray_ENABLEFLAGS(reinterpret_cast<PyArrayObject*>(array),
+                      NPY_ARRAY_OWNDATA);
+  return PyArray_Return(reinterpret_cast<PyArrayObject*>(array));
+}
+
+PyObject* InterpreterWrapper::ResizeInputTensor(int i, PyObject* value,
+                                                bool strict) {
+  PyArrayObject* array =
+      reinterpret_cast<PyArrayObject*>(ResizeInputTensorImpl(i, value));
+  if (array == nullptr) {
+    return nullptr;
+  }
+
   std::vector<int> dims(PyArray_SHAPE(array)[0]);
   memcpy(dims.data(), PyArray_BYTES(array), dims.size() * sizeof(int));
 
-  TFLITE_PY_CHECK(interpreter_->ResizeInputTensor(i, dims));
+  if (strict) {
+    TFLITE_PY_CHECK(interpreter_->ResizeInputTensorStrict(i, dims));
+  } else {
+    TFLITE_PY_CHECK(interpreter_->ResizeInputTensor(i, dims));
+  }
   Py_RETURN_NONE;
 }
 
@@ -592,6 +609,7 @@ PyObject* InterpreterWrapper::GetTensor(int i) const {
       size_t size_of_type;
       if (GetSizeOfType(nullptr, tensor->type, &size_of_type) != kTfLiteOk) {
         PyErr_SetString(PyExc_ValueError, "Unknown tensor type.");
+        free(data);
         return nullptr;
       }
       sparse_buffer_dims[0] = tensor->bytes / size_of_type;
diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h
index b509c1ca199..2de38d07ed6 100644
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.h
@@ -63,7 +63,7 @@ class InterpreterWrapper {
 
   PyObject* InputIndices() const;
   PyObject* OutputIndices() const;
-  PyObject* ResizeInputTensor(int i, PyObject* value);
+  PyObject* ResizeInputTensor(int i, PyObject* value, bool strict);
 
   int NumTensors() const;
   std::string TensorName(int i) const;
@@ -110,6 +110,9 @@ class InterpreterWrapper {
   InterpreterWrapper();
   InterpreterWrapper(const InterpreterWrapper& rhs);
 
+  // Helper function to resize an input tensor.
+  PyObject* ResizeInputTensorImpl(int i, PyObject* value);
+
   // The public functions which creates `InterpreterWrapper` should ensure all
   // these member variables are initialized successfully. Otherwise it should
   // report the error and return `nullptr`.
diff --git a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper_pybind11.cc b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper_pybind11.cc
index b7ab65fdc86..1a61c2aa33b 100644
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper_pybind11.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper_pybind11.cc
@@ -70,9 +70,9 @@ PYBIND11_MODULE(_pywrap_tensorflow_interpreter_wrapper, m) {
              return tensorflow::PyoOrThrow(self.OutputIndices());
            })
       .def("ResizeInputTensor",
-           [](InterpreterWrapper& self, int i, py::handle& value) {
+           [](InterpreterWrapper& self, int i, py::handle& value, bool strict) {
              return tensorflow::PyoOrThrow(
-                 self.ResizeInputTensor(i, value.ptr()));
+                 self.ResizeInputTensor(i, value.ptr(), strict));
            })
       .def("NumTensors", &InterpreterWrapper::NumTensors)
       .def("TensorName", &InterpreterWrapper::TensorName)
diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py
index d5795bfe57e..99be58f4376 100644
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@@ -20,6 +20,8 @@ from __future__ import division
 from __future__ import print_function
 
 import enum
+import shutil
+import tempfile
 import warnings
 
 from absl import logging
@@ -91,6 +93,22 @@ class Optimize(enum.Enum):
   """Enum defining the optimizations to apply when generating tflite graphs.
 
   Some optimizations may come at the cost of accuracy.
+
+  DEFAULT
+      Default optimization strategy.
+
+      Converter will do its best to improve size and latency based on the
+      information provided.
+      Enhanced optimizations are gained by providing a representative_dataset.
+      This is recommended, and is currently equivalent to the modes below.
+      Currently, weights will be quantized and if representative_dataset is
+      provided, activations for quantizable operations will also be quantized.
+
+  OPTIMIZE_FOR_SIZE
+      Deprecated. Does the same as DEFAULT.
+
+  OPTIMIZE_FOR_LATENCY
+      Deprecated. Does the same as DEFAULT.
   """
 
   # Default optimization strategy.
@@ -103,19 +121,10 @@ class Optimize(enum.Enum):
   # provided, activations for quantizable operations will also be quantized.
   DEFAULT = "DEFAULT"
 
-  # Optimize for size.
-  #
-  # Optimizations that reduce the size of the model.
-  # The model size will be reduced.
-  # Currently, weights will be quantized and if representative_dataset is
-  # provided, activations for quantizable operations will also be quantized.
+  # Deprecated. Does the same as DEFAULT.
   OPTIMIZE_FOR_SIZE = "OPTIMIZE_FOR_SIZE"
 
-  # Optimize for latency.
-  #
-  # Optimizations that reduce the latency of the model.
-  # Currently, weights will be quantized and if representative_dataset is
-  # provided, activations for quantizable operations will also be quantized.
+  # Deprecated. Does the same as DEFAULT.
   OPTIMIZE_FOR_LATENCY = "OPTIMIZE_FOR_LATENCY"
 
   def __str__(self):
@@ -289,9 +298,9 @@ class TFLiteConverterBase(object):
     # The 'GraphDebugInfo'  contains the stack traces of all the original nodes
     # in the `GraphDef` to the converter.
     self._debug_info = None
-    self._saved_model_dir = None
+    self.saved_model_dir = None
     self._saved_model_tags = None
-    self._saved_model_version = None
+    self._saved_model_version = 0
     self._saved_model_exported_names = []
     self._experimental_sparsify_model = False
 
@@ -339,15 +348,9 @@ class TFLiteConverterBase(object):
           self.representative_dataset.input_gen, inference_input_type,
           inference_output_type, allow_float)
 
-  def _is_unknown_shapes_allowed(self, fp32_execution):
-    # TODO(b/128319310): Investigate which quantization methods work.
-    if not fp32_execution:
-      return False
-
+  def _is_unknown_shapes_allowed(self):
     # Unknown dimensions are only allowed with the new converter.
-    if not self.experimental_new_converter:
-      return False
-    return True
+    return self.experimental_new_converter
 
   def _get_base_converter_args(self):
     """Returns the base converter args.
@@ -365,9 +368,9 @@ class TFLiteConverterBase(object):
         "enable_mlir_converter": self.experimental_new_converter,
     }
 
-    if self._saved_model_dir:
+    if self.saved_model_dir:
       args.update({
-          "saved_model_dir": self._saved_model_dir,
+          "saved_model_dir": self.saved_model_dir,
           "saved_model_version": self._saved_model_version,
           "saved_model_tags": self._saved_model_tags,
           "saved_model_exported_names": self._saved_model_exported_names,
@@ -383,204 +386,50 @@ class TFLiteConverterBase(object):
         return True
     return False
 
-  def _parse_saved_model_args(self):
-    """Parses SavedModel arguments from the given Keras/RNN SavedModel."""
+  def _parse_saved_model_args(self, always_enable_saved_model_import=False):
+    """Parses SavedModel arguments from the given Keras/RNN SavedModel.
+
+    Args:
+      always_enable_saved_model_import: Bool. When the value is true, it enables
+        MLIR saved model import path regardless of checking the conditions.
+    """
     if not self.experimental_new_converter:
-      self._saved_model_dir = None
+      self.saved_model_dir = None
       return
-    if self._saved_model_dir:
+    if self.saved_model_dir:
       try:
         saved_model_proto, _ = (
-            _parse_saved_model_with_debug_info(self._saved_model_dir))
+            _parse_saved_model_with_debug_info(self.saved_model_dir))
       except OSError:
         # If it fails to read the given saved model, it will fall back to the
         # frozen graph def path.
-        self._saved_model_dir = None
+        self.saved_model_dir = None
         return
-      if not self._contains_function_with_implements_attr(saved_model_proto):
-        self._saved_model_dir = None
-      else:
+      if (not always_enable_saved_model_import and
+          not self._contains_function_with_implements_attr(saved_model_proto)):
+        self.saved_model_dir = None
+        return
+
+      if not self._saved_model_exported_names:
         self._saved_model_exported_names = []
-        self._saved_model_version = saved_model_proto.saved_model_schema_version
-        if self._saved_model_version not in [1, 2]:
-          raise ValueError(
-              "SavedModel file format({0}) is not supported".format(
-                  self._saved_model_version))
+      self._saved_model_version = saved_model_proto.saved_model_schema_version
+      if self._saved_model_version not in [1, 2]:
+        raise ValueError("SavedModel file format({0}) is not supported".format(
+            self._saved_model_version))
 
 
-@_tf_export("lite.TFLiteConverter", v1=[])
-class TFLiteConverterV2(TFLiteConverterBase):
-  """Converts a TensorFlow model into TensorFlow Lite model.
+class TFLiteConverterBaseV2(TFLiteConverterBase):
+  """Converter subclass to share functionality between V2 converters."""
 
-  Attributes:
-    allow_custom_ops: Boolean indicating whether to allow custom operations.
-      When false any unknown operation is an error. When true, custom ops are
-      created for any op that is unknown. The developer will need to provide
-      these to the TensorFlow Lite runtime with a custom resolver.
-      (default False)
-    target_spec: Experimental flag, subject to change. Specification of target
-      device.
-    optimizations: Experimental flag, subject to change. A list of optimizations
-      to apply when converting the model. E.g. `[Optimize.DEFAULT]`
-    representative_dataset: A representative dataset that can be used to
-      generate input and output samples for the model. The converter can use the
-      dataset to evaluate different optimizations. Note that this is an optional
-      attribute but it is necessary if INT8 is the only support builtin ops in
-      target ops.
-    experimental_new_converter: Experimental flag, subject to change.
-      Enables MLIR-based conversion instead of TOCO conversion.
-  Example usage:
-
-    ```python
-    # Converting a SavedModel to a TensorFlow Lite model.
-    converter = lite.TFLiteConverter.from_saved_model(saved_model_dir)
-    tflite_model = converter.convert()
-
-    # Converting a tf.Keras model to a TensorFlow Lite model.
-    converter = lite.TFLiteConverter.from_keras_model(model)
-    tflite_model = converter.convert()
-
-    # Converting ConcreteFunctions to a TensorFlow Lite model.
-    converter = lite.TFLiteConverter.from_concrete_functions([func])
-    tflite_model = converter.convert()
-    ```
-  """
-
-  def __init__(self,
-               funcs,
-               trackable_obj=None,
-               saved_model_dir=None,
-               saved_model_tags=None):
-    """Constructor for TFLiteConverter.
-
-    Args:
-      funcs: List of TensorFlow ConcreteFunctions. The list should not contain
-        duplicate elements.
-      trackable_obj: tf.AutoTrackable object associated with `funcs`. A
-        reference to this object needs to be maintained so that Variables do not
-        get garbage collected since functions have a weak reference to
-        Variables. This is only required when the tf.AutoTrackable object is not
-        maintained by the user (e.g. `from_saved_model`).
-      saved_model_dir: Directory of the SavedModel. This argument can be null
-        when it creates via the from_keras_model and from_concrete_function
-        methods.
-      saved_model_tags: Set of tags identifying the MetaGraphDef within the
-        SavedModel to analyze. All tags in the tag set must be present. (default
-        set(SERVING)).  This argument will be available when the saved model dir
-        argument is set.
-    """
-    super(TFLiteConverterV2, self).__init__()
-    self._funcs = funcs
-    self._trackable_obj = trackable_obj
-    self._saved_model_dir = saved_model_dir
-    self._saved_model_tags = saved_model_tags
-
-  @classmethod
-  def from_concrete_functions(cls, funcs):
-    """Creates a TFLiteConverter object from ConcreteFunctions.
-
-    Args:
-      funcs: List of TensorFlow ConcreteFunctions. The list should not contain
-        duplicate elements. Currently converter can only convert a single
-        ConcreteFunction. Converting multiple functions is under development.
-
-    Returns:
-      TFLiteConverter object.
-
-    Raises:
-      Invalid input type.
-    """
-    for func in funcs:
-      if not isinstance(func, _function.ConcreteFunction):
-        message = "This function takes in a list of ConcreteFunction."
-        if isinstance(func, _def_function.Function):
-          message += (" To get the ConcreteFunction from a Function,"
-                      " call get_concrete_function.")
-        raise ValueError(message)
-    return cls(funcs)
-
-  @classmethod
-  def from_saved_model(cls, saved_model_dir, signature_keys=None, tags=None):
-    """Creates a TFLiteConverter object from a SavedModel directory.
-
-    Args:
-      saved_model_dir: SavedModel directory to convert.
-      signature_keys: List of keys identifying SignatureDef containing inputs
-        and outputs. Elements should not be duplicated. By default the
-        `signatures` attribute of the MetaGraphdef is used. (default
-        saved_model.signatures)
-      tags: Set of tags identifying the MetaGraphDef within the SavedModel to
-        analyze. All tags in the tag set must be present. (default set(SERVING))
-
-    Returns:
-      TFLiteConverter object.
-
-    Raises:
-      Invalid signature keys.
-    """
-    # When run without eager enabled, this will return the legacy
-    # TFLiteConverter.
-    if not context.executing_eagerly():
-      signature_key = None
-      if signature_keys:
-        if len(signature_keys) != 1:
-          raise ValueError("Only support a single signature key.")
-        else:
-          signature_key = signature_keys[0]
-      logging.warning("Invoking the TF1 implementation of TFLiteConverter "
-                      "because eager is disabled. Consider enabling eager.")
-      return TFLiteConverter.from_saved_model(saved_model_dir,
-                                              signature_key=signature_key,
-                                              tag_set=tags)
-
-    # Ensures any graphs created in Eager mode are able to run. This is required
-    # in order to create a tf.estimator.Exporter that exports a TFLite model.
-    if tags is None:
-      tags = set([_tag_constants.SERVING])
-
-    with context.eager_mode():
-      saved_model = _load(saved_model_dir, tags)
-    if not signature_keys:
-      signature_keys = saved_model.signatures
-
-    funcs = []
-    for key in signature_keys:
-      if key not in saved_model.signatures:
-        raise ValueError("Invalid signature key '{}' found. Valid keys are "
-                         "'{}'.".format(key, ",".join(saved_model.signatures)))
-      funcs.append(saved_model.signatures[key])
-
-    return cls(funcs, saved_model, saved_model_dir, tags)
-
-  @classmethod
-  def from_keras_model(cls, model):
-    """Creates a TFLiteConverter object from a Keras model.
-
-    Args:
-      model: tf.Keras.Model
-
-    Returns:
-      TFLiteConverter object.
-    """
-    input_signature = None
-    # If the model's call is not a `tf.function`, then we need to first get its
-    # input signature from `model_input_signature` method. We can't directly
-    # call `trace_model_call` because otherwise the batch dimension is set
-    # to None.
-    # Once we have better support for dynamic shapes, we can remove this.
-    if not isinstance(model.call, _def_function.Function):
-      # Pass `keep_original_batch_size=True` will ensure that we get an input
-      # signature including the batch dimension specified by the user.
-      input_signature = _saving_utils.model_input_signature(
-          model, keep_original_batch_size=True)
-
-    func = _saving_utils.trace_model_call(model, input_signature)
-    concrete_func = func.get_concrete_function()
-    return cls([concrete_func])
-
-  def convert(self):
+  def convert(self, graph_def, input_tensors, output_tensors):
     """Converts a TensorFlow GraphDef based on instance variables.
 
+    Args:
+      graph_def: Frozen TensorFlow GraphDef.
+      input_tensors: List of input tensors. Type and shape are computed using
+        `foo.shape` and `foo.dtype`.
+      output_tensors: List of output tensors (only .name is used from this).
+
     Returns:
       The converted data in serialized format.
 
@@ -591,66 +440,10 @@ class TFLiteConverterV2(TFLiteConverterBase):
         Input shape is not specified.
         Invalid quantization parameters.
     """
-    # TODO(b/130297984): Add support for converting multiple function.
-
-    if len(self._funcs) == 0:
-      raise ValueError("No ConcreteFunction is specified.")
-
-    if len(self._funcs) > 1:
-      raise ValueError("This converter can only convert a single "
-                       "ConcreteFunction. Converting multiple functions is "
-                       "under development.")
-
-    # Parses SavedModel argument.
-    self._parse_saved_model_args()
-
-    # graph_def is used here to preserve the node bug information
-    if self._saved_model_dir:
-      graph = _ops.Graph()
-      saved_model = _loader_impl.SavedModelLoader(self._saved_model_dir)
-      saved_model.load_graph(graph, tags=self._saved_model_tags)
-      meta_graph = saved_model.get_meta_graph_def_from_tags(
-          self._saved_model_tags)
-      signature_def = meta_graph.signature_def[
-          _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-      input_tensors = [
-          graph.get_tensor_by_name(signature_def.inputs[key].name)
-          for key in signature_def.inputs
-      ]
-      output_tensors = [
-          graph.get_tensor_by_name(signature_def.outputs[key].name)
-          for key in signature_def.outputs
-      ]
-      self._graph_def = graph_def = meta_graph.graph_def
-    else:
-      frozen_func, graph_def = (
-          _convert_to_constants.convert_variables_to_constants_v2_as_graph(
-              self._funcs[0], lower_control_flow=False))
-      self._graph_def = graph_def
-
-      input_tensors = [
-          tensor for tensor in frozen_func.inputs
-          if tensor.dtype != _dtypes.resource
-      ]
-      output_tensors = frozen_func.outputs
-
-      # Run a Grappler pass.
-      grappler_config = self._grappler_config()
-      # Skip running grappler when there are no optimizers to run. If not,
-      # grappler will run with the default optimizer set and it will lead to
-      # causing an unexpected behavior.
-      if grappler_config.graph_options.rewrite_options.optimizers:
-        graph_def = _run_graph_optimizations(
-            graph_def,
-            input_tensors,
-            output_tensors,
-            config=grappler_config,
-            graph=frozen_func.graph)
-
     quant_mode = QuantizationMode(self.optimizations, self.target_spec,
                                   self.representative_dataset, graph_def)
 
-    if not self._is_unknown_shapes_allowed(quant_mode.fp32_execution()):
+    if not self._is_unknown_shapes_allowed():
       # Checks dimensions in input tensor.
       for tensor in input_tensors:
         # Note that shape_list might be empty for scalar shapes.
@@ -723,8 +516,1003 @@ class TFLiteConverterV2(TFLiteConverterBase):
     return result
 
 
+class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2):
+  """Converts the given SavedModel into TensorFlow Lite model.
+
+  Attributes:
+      saved_model_dir: Directory of the SavedModel.
+  """
+
+  def __init__(self,
+               saved_model_dir,
+               saved_model_tags=None,
+               saved_model_exported_names=None,
+               trackable_obj=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      saved_model_dir: Directory of the SavedModel.
+      saved_model_tags: Set of tags identifying the MetaGraphDef within the
+        SavedModel to analyze. All tags in the tag set must be present. (default
+        set(SERVING)).
+      saved_model_exported_names: Names to be exported (default: export all)
+        when the saved model import path is on.
+      trackable_obj: tf.AutoTrackable object associated with `funcs`. A
+        reference to this object needs to be maintained so that Variables do not
+        get garbage collected since functions have a weak reference to
+        Variables. This is only required when the tf.AutoTrackable object is not
+        maintained by the user (e.g. `from_saved_model`).
+    """
+    super(TFLiteSavedModelConverterV2, self).__init__()
+    self.saved_model_dir = saved_model_dir
+    self._saved_model_tags = saved_model_tags
+    self._saved_model_exported_names = saved_model_exported_names
+    self._trackable_obj = trackable_obj
+    self._parse_saved_model_args(always_enable_saved_model_import=True)
+
+  def convert(self):
+    """Converts a TensorFlow GraphDef based on instance variables.
+
+    Returns:
+      The converted data in serialized format.
+
+    Raises:
+      ValueError:
+        No concrete functions is specified.
+        Multiple concrete functions are specified.
+        Input shape is not specified.
+        Invalid quantization parameters.
+    """
+    graph = _ops.Graph()
+    saved_model = _loader_impl.SavedModelLoader(self.saved_model_dir)
+    saved_model.load_graph(graph, tags=self._saved_model_tags)
+    meta_graph = saved_model.get_meta_graph_def_from_tags(
+        self._saved_model_tags)
+    signature_def = meta_graph.signature_def[
+        _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+    input_tensors = [
+        graph.get_tensor_by_name(signature_def.inputs[key].name)
+        for key in signature_def.inputs
+    ]
+    output_tensors = [
+        graph.get_tensor_by_name(signature_def.outputs[key].name)
+        for key in signature_def.outputs
+    ]
+    return super(TFLiteSavedModelConverterV2,
+                 self).convert(meta_graph.graph_def, input_tensors,
+                               output_tensors)
+
+
+class TFLiteKerasModelConverterV2(TFLiteConverterBaseV2):
+  """Converts the given Keras model into TensorFlow Lite model."""
+
+  def __init__(self, keras_model, trackable_obj=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      keras_model: tf.Keras.Model.
+      trackable_obj: tf.AutoTrackable object associated with `funcs`. A
+        reference to this object needs to be maintained so that Variables do not
+        get garbage collected since functions have a weak reference to
+        Variables. This is only required when the tf.AutoTrackable object is not
+        maintained by the user (e.g. `from_saved_model`).
+    """
+    super(TFLiteKerasModelConverterV2, self).__init__()
+    self._keras_model = keras_model
+    self._trackable_obj = trackable_obj
+
+  def convert(self):
+    """Converts a keras model based on instance variables.
+
+    Returns:
+      The converted data in serialized format.
+
+    Raises:
+      ValueError:
+        Multiple concrete functions are specified.
+        Input shape is not specified.
+        Invalid quantization parameters.
+    """
+    temp_dir = tempfile.mkdtemp()
+    try:
+      self._keras_model.save(temp_dir, save_format="tf")
+      self.saved_model_dir = temp_dir
+      self._saved_model_tags = set([_tag_constants.SERVING])
+      self._saved_model_exported_names = [
+          _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+      ]
+      self._parse_saved_model_args()
+      if self.saved_model_dir:
+        graph = _ops.Graph()
+        saved_model = _loader_impl.SavedModelLoader(self.saved_model_dir)
+        saved_model.load_graph(graph, tags=self._saved_model_tags)
+        meta_graph = saved_model.get_meta_graph_def_from_tags(
+            self._saved_model_tags)
+        signature_def = meta_graph.signature_def[
+            _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+        input_tensors = [
+            graph.get_tensor_by_name(signature_def.inputs[key].name)
+            for key in signature_def.inputs
+        ]
+        output_tensors = [
+            graph.get_tensor_by_name(signature_def.outputs[key].name)
+            for key in signature_def.outputs
+        ]
+        self._trackable_obj = _load(self.saved_model_dir,
+                                    self._saved_model_tags)
+        return super(TFLiteKerasModelConverterV2,
+                     self).convert(meta_graph.graph_def, input_tensors,
+                                   output_tensors)
+    finally:
+      shutil.rmtree(temp_dir, True)
+
+    input_signature = None
+    # If the model's call is not a `tf.function`, then we need to first get its
+    # input signature from `model_input_signature` method. We can't directly
+    # call `trace_model_call` because otherwise the batch dimension is set
+    # to None.
+    # Once we have better support for dynamic shapes, we can remove this.
+    if not isinstance(self._keras_model.call, _def_function.Function):
+      # Pass `keep_original_batch_size=True` will ensure that we get an input
+      # signature including the batch dimension specified by the user.
+      input_signature = _saving_utils.model_input_signature(
+          self._keras_model, keep_original_batch_size=True)
+
+    func = _saving_utils.trace_model_call(self._keras_model, input_signature)
+    concrete_func = func.get_concrete_function()
+    self._funcs = [concrete_func]
+
+    frozen_func, graph_def = (
+        _convert_to_constants.convert_variables_to_constants_v2_as_graph(
+            self._funcs[0], lower_control_flow=False))
+
+    input_tensors = [
+        tensor for tensor in frozen_func.inputs
+        if tensor.dtype != _dtypes.resource
+    ]
+    output_tensors = frozen_func.outputs
+
+    # Run a Grappler pass.
+    grappler_config = self._grappler_config()
+    # Skip running grappler when there are no optimizers to run. If not,
+    # grappler will run with the default optimizer set and it will lead to
+    # causing an unexpected behavior.
+    if grappler_config.graph_options.rewrite_options.optimizers:
+      graph_def = _run_graph_optimizations(
+          graph_def,
+          input_tensors,
+          output_tensors,
+          config=grappler_config,
+          graph=frozen_func.graph)
+
+    return super(TFLiteKerasModelConverterV2,
+                 self).convert(graph_def, input_tensors, output_tensors)
+
+
+class TFLiteFrozenGraphConverterV2(TFLiteConverterBaseV2):
+  """Converts the given frozen graph into TensorFlow Lite model."""
+
+  def __init__(self, funcs, trackable_obj=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      funcs: List of TensorFlow ConcreteFunctions. The list should not contain
+        duplicate elements.
+      trackable_obj: tf.AutoTrackable object associated with `funcs`. A
+        reference to this object needs to be maintained so that Variables do not
+        get garbage collected since functions have a weak reference to
+        Variables. This is only required when the tf.AutoTrackable object is not
+        maintained by the user (e.g. `from_saved_model`).
+    """
+    super(TFLiteFrozenGraphConverterV2, self).__init__()
+    self._funcs = funcs
+    self._trackable_obj = trackable_obj
+
+  def convert(self):
+    """Converts a TensorFlow GraphDef based on instance variables.
+
+    Returns:
+      The converted data in serialized format.
+
+    Raises:
+      ValueError:
+        No concrete functions is specified.
+        Multiple concrete functions are specified.
+        Input shape is not specified.
+        Invalid quantization parameters.
+    """
+    # TODO(b/130297984): Add support for converting multiple function.
+
+    if len(self._funcs) == 0:
+      raise ValueError("No ConcreteFunction is specified.")
+
+    if len(self._funcs) > 1:
+      raise ValueError("This converter can only convert a single "
+                       "ConcreteFunction. Converting multiple functions is "
+                       "under development.")
+
+    frozen_func, graph_def = (
+        _convert_to_constants.convert_variables_to_constants_v2_as_graph(
+            self._funcs[0], lower_control_flow=False))
+
+    input_tensors = [
+        tensor for tensor in frozen_func.inputs
+        if tensor.dtype != _dtypes.resource
+    ]
+    output_tensors = frozen_func.outputs
+
+    # Run a Grappler pass.
+    grappler_config = self._grappler_config()
+    # Skip running grappler when there are no optimizers to run. If not,
+    # grappler will run with the default optimizer set and it will lead to
+    # causing an unexpected behavior.
+    if grappler_config.graph_options.rewrite_options.optimizers:
+      graph_def = _run_graph_optimizations(
+          graph_def,
+          input_tensors,
+          output_tensors,
+          config=grappler_config,
+          graph=frozen_func.graph)
+
+    return super(TFLiteFrozenGraphConverterV2,
+                 self).convert(graph_def, input_tensors, output_tensors)
+
+
+@_tf_export("lite.TFLiteConverter", v1=[])
+class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
+  """Converts a TensorFlow model into TensorFlow Lite model.
+
+  Attributes:
+    allow_custom_ops: Boolean indicating whether to allow custom operations.
+      When false any unknown operation is an error. When true, custom ops are
+      created for any op that is unknown. The developer will need to provide
+      these to the TensorFlow Lite runtime with a custom resolver.
+      (default False)
+    target_spec: Experimental flag, subject to change. Specification of target
+      device.
+    optimizations: Experimental flag, subject to change. A list of optimizations
+      to apply when converting the model. E.g. `[Optimize.DEFAULT]`
+    representative_dataset: A representative dataset that can be used to
+      generate input and output samples for the model. The converter can use the
+      dataset to evaluate different optimizations. Note that this is an optional
+      attribute but it is necessary if INT8 is the only support builtin ops in
+      target ops.
+    experimental_new_converter: Experimental flag, subject to change.
+      Enables MLIR-based conversion instead of TOCO conversion.
+  Example usage:
+
+    ```python
+    # Converting a SavedModel to a TensorFlow Lite model.
+    converter = lite.TFLiteConverter.from_saved_model(saved_model_dir)
+    tflite_model = converter.convert()
+
+    # Converting a tf.Keras model to a TensorFlow Lite model.
+    converter = lite.TFLiteConverter.from_keras_model(model)
+    tflite_model = converter.convert()
+
+    # Converting ConcreteFunctions to a TensorFlow Lite model.
+    converter = lite.TFLiteConverter.from_concrete_functions([func])
+    tflite_model = converter.convert()
+    ```
+  """
+
+  # pylint: disable=useless-super-delegation
+  def __init__(self, funcs, trackable_obj=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      funcs: List of TensorFlow ConcreteFunctions. The list should not contain
+        duplicate elements.
+      trackable_obj: tf.AutoTrackable object associated with `funcs`. A
+        reference to this object needs to be maintained so that Variables do not
+        get garbage collected since functions have a weak reference to
+        Variables. This is only required when the tf.AutoTrackable object is not
+        maintained by the user (e.g. `from_saved_model`).
+    """
+    super(TFLiteConverterV2, self).__init__(funcs, trackable_obj)
+
+  @classmethod
+  def from_concrete_functions(cls, funcs):
+    """Creates a TFLiteConverter object from ConcreteFunctions.
+
+    Args:
+      funcs: List of TensorFlow ConcreteFunctions. The list should not contain
+        duplicate elements. Currently converter can only convert a single
+        ConcreteFunction. Converting multiple functions is under development.
+
+    Returns:
+      TFLiteConverter object.
+
+    Raises:
+      Invalid input type.
+    """
+    for func in funcs:
+      if not isinstance(func, _function.ConcreteFunction):
+        message = "This function takes in a list of ConcreteFunction."
+        if isinstance(func, _def_function.Function):
+          message += (" To get the ConcreteFunction from a Function,"
+                      " call get_concrete_function.")
+        raise ValueError(message)
+    return cls(funcs)
+
+  @classmethod
+  def from_saved_model(cls, saved_model_dir, signature_keys=None, tags=None):
+    """Creates a TFLiteConverter object from a SavedModel directory.
+
+    Args:
+      saved_model_dir: SavedModel directory to convert.
+      signature_keys: List of keys identifying SignatureDef containing inputs
+        and outputs. Elements should not be duplicated. By default the
+        `signatures` attribute of the MetaGraphdef is used. (default
+        saved_model.signatures)
+      tags: Set of tags identifying the MetaGraphDef within the SavedModel to
+        analyze. All tags in the tag set must be present. (default set(SERVING))
+
+    Returns:
+      TFLiteConverter object.
+
+    Raises:
+      Invalid signature keys.
+    """
+    # When run without eager enabled, this will return the legacy
+    # TFLiteConverter.
+    if not context.executing_eagerly():
+      signature_key = None
+      if signature_keys:
+        if len(signature_keys) != 1:
+          raise ValueError("Only support a single signature key.")
+        else:
+          signature_key = signature_keys[0]
+      logging.warning("Invoking the TF1 implementation of TFLiteConverter "
+                      "because eager is disabled. Consider enabling eager.")
+      return TFLiteConverter.from_saved_model(saved_model_dir,
+                                              signature_key=signature_key,
+                                              tag_set=tags)
+
+    # Ensures any graphs created in Eager mode are able to run. This is required
+    # in order to create a tf.estimator.Exporter that exports a TFLite model.
+    if tags is None:
+      tags = set([_tag_constants.SERVING])
+
+    with context.eager_mode():
+      saved_model = _load(saved_model_dir, tags)
+    if not signature_keys:
+      signature_keys = saved_model.signatures
+
+    if len(signature_keys) != 1:
+      raise ValueError("Only support a single signature key.")
+
+    funcs = []
+    for key in signature_keys:
+      if key not in saved_model.signatures:
+        raise ValueError("Invalid signature key '{}' found. Valid keys are "
+                         "'{}'.".format(key, ",".join(saved_model.signatures)))
+      funcs.append(saved_model.signatures[key])
+
+    saved_model_converter = TFLiteSavedModelConverterV2(saved_model_dir, tags,
+                                                        signature_keys,
+                                                        saved_model)
+    if saved_model_converter.saved_model_dir:
+      return saved_model_converter
+
+    return cls(funcs, saved_model)
+
+  @classmethod
+  def from_keras_model(cls, model):
+    """Creates a TFLiteConverter object from a Keras model.
+
+    Args:
+      model: tf.Keras.Model
+
+    Returns:
+      TFLiteConverter object.
+    """
+    return TFLiteKerasModelConverterV2(model)
+
+  # pylint: disable=useless-super-delegation
+  def convert(self):
+    """Converts a TensorFlow GraphDef based on instance variables.
+
+    Returns:
+      The converted data in serialized format.
+
+    Raises:
+      ValueError:
+        No concrete functions is specified.
+        Multiple concrete functions are specified.
+        Input shape is not specified.
+        Invalid quantization parameters.
+    """
+    return super(TFLiteConverterV2, self).convert()
+
+
+class TFLiteConverterBaseV1(TFLiteConverterBase):
+  """Converter subclass to share functionality between V1 converters.
+
+  Attributes:
+    inference_type: Target data type of real-number arrays in the output file.
+      Must be `{tf.float32, tf.uint8}`. If `optimzations` are provided, this
+      parameter is ignored. (default tf.float32)
+    inference_input_type: Target data type of real-number input arrays. Allows
+      for a different type for input arrays. If an integer type is provided and
+      `optimizations` are not used, `quantized_inputs_stats` must be provided.
+      If `inference_type` is tf.uint8, signaling conversion to a fully quantized
+      model from a quantization-aware trained input model, then
+      `inference_input_type` defaults to tf.uint8. In all other cases,
+      `inference_input_type` defaults to tf.float32. Must be `{tf.float32,
+      tf.uint8, tf.int8}`
+    inference_output_type: Target data type of real-number output arrays. Allows
+      for a different type for output arrays. If `inference_type` is tf.uint8,
+      signaling conversion to a fully quantized model from a quantization-aware
+      trained output model, then `inference_output_type` defaults to tf.uint8.
+      In all other cases, `inference_output_type` must be tf.float32, an error
+      will be thrown otherwise. Must be `{tf.float32, tf.uint8, tf.int8}`
+    output_format: Output file format. Currently must be `{TFLITE,
+      GRAPHVIZ_DOT}`. (default TFLITE)
+    quantized_input_stats: Dict of strings representing input tensor names
+      mapped to tuple of floats representing the mean and standard deviation
+      of the training data (e.g., {"foo" : (0., 1.)}). Only need if
+        `inference_input_type` is `QUANTIZED_UINT8`. real_input_value =
+        (quantized_input_value - mean_value) / std_dev_value. (default {})
+    default_ranges_stats: Tuple of integers representing (min, max) range values
+      for all arrays without a specified range. Intended for experimenting with
+      quantization via "dummy quantization". (default None)
+    drop_control_dependency: Boolean indicating whether to drop control
+      dependencies silently. This is due to TFLite not supporting control
+      dependencies. (default True)
+    reorder_across_fake_quant: Boolean indicating whether to reorder FakeQuant
+      nodes in unexpected locations. Used when the location of the FakeQuant
+      nodes is preventing graph transformations necessary to convert the graph.
+      Results in a graph that differs from the quantized training graph,
+      potentially causing differing arithmetic behavior. (default False)
+    change_concat_input_ranges: Boolean to change behavior of min/max ranges for
+      inputs and outputs of the concat operator for quantized models. Changes
+      the ranges of concat operator overlap when true. (default False)
+    allow_custom_ops: Boolean indicating whether to allow custom operations.
+      When false any unknown operation is an error. When true, custom ops are
+      created for any op that is unknown. The developer will need to provide
+      these to the TensorFlow Lite runtime with a custom resolver. (default
+      False)
+    post_training_quantize: Deprecated. Please specify `[Optimize.DEFAULT]` for
+      `optimizations` instead. Boolean indicating whether to quantize the
+      weights of the converted float model.  Model size will be reduced and
+      there will be latency improvements (at the cost of accuracy). (default
+      False)
+    dump_graphviz_dir: Full filepath of folder to dump the graphs at various
+      stages of processing GraphViz .dot files. Preferred over
+      --output_format=GRAPHVIZ_DOT in order to keep the requirements of the
+      output file. (default None)
+    dump_graphviz_video: Boolean indicating whether to dump the graph after
+      every graph transformation. (default False)
+    conversion_summary_dir: A string indicating the path to the generated
+      conversion logs.
+    target_ops: Deprecated. Please specify `target_spec.supported_ops` instead.
+      Set of OpsSet options indicating which converter to use. (default
+      set([OpsSet.TFLITE_BUILTINS]))
+    target_spec: Experimental flag, subject to change. Specification of target
+      device.
+    optimizations: Experimental flag, subject to change. A list of optimizations
+      to apply when converting the model. E.g. `[Optimize.DEFAULT]`
+    representative_dataset: A representative dataset that can be used to
+      generate input and output samples for the model. The converter can use the
+      dataset to evaluate different optimizations.
+    experimental_new_converter: Experimental flag, subject to change. Enables
+      MLIR-based conversion instead of TOCO conversion.
+  """
+
+  def __init__(self, experimental_debug_info_func):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      experimental_debug_info_func: An experimental function to retrieve the
+        graph debug info for a set of nodes from the `graph_def`.
+    """
+    super(TFLiteConverterBaseV1, self).__init__()
+    self.inference_type = constants.FLOAT
+    self.inference_input_type = None
+    self.inference_output_type = None
+    self.output_format = constants.TFLITE
+    self.quantized_input_stats = {}
+    self.default_ranges_stats = None
+    self.drop_control_dependency = True
+    self.reorder_across_fake_quant = False
+    self.change_concat_input_ranges = False
+    self.dump_graphviz_dir = None
+    self.dump_graphviz_video = False
+    self.conversion_summary_dir = None
+    self._debug_info_func = experimental_debug_info_func
+    self._custom_opdefs = None
+
+  def __setattr__(self, name, value):
+    if name == "post_training_quantize":
+      warnings.warn("Property %s is deprecated, "
+                    "please use optimizations=[Optimize.DEFAULT]"
+                    " instead." % name)
+      if value:
+        self.optimizations = [Optimize.DEFAULT]
+      else:
+        self.optimizations = []
+      return
+    if name == "target_ops":
+      warnings.warn("Property %s is deprecated, please use "
+                    "target_spec.supported_ops instead." % name)
+      self.target_spec.supported_ops = value
+      return
+    object.__setattr__(self, name, value)
+
+  def __getattribute__(self, name):
+    if name == "post_training_quantize":
+      warnings.warn("Property %s is deprecated, "
+                    "please use optimizations=[Optimize.DEFAULT]"
+                    " instead." % name)
+      return Optimize.DEFAULT in set(self.optimizations)
+    if name == "target_ops":
+      warnings.warn("Property %s is deprecated, please use "
+                    "target_spec.supported_ops instead." % name)
+      return self.target_spec.supported_ops
+    return object.__getattribute__(self, name)
+
+  def _validate_quantized_input_stats(self, converter_kwargs):
+    """Ensure quantized_input_stats provided if required."""
+
+    quantized_types = frozenset({constants.INT8, constants.QUANTIZED_UINT8})
+
+    requires_quantized_input_stats = (
+        (converter_kwargs["inference_type"] in quantized_types or
+         converter_kwargs["inference_input_type"] in quantized_types) and
+        not converter_kwargs["post_training_quantize"])
+
+    if (requires_quantized_input_stats and
+        not converter_kwargs["quantized_input_stats"]):
+      raise ValueError("std_dev and mean must be defined when inference_type "
+                       "or inference_input_type is QUANTIZED_UINT8 or INT8.")
+
+  def convert(self):
+    """Converts a TensorFlow GraphDef based on instance variables.
+
+    Returns:
+      The converted data in serialized format. Either a TFLite Flatbuffer or a
+      Graphviz graph depending on value in `output_format`.
+
+    Raises:
+      ValueError:
+        Input shape is not specified.
+        None value for dimension in input_tensor.
+    """
+    quant_mode = QuantizationMode(self.optimizations, self.target_spec,
+                                  self.representative_dataset, self._graph_def)
+
+    if (not self._is_unknown_shapes_allowed() and self._has_valid_tensors()):
+      # Checks dimensions in input tensor.
+      for tensor in self._input_tensors:
+        shape = tensor.shape
+        if not shape:
+          raise ValueError("Provide an input shape for input array "
+                           "'{0}'.".format(_get_tensor_name(tensor)))
+        # Note that shape_list might be empty for scalar shapes.
+        shape_list = shape.as_list()
+        if None in shape_list[1:]:
+          raise ValueError(
+              "None is only supported in the 1st dimension. Tensor '{0}' has "
+              "invalid shape '{1}'.".format(
+                  _get_tensor_name(tensor), shape_list))
+        elif shape_list and shape_list[0] is None:
+          self._set_batch_size(batch_size=1)
+
+    # Get quantization stats. Ensures there is one stat per name if the stats
+    # are specified.
+    if self.quantized_input_stats:
+      quantized_stats = []
+      invalid_stats = []
+      for name in self.get_input_arrays():
+        if name in self.quantized_input_stats:
+          quantized_stats.append(self.quantized_input_stats[name])
+        else:
+          invalid_stats.append(name)
+
+      if invalid_stats:
+        raise ValueError("Quantization input stats are not available for input "
+                         "tensors '{0}'.".format(",".join(invalid_stats)))
+    else:
+      quantized_stats = None
+
+    toco_inference_input_type = self.inference_input_type
+    inference_input_type = self.inference_input_type
+    inference_output_type = self.inference_output_type
+    post_training_optimize = (
+        quant_mode.post_training_int8_no_float() or
+        quant_mode.post_training_int8_allow_float() or
+        quant_mode.post_training_dynamic_range_int8() or
+        quant_mode.post_training_fp16())
+    if post_training_optimize:
+      # Post training optimizations require that TOCO outputs a float model.
+      if self.inference_type != constants.FLOAT:
+        raise ValueError(
+            "`optimizations` require that `inference_type` is set to float.")
+      toco_inference_input_type = constants.FLOAT
+      # Set up default values.
+      if inference_input_type is None:
+        inference_input_type = constants.FLOAT
+      if inference_output_type is None:
+        inference_output_type = constants.FLOAT
+
+    weight_only_quantize = (
+        quant_mode.post_training_dynamic_range_int8() or
+        quant_mode.post_training_fp16())
+    if weight_only_quantize:
+      # Currently, weight only quantization requires float inputs and outputs.
+      if (inference_input_type != constants.FLOAT or
+          inference_output_type != constants.FLOAT):
+        raise ValueError(
+            "Provide an inference_input_type and inference_output_type of type "
+            "tf.float32.")
+
+    if not post_training_optimize and self.inference_output_type is not None:
+      raise ValueError(
+          "inference_output_type is currently not supported if optimizations "
+          "are not enabled.")
+
+    optimized_graph = self._graph_def
+    if not self.saved_model_dir:
+      # if it is not uint8 or int8 with post-training quantization, it is not
+      # quantization aware training, then graph optimization is applied.
+      # Graph optimization is disabled for quantization aware training.
+      if (self.inference_type != constants.QUANTIZED_UINT8 or
+          (self.inference_type == constants.INT8 and
+           (post_training_optimize or weight_only_quantize))):
+        try:
+          # TODO(b/150163103): Merge `disabling lower using switch merge' calls.
+          # Grappler will also try to lower while loop into switch merge
+          # representation which is undesired for Ophints, so we simply remove
+          # those attributes to prevent Grappler from doing so.
+          graph_def = _convert_to_constants.disable_lower_using_switch_merge(
+              optimized_graph)
+          # Run function inlining optimization to ensure any models generated
+          # through the from_frozen_graph path have been inlined.
+          optimized_graph = _run_graph_optimizations(
+              graph_def,
+              self._input_tensors,
+              self._output_tensors,
+              config=self._grappler_config(["function"]))
+        except Exception:  # pylint: disable=broad-except
+          optimized_graph = self._graph_def
+
+    self._debug_info = _get_debug_info(self._debug_info_func, optimized_graph)
+
+    converter_kwargs = self._get_base_converter_args()
+
+    if quant_mode.post_training_dynamic_range_int8():
+      converter_kwargs.update({
+          "post_training_quantize": True,
+      })
+    elif quant_mode.post_training_fp16():
+      converter_kwargs.update({
+          "post_training_quantize": True,
+          "quantize_to_float16": True,
+      })
+
+    converter_kwargs.update({
+        "inference_type": self.inference_type,
+        "inference_input_type": toco_inference_input_type,
+        "output_format": self.output_format,
+        "quantized_input_stats": quantized_stats,
+        "default_ranges_stats": self.default_ranges_stats,
+        "drop_control_dependency": self.drop_control_dependency,
+        "reorder_across_fake_quant": self.reorder_across_fake_quant,
+        "change_concat_input_ranges": self.change_concat_input_ranges,
+        "dump_graphviz_dir": self.dump_graphviz_dir,
+        "dump_graphviz_video": self.dump_graphviz_video,
+        "conversion_summary_dir": self.conversion_summary_dir,
+        "custom_opdefs": self._custom_opdefs,
+    })
+
+    if not self.experimental_new_converter:
+      logging.warning(
+          "Please consider switching to use new converter by setting "
+          "experimental_new_converter to true. "
+          "Old converter (TOCO) is deprecated and flow will be switched on "
+          "by default to use new converter soon.")
+    else:
+      logging.info("Using experimental converter: If you encountered a problem "
+                   "please file a bug. You can opt-out "
+                   "by setting experimental_new_converter=False")
+
+    self._validate_quantized_input_stats(converter_kwargs)
+
+    # Converts model.
+    if self._has_valid_tensors():
+      result = _toco_convert_impl(
+          input_data=optimized_graph,
+          input_tensors=self._input_tensors,
+          output_tensors=self._output_tensors,
+          **converter_kwargs)
+    else:
+      result = _toco_convert_graph_def(
+          input_data=optimized_graph,
+          input_arrays_with_shape=self._input_arrays_with_shape,
+          output_arrays=self._output_arrays,
+          **converter_kwargs)
+
+    if quant_mode.post_training_int8_no_float():
+      result = self._calibrate_quantize_model(result, inference_input_type,
+                                              inference_output_type, False)
+    elif quant_mode.post_training_int8_allow_float():
+      result = self._calibrate_quantize_model(result, inference_input_type,
+                                              inference_output_type, True)
+
+    if self._experimental_sparsify_model:
+      result = _mlir_sparsify(result)
+
+    return result
+
+  def get_input_arrays(self):
+    """Returns a list of the names of the input tensors.
+
+    Returns:
+      List of strings.
+    """
+    if self._has_valid_tensors():
+      return [_get_tensor_name(tensor) for tensor in self._input_tensors]
+    else:
+      return [name for name, _ in self._input_arrays_with_shape]
+
+  def _has_valid_tensors(self):
+    """Checks if the input and output tensors have been initialized.
+
+    Returns:
+      Bool.
+    """
+    return self._input_tensors and self._output_tensors
+
+  def _set_batch_size(self, batch_size):
+    """Sets the first dimension of the input tensor to `batch_size`.
+
+    Args:
+      batch_size: Batch size for the model. Replaces the first dimension of an
+        input size array if undefined. (default 1)
+
+    Raises:
+      ValueError: input_tensor is not defined.
+    """
+    if not self._has_valid_tensors():
+      raise ValueError("The batch size cannot be set for this model. Please "
+                       "use input_shapes parameter.")
+
+    for tensor in self._input_tensors:
+      shape = tensor.shape.as_list()
+      if shape[0] is None:
+        shape[0] = batch_size
+        tensor.set_shape(shape)
+
+  def _is_unknown_shapes_allowed(self):
+    # Ophint Converted nodes will need the shapes to be known.
+    if _is_ophint_converted(self._graph_def):
+      return False
+
+    if not super(TFLiteConverterBaseV1, self)._is_unknown_shapes_allowed():
+      return False
+
+    # `conversion_summary_dir` calls TOCO. Unknown shapes are only supported by
+    # the MLIR converter.
+    if self.conversion_summary_dir:
+      logging.warning(
+          "`conversion_summary_dir` does not work with unknown shapes. "
+          "Graphs with unknown shapes might be different than when this flag "
+          "is disabled.")
+      return False
+    return True
+
+
+class TFLiteSavedModelConverter(TFLiteConverterBaseV1):
+  """Converts the given SavedModel into TensorFlow Lite model.
+
+  Attributes:
+      saved_model_dir: Directory of the SavedModel.
+  """
+
+  def __init__(self,
+               saved_model_dir,
+               saved_model_tags,
+               saved_model_exported_names,
+               experimental_debug_info_func=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      saved_model_dir: Directory of the SavedModel.
+      saved_model_tags: Set of tags identifying the MetaGraphDef within the
+        SavedModel to analyze. All tags in the tag set must be present. (default
+        set(SERVING)).
+      saved_model_exported_names: Names to be exported (default: export all)
+        when the saved model import path is on.
+      experimental_debug_info_func: An experimental function to retrieve the
+        graph debug info for a set of nodes from the `graph_def`.
+
+    Raises:
+      ValueError: Invalid arguments.
+    """
+    super(TFLiteSavedModelConverter,
+          self).__init__(experimental_debug_info_func)
+    self.saved_model_dir = saved_model_dir
+    self._saved_model_tags = saved_model_tags
+    self._saved_model_exported_names = saved_model_exported_names
+
+    signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+
+    if len(self._saved_model_exported_names) != 1:
+      raise ValueError("Only support a single signature key.")
+
+    signature_key = self._saved_model_exported_names[0]
+
+    result = _freeze_saved_model(self.saved_model_dir, None, None, None,
+                                 self._saved_model_tags, signature_key)
+    self._graph_def = result[0]
+    self._input_tensors = result[1]
+    self._output_tensors = result[2]
+    self._parse_saved_model_args()
+
+
+class TFLiteKerasModelConverter(TFLiteConverterBaseV1):
+  """Converts the given SavedModel into TensorFlow Lite model."""
+
+  def __init__(self,
+               model_file,
+               input_arrays=None,
+               input_shapes=None,
+               output_arrays=None,
+               custom_objects=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      model_file: Full filepath of HDF5 file containing the tf.keras model.
+      input_arrays: List of input tensors to freeze graph with. Uses input
+        arrays from SignatureDef when none are provided. (default None)
+      input_shapes: Dict of strings representing input tensor names to list of
+        integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}).
+        Automatically determined when input shapes is None (e.g., {"foo" :
+          None}). (default None)
+      output_arrays: List of output tensors to freeze graph with. Uses output
+        arrays from SignatureDef when none are provided. (default None)
+      custom_objects: Dict mapping names (strings) to custom classes or
+        functions to be considered during model deserialization. (default None)
+
+    Raises:
+      ValueError: Invalid arguments.
+    """
+    super(TFLiteKerasModelConverter,
+          self).__init__(experimental_debug_info_func=None)
+    # Handles Keras when Eager mode is enabled.
+    if context.executing_eagerly():
+      if input_arrays or output_arrays:
+        raise ValueError("`input_arrays` and `output_arrays` are unsupported "
+                         "with Eager mode. If your model requires any of these "
+                         "parameters, please use disable_eager_execution().")
+
+      _keras.backend.set_learning_phase(False)
+      keras_model = _keras.models.load_model(model_file, custom_objects)
+
+      function = _saving_utils.trace_model_call(keras_model)
+      concrete_func = function.get_concrete_function()
+
+      frozen_func = _convert_to_constants.convert_variables_to_constants_v2(
+          concrete_func, lower_control_flow=False)
+      _set_tensor_shapes(frozen_func.inputs, input_shapes)
+      self._keras_model = keras_model
+      self._graph_def = frozen_func.graph.as_graph_def()
+      self._input_tensors = frozen_func.inputs
+      self._output_tensors = frozen_func.outputs
+      self._debug_info_func = _build_debug_info_func(frozen_func.graph)
+      return
+
+    # Handles Keras when Eager mode is disabled.
+    _keras.backend.clear_session()
+    _keras.backend.set_learning_phase(False)
+    keras_model = _keras.models.load_model(model_file, custom_objects)
+    sess = _keras.backend.get_session()
+
+    # Get input and output tensors.
+    if input_arrays:
+      input_tensors = _get_tensors_from_tensor_names(sess.graph, input_arrays)
+    else:
+      input_tensors = keras_model.inputs
+
+    if output_arrays:
+      output_tensors = _get_tensors_from_tensor_names(sess.graph, output_arrays)
+    else:
+      output_tensors = keras_model.outputs
+    _set_tensor_shapes(input_tensors, input_shapes)
+
+    graph_def = _freeze_graph(sess, input_tensors, output_tensors)
+    self._keras_model = keras_model
+    self._graph_def = graph_def
+    self._input_tensors = input_tensors
+    self._output_tensors = output_tensors
+    self._debug_info_func = _build_debug_info_func(sess.graph)
+
+  def convert(self):
+    """Converts a Keras model based on instance variables.
+
+    Returns:
+      The converted data in serialized format. Either a TFLite Flatbuffer or a
+      Graphviz graph depending on value in `output_format`.
+
+    Raises:
+      ValueError:
+        Input shape is not specified.
+        None value for dimension in input_tensor.
+    """
+    temp_dir = tempfile.mkdtemp()
+    try:
+      self._keras_model.save(temp_dir, save_format="tf")
+      tag_set = set([_tag_constants.SERVING])
+      signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+      result = _freeze_saved_model(temp_dir, None, None, None, tag_set,
+                                   signature_key)
+
+      self.saved_model_dir = temp_dir
+      self._saved_model_tags = tag_set
+      self._saved_model_exported_names = [signature_key]
+      self._parse_saved_model_args()
+      if self.saved_model_dir:
+        self._graph_def = result[0]
+        self._input_tensors = result[1]
+        self._output_tensors = result[2]
+        self._debug_info_func = _build_debug_info_func(result[3])
+        return super(TFLiteKerasModelConverter, self).convert()
+    finally:
+      shutil.rmtree(temp_dir, True)
+
+    return super(TFLiteKerasModelConverter, self).convert()
+
+
+class TFLiteFrozenGraphConverter(TFLiteConverterBaseV1):
+  """Converts the given frozen graph def into TensorFlow Lite model."""
+
+  def __init__(self,
+               graph_def,
+               input_tensors,
+               output_tensors,
+               input_arrays_with_shape=None,
+               output_arrays=None,
+               experimental_debug_info_func=None):
+    """Constructor for TFLiteConverter.
+
+    Args:
+      graph_def: Frozen TensorFlow GraphDef.
+      input_tensors: List of input tensors. Type and shape are computed using
+        `foo.shape` and `foo.dtype`.
+      output_tensors: List of output tensors (only .name is used from this).
+      input_arrays_with_shape: Tuple of strings representing input tensor names
+        and list of integers representing input shapes
+        (e.g., [("foo" : [1, 16, 16, 3])]). Use only when graph cannot be loaded
+          into TensorFlow and when `input_tensors` and `output_tensors` are
+          None. (default None)
+      output_arrays: List of output tensors to freeze graph with. Use only when
+        graph cannot be loaded into TensorFlow and when `input_tensors` and
+        `output_tensors` are None. (default None)
+      experimental_debug_info_func: An experimental function to retrieve the
+        graph debug info for a set of nodes from the `graph_def`.
+
+    Raises:
+      ValueError: Invalid arguments.
+    """
+    super(TFLiteFrozenGraphConverter,
+          self).__init__(experimental_debug_info_func)
+    self._graph_def = graph_def
+    self._input_tensors = input_tensors
+    self._output_tensors = output_tensors
+
+    # Attributes are used by models that cannot be loaded into TensorFlow.
+    if not self._has_valid_tensors():
+      if not input_arrays_with_shape or not output_arrays:
+        raise ValueError(
+            "If input_tensors and output_tensors are None, both "
+            "input_arrays_with_shape and output_arrays must be defined.")
+      self._input_arrays_with_shape = input_arrays_with_shape
+      self._output_arrays = output_arrays
+
+
 @_tf_export(v1=["lite.TFLiteConverter"])
-class TFLiteConverter(TFLiteConverterBase):
+class TFLiteConverter(TFLiteFrozenGraphConverter):
   """Convert a TensorFlow model into `output_format`.
 
   This is used to convert from a TensorFlow GraphDef, SavedModel or tf.keras
@@ -830,15 +1618,14 @@ class TFLiteConverter(TFLiteConverterBase):
     ```
   """
 
+  # pylint: disable=useless-super-delegation
   def __init__(self,
                graph_def,
                input_tensors,
                output_tensors,
                input_arrays_with_shape=None,
                output_arrays=None,
-               experimental_debug_info_func=None,
-               saved_model_dir=None,
-               saved_model_tags=None):
+               experimental_debug_info_func=None):
     """Constructor for TFLiteConverter.
 
     Args:
@@ -856,47 +1643,14 @@ class TFLiteConverter(TFLiteConverterBase):
         `output_tensors` are None. (default None)
       experimental_debug_info_func: An experimental function to retrieve the
         graph debug info for a set of nodes from the `graph_def`.
-      saved_model_dir: Directory of the SavedModel. This argument can be null
-        when it creates via the from_keras_model and from_concrete_function
-        methods.
-      saved_model_tags: Set of tags identifying the MetaGraphDef within the
-        SavedModel to analyze. All tags in the tag set must be present. (default
-        set(SERVING)).  This argument will be available when the saved model dir
-        argument is set.
 
     Raises:
       ValueError: Invalid arguments.
     """
-    super(TFLiteConverter, self).__init__()
-    self._graph_def = graph_def
-    self._input_tensors = input_tensors
-    self._output_tensors = output_tensors
-    self.inference_type = constants.FLOAT
-    self.inference_input_type = None
-    self.inference_output_type = None
-    self.output_format = constants.TFLITE
-    self.quantized_input_stats = {}
-    self.default_ranges_stats = None
-    self.drop_control_dependency = True
-    self.reorder_across_fake_quant = False
-    self.change_concat_input_ranges = False
-    self._post_training_quantize = False
-    self.dump_graphviz_dir = None
-    self.dump_graphviz_video = False
-    self.conversion_summary_dir = None
-    self._debug_info_func = experimental_debug_info_func
-    self._custom_opdefs = None
-    self._saved_model_dir = saved_model_dir
-    self._saved_model_tags = saved_model_tags
-
-    # Attributes are used by models that cannot be loaded into TensorFlow.
-    if not self._has_valid_tensors():
-      if not input_arrays_with_shape or not output_arrays:
-        raise ValueError(
-            "If input_tensors and output_tensors are None, both "
-            "input_arrays_with_shape and output_arrays must be defined.")
-      self._input_arrays_with_shape = input_arrays_with_shape
-      self._output_arrays = output_arrays
+    super(TFLiteConverter,
+          self).__init__(graph_def, input_tensors, output_tensors,
+                         input_arrays_with_shape, output_arrays,
+                         experimental_debug_info_func)
 
   @classmethod
   def from_session(cls, sess, input_tensors, output_tensors):
@@ -1044,15 +1798,19 @@ class TFLiteConverter(TFLiteConverterBase):
     if signature_key is None:
       signature_key = _signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
 
+    saved_model_converter = TFLiteSavedModelConverter(saved_model_dir, tag_set,
+                                                      [signature_key])
+    if saved_model_converter.saved_model_dir:
+      return saved_model_converter
+
     result = _freeze_saved_model(saved_model_dir, input_arrays, input_shapes,
                                  output_arrays, tag_set, signature_key)
+
     return cls(
         graph_def=result[0],
         input_tensors=result[1],
         output_tensors=result[2],
-        experimental_debug_info_func=_build_debug_info_func(result[3]),
-        saved_model_dir=saved_model_dir,
-        saved_model_tags=tag_set)
+        experimental_debug_info_func=_build_debug_info_func(result[3]))
 
   @classmethod
   def from_keras_model_file(cls,
@@ -1079,98 +1837,10 @@ class TFLiteConverter(TFLiteConverterBase):
     Returns:
       TFLiteConverter class.
     """
-    # Handles Keras when Eager mode is enabled.
-    if context.executing_eagerly():
-      if input_arrays or output_arrays:
-        raise ValueError("`input_arrays` and `output_arrays` are unsupported "
-                         "with Eager mode. If your model requires any of these "
-                         "parameters, please use disable_eager_execution().")
-
-      _keras.backend.set_learning_phase(False)
-      keras_model = _keras.models.load_model(model_file, custom_objects)
-
-      function = _saving_utils.trace_model_call(keras_model)
-      concrete_func = function.get_concrete_function()
-
-      frozen_func = _convert_to_constants.convert_variables_to_constants_v2(
-          concrete_func, lower_control_flow=False)
-      _set_tensor_shapes(frozen_func.inputs, input_shapes)
-      return cls(
-          frozen_func.graph.as_graph_def(),
-          frozen_func.inputs,
-          frozen_func.outputs,
-          experimental_debug_info_func=_build_debug_info_func(
-              frozen_func.graph))
-
-    # Handles Keras when Eager mode is disabled.
-    _keras.backend.clear_session()
-    _keras.backend.set_learning_phase(False)
-    keras_model = _keras.models.load_model(model_file, custom_objects)
-    sess = _keras.backend.get_session()
-
-    # Get input and output tensors.
-    if input_arrays:
-      input_tensors = _get_tensors_from_tensor_names(sess.graph, input_arrays)
-    else:
-      input_tensors = keras_model.inputs
-
-    if output_arrays:
-      output_tensors = _get_tensors_from_tensor_names(sess.graph, output_arrays)
-    else:
-      output_tensors = keras_model.outputs
-    _set_tensor_shapes(input_tensors, input_shapes)
-
-    graph_def = _freeze_graph(sess, input_tensors, output_tensors)
-    return cls(
-        graph_def,
-        input_tensors,
-        output_tensors,
-        experimental_debug_info_func=_build_debug_info_func(sess.graph))
-
-  def __setattr__(self, name, value):
-    if name == "post_training_quantize":
-      warnings.warn("Property %s is deprecated, "
-                    "please use optimizations=[Optimize.DEFAULT]"
-                    " instead." % name)
-      if value:
-        self.optimizations = [Optimize.DEFAULT]
-      else:
-        self.optimizations = []
-      return
-    if name == "target_ops":
-      warnings.warn("Property %s is deprecated, please use "
-                    "target_spec.supported_ops instead." % name)
-      self.target_spec.supported_ops = value
-      return
-    object.__setattr__(self, name, value)
-
-  def __getattribute__(self, name):
-    if name == "post_training_quantize":
-      warnings.warn("Property %s is deprecated, "
-                    "please use optimizations=[Optimize.DEFAULT]"
-                    " instead." % name)
-      return Optimize.DEFAULT in set(self.optimizations)
-    if name == "target_ops":
-      warnings.warn("Property %s is deprecated, please use "
-                    "target_spec.supported_ops instead." % name)
-      return self.target_spec.supported_ops
-    return object.__getattribute__(self, name)
-
-  def _validate_quantized_input_stats(self, converter_kwargs):
-    """Ensure quantized_input_stats provided if required."""
-
-    quantized_types = frozenset({constants.INT8, constants.QUANTIZED_UINT8})
-
-    requires_quantized_input_stats = (
-        (converter_kwargs["inference_type"] in quantized_types or
-         converter_kwargs["inference_input_type"] in quantized_types) and
-        not converter_kwargs["post_training_quantize"])
-
-    if (requires_quantized_input_stats and
-        not converter_kwargs["quantized_input_stats"]):
-      raise ValueError("std_dev and mean must be defined when inference_type "
-                       "or inference_input_type is QUANTIZED_UINT8 or INT8.")
+    return TFLiteKerasModelConverter(model_file, input_arrays, input_shapes,
+                                     output_arrays, custom_objects)
 
+  # pylint: disable=useless-super-delegation
   def convert(self):
     """Converts a TensorFlow GraphDef based on instance variables.
 
@@ -1183,233 +1853,7 @@ class TFLiteConverter(TFLiteConverterBase):
         Input shape is not specified.
         None value for dimension in input_tensor.
     """
-    # Parses SavedModel argument.
-    self._parse_saved_model_args()
-
-    quant_mode = QuantizationMode(self.optimizations, self.target_spec,
-                                  self.representative_dataset, self._graph_def)
-
-    # Checks dimensions in input tensor.
-    if (not self._is_unknown_shapes_allowed(quant_mode.fp32_execution()) and
-        self._has_valid_tensors()):
-      for tensor in self._input_tensors:
-        shape = tensor.shape
-        if not shape:
-          raise ValueError("Provide an input shape for input array "
-                           "'{0}'.".format(_get_tensor_name(tensor)))
-        # Note that shape_list might be empty for scalar shapes.
-        shape_list = shape.as_list()
-        if None in shape_list[1:]:
-          raise ValueError(
-              "None is only supported in the 1st dimension. Tensor '{0}' has "
-              "invalid shape '{1}'.".format(
-                  _get_tensor_name(tensor), shape_list))
-        elif shape_list and shape_list[0] is None:
-          self._set_batch_size(batch_size=1)
-
-    # Get quantization stats. Ensures there is one stat per name if the stats
-    # are specified.
-    if self.quantized_input_stats:
-      quantized_stats = []
-      invalid_stats = []
-      for name in self.get_input_arrays():
-        if name in self.quantized_input_stats:
-          quantized_stats.append(self.quantized_input_stats[name])
-        else:
-          invalid_stats.append(name)
-
-      if invalid_stats:
-        raise ValueError("Quantization input stats are not available for input "
-                         "tensors '{0}'.".format(",".join(invalid_stats)))
-    else:
-      quantized_stats = None
-
-    toco_inference_input_type = self.inference_input_type
-    inference_input_type = self.inference_input_type
-    inference_output_type = self.inference_output_type
-    post_training_optimize = (
-        quant_mode.post_training_int8_no_float() or
-        quant_mode.post_training_int8_allow_float() or
-        quant_mode.post_training_dynamic_range_int8() or
-        quant_mode.post_training_fp16())
-    if post_training_optimize:
-      # Post training optimizations require that TOCO outputs a float model.
-      if self.inference_type != constants.FLOAT:
-        raise ValueError(
-            "`optimizations` require that `inference_type` is set to float.")
-      toco_inference_input_type = constants.FLOAT
-      # Set up default values.
-      if inference_input_type is None:
-        inference_input_type = constants.FLOAT
-      if inference_output_type is None:
-        inference_output_type = constants.FLOAT
-
-    weight_only_quantize = (
-        quant_mode.post_training_dynamic_range_int8() or
-        quant_mode.post_training_fp16())
-    if weight_only_quantize:
-      # Currently, weight only quantization requires float inputs and outputs.
-      if (inference_input_type != constants.FLOAT or
-          inference_output_type != constants.FLOAT):
-        raise ValueError(
-            "Provide an inference_input_type and inference_output_type of type "
-            "tf.float32.")
-
-    if not post_training_optimize and self.inference_output_type is not None:
-      raise ValueError(
-          "inference_output_type is currently not supported if optimizations "
-          "are not enabled.")
-
-    optimized_graph = self._graph_def
-    if not self._saved_model_dir:
-      # if it is not uint8 or int8 with post-training quantization, it is not
-      # quantization aware training, then graph optimization is applied.
-      # Graph optimization is disabled for quantization aware training.
-      if (self.inference_type != constants.QUANTIZED_UINT8 or
-          (self.inference_type == constants.INT8 and
-           (post_training_optimize or weight_only_quantize))):
-        try:
-          # TODO(b/150163103): Merge `disabling lower using switch merge' calls.
-          # Grappler will also try to lower while loop into switch merge
-          # representation which is undesired for Ophints, so we simply remove
-          # those attributes to prevent Grappler from doing so.
-          graph_def = _convert_to_constants.disable_lower_using_switch_merge(
-              optimized_graph)
-          # Run function inlining optimization to ensure any models generated
-          # through the from_frozen_graph path have been inlined.
-          optimized_graph = _run_graph_optimizations(
-              graph_def,
-              self._input_tensors,
-              self._output_tensors,
-              config=self._grappler_config(["function"]))
-        except Exception:
-          optimized_graph = self._graph_def
-
-    self._debug_info = _get_debug_info(self._debug_info_func, optimized_graph)
-
-    converter_kwargs = self._get_base_converter_args()
-
-    if quant_mode.post_training_dynamic_range_int8():
-      converter_kwargs.update({
-          "post_training_quantize": True,
-      })
-    elif quant_mode.post_training_fp16():
-      converter_kwargs.update({
-          "post_training_quantize": True,
-          "quantize_to_float16": True,
-      })
-
-    converter_kwargs.update({
-        "inference_type": self.inference_type,
-        "inference_input_type": toco_inference_input_type,
-        "output_format": self.output_format,
-        "quantized_input_stats": quantized_stats,
-        "default_ranges_stats": self.default_ranges_stats,
-        "drop_control_dependency": self.drop_control_dependency,
-        "reorder_across_fake_quant": self.reorder_across_fake_quant,
-        "change_concat_input_ranges": self.change_concat_input_ranges,
-        "dump_graphviz_dir": self.dump_graphviz_dir,
-        "dump_graphviz_video": self.dump_graphviz_video,
-        "conversion_summary_dir": self.conversion_summary_dir,
-        "custom_opdefs": self._custom_opdefs,
-    })
-
-    if not self.experimental_new_converter:
-      logging.warning(
-          "Please consider switching to use new converter by setting "
-          "experimental_new_converter to true. "
-          "Old converter (TOCO) is deprecated and flow will be switched on "
-          "by default to use new converter soon.")
-    else:
-      logging.info("Using experimental converter: If you encountered a problem "
-                   "please file a bug. You can opt-out "
-                   "by setting experimental_new_converter=False")
-
-    self._validate_quantized_input_stats(converter_kwargs)
-
-    # Converts model.
-    if self._has_valid_tensors():
-      result = _toco_convert_impl(
-          input_data=optimized_graph,
-          input_tensors=self._input_tensors,
-          output_tensors=self._output_tensors,
-          **converter_kwargs)
-    else:
-      result = _toco_convert_graph_def(
-          input_data=optimized_graph,
-          input_arrays_with_shape=self._input_arrays_with_shape,
-          output_arrays=self._output_arrays,
-          **converter_kwargs)
-
-    if quant_mode.post_training_int8_no_float():
-      result = self._calibrate_quantize_model(result, inference_input_type,
-                                              inference_output_type, False)
-    elif quant_mode.post_training_int8_allow_float():
-      result = self._calibrate_quantize_model(result, inference_input_type,
-                                              inference_output_type, True)
-
-    if self._experimental_sparsify_model:
-      result = _mlir_sparsify(result)
-
-    return result
-
-  def get_input_arrays(self):
-    """Returns a list of the names of the input tensors.
-
-    Returns:
-      List of strings.
-    """
-    if self._has_valid_tensors():
-      return [_get_tensor_name(tensor) for tensor in self._input_tensors]
-    else:
-      return [name for name, _ in self._input_arrays_with_shape]
-
-  def _has_valid_tensors(self):
-    """Checks if the input and output tensors have been initialized.
-
-    Returns:
-      Bool.
-    """
-    return self._input_tensors and self._output_tensors
-
-  def _set_batch_size(self, batch_size):
-    """Sets the first dimension of the input tensor to `batch_size`.
-
-    Args:
-      batch_size: Batch size for the model. Replaces the first dimension of an
-        input size array if undefined. (default 1)
-
-    Raises:
-      ValueError: input_tensor is not defined.
-    """
-    if not self._has_valid_tensors():
-      raise ValueError("The batch size cannot be set for this model. Please "
-                       "use input_shapes parameter.")
-
-    for tensor in self._input_tensors:
-      shape = tensor.shape.as_list()
-      if shape[0] is None:
-        shape[0] = batch_size
-        tensor.set_shape(shape)
-
-  def _is_unknown_shapes_allowed(self, fp32_execution):
-    # Ophint Converted nodes will need the shapes to be known.
-    if _is_ophint_converted(self._graph_def):
-      return False
-
-    if not super(TFLiteConverter,
-                 self)._is_unknown_shapes_allowed(fp32_execution):
-      return False
-
-    # `conversion_summary_dir` calls TOCO. Unknown shapes are only supported by
-    # the MLIR converter.
-    if self.conversion_summary_dir:
-      logging.warning(
-          "`conversion_summary_dir` does not work with unknown shapes. "
-          "Graphs with unknown shapes might be different than when this flag "
-          "is disabled.")
-      return False
-    return True
+    return super(TFLiteConverter, self).convert()
 
 
 @_tf_export(v1=["lite.TocoConverter"])
diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py
index e327aee1376..1bcb2ce0ee4 100644
--- a/tensorflow/lite/python/lite_test.py
+++ b/tensorflow/lite/python/lite_test.py
@@ -269,9 +269,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
                                                   [out_tensor])
     converter.inference_input_type = lite_constants.QUANTIZED_UINT8
     converter.inference_type = lite_constants.FLOAT
-    converter.quantized_input_stats = {
-        'Placeholder': (0., 1.)
-    }  # mean, std_dev
+    converter.quantized_input_stats = {'Placeholder': (0., 1.)}  # mean, std_dev
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
@@ -450,8 +448,15 @@ class FromSessionTest(TestModels, parameterized.TestCase):
                       3] == input_details[0]['shape_signature']).all())
     self.assertEqual((0., 0.), input_details[0]['quantization'])
 
+    # Resize tensor with strict checking.
+    with self.assertRaises(RuntimeError) as error:
+      interpreter.resize_tensor_input(0, [3, 16, 16, 3], strict=True)
+    self.assertIn(
+        'ResizeInputTensorStrict only allows mutating unknown dimensions '
+        'identified by -1.', str(error.exception))
+
     # Resize tensor and invoke.
-    interpreter.resize_tensor_input(0, [1, 16, 16, 3])
+    interpreter.resize_tensor_input(0, [1, 16, 16, 3], strict=True)
     interpreter.allocate_tensors()
     interpreter.invoke()
 
@@ -465,6 +470,34 @@ class FromSessionTest(TestModels, parameterized.TestCase):
     self.assertTrue(([1, -1, 16,
                       3] == output_details[0]['shape_signature']).all())
 
+  def testResizeTensorInputStrict(self):
+    # Ensures that resize_tensor_input(strict=True) works as expected.
+    with ops.Graph().as_default():
+      in_tensor = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32)
+      out_tensor = in_tensor + in_tensor
+      sess = session.Session()
+
+    # Convert model and ensure model is not None.
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Check values from converted model.
+    interpreter = Interpreter(model_content=tflite_model)
+
+    # Resize incorrect value.
+    with self.assertRaises(RuntimeError) as error:
+      interpreter.resize_tensor_input(0, [3, 16, 16, 3], strict=True)
+    self.assertIn(
+        'ResizeInputTensorStrict only allows mutating unknown dimensions '
+        'identified by -1.', str(error.exception))
+
+    # Resize correct value.
+    interpreter.resize_tensor_input(0, [1, 16, 16, 3], strict=True)
+    interpreter.allocate_tensors()
+
   def testBatchSizeValid(self):
     with ops.Graph().as_default():
       in_tensor = array_ops.placeholder(
@@ -1292,6 +1325,41 @@ class FromSessionTest(TestModels, parameterized.TestCase):
     tflite_model = converter.convert()
     self.assertTrue(tflite_model)
 
+  def testResizeWithShape(self):
+    with ops.Graph().as_default():
+      # Construct a graph with a dynamically shapped input and an internal node
+      # that relies on the output of that input's shape.
+      in_tensor = array_ops.placeholder(
+          shape=[None, None], dtype=dtypes.float32)
+      in_tensor2 = [[1, 2], [3, 4]]
+      out_tensor = array_ops.reshape(in_tensor2, array_ops.shape(in_tensor))
+      sess = session.Session()
+
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
+    converter.experimental_new_converter = True
+    tflite_model = converter.convert()
+
+    # Check values from converted model.
+    interpreter = Interpreter(model_content=tflite_model)
+    input_details = interpreter.get_input_details()
+    self.assertLen(input_details, 1)
+    self.assertTrue(([1, 1] == input_details[0]['shape']).all())
+    self.assertTrue(([-1, -1] == input_details[0]['shape_signature']).all())
+
+    # Resize tensor and invoke.
+    interpreter.resize_tensor_input(0, [4])
+    interpreter.allocate_tensors()
+    interpreter.invoke()
+
+    # The output should be reshaped properly according to the resized input.
+    output_details = interpreter.get_output_details()
+    self.assertLen(output_details, 1)
+    self.assertEqual(np.int32, output_details[0]['dtype'])
+    self.assertTrue(([4] == output_details[0]['shape']).all())
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+    self.assertTrue(([1, 2, 3, 4] == output_data).all())
+
   def testResizingIntermediateDynamicTensor(self):
     # This is a regression test for the case where shape of dynamic output
     # tensors changes between invocations.
@@ -1860,7 +1928,7 @@ class FromKerasFile(TestModels, parameterized.TestCase):
 
     input_details = interpreter.get_input_details()
     self.assertLen(input_details, 1)
-    self.assertEqual('dense_input', input_details[0]['name'])
+    self.assertEndsWith(input_details[0]['name'], 'dense_input')
     self.assertEqual(np.float32, input_details[0]['dtype'])
     self.assertTrue(([1, 3] == input_details[0]['shape']).all())
     self.assertEqual((0., 0.), input_details[0]['quantization'])
@@ -1955,7 +2023,7 @@ class FromKerasFile(TestModels, parameterized.TestCase):
 
     input_details = interpreter.get_input_details()
     self.assertLen(input_details, 1)
-    self.assertEqual('dense_input', input_details[0]['name'])
+    self.assertEndsWith(input_details[0]['name'], 'dense_input')
     self.assertTrue(([2, 3] == input_details[0]['shape']).all())
 
   def testSequentialModelOutputArray(self):
@@ -2074,12 +2142,12 @@ class FromKerasFile(TestModels, parameterized.TestCase):
 
     input_details = interpreter.get_input_details()
     self.assertLen(input_details, 2)
-    self.assertEqual('input_a', input_details[0]['name'])
+    self.assertEndsWith(input_details[0]['name'], 'input_a')
     self.assertEqual(np.float32, input_details[0]['dtype'])
     self.assertTrue(([1, 3] == input_details[0]['shape']).all())
     self.assertEqual((0., 0.), input_details[0]['quantization'])
 
-    self.assertEqual('input_b', input_details[1]['name'])
+    self.assertEndsWith(input_details[1]['name'], 'input_b')
     self.assertEqual(np.float32, input_details[1]['dtype'])
     self.assertTrue(([1, 3] == input_details[1]['shape']).all())
     self.assertEqual((0., 0.), input_details[1]['quantization'])
@@ -2130,7 +2198,7 @@ class FromKerasFile(TestModels, parameterized.TestCase):
 
     input_details = interpreter.get_input_details()
     self.assertLen(input_details, 1)
-    self.assertEqual('dense_input', input_details[0]['name'])
+    self.assertEndsWith(input_details[0]['name'], 'dense_input')
     self.assertEqual(np.float32, input_details[0]['dtype'])
     self.assertTrue(([1, 3] == input_details[0]['shape']).all())
     self.assertEqual((0., 0.), input_details[0]['quantization'])
@@ -2283,5 +2351,42 @@ class ImportOpsUtilTest(LiteTest):
     self.assertIsNotNone(lite.get_potentially_supported_ops())
 
 
+class DefaultConverterAttrsTest(LiteTest):
+
+  def testAttrs(self):
+    with ops.Graph().as_default():
+      in_tensor = array_ops.placeholder(shape=[2, 2], dtype=dtypes.float32)
+      out_tensor = in_tensor + in_tensor
+      sess = session.Session()
+
+    # Convert model.
+    converter = lite.TFLiteConverter.from_session(sess, [in_tensor],
+                                                  [out_tensor])
+
+    # Assert output format.
+    self.assertEqual(converter.output_format, lite_constants.TFLITE)
+
+    # Assert the default inference type is float.
+    self.assertEqual(converter.inference_type, lite_constants.FLOAT)
+
+    # Assert the default inference type overrides are None.
+    self.assertIsNone(converter.inference_input_type)
+    self.assertIsNone(converter.inference_output_type)
+
+    # Assert the default quantization options are not set.
+    self.assertEqual(converter.quantized_input_stats, {})
+    self.assertIsNone(converter.default_ranges_stats)
+    self.assertFalse(converter.reorder_across_fake_quant)
+    self.assertFalse(converter.change_concat_input_ranges)
+
+    # Assert dropping control dependency is enabled by default.
+    self.assertTrue(converter.drop_control_dependency)
+
+    # Assert dumping extra information is disabled by default.
+    self.assertIsNone(converter.dump_graphviz_dir)
+    self.assertFalse(converter.dump_graphviz_video)
+    self.assertIsNone(converter.conversion_summary_dir)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py
index 763e90f07eb..9af37df2975 100644
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@@ -29,7 +29,9 @@ import tensorflow as tf
 
 from tensorflow.lite.python import lite
 from tensorflow.lite.python import lite_v2_test_util
+from tensorflow.lite.python.convert import mlir_quantize
 from tensorflow.lite.python.interpreter import Interpreter
+from tensorflow.lite.toco import types_pb2 as _types_pb2
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.layers import recurrent
@@ -204,6 +206,40 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
     # Ensure that the quantized weights tflite model is smaller.
     self.assertLess(len(quantized_tflite), len(float_tflite))
 
+  def testCalibrateAndQuantizeBuiltinInt16(self):
+    func, calibration_gen = self._getCalibrationQuantizeModel()
+
+    # Convert float model.
+    float_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
+    float_tflite = float_converter.convert()
+    self.assertTrue(float_tflite)
+
+    converter = lite.TFLiteConverterV2.from_concrete_functions([func])
+    # TODO(b/156309549): We should add INT16 to the builtin types.
+    converter.target_spec.supported_ops = [
+        lite.OpsSet.TFLITE_BUILTINS_INT8
+    ]
+    converter.representative_dataset = calibration_gen
+    converter._experimental_calibrate_only = True
+    calibrated_tflite = converter.convert()
+    quantized_tflite = mlir_quantize(calibrated_tflite,
+                                     inference_type=_types_pb2.QUANTIZED_INT16)
+
+    self.assertTrue(quantized_tflite)
+
+    # The default input and output types should be float.
+    interpreter = Interpreter(model_content=quantized_tflite)
+    interpreter.allocate_tensors()
+    input_details = interpreter.get_input_details()
+    self.assertLen(input_details, 1)
+    self.assertEqual(np.float32, input_details[0]['dtype'])
+    output_details = interpreter.get_output_details()
+    self.assertLen(output_details, 1)
+    self.assertEqual(np.float32, output_details[0]['dtype'])
+
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertLess(len(quantized_tflite), len(float_tflite))
+
   def _getTrainingTimeQuantizedModel(self):
 
     class QLinear(tf.keras.layers.Layer):
@@ -213,9 +249,11 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
         self.units = units
 
       def build(self, input_shape):
-        self.w = self.add_weight(shape=(input_shape[-1], self.units),
-                                 initializer='random_normal',
-                                 trainable=True)
+        self.w = self.add_weight(
+            'weight',
+            shape=(input_shape[-1], self.units),
+            initializer='random_normal',
+            trainable=True)
         self.min_var = self.add_weight(
             'min',
             initializer=tf.keras.initializers.Constant(-6.0),
@@ -469,15 +507,10 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest):
     save_dir = os.path.join(self.get_temp_dir(), 'saved_model')
     save(root, save_dir, {'add': add_func, 'sub': sub_func})
 
-    # Ensure the converter generates.
-    converter = lite.TFLiteConverterV2.from_saved_model(save_dir)
-    self.assertLen(converter._funcs, 2)
-
     # Try converting multiple functions.
     with self.assertRaises(ValueError) as error:
-      _ = converter.convert()
-    self.assertIn('This converter can only convert a single ConcreteFunction',
-                  str(error.exception))
+      _ = lite.TFLiteConverterV2.from_saved_model(save_dir)
+    self.assertIn('Only support a single signature key.', str(error.exception))
 
   @test_util.run_v2_only
   def testNoConcreteFunctionModel(self):
@@ -487,12 +520,9 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest):
     save_dir = os.path.join(self.get_temp_dir(), 'saved_model')
     save(root, save_dir)
 
-    converter = lite.TFLiteConverterV2.from_saved_model(save_dir)
-    self.assertLen(converter._funcs, 0)
-
     with self.assertRaises(ValueError) as error:
-      _ = converter.convert()
-    self.assertIn('No ConcreteFunction is specified.', str(error.exception))
+      _ = lite.TFLiteConverterV2.from_saved_model(save_dir)
+    self.assertIn('Only support a single signature key.', str(error.exception))
 
   @test_util.run_v2_only
   def testKerasSequentialModel(self):
@@ -756,7 +786,10 @@ class ControlFlowTest(lite_v2_test_util.ModelTest):
     input_data = tf.constant(
         np.array(np.random.random_sample((1, 10, 10)), dtype=np.float32))
     rnn_obj = rnn_layer(units=10, input_shape=(10, 10))
-    model = tf.keras.models.Sequential([rnn_obj])
+    model = tf.keras.models.Sequential([
+        tf.keras.layers.Input(batch_size=1, shape=(10, 10), name='input'),
+        rnn_obj,
+    ])
 
     # Convert model.
     converter = lite.TFLiteConverterV2.from_keras_model(model)
@@ -795,6 +828,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest):
     input_data = tf.constant(
         np.array(np.random.random_sample((1, 10, 10)), dtype=np.float32))
     model = tf.keras.models.Sequential()
+    model.add(tf.keras.layers.Input(batch_size=1, shape=(10, 10), name='input'))
     model.add(
         tf.keras.layers.Bidirectional(
             recurrent_v2.LSTM(units=10, return_sequences=True),
@@ -876,8 +910,85 @@ class UnknownShapes(lite_v2_test_util.ModelTest):
     np.testing.assert_almost_equal(
         expected_value.numpy(), actual_value[0], decimal=6)
 
+  def _getQuantizedModel(self):
+    # Returns a model with tf.MatMul and unknown dimensions.
+    @tf.function(
+        input_signature=[tf.TensorSpec(shape=[None, 33], dtype=tf.float32)])
+    def model(in_tensor):
+      # We need the tensor to have more than 1024 elements for quantize_weights
+      # to kick in. Thus, the [33, 33] shape.
+      const_tensor = tf.constant(
+          np.random.uniform(low=-10., high=10., size=[33, 33]),
+          shape=[33, 33],
+          dtype=tf.float32,
+          name='inputB')
+
+      shape = tf.shape(in_tensor)
+      fill = tf.transpose(tf.fill(shape, 1.))
+      mult = tf.matmul(fill, in_tensor)
+      return tf.matmul(mult, const_tensor)
+
+    concrete_func = model.get_concrete_function()
+
+    def calibration_gen():
+      for batch in range(5, 20, 5):
+        for _ in range(5):
+          yield [np.random.uniform(-1, 1, size=(batch, 33)).astype(np.float32)]
+
+    return concrete_func, calibration_gen
+
+  @test_util.run_v2_only
+  def testMatMulQuantize(self):
+    concrete_func, _ = self._getQuantizedModel()
+    float_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    float_converter.experimental_new_converter = True
+    float_tflite_model = float_converter.convert()
+
+    quantized_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    quantized_converter.experimental_new_converter = True
+    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
+    quantized_tflite_model = quantized_converter.convert()
+
+    # The default input and output types should be float.
+    quantized_interpreter = Interpreter(model_content=quantized_tflite_model)
+    quantized_interpreter.allocate_tensors()
+    input_details = quantized_interpreter.get_input_details()
+    self.assertLen(input_details, 1)
+    self.assertEqual(np.float32, input_details[0]['dtype'])
+    self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all())
+
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
+
+  @test_util.run_v2_only
+  def testMatMulCalibrateAndQuantize(self):
+    concrete_func, calibration_gen = self._getQuantizedModel()
+    float_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    float_converter.experimental_new_converter = True
+    float_tflite_model = float_converter.convert()
+
+    quantized_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
+    quantized_converter.representative_dataset = calibration_gen
+    quantized_converter.experimental_new_converter = True
+    quantized_tflite_model = quantized_converter.convert()
+
+    # The default input and output types should be float.
+    quantized_interpreter = Interpreter(model_content=quantized_tflite_model)
+    quantized_interpreter.allocate_tensors()
+    input_details = quantized_interpreter.get_input_details()
+    self.assertLen(input_details, 1)
+    self.assertEqual(np.float32, input_details[0]['dtype'])
+    self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all())
+
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
+
   def testBatchMatMul(self):
-    self.skipTest('BatchMatMulV2 does not support unknown batch size.')
     input_data_1 = tf.constant(
         np.array(np.random.random_sample((1, 256, 256)), dtype=np.float32))
     input_data_2 = tf.constant(
@@ -901,7 +1012,29 @@ class UnknownShapes(lite_v2_test_util.ModelTest):
     actual_value = self._evaluateTFLiteModel(
         tflite_model, [input_data_1, input_data_2],
         input_shapes=[([-1, 256, 256], [1, 256, 256])])
-    np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])
+    np.testing.assert_almost_equal(
+        expected_value.numpy(), actual_value[0], decimal=4)
+
+  def testSizeInvalid(self):
+
+    @tf.function(input_signature=[
+        tf.TensorSpec(shape=[1, None, 16, 3], dtype=tf.float32)
+    ])
+    def model(in_tensor):
+      return in_tensor + in_tensor
+
+    concrete_func = model.get_concrete_function()
+
+    # Test invalid shape. None after 1st dimension. Run with TOCO in order to
+    # invoke shape checking code.
+    converter = lite.TFLiteConverterV2.from_concrete_functions([concrete_func])
+    converter.experimental_new_converter = False
+    with self.assertRaises(ValueError) as error:
+      converter.convert()
+    self.assertEqual(
+        'None is only supported in the 1st dimension. Tensor '
+        '\'in_tensor\' has invalid shape \'[1, None, 16, 3]\'.',
+        str(error.exception))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/lite/python/lite_v2_test_util.py b/tensorflow/lite/python/lite_v2_test_util.py
index 5ea239f22a2..d8f764711cd 100644
--- a/tensorflow/lite/python/lite_v2_test_util.py
+++ b/tensorflow/lite/python/lite_v2_test_util.py
@@ -53,10 +53,12 @@ class ModelTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       for idx, (shape_signature, final_shape) in enumerate(input_shapes):
         self.assertTrue(
             (input_details[idx]['shape_signature'] == shape_signature).all())
-        interpreter.resize_tensor_input(idx, final_shape)
+        index = input_details[idx]['index']
+        interpreter.resize_tensor_input(index, final_shape, strict=True)
     interpreter.allocate_tensors()
 
     output_details = interpreter.get_output_details()
+    input_details = interpreter.get_input_details()
 
     for input_tensor, tensor_data in zip(input_details, input_data):
       interpreter.set_tensor(input_tensor['index'], tensor_data.numpy())
diff --git a/tensorflow/lite/python/op_hint.py b/tensorflow/lite/python/op_hint.py
index 29683718016..9d62c1b8a97 100644
--- a/tensorflow/lite/python/op_hint.py
+++ b/tensorflow/lite/python/op_hint.py
@@ -435,6 +435,7 @@ class OpHint(object):
     Args:
       *args: List of inputs to be converted (should be Tf.Tensor).
       **kwargs: This allows 'names' which should be a list of names.
+
     Returns:
       Wrapped inputs (identity standins that have additional metadata). These
       are also are also tf.Tensor's.
@@ -453,6 +454,7 @@ class OpHint(object):
     Args:
       *args: List of outputs to be converted (should be tf.Tensor).
       **kwargs: See
+
     Returns:
       Wrapped outputs (identity standins that have additional metadata). These
       are also tf.Tensor's.
@@ -574,8 +576,8 @@ class _LiteAggregateOperand(_LiteOperand):
       elif self.aggregation == OpHint.AGGREGATE_STACK:
         pass
       else:
-        raise ValueError(
-            "Invalid aggregation type %r specified" % self.aggregation)
+        raise ValueError("Invalid aggregation type %r specified" %
+                         self.aggregation)
     return self.flattened
 
   def flatten(self):
@@ -646,8 +648,8 @@ class _LiteAggregateOperand(_LiteOperand):
       stack_node.attr["num"].i = len(flattened)
       output_type = flattened[0].attr["T"].type
       stack_node.attr["T"].type = output_type
-      stack_node.input.append(_tensorflow_output_name(
-          fused_op_name, output_index))
+      stack_node.input.append(
+          _tensorflow_output_name(fused_op_name, output_index))
       out_graphdef.node.extend([stack_node])
 
       for idx, discrete in enumerate(flattened):
@@ -675,11 +677,10 @@ class _LiteFuncCall(object):
     inputs: inputs to the op (hash from index # to argument)
     outputs: outputs to the op (hash from index # to argument)
     function_name: the tflite custom op name to use
-    uuid: a unique call id for this particular call  (i.e.
-      multiple function calls would have the same function_name but different
-      uuids.
-    params: A param name to key value for op constant data. I.e. for
-      axis on a reduction, strides on a convolution, etc.
+    uuid: a unique call id for this particular call  (i.e. multiple function
+      calls would have the same function_name but different uuids.
+    params: A param name to key value for op constant data. I.e. for axis on a
+      reduction, strides on a convolution, etc.
     level: Level of the OpHint.
     children_inputs_mappings: If the Ophint has children, children inputs
       mappings indicate how their inputs & outputs are mapped.
@@ -700,6 +701,7 @@ class _LiteFuncCall(object):
     Returns:
       Tuple of (inputs, outputs). where input and output i a list of names.
     """
+
     def _flatten(input_or_output_dict):
       flattened_items = []
       for item in input_or_output_dict.values():
@@ -709,6 +711,7 @@ class _LiteFuncCall(object):
     return _flatten(self.inputs), _flatten(self.outputs)
 
   def __str__(self):
+
     def format_args(items):
       s = ""
       for idx, item in items.iteritems():
@@ -739,8 +742,8 @@ def _find_all_hints_in_nodes(nodes):
   for node in nodes:
     attr = node.attr
     # This is an op hint if it has a FUNCTION_UUID_ATTR, otherwise skip
-    if (OpHint.FUNCTION_UUID_ATTR not in attr
-        or not attr[OpHint.FUNCTION_UUID_ATTR].s):
+    if (OpHint.FUNCTION_UUID_ATTR not in attr or
+        not attr[OpHint.FUNCTION_UUID_ATTR].s):
       continue
     uuid = attr[OpHint.FUNCTION_UUID_ATTR].s
 
@@ -751,9 +754,11 @@ def _find_all_hints_in_nodes(nodes):
     call_def.level = attr[OpHint.FUNCTION_LEVEL_ATTR].i
     # Get sorting and aggregation information
 
-    sort = (attr[OpHint.FUNCTION_SORT_INDEX_ATTR].i
-            if OpHint.FUNCTION_SORT_INDEX_ATTR in attr else None)
-    if sort == -1: sort = None
+    sort = (
+        attr[OpHint.FUNCTION_SORT_INDEX_ATTR].i
+        if OpHint.FUNCTION_SORT_INDEX_ATTR in attr else None)
+    if sort == -1:
+      sort = None
     aggregation = None
     if OpHint.FUNCTION_AGGREGATE_ATTR in attr:
       aggregation = _compat.as_text(attr[OpHint.FUNCTION_AGGREGATE_ATTR].s)
@@ -887,6 +892,7 @@ def _tensor_name_base(full_tensor_name):
   Args:
     full_tensor_name: A tensor name that is annotated with a device placement
       (this is what tensor flow introspection gives).
+
   Returns:
     A name without any device assignment.
   """
@@ -919,10 +925,10 @@ def _check_subgraph_closed(n, reachable_by_input, input_nodes_set,
   while next_to_visit:
     current_node = next_to_visit.pop()
     visited.add(current_node)
-    if (current_node in reachable_by_input
-        and current_node not in input_nodes_set):
-      raise TypeError(
-          "Node %s uses input %s not in input_nodes." % (n, current_node))
+    if (current_node in reachable_by_input and
+        current_node not in input_nodes_set):
+      raise TypeError("Node %s uses input %s not in input_nodes." %
+                      (n, current_node))
     if current_node not in input_nodes_set:
       next_to_visit += [
           input_node for input_node in name_to_input_name[current_node]
@@ -1066,6 +1072,7 @@ def _remove_one_redundant_stack_unstack(in_graph_def):
 
   Args:
     in_graph_def: Graph def to use as input.
+
   Returns:
     Simplified tuple (graph_def, changed_something) where changed_something
     is true if anything was done.
@@ -1101,15 +1108,15 @@ def _remove_one_redundant_stack_unstack(in_graph_def):
       node = name_to_node[current_node_name]
       is_op_hint_stack = node.name.startswith("OpHintStack")
       is_op_hint_unstack = node.name.startswith("OpHintUnstack")
-      if (node.op == "Identity" or is_op_hint_stack
-          or (do_generic_pack_unpack and node.op == "Pack")):
+      if (node.op == "Identity" or is_op_hint_stack or
+          (do_generic_pack_unpack and node.op == "Pack")):
         is_hint_created_stack |= is_op_hint_stack
         next_to_visit += [
             input_node for input_node in name_to_input_name[current_node_name]
             if input_node not in visited
         ]
-      elif (is_op_hint_unstack
-            or (do_generic_pack_unpack and node.op == "Unpack")):
+      elif (is_op_hint_unstack or
+            (do_generic_pack_unpack and node.op == "Unpack")):
         unpack_nodes.add(node.name)
         is_hint_created_stack &= is_op_hint_unstack
       else:
@@ -1124,7 +1131,8 @@ def _remove_one_redundant_stack_unstack(in_graph_def):
       # Unstacked form
       no_external_dependency = True
       for other_n in in_graph_def.node:
-        if other_n.name in visited: continue
+        if other_n.name in visited:
+          continue
         for input_tensor in name_to_input_name[other_n.name]:
           input_op = _tensor_name_base(input_tensor)
           if input_op in visited and input_op != pack_node:
@@ -1141,9 +1149,9 @@ def _remove_one_redundant_stack_unstack(in_graph_def):
           if node_name not in visited:
             new_node = _copy.deepcopy(other_n)
             new_node.input[:] = [
-                (end_input if stripped == pack_node else
-                 non_stripped) for stripped, non_stripped in zip(
-                     name_to_input_name[node_name], new_node.input[:])
+                (end_input if stripped == pack_node else non_stripped)
+                for stripped, non_stripped in zip(name_to_input_name[node_name],
+                                                  new_node.input[:])
             ]
             out.node.extend([new_node])
         return out, True
@@ -1166,8 +1174,6 @@ def _get_correct_mapping(original_index, nodes):
     node_indices = nodes.keys()
     node_indices = sorted(node_indices)
     return node_indices[-1]
-  else:
-    return original_index
   return original_index
 
 
@@ -1179,6 +1185,7 @@ def _convert_op_hints_to_stubs_helper(
     graph_def: A graph def that we should convert.
     write_callback: A function pointer that can be used to write intermediate
       steps of graph transformation (optional).
+
   Returns:
     A new stubbed graph_def.
   """
@@ -1308,6 +1315,7 @@ def convert_op_hints_to_stubs(session=None,
     graph_def: A graph def that we should convert.
     write_callback: A function pointer that can be used to write intermediate
       steps of graph transformation (optional).
+
   Returns:
     A new graphdef with all ops contained in OpHints being replaced by
     a single op call with the right parameters.
diff --git a/tensorflow/lite/python/optimize/calibration_wrapper.cc b/tensorflow/lite/python/optimize/calibration_wrapper.cc
index 5a7a3ae2aa5..a115e401cfa 100644
--- a/tensorflow/lite/python/optimize/calibration_wrapper.cc
+++ b/tensorflow/lite/python/optimize/calibration_wrapper.cc
@@ -218,16 +218,33 @@ PyObject* CalibrationWrapper::SetTensor(int index, PyObject* value) {
     return nullptr;
   }
 
+  std::vector<int> dims(PyArray_NDIM(array));
+  bool has_unknown_dims = false;
   for (int j = 0; j < PyArray_NDIM(array); j++) {
-    if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) {
+    // Ensure the calibration data input shape is the same as the model input
+    // shape unless the dimension is unknown.
+    if (tensor->dims_signature->size == tensor->dims->size &&
+        tensor->dims_signature->data[j] == -1) {
+      has_unknown_dims = true;
+    } else if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) {
       PyErr_Format(PyExc_ValueError,
                    "Cannot set tensor: Size mismatch, expected %d for dim "
                    "%d but found %ld",
                    tensor->dims->data[j], j, PyArray_SHAPE(array)[j]);
       return nullptr;
     }
+    dims[j] = PyArray_SHAPE(array)[j];
   }
 
+  // Resize the input tensor if there are unknown dimensions.
+  if (has_unknown_dims) {
+    // Does strict checking on the `ResizeInputTensor` call.
+    TFLITE_PY_CHECK(interpreter_->ResizeInputTensorStrict(index, dims));
+    TFLITE_PY_CHECK(interpreter_->AllocateTensors());
+  }
+
+  tensor = interpreter_->tensor(index);
+
   size_t size = PyArray_NBYTES(array);
   if (size != tensor->bytes) {
     PyErr_Format(PyExc_ValueError,
diff --git a/tensorflow/lite/python/wrap_toco.py b/tensorflow/lite/python/wrap_toco.py
index b8d3fc3c70b..8f72cc8cbbd 100644
--- a/tensorflow/lite/python/wrap_toco.py
+++ b/tensorflow/lite/python/wrap_toco.py
@@ -43,9 +43,12 @@ def wrapped_get_potentially_supported_ops():
   return _pywrap_toco_api.TocoGetPotentiallySupportedOps()
 
 
-def wrapped_experimental_mlir_quantize(input_data_str):
+def wrapped_experimental_mlir_quantize(input_data_str, disable_per_channel,
+                                       inference_type):
   """Wraps experimental mlir quantize model."""
-  return _pywrap_toco_api.ExperimentalMlirQuantizeModel(input_data_str)
+  return _pywrap_toco_api.ExperimentalMlirQuantizeModel(input_data_str,
+                                                        disable_per_channel,
+                                                        inference_type)
 
 
 def wrapped_experimental_mlir_sparsify(input_data_str):
diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs
index 836bde083ee..b7f41c756e4 100644
--- a/tensorflow/lite/schema/schema.fbs
+++ b/tensorflow/lite/schema/schema.fbs
@@ -662,6 +662,7 @@ table ResizeBilinearOptions {
 
 table ResizeNearestNeighborOptions {
   align_corners: bool;
+  half_pixel_centers: bool;
 }
 
 // A call operation options
@@ -969,8 +970,8 @@ table SegmentSumOptions {
 }
 
 table BatchMatMulOptions {
-  adjoint_lhs:bool;
-  adjoint_rhs:bool;
+  adj_x:bool;
+  adj_y:bool;
 }
 
 // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
index 9c8deef700f..b044acb4033 100755
--- a/tensorflow/lite/schema/schema_generated.h
+++ b/tensorflow/lite/schema/schema_generated.h
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 // automatically generated by the FlatBuffers compiler, do not modify
 
+
 #ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
 #define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
 
@@ -5375,22 +5376,29 @@ flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffe
 struct ResizeNearestNeighborOptionsT : public flatbuffers::NativeTable {
   typedef ResizeNearestNeighborOptions TableType;
   bool align_corners;
+  bool half_pixel_centers;
   ResizeNearestNeighborOptionsT()
-      : align_corners(false) {
+      : align_corners(false),
+        half_pixel_centers(false) {
   }
 };
 
 struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef ResizeNearestNeighborOptionsT NativeTableType;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_ALIGN_CORNERS = 4
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
   };
   bool align_corners() const {
     return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0;
   }
+  bool half_pixel_centers() const {
+    return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) &&
            verifier.EndTable();
   }
   ResizeNearestNeighborOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -5404,6 +5412,9 @@ struct ResizeNearestNeighborOptionsBuilder {
   void add_align_corners(bool align_corners) {
     fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0);
   }
+  void add_half_pixel_centers(bool half_pixel_centers) {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, static_cast<uint8_t>(half_pixel_centers), 0);
+  }
   explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -5418,8 +5429,10 @@ struct ResizeNearestNeighborOptionsBuilder {
 
 inline flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    bool align_corners = false) {
+    bool align_corners = false,
+    bool half_pixel_centers = false) {
   ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
   builder_.add_align_corners(align_corners);
   return builder_.Finish();
 }
@@ -9230,30 +9243,30 @@ flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(flatbuffers::Flat
 
 struct BatchMatMulOptionsT : public flatbuffers::NativeTable {
   typedef BatchMatMulOptions TableType;
-  bool adjoint_lhs;
-  bool adjoint_rhs;
+  bool adj_x;
+  bool adj_y;
   BatchMatMulOptionsT()
-      : adjoint_lhs(false),
-        adjoint_rhs(false) {
+      : adj_x(false),
+        adj_y(false) {
   }
 };
 
 struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   typedef BatchMatMulOptionsT NativeTableType;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_ADJOINT_LHS = 4,
-    VT_ADJOINT_RHS = 6
+    VT_ADJ_X = 4,
+    VT_ADJ_Y = 6
   };
-  bool adjoint_lhs() const {
-    return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0;
+  bool adj_x() const {
+    return GetField<uint8_t>(VT_ADJ_X, 0) != 0;
   }
-  bool adjoint_rhs() const {
-    return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0;
+  bool adj_y() const {
+    return GetField<uint8_t>(VT_ADJ_Y, 0) != 0;
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
-           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) &&
+           VerifyField<uint8_t>(verifier, VT_ADJ_X) &&
+           VerifyField<uint8_t>(verifier, VT_ADJ_Y) &&
            verifier.EndTable();
   }
   BatchMatMulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -9264,11 +9277,11 @@ struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
 struct BatchMatMulOptionsBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_adjoint_lhs(bool adjoint_lhs) {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs), 0);
+  void add_adj_x(bool adj_x) {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0);
   }
-  void add_adjoint_rhs(bool adjoint_rhs) {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs), 0);
+  void add_adj_y(bool adj_y) {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0);
   }
   explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
@@ -9284,11 +9297,11 @@ struct BatchMatMulOptionsBuilder {
 
 inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(
     flatbuffers::FlatBufferBuilder &_fbb,
-    bool adjoint_lhs = false,
-    bool adjoint_rhs = false) {
+    bool adj_x = false,
+    bool adj_y = false) {
   BatchMatMulOptionsBuilder builder_(_fbb);
-  builder_.add_adjoint_rhs(adjoint_rhs);
-  builder_.add_adjoint_lhs(adjoint_lhs);
+  builder_.add_adj_y(adj_y);
+  builder_.add_adj_x(adj_x);
   return builder_.Finish();
 }
 
@@ -11634,6 +11647,7 @@ inline void ResizeNearestNeighborOptions::UnPackTo(ResizeNearestNeighborOptionsT
   (void)_o;
   (void)_resolver;
   { auto _e = align_corners(); _o->align_corners = _e; }
+  { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; }
 }
 
 inline flatbuffers::Offset<ResizeNearestNeighborOptions> ResizeNearestNeighborOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -11645,9 +11659,11 @@ inline flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeig
   (void)_o;
   struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeNearestNeighborOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
   auto _align_corners = _o->align_corners;
+  auto _half_pixel_centers = _o->half_pixel_centers;
   return tflite::CreateResizeNearestNeighborOptions(
       _fbb,
-      _align_corners);
+      _align_corners,
+      _half_pixel_centers);
 }
 
 inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
@@ -13617,8 +13633,8 @@ inline BatchMatMulOptionsT *BatchMatMulOptions::UnPack(const flatbuffers::resolv
 inline void BatchMatMulOptions::UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
   (void)_o;
   (void)_resolver;
-  { auto _e = adjoint_lhs(); _o->adjoint_lhs = _e; }
-  { auto _e = adjoint_rhs(); _o->adjoint_rhs = _e; }
+  { auto _e = adj_x(); _o->adj_x = _e; }
+  { auto _e = adj_y(); _o->adj_y = _e; }
 }
 
 inline flatbuffers::Offset<BatchMatMulOptions> BatchMatMulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -13629,12 +13645,12 @@ inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(flatbuff
   (void)_rehasher;
   (void)_o;
   struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchMatMulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _adjoint_lhs = _o->adjoint_lhs;
-  auto _adjoint_rhs = _o->adjoint_rhs;
+  auto _adj_x = _o->adj_x;
+  auto _adj_y = _o->adj_y;
   return tflite::CreateBatchMatMulOptions(
       _fbb,
-      _adjoint_lhs,
-      _adjoint_rhs);
+      _adj_x,
+      _adj_y);
 }
 
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
diff --git a/tensorflow/lite/special_rules.bzl b/tensorflow/lite/special_rules.bzl
index eefbe1fb778..c1373d3a5c2 100644
--- a/tensorflow/lite/special_rules.bzl
+++ b/tensorflow/lite/special_rules.bzl
@@ -30,7 +30,9 @@ def tflite_extra_gles_deps():
 
 def tflite_ios_lab_runner(version):
     """This is a no-op outside of Google."""
-    return None
+
+    # Can switch back to None when https://github.com/bazelbuild/rules_apple/pull/757 is fixed
+    return "@build_bazel_rules_apple//apple/testing/default_runner:ios_default_runner"
 
 def if_nnapi(supported, not_supported = [], supported_android = None):
     if supported_android == None:
diff --git a/tensorflow/lite/string_util.cc b/tensorflow/lite/string_util.cc
index f7fcf2ac630..44719858f2a 100644
--- a/tensorflow/lite/string_util.cc
+++ b/tensorflow/lite/string_util.cc
@@ -89,6 +89,7 @@ int DynamicBuffer::WriteToBuffer(char** buffer) {
   return bytes;
 }
 
+#ifndef TF_LITE_STATIC_MEMORY
 void DynamicBuffer::WriteToTensorAsVector(TfLiteTensor* tensor) {
   auto dims = TfLiteIntArrayCreate(1);
   dims->data[0] = offset_.size() - 1;  // Store number of strings.
@@ -109,6 +110,7 @@ void DynamicBuffer::WriteToTensor(TfLiteTensor* tensor,
                     tensor_buffer, bytes, kTfLiteDynamic, tensor->allocation,
                     tensor->is_variable, tensor);
 }
+#endif  // TF_LITE_STATIC_MEMORY
 
 int GetStringCount(const void* raw_buffer) {
   // The first integers in the raw buffer is the number of strings.
diff --git a/tensorflow/lite/string_util.h b/tensorflow/lite/string_util.h
index 779b1e12ab8..879aa76b83b 100644
--- a/tensorflow/lite/string_util.h
+++ b/tensorflow/lite/string_util.h
@@ -74,6 +74,9 @@ class DynamicBuffer {
   // The function allocates space for the buffer but does NOT take ownership.
   int WriteToBuffer(char** buffer);
 
+  // String tensors are not generally supported on platforms w/ static memory.
+  // TODO(b/156130024): Remove this guard after removing header from TFLM deps.
+#ifndef TF_LITE_STATIC_MEMORY
   // Fill content into a string tensor, with the given new_shape. The new shape
   // must match the number of strings in this object. Caller relinquishes
   // ownership of new_shape. If 'new_shape' is nullptr, keep the tensor's
@@ -82,6 +85,7 @@ class DynamicBuffer {
 
   // Fill content into a string tensor. Set shape to {num_strings}.
   void WriteToTensorAsVector(TfLiteTensor* tensor);
+#endif  // TF_LITE_STATIC_MEMORY
 
  private:
   // Data buffer to store contents of strings, not including headers.
diff --git a/tensorflow/lite/testdata/dynamic_shapes.bin b/tensorflow/lite/testdata/dynamic_shapes.bin
new file mode 100644
index 00000000000..268d457131a
Binary files /dev/null and b/tensorflow/lite/testdata/dynamic_shapes.bin differ
diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD
index 9d50f1ad604..379230b3a4b 100644
--- a/tensorflow/lite/testing/BUILD
+++ b/tensorflow/lite/testing/BUILD
@@ -68,8 +68,8 @@ exports_files([
             "//tensorflow/core:test",
         ],
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
     }),
 ) for conversion_mode, test_name, tags, args in generated_test_models_all() + merged_test_models()]
@@ -326,10 +326,10 @@ cc_library(
             "//tensorflow/core:tensorflow",
         ],
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
     }),
 )
@@ -365,10 +365,10 @@ cc_library(
             "//tensorflow/core:framework",
         ],
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
     }),
 )
@@ -405,10 +405,10 @@ cc_library(
             "//tensorflow/core:lib",
         ],
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
     }),
 )
@@ -440,10 +440,10 @@ cc_library(
             "//tensorflow/core:lib",
         ],
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
         "//tensorflow:ios": [
-            "//tensorflow/core:ios_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
     }),
 )
diff --git a/tensorflow/lite/testing/kernel_test/BUILD b/tensorflow/lite/testing/kernel_test/BUILD
index 5180f2f4e5a..76333c76259 100644
--- a/tensorflow/lite/testing/kernel_test/BUILD
+++ b/tensorflow/lite/testing/kernel_test/BUILD
@@ -25,7 +25,7 @@ cc_library(
             "//tensorflow/core:lib",
         ],
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
         ],
     }),
 )
diff --git a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
index 26f518590df..71a1a31ac4c 100644
--- a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
@@ -424,7 +424,8 @@ def test_frozen_graph_quant(filename,
 
   # Convert and load the quantized model.
   converter = _lite.TFLiteConverter.from_frozen_graph(filename, input_arrays,
-                                                      output_arrays)
+                                                      output_arrays,
+                                                      input_shapes)
   tflite_model_quant = _convert(
       converter, post_training_quantize=True, **kwargs)
 
diff --git a/tensorflow/lite/testing/op_tests/prelu.py b/tensorflow/lite/testing/op_tests/prelu.py
index f927c7a8b00..bc5875739ed 100644
--- a/tensorflow/lite/testing/op_tests/prelu.py
+++ b/tensorflow/lite/testing/op_tests/prelu.py
@@ -35,12 +35,33 @@ def make_prelu_tests(options):
           # channel.
           "input_shape": [[1, 10, 10, 3], [3, 3, 3, 3]],
           "shared_axes": [[1, 2], [1]],
+          "fully_quantize": [False],
+          "input_range": [(-10, 10)],
       },
       {
           # 2D-3D example. Share the 2nd axis.
           "input_shape": [[20, 20], [20, 20, 20]],
           "shared_axes": [[1]],
-      }
+          "fully_quantize": [False],
+          "input_range": [(-10, 10)],
+      },
+      # Quantized cases.
+      {
+          # The canonical case for image processing is having a 4D `input`
+          # (NHWC)and `shared_axes`=[1, 2], so the alpha parameter is per
+          # channel.
+          "input_shape": [[1, 10, 10, 3], [3, 3, 3, 3]],
+          "shared_axes": [[1, 2], [1]],
+          "fully_quantize": [True],
+          "input_range": [(-10, 10)],
+      },
+      {
+          # 2D-3D example. Share the 2nd axis.
+          "input_shape": [[20, 20], [20, 20, 20]],
+          "shared_axes": [[1]],
+          "fully_quantize": [True],
+          "input_range": [(-10, 10)],
+      },
   ]
 
   def build_graph(parameters):
@@ -64,7 +85,8 @@ def make_prelu_tests(options):
     for dim in range(1, len(input_shape)):
       alpha_shape.append(1 if dim in shared_axes else input_shape[dim])
 
-    alpha_values = create_tensor_data(np.float32, alpha_shape)
+    alpha_values = create_tensor_data(
+        np.float32, alpha_shape, min_value=-5, max_value=5)
 
     # There should be only 1 trainable variable tensor.
     variables = tf.compat.v1.all_variables()
diff --git a/tensorflow/lite/testing/op_tests/resize_bilinear.py b/tensorflow/lite/testing/op_tests/resize_bilinear.py
index 0316e84b692..9f40ed6e1de 100644
--- a/tensorflow/lite/testing/op_tests/resize_bilinear.py
+++ b/tensorflow/lite/testing/op_tests/resize_bilinear.py
@@ -31,35 +31,35 @@ def make_resize_bilinear_tests(options):
       "dtype": [tf.float32, tf.int32],
       "input_shape": [[1, 3, 4, 3], [1, 10, 2, 1]],
       "size": [[1, 1], [4, 3], [2, 2], [5, 6]],
-      "align_corners": [None, True, False],
+      "align_corners": [True, False],
       "half_pixel_centers": [False],
       "fully_quantize": [False]
   }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 3, 4, 3], [1, 10, 2, 1]],
       "size": [[1, 1], [4, 3], [2, 2], [5, 6]],
-      "align_corners": [None, True, False],
+      "align_corners": [True, False],
       "half_pixel_centers": [False],
       "fully_quantize": [True]
   }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 16, 24, 3], [1, 12, 18, 3]],
       "size": [[8, 12], [12, 18]],
-      "align_corners": [None, True, False],
+      "align_corners": [True, False],
       "half_pixel_centers": [False],
       "fully_quantize": [True]
   }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 16, 24, 3], [1, 12, 18, 3]],
       "size": [[8, 12]],
-      "align_corners": [None, False],
+      "align_corners": [False],
       "half_pixel_centers": [True],
       "fully_quantize": [True]
   }, {
       "dtype": [tf.float32, tf.int32],
       "input_shape": [[1, 3, 4, 3], [1, 10, 2, 1]],
       "size": [[1, 1], [4, 3], [2, 2], [5, 6]],
-      "align_corners": [None, False],
+      "align_corners": [False],
       "half_pixel_centers": [True],
       "fully_quantize": [False]
   }]
diff --git a/tensorflow/lite/testing/op_tests/resize_nearest_neighbor.py b/tensorflow/lite/testing/op_tests/resize_nearest_neighbor.py
index 4e49e0bb39b..386952f72e2 100644
--- a/tensorflow/lite/testing/op_tests/resize_nearest_neighbor.py
+++ b/tensorflow/lite/testing/op_tests/resize_nearest_neighbor.py
@@ -28,23 +28,26 @@ def make_resize_nearest_neighbor_tests(options):
   """Make a set of tests to do resize_nearest_neighbor."""
 
   test_parameters = [{
-      "dtype": [tf.float32, tf.int32],
-      "input_shape": [[1, 3, 4, 3], [1, 10, 2, 1]],
-      "size": [[1, 1], [4, 3], [2, 2], [5, 6]],
-      "align_corners": [False],
-      "fully_quantize": [False],
-  }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 3, 4, 3], [1, 10, 2, 1]],
       "size": [[1, 1], [4, 3], [2, 2], [5, 6]],
       "align_corners": [False],
-      "fully_quantize": [True],
+      "half_pixel_centers": [False],
+      "fully_quantize": [True, False],
   }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 16, 24, 3], [1, 12, 18, 3]],
       "size": [[8, 12], [12, 18]],
-      "align_corners": [None, True, False],
-      "fully_quantize": [True]
+      "align_corners": [True],
+      "half_pixel_centers": [False],
+      "fully_quantize": [True, False]
+  }, {
+      "dtype": [tf.float32],
+      "input_shape": [[1, 16, 24, 3], [1, 12, 18, 3]],
+      "size": [[8, 12], [12, 18]],
+      "align_corners": [False],
+      "half_pixel_centers": [True],
+      "fully_quantize": [True, False]
   }]
 
   def build_graph(parameters):
@@ -55,7 +58,8 @@ def make_resize_nearest_neighbor_tests(options):
     out = tf.image.resize_nearest_neighbor(
         input_tensor,
         size=parameters["size"],
-        align_corners=parameters["align_corners"])
+        align_corners=parameters["align_corners"],
+        half_pixel_centers=parameters["half_pixel_centers"])
     return [input_tensor], [out]
 
   def build_inputs(parameters, sess, inputs, outputs):
diff --git a/tensorflow/lite/toco/export_tensorflow.cc b/tensorflow/lite/toco/export_tensorflow.cc
index 81c685d4da6..ec3fb386d10 100644
--- a/tensorflow/lite/toco/export_tensorflow.cc
+++ b/tensorflow/lite/toco/export_tensorflow.cc
@@ -1417,6 +1417,21 @@ void ConvertResizeBilinearOperator(const Model& model,
       src_op.half_pixel_centers);
 }
 
+void ConvertResizeNearestNeighborOperator(
+    const Model& model, const ResizeNearestNeighborOperator& src_op,
+    GraphDef* tensorflow_graph) {
+  tensorflow::NodeDef* resize_op = tensorflow_graph->add_node();
+  resize_op->set_op("ResizeNearestNeighbor");
+  resize_op->set_name(src_op.outputs[0]);
+  CHECK_EQ(src_op.inputs.size(), 2);
+  *resize_op->add_input() = src_op.inputs[0];
+  *resize_op->add_input() = src_op.inputs[1];
+  (*resize_op->mutable_attr())["T"].set_type(DT_FLOAT);
+  (*resize_op->mutable_attr())["align_corners"].set_b(src_op.align_corners);
+  (*resize_op->mutable_attr())["half_pixel_centers"].set_b(
+      src_op.half_pixel_centers);
+}
+
 void ConvertOneHotOperator(const Model& model, const OneHotOperator& src_op,
                            GraphDef* tensorflow_graph) {
   tensorflow::NodeDef* onehot_op = tensorflow_graph->add_node();
@@ -2227,6 +2242,10 @@ void ConvertOperator(const Model& model, const Operator& src_op,
     ConvertResizeBilinearOperator(
         model, static_cast<const ResizeBilinearOperator&>(src_op),
         tensorflow_graph);
+  } else if (src_op.type == OperatorType::kResizeNearestNeighbor) {
+    ConvertResizeNearestNeighborOperator(
+        model, static_cast<const ResizeNearestNeighborOperator&>(src_op),
+        tensorflow_graph);
   } else if (src_op.type == OperatorType::kSpaceToBatchND) {
     ConvertSpaceToBatchNDOperator(
         model, static_cast<const SpaceToBatchNDOperator&>(src_op),
diff --git a/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc
index 9816cc1df6a..171d522daa7 100644
--- a/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc
+++ b/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc
@@ -271,8 +271,8 @@ bool MinMaxApproximatelyEqual(const MinMax& minmax1, const MinMax& minmax2) {
   const double magnitude =
       std::min(minmax1.max - minmax1.min, minmax2.max - minmax2.min);
   const double tolerated = 1e-6 * magnitude;
-  return std::abs(minmax1.min - minmax2.min) < tolerated &&
-         std::abs(minmax1.max - minmax2.max) < tolerated;
+  return std::abs(minmax1.min - minmax2.min) <= tolerated &&
+         std::abs(minmax1.max - minmax2.max) <= tolerated;
 }
 
 // Propagates MinMax from any of the listed arrays, to all others.
diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc
index e00a1aca7e0..3124133047e 100644
--- a/tensorflow/lite/toco/import_tensorflow.cc
+++ b/tensorflow/lite/toco/import_tensorflow.cc
@@ -1731,9 +1731,13 @@ tensorflow::Status ConvertResizeNearestNeighborOperator(
   auto* op = new ResizeNearestNeighborOperator;
 
   op->align_corners = false;
+  op->half_pixel_centers = false;
   if (HasAttr(node, "align_corners")) {
     op->align_corners = GetBoolAttr(node, "align_corners");
   }
+  if (HasAttr(node, "half_pixel_centers")) {
+    op->half_pixel_centers = GetBoolAttr(node, "half_pixel_centers");
+  }
 
   op->inputs.push_back(node.input(0));
   op->inputs.push_back(node.input(1));
diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h
index a4edb06ac24..89ea9d997f9 100644
--- a/tensorflow/lite/toco/model.h
+++ b/tensorflow/lite/toco/model.h
@@ -1861,6 +1861,7 @@ struct ResizeNearestNeighborOperator : Operator {
       : Operator(OperatorType::kResizeNearestNeighbor) {}
 
   bool align_corners = false;
+  bool half_pixel_centers = false;
 };
 
 // SpaceToBatchND operator. It divides spatial dimensions into a grid of
diff --git a/tensorflow/lite/toco/python/BUILD b/tensorflow/lite/toco/python/BUILD
index bea582d83a5..7dfa714d1d6 100644
--- a/tensorflow/lite/toco/python/BUILD
+++ b/tensorflow/lite/toco/python/BUILD
@@ -54,6 +54,7 @@ cc_library(
         "//tensorflow/compiler/mlir/lite/python:saved_model_to_tfl_flatbuffer",
         "//tensorflow/compiler/mlir/lite/quantization/lite:quantize_model",
         "//tensorflow/compiler/mlir/lite/sparsity:sparsify_model",
+        "//tensorflow/lite/toco:types_proto_cc",
     ] + select({
         # This is required when running `tflite_convert` from `bazel`.
         # It requires to link with TensorFlow Ops to get the op definitions.
diff --git a/tensorflow/lite/toco/python/toco_python_api.cc b/tensorflow/lite/toco/python/toco_python_api.cc
index a19f5d26eed..441aabf0ffe 100644
--- a/tensorflow/lite/toco/python/toco_python_api.cc
+++ b/tensorflow/lite/toco/python/toco_python_api.cc
@@ -41,6 +41,7 @@ limitations under the License.
 #include "tensorflow/lite/toco/toco_tooling.h"
 #include "tensorflow/lite/toco/toco_types.h"
 #include "tensorflow/lite/toco/tooling_util.h"
+#include "tensorflow/lite/toco/types.pb.h"
 
 namespace toco {
 
@@ -228,7 +229,8 @@ PyObject* TocoGetPotentiallySupportedOps() {
   return list;
 }
 
-PyObject* MlirQuantizeModel(PyObject* data, bool fully_quantize) {
+PyObject* MlirQuantizeModel(PyObject* data, bool disable_per_channel,
+                            bool fully_quantize, int inference_type) {
   using tflite::interpreter_wrapper::PythonErrorReporter;
   char* buf = nullptr;
   Py_ssize_t length;
@@ -248,11 +250,25 @@ PyObject* MlirQuantizeModel(PyObject* data, bool fully_quantize) {
   auto tflite_model = absl::make_unique<tflite::ModelT>();
   model->GetModel()->UnPackTo(tflite_model.get(), nullptr);
 
+  tflite::TensorType inference_tensor_type;
+  switch (inference_type) {
+    case toco::IODataType::QUANTIZED_INT16:
+      inference_tensor_type = tflite::TensorType_INT16;
+      break;
+    case toco::IODataType::QUANTIZED_UINT8:
+      inference_tensor_type = tflite::TensorType_UINT8;
+      break;
+    case toco::IODataType::INT8:
+      inference_tensor_type = tflite::TensorType_INT8;
+      break;
+    default:
+      return nullptr;
+  }
   flatbuffers::FlatBufferBuilder builder;
   auto status = mlir::lite::QuantizeModel(
       *tflite_model, tflite::TensorType::TensorType_FLOAT32,
-      tflite::TensorType::TensorType_FLOAT32, {}, fully_quantize, &builder,
-      error_reporter.get());
+      tflite::TensorType::TensorType_FLOAT32, inference_tensor_type, {},
+      disable_per_channel, fully_quantize, &builder, error_reporter.get());
 
   if (status != kTfLiteOk) {
     error_reporter->exception();
diff --git a/tensorflow/lite/toco/python/toco_python_api.h b/tensorflow/lite/toco/python/toco_python_api.h
index c7c7a3549a6..058ae9fb942 100644
--- a/tensorflow/lite/toco/python/toco_python_api.h
+++ b/tensorflow/lite/toco/python/toco_python_api.h
@@ -43,7 +43,8 @@ PyObject* TocoGetPotentiallySupportedOps();
 // Quantize the model with calibration data. Throw errors if `fully_quantize`
 // is specified by the calibration data are not sufficient to quantize the
 // model.
-PyObject* MlirQuantizeModel(PyObject* data, bool fully_quantize);
+PyObject* MlirQuantizeModel(PyObject* data, bool disable_per_channel,
+                            bool fully_quantize, int inference_type);
 
 // Sparsifies model to encode sparse tensors with proper format. Throws error if
 // sparsification fails.
diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc
index 1b259b796b2..cf127a9f459 100644
--- a/tensorflow/lite/toco/tflite/op_version.cc
+++ b/tensorflow/lite/toco/tflite/op_version.cc
@@ -45,9 +45,12 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kConv, 1}, "1.5.0"},
           {{OperatorType::kConv, 2}, "1.14.0"},
           {{OperatorType::kConv, 3}, "1.14.0"},
+          {{OperatorType::kConv, 4}, kPendingReleaseOpVersion},
           {{OperatorType::kDepthwiseConv, 1}, "1.5.0"},
           {{OperatorType::kDepthwiseConv, 2}, "1.12.0"},
           {{OperatorType::kDepthwiseConv, 3}, "1.14.0"},
+          {{OperatorType::kDepthwiseConv, 4}, "1.14.0"},
+          {{OperatorType::kDepthwiseConv, 5}, kPendingReleaseOpVersion},
           {{OperatorType::kAdd, 1}, "1.5.0"},
           {{OperatorType::kAdd, 2}, "1.14.0"},
           {{OperatorType::kAddN, 1}, "1.14.0"},
@@ -62,6 +65,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kCast, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 1}, "1.5.0"},
           {{OperatorType::kConcatenation, 2}, "1.14.0"},
+          {{OperatorType::kConcatenation, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kDepthToSpace, 1}, "2.1.0"},
           {{OperatorType::kFakeQuant, 1}, "1.5.0"},
           {{OperatorType::kFakeQuant, 2}, "1.10.0"},
@@ -71,6 +75,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kFullyConnected, 4}, "1.14.0"},
           {{OperatorType::kFullyConnected, 5}, "2.0.0"},
           {{OperatorType::kFullyConnected, 6}, "2.1.0"},
+          {{OperatorType::kFullyConnected, 7}, kPendingReleaseOpVersion},
           {{OperatorType::kGather, 1}, "1.6.0"},
           {{OperatorType::kGather, 2}, "1.14.0"},
           {{OperatorType::kGather, 3}, "1.15.0"},
@@ -89,12 +94,14 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kMaximum, 1}, "1.14.0"},
           {{OperatorType::kMaximum, 2}, "1.14.0"},
           {{OperatorType::kMaximum, 3}, kPendingReleaseOpVersion},
+          {{OperatorType::kMaximum, 4}, kPendingReleaseOpVersion},
           {{OperatorType::kMinimum, 1}, "1.14.0"},
           {{OperatorType::kMinimum, 2}, "1.14.0"},
           {{OperatorType::kMinimum, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kMul, 1}, "1.5.0"},
           {{OperatorType::kMul, 2}, "1.14.0"},
           {{OperatorType::kMul, 3}, "1.15.0"},
+          {{OperatorType::kMul, 4}, kPendingReleaseOpVersion},
           {{OperatorType::kPad, 1}, "1.5.0"},
           {{OperatorType::kPad, 2}, "1.14.0"},
           {{OperatorType::kTile, 1}, "1.10.1"},
@@ -104,6 +111,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kReshape, 1}, "1.5.0"},
           {{OperatorType::kSoftmax, 1}, "1.5.0"},
           {{OperatorType::kSoftmax, 2}, "1.14.0"},
+          {{OperatorType::kSoftmax, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kSpaceToDepth, 1}, "1.5.0"},
           {{OperatorType::kSpaceToDepth, 2}, "1.14.0"},
           {{OperatorType::kTranspose, 1}, "1.6.0"},
@@ -133,10 +141,12 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kResizeBilinear, 3}, "2.2.0"},
           {{OperatorType::kResizeNearestNeighbor, 1}, "1.13.1"},
           {{OperatorType::kResizeNearestNeighbor, 2}, "1.14.0"},
+          {{OperatorType::kResizeNearestNeighbor, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kSqueeze, 1}, "1.6.0"},
           {{OperatorType::kSplit, 1}, "1.5.0"},
           {{OperatorType::kSplit, 2}, "1.14.0"},
           {{OperatorType::kSplit, 3}, "1.14.0"},
+          {{OperatorType::kSplit, 4}, kPendingReleaseOpVersion},
           {{OperatorType::kSplitV, 2}, kPendingReleaseOpVersion},
           {{OperatorType::kStridedSlice, 1}, "1.6.0"},
           {{OperatorType::kStridedSlice, 2}, "1.14.0"},
@@ -163,6 +173,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kSlice, 3}, "1.14.0"},
           {{OperatorType::kTanh, 1}, "1.14.0"},
           {{OperatorType::kTanh, 2}, "1.14.0"},
+          {{OperatorType::kTanh, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kOneHot, 1}, "1.11.0"},
           {{OperatorType::kCTCBeamSearchDecoder, 1}, "1.11.0"},
           {{OperatorType::kUnpack, 1}, "1.11.0"},
@@ -170,8 +181,10 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kUnpack, 3}, "2.2.0"},
           {{OperatorType::kUnpack, 4}, kPendingReleaseOpVersion},
           {{OperatorType::kLeakyRelu, 1}, "1.13.1"},
+          {{OperatorType::kLeakyRelu, 2}, kPendingReleaseOpVersion},
           {{OperatorType::kLogistic, 1}, "1.14.0"},
           {{OperatorType::kLogistic, 2}, "1.14.0"},
+          {{OperatorType::kLogistic, 3}, kPendingReleaseOpVersion},
           {{OperatorType::kLogSoftmax, 1}, "1.14.0"},
           {{OperatorType::kLogSoftmax, 2}, "1.14.0"},
           {{OperatorType::kSquaredDifference, 1}, "1.13.1"},
diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index 212bed08556..fee10a19787 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@@ -487,6 +487,7 @@ class FullyConnected
     op_sig.options.fully_connected.keep_num_dims = fc_op.keep_num_dims;
     op_sig.options.fully_connected.weights_format =
         GetWeightFormat(fc_op.weights_format);
+    op_sig.options.fully_connected.sparse_weight = false;
     return ::tflite::GetBuiltinOperatorVersion(op_sig);
   }
 };
@@ -1116,8 +1117,9 @@ class ResizeBilinear
         static_cast<const ResizeBilinearOperator&>(*op_signature.op);
     ::tflite::OpSignature op_sig =
         GetVersioningOpSig(builtin_op(), op_signature);
-    op_sig.options.resize_bilinear.half_pixel_centers =
+    op_sig.options.resize.half_pixel_centers =
         resize_bilinear_op.half_pixel_centers;
+    op_sig.options.resize.align_corners = resize_bilinear_op.align_corners;
     return ::tflite::GetBuiltinOperatorVersion(op_sig);
   }
 };
@@ -1131,13 +1133,24 @@ class ResizeNearestNeighbor
   flatbuffers::Offset<TfLiteOptions> WriteOptions(
       const TocoOperator& op,
       flatbuffers::FlatBufferBuilder* builder) const override {
-    return ::tflite::CreateResizeNearestNeighborOptions(*builder,
-                                                        op.align_corners);
+    return ::tflite::CreateResizeNearestNeighborOptions(
+        *builder, op.align_corners, op.half_pixel_centers);
   }
 
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {
     op->align_corners = options.align_corners();
+    op->half_pixel_centers = options.half_pixel_centers();
+  }
+
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const auto& resize_nn_op =
+        static_cast<const ResizeNearestNeighborOperator&>(*op_signature.op);
+    ::tflite::OpSignature op_sig =
+        GetVersioningOpSig(builtin_op(), op_signature);
+    op_sig.options.resize.half_pixel_centers = resize_nn_op.half_pixel_centers;
+    op_sig.options.resize.align_corners = resize_nn_op.align_corners;
+    return ::tflite::GetBuiltinOperatorVersion(op_sig);
   }
 };
 
diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc
index d0e5cdebc86..a4fe01e4afd 100644
--- a/tensorflow/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/lite/toco/tflite/operator_test.cc
@@ -436,11 +436,25 @@ TEST_F(OperatorTest, ResizeBilinear_HalfPixelCenters) {
 TEST_F(OperatorTest, ResizeNearestNeighbor) {
   ResizeNearestNeighborOperator op;
   op.align_corners = true;
+  op.half_pixel_centers = false;
   auto output_toco_op =
       SerializeAndDeserialize(GetOperator("RESIZE_NEAREST_NEIGHBOR",
                                           OperatorType::kResizeNearestNeighbor),
                               op);
   EXPECT_EQ(op.align_corners, output_toco_op->align_corners);
+  EXPECT_EQ(op.half_pixel_centers, output_toco_op->half_pixel_centers);
+}
+
+TEST_F(OperatorTest, ResizeNearestNeighbor_HalfPixelCenters) {
+  ResizeNearestNeighborOperator op;
+  op.align_corners = true;
+  op.half_pixel_centers = true;
+  auto output_toco_op =
+      SerializeAndDeserialize(GetOperator("RESIZE_NEAREST_NEIGHBOR",
+                                          OperatorType::kResizeNearestNeighbor),
+                              op);
+  EXPECT_EQ(op.align_corners, output_toco_op->align_corners);
+  EXPECT_EQ(op.half_pixel_centers, output_toco_op->half_pixel_centers);
 }
 
 TEST_F(OperatorTest, Svdf) {
diff --git a/tensorflow/lite/toco/toco_port.cc b/tensorflow/lite/toco/toco_port.cc
index b222032e614..d2f1d102c5a 100644
--- a/tensorflow/lite/toco/toco_port.cc
+++ b/tensorflow/lite/toco/toco_port.cc
@@ -33,7 +33,7 @@ void CopyToBuffer(const string& src, char* dest) {
 }
 
 #ifdef PLATFORM_GOOGLE
-void CopyToBuffer(const Cord& src, char* dest) { src.CopyToArray(dest); }
+void CopyToBuffer(const absl::Cord& src, char* dest) { src.CopyToArray(dest); }
 #endif
 }  // namespace port
 }  // namespace toco
diff --git a/tensorflow/lite/toco/toco_port.h b/tensorflow/lite/toco/toco_port.h
index 231612ecd43..5a80d29b72a 100644
--- a/tensorflow/lite/toco/toco_port.h
+++ b/tensorflow/lite/toco/toco_port.h
@@ -80,7 +80,7 @@ tensorflow::Status Exists(const string& filename, const Options& options);
 
 // Copy `src` string to `dest`. User must ensure `dest` has enough space.
 #if defined(PLATFORM_GOOGLE)
-void CopyToBuffer(const ::Cord& src, char* dest);
+void CopyToBuffer(const ::absl::Cord& src, char* dest);
 #endif  // PLATFORM_GOOGLE
 void CopyToBuffer(const string& src, char* dest);
 
diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD
index 357072226af..f6cb71749f8 100644
--- a/tensorflow/lite/tools/benchmark/BUILD
+++ b/tensorflow/lite/tools/benchmark/BUILD
@@ -142,6 +142,7 @@ cc_library(
         ":profiling_listener",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:string_util",
+        "//tensorflow/lite/c:common",
         "//tensorflow/lite/kernels:builtin_ops",
         "//tensorflow/lite/profiling:platform_profiler",
         "//tensorflow/lite/profiling:profile_summary_formatter",
diff --git a/tensorflow/lite/tools/benchmark/README.md b/tensorflow/lite/tools/benchmark/README.md
index a4f632c40a9..ae7e4ae150d 100644
--- a/tensorflow/lite/tools/benchmark/README.md
+++ b/tensorflow/lite/tools/benchmark/README.md
@@ -73,6 +73,7 @@ Note when `use_legacy_nnapi` is selected, this parameter won't work.
     `/data/local/tmp/` and this benchmark tool will not correctly use NNAPI.
 *   `nnapi_accelerator_name`: `str` (default="")
 *   `disable_nnapi_cpu`: `bool` (default=false)
+*   `nnapi_allow_fp16`: `bool` (default=false)
 
 #### Hexagon delegate
 * `use_hexagon`: `bool` (default=false)
@@ -87,6 +88,7 @@ the reported data on hexagon is in cycles, not in ms like on cpu.
 
 #### CoreML delegate
 *   `use_coreml`: `bool` (default=false)
+*   `coreml_version`: `int` (default=0)
 
 #### External delegate
 *   `external_delegate_path`: `string` (default="")
diff --git a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
index 26fed5e279f..cfce23c4595 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_performance_options.cc
@@ -233,6 +233,7 @@ void BenchmarkPerformanceOptions::ResetPerformanceOptions() {
   single_option_run_params_->Set<std::string>("nnapi_accelerator_name", "");
   single_option_run_params_->Set<bool>("disable_nnapi_cpu", false);
   single_option_run_params_->Set<int>("max_delegated_partitions", 0);
+  single_option_run_params_->Set<bool>("nnapi_allow_fp16", false);
 #endif
 #if defined(TFLITE_ENABLE_HEXAGON)
   single_option_run_params_->Set<bool>("use_hexagon", false);
@@ -334,7 +335,7 @@ void BenchmarkPerformanceOptions::Run() {
   // profiling listener etc. in each Run() invoke because such listeners may be
   // reset and become invalid in the next Run(). As a result, we record the
   // number of externally-added listeners here to prevent they're cleared later.
-  const int num_external_listners = single_option_run_->NumListeners();
+  const int num_external_listeners = single_option_run_->NumListeners();
 
   // Now perform all runs, each with different performance-affecting parameters.
   for (const auto& run_params : all_run_params_) {
@@ -349,7 +350,7 @@ void BenchmarkPerformanceOptions::Run() {
 
     // Clear internally created listeners before each run but keep externally
     // created ones.
-    single_option_run_->RemoveListeners(num_external_listners);
+    single_option_run_->RemoveListeners(num_external_listeners);
 
     all_run_stats_->MarkBenchmarkStart(*single_option_run_params_);
     single_option_run_->Run();
diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
index 489780e4f69..969713cce73 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "absl/base/attributes.h"
 #include "absl/strings/numbers.h"
 #include "ruy/profiler/profiler.h"  // from @ruy
+#include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/op_resolver.h"
@@ -596,17 +597,20 @@ TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
   return kTfLiteOk;
 }
 
-TfLiteStatus BenchmarkTfLiteModel::Init() {
-  TF_LITE_ENSURE_STATUS(LoadModel());
-
+TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() {
   auto resolver = GetOpResolver();
-
   const int32_t num_threads = params_.Get<int32_t>("num_threads");
   tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads);
   if (!interpreter_) {
-    TFLITE_LOG(ERROR) << "Failed to construct interpreter";
+    TFLITE_LOG(ERROR) << "Failed to initialize the interpreter";
     return kTfLiteError;
   }
+  return kTfLiteOk;
+}
+
+TfLiteStatus BenchmarkTfLiteModel::Init() {
+  TF_LITE_ENSURE_STATUS(LoadModel());
+  TF_LITE_ENSURE_STATUS(InitInterpreter());
 
   // Install profilers if necessary right after interpreter is created so that
   // any memory allocations inside the TFLite runtime could be recorded if the
diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
index b56390b3775..cc87743b531 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
@@ -74,6 +74,9 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
   // Allow subclasses to create a customized Op resolver during init.
   virtual std::unique_ptr<tflite::OpResolver> GetOpResolver() const;
 
+  // Allow subclass to initialize a customized tflite interpereter.
+  virtual TfLiteStatus InitInterpreter();
+
   // Create a BenchmarkListener that's specifically for TFLite profiling if
   // necessary.
   virtual std::unique_ptr<BenchmarkListener> MayCreateProfilingListener() const;
diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h
index 81ba071be19..ab150e87d93 100644
--- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h
+++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h
@@ -29,6 +29,9 @@ limitations under the License.
 // TfLiteDelegate - allows delegation of nodes to alternative backends.
 //
 // Some abstractions in this file are created and managed by Interpreter.
+//
+// NOTE: The order of values in these structs are "semi-ABI stable". New values
+// should be added only to the end of structs and never reordered.
 
 #ifndef TENSORFLOW_LITE_C_COMMON_H_
 #define TENSORFLOW_LITE_C_COMMON_H_
@@ -155,8 +158,16 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
   do {                                              \
     (context)->ReportError((context), __VA_ARGS__); \
   } while (false)
+
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
+  do {                                                \
+    if ((context) != nullptr) {                       \
+      (context)->ReportError((context), __VA_ARGS__); \
+    }                                                 \
+  } while (false)
 #else  // TF_LITE_STRIP_ERROR_STRINGS
 #define TF_LITE_KERNEL_LOG(context, ...)
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
 #endif  // TF_LITE_STRIP_ERROR_STRINGS
 
 // Check whether value is true, and if not return kTfLiteError from
@@ -310,15 +321,23 @@ typedef union TfLitePtrUnion {
   void* data;
 } TfLitePtrUnion;
 
-// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
-// data (or data externally allocated). kTfLiteArenaRw is arena allocated
-// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
+// Memory allocation strategies.
+//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
+//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
+//        and available during eval.
+//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
+//        only available during eval.
+//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
+//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
+//        useful for tensors that can be computed during prepare and treated
+//        as constant inputs for downstream ops (also in prepare).
 typedef enum TfLiteAllocationType {
   kTfLiteMemNone = 0,
   kTfLiteMmapRo,
   kTfLiteArenaRw,
   kTfLiteArenaRwPersistent,
   kTfLiteDynamic,
+  kTfLitePersistentRo,
 } TfLiteAllocationType;
 
 // The delegates should use zero or positive integers to represent handles.
diff --git a/tensorflow/lite/tools/delegates/README.md b/tensorflow/lite/tools/delegates/README.md
index f0e15e9e71a..bc1bffd49b6 100644
--- a/tensorflow/lite/tools/delegates/README.md
+++ b/tensorflow/lite/tools/delegates/README.md
@@ -73,6 +73,8 @@ TFLite delegate.
     [NNAPI CPU reference implementation](https://developer.android.com/ndk/guides/neuralnetworks#device-assignment)
     from the possible devices to be used by NNAPI to execute the model. This
     option is ignored if `nnapi_accelerator_name` is specified.
+*   `nnapi_allow_fp16`: `bool` (default=false) \
+    Whether to allow FP32 computation to be run in FP16.
 
 ### Hexagon delegate provider
 *   `use_hexagon`: `bool` (default=false) \
@@ -93,6 +95,9 @@ TFLite delegate.
 *   `use_coreml`: `bool` (default=false) \
     Whether to use the [Core ML delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/delegates/coreml).
     This option is only available in iOS.
+*   `coreml_version`: `int` (default=0) \
+    Target Core ML version for model conversion. The default value is 0 and it
+    means using the newest version that's available on the device.
 
 ### External delegate provider
 *   `external_delegate_path`: `string` (default="") \
diff --git a/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc b/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc
index 0d1a8ade368..c6509618aee 100644
--- a/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc
+++ b/tensorflow/lite/tools/delegates/coreml_delegate_provider.cc
@@ -31,7 +31,8 @@ class CoreMlDelegateProvider : public DelegateProvider {
  public:
   CoreMlDelegateProvider() {
 #if defined(REAL_IPHONE_DEVICE)
-    default_params_.AddParam("use_coreml", ToolParam::Create<bool>(true));
+    default_params_.AddParam("use_coreml", ToolParam::Create<bool>(false));
+    default_params_.AddParam("coreml_version", ToolParam::Create<int>(0));
 #endif
   }
   std::vector<Flag> CreateFlags(ToolParams* params) const final;
@@ -49,6 +50,10 @@ std::vector<Flag> CoreMlDelegateProvider::CreateFlags(
 #if defined(REAL_IPHONE_DEVICE)
   std::vector<Flag> flags = {
       CreateFlag<bool>("use_coreml", params, "use Core ML"),
+      CreateFlag<int>("coreml_version", params,
+                      "Target Core ML version for model conversion. "
+                      "The default value is 0 and it means using the newest "
+                      "version that's available on the device."),
   };
   return flags;
 #else
@@ -71,6 +76,7 @@ TfLiteDelegatePtr CoreMlDelegateProvider::CreateTfLiteDelegate(
   if (params.Get<bool>("use_coreml")) {
     TfLiteCoreMlDelegateOptions coreml_opts = {
         .enabled_devices = TfLiteCoreMlDelegateAllDevices};
+    coreml_opts.coreml_version = params.Get<int>("coreml_version");
     coreml_opts.max_delegated_partitions =
         params.Get<int>("max_delegated_partitions");
     coreml_opts.min_nodes_per_partition =
diff --git a/tensorflow/lite/tools/delegates/external_delegate_provider.cc b/tensorflow/lite/tools/delegates/external_delegate_provider.cc
index 95b0e42802f..193860820b1 100644
--- a/tensorflow/lite/tools/delegates/external_delegate_provider.cc
+++ b/tensorflow/lite/tools/delegates/external_delegate_provider.cc
@@ -119,7 +119,7 @@ std::vector<Flag> ExternalDelegateProvider::CreateFlags(
                               "The library path for the underlying external."),
       CreateFlag<std::string>(
           "external_delegate_options", params,
-          "Comma-seperated options to be passed to the external delegate")};
+          "Comma-separated options to be passed to the external delegate")};
   return flags;
 }
 
diff --git a/tensorflow/lite/tools/delegates/nnapi_delegate_provider.cc b/tensorflow/lite/tools/delegates/nnapi_delegate_provider.cc
index f3ed8743e54..2fbfb791e8c 100644
--- a/tensorflow/lite/tools/delegates/nnapi_delegate_provider.cc
+++ b/tensorflow/lite/tools/delegates/nnapi_delegate_provider.cc
@@ -33,6 +33,8 @@ class NnapiDelegateProvider : public DelegateProvider {
                              ToolParam::Create<std::string>(""));
     default_params_.AddParam("disable_nnapi_cpu",
                              ToolParam::Create<bool>(false));
+    default_params_.AddParam("nnapi_allow_fp16",
+                             ToolParam::Create<bool>(false));
   }
 
   std::vector<Flag> CreateFlags(ToolParams* params) const final;
@@ -56,7 +58,9 @@ std::vector<Flag> NnapiDelegateProvider::CreateFlags(ToolParams* params) const {
           "nnapi_accelerator_name", params,
           "the name of the nnapi accelerator to use (requires Android Q+)"),
       CreateFlag<bool>("disable_nnapi_cpu", params,
-                       "Disable the NNAPI CPU device")};
+                       "Disable the NNAPI CPU device"),
+      CreateFlag<bool>("nnapi_allow_fp16", params,
+                       "Allow fp32 computation to be run in fp16")};
 
   return flags;
 }
@@ -83,6 +87,10 @@ void NnapiDelegateProvider::LogParams(const ToolParams& params) const {
       TFLITE_LOG(INFO) << "disable_nnapi_cpu: ["
                        << params.Get<bool>("disable_nnapi_cpu") << "]";
     }
+    if (params.Get<bool>("nnapi_allow_fp16")) {
+      TFLITE_LOG(INFO) << "Allow fp16 in NNAPI: ["
+                       << params.Get<bool>("nnapi_allow_fp16") << "]";
+    }
   }
 #endif
 }
@@ -99,6 +107,11 @@ TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate(
     } else if (params.Get<bool>("disable_nnapi_cpu")) {
       options.disallow_nnapi_cpu = true;
     }
+
+    if (params.Get<bool>("nnapi_allow_fp16")) {
+      options.allow_fp16 = true;
+    }
+
     std::string string_execution_preference =
         params.Get<std::string>("nnapi_execution_preference");
     // Only set execution preference if user explicitly passes one. Otherwise,
diff --git a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h
index 36f80469a97..9ff20d630ce 100644
--- a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h
+++ b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider.h
@@ -33,7 +33,7 @@ class DelegateProviders {
   DelegateProviders();
 
   // Initialize delegate-related parameters from commandline arguments and
-  // returns true if sucessful.
+  // returns true if successful.
   bool InitFromCmdlineArgs(int* argc, const char** argv);
 
   // Get all parameters from all registered delegate providers.
diff --git a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc
index c2dfa8d0360..5d0a4dfa7d3 100644
--- a/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc
+++ b/tensorflow/lite/tools/evaluation/evaluation_delegate_provider_test.cc
@@ -66,7 +66,7 @@ TEST(EvaluationDelegateProviderTest, GetAllParamsWithTfliteInferenceParams) {
   TfliteInferenceParams params;
   params.set_delegate(TfliteInferenceParams::NONE);
   params.set_num_threads(4);
-  // The same-meaning parameter in TfliteInferenceParams takes precendence.
+  // The same-meaning parameter in TfliteInferenceParams takes precedence.
   tools::ToolParams tool_params = providers.GetAllParams(params);
   EXPECT_EQ(4, tool_params.Get<int>("num_threads"));
   EXPECT_EQ(1, argc);
diff --git a/tensorflow/lite/tools/gen_op_registration_main.cc b/tensorflow/lite/tools/gen_op_registration_main.cc
index 464d0a5e7ca..410aaabf064 100644
--- a/tensorflow/lite/tools/gen_op_registration_main.cc
+++ b/tensorflow/lite/tools/gen_op_registration_main.cc
@@ -29,9 +29,10 @@ const char kOutputRegistrationFlag[] = "output_registration";
 const char kTfLitePathFlag[] = "tflite_path";
 const char kForMicro[] = "for_micro";
 
-void ParseFlagAndInit(int* argc, char** argv, string* input_model,
-                      string* output_registration, string* tflite_path,
-                      string* namespace_flag, bool* for_micro) {
+void ParseFlagAndInit(int* argc, char** argv, std::string* input_model,
+                      std::string* output_registration,
+                      std::string* tflite_path, std::string* namespace_flag,
+                      bool* for_micro) {
   std::vector<tflite::Flag> flag_list = {
       tflite::Flag::CreateFlag(kInputModelFlag, input_model,
                                "path to the tflite model"),
@@ -128,14 +129,14 @@ void GenerateFileContent(const std::string& tflite_path,
   fout.close();
 }
 
-void AddOpsFromModel(const string& input_model,
+void AddOpsFromModel(const std::string& input_model,
                      tflite::RegisteredOpMap* builtin_ops,
                      tflite::RegisteredOpMap* custom_ops) {
   std::ifstream fin(input_model);
   std::stringstream content;
   content << fin.rdbuf();
   // Need to store content data first, otherwise, it won't work in bazel.
-  string content_str = content.str();
+  std::string content_str = content.str();
   const ::tflite::Model* model = ::tflite::GetModel(content_str.data());
   ::tflite::ReadOpsFromModel(model, builtin_ops, custom_ops);
 }
@@ -143,10 +144,10 @@ void AddOpsFromModel(const string& input_model,
 }  // namespace
 
 int main(int argc, char** argv) {
-  string input_model;
-  string output_registration;
-  string tflite_path;
-  string namespace_flag;
+  std::string input_model;
+  std::string output_registration;
+  std::string tflite_path;
+  std::string namespace_flag;
   bool for_micro = false;
   ParseFlagAndInit(&argc, argv, &input_model, &output_registration,
                    &tflite_path, &namespace_flag, &for_micro);
diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile
index ad3832f9962..41f87fb033d 100644
--- a/tensorflow/lite/tools/make/Makefile
+++ b/tensorflow/lite/tools/make/Makefile
@@ -246,7 +246,7 @@ BENCHMARK_LIB_SRCS := $(filter-out \
 	$(BENCHMARK_ALL_SRCS))
 
 # These target-specific makefiles should modify or replace options like
-# CXXFLAGS or LIBS to work for a specific targetted architecture. All logic
+# CXXFLAGS or LIBS to work for a specific targeted architecture. All logic
 # based on platforms or architectures should happen within these files, to
 # keep this main makefile focused on the sources and dependencies.
 include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc)
diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh
index 34d7ed3562c..a7840f6dcd0 100755
--- a/tensorflow/lite/tools/make/download_dependencies.sh
+++ b/tensorflow/lite/tools/make/download_dependencies.sh
@@ -37,8 +37,8 @@ EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${B
 EIGEN_SHA="$(eval echo $(grep '# SHARED_EIGEN_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))"
 GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GEMMLOWP_SHA="$(eval echo $(grep '# SHARED_GEMMLOWP_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))"
-RUY_URL="https://github.com/google/ruy/archive/4bdb31ab484e624deef9620ecde2156ca17f6567.zip"
-RUY_SHA="51c1492196cdd6fc524dd8b539de5d644bbb436699fab3908585a575e347c789"
+RUY_URL="https://github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip"
+RUY_SHA="b21524de00c63b3d5683b42557f78452e791cf77fddb2e63f9bcba1f7bd99093"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 GOOGLETEST_SHA="58a6f4277ca2bc8565222b3bbd58a177609e9c488e8a72649359ba51450db7d8"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
diff --git a/tensorflow/lite/tools/optimize/model_utils.cc b/tensorflow/lite/tools/optimize/model_utils.cc
index 8c2e39e45fa..ae868cf21b8 100644
--- a/tensorflow/lite/tools/optimize/model_utils.cc
+++ b/tensorflow/lite/tools/optimize/model_utils.cc
@@ -77,10 +77,14 @@ void MakeQuantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,
 
 // Create a new TensorT object without quantization parameters.
 void MakeTensor(const string& name, const std::vector<int32_t>& shape,
+                const std::vector<int32_t>& shape_signature,
                 const TensorType& type, std::unique_ptr<TensorT>* tensor) {
   TensorT* tensor_raw = new TensorT;
   tensor_raw->name = name;
   tensor_raw->shape = shape;
+  if (!shape_signature.empty()) {
+    tensor_raw->shape_signature = shape_signature;
+  }
   tensor_raw->type = type;
 
   tensor->reset(tensor_raw);
@@ -89,10 +93,11 @@ void MakeTensor(const string& name, const std::vector<int32_t>& shape,
 // Create a new TensorT object with quantization parameters.
 void MakeTensorWithQuantParam(const string& name,
                               const std::vector<int32_t>& shape,
+                              const std::vector<int32_t>& shape_signature,
                               const TensorType& type, float scale,
                               int64_t zero_point,
                               std::unique_ptr<TensorT>* tensor) {
-  MakeTensor(name, shape, type, tensor);
+  MakeTensor(name, shape, shape_signature, type, tensor);
   (*tensor)->quantization = absl::make_unique<QuantizationParametersT>();
   (*tensor)->quantization->scale.push_back(scale);
   (*tensor)->quantization->zero_point.push_back(zero_point);
@@ -129,8 +134,10 @@ void SetOperatorCodeVersion(ModelT* model) {
       OperatorCodeT* op_code = model->operator_codes[op->opcode_index].get();
       operator_property::OperatorProperty property =
           operator_property::GetOperatorProperty(model, subgraph_idx, op_idx);
-      if (property.quantizable) {
-        // Only update the versions of quantizable operations.
+      if (property.quantizable && op_code->version < property.version) {
+        // Only update the versions of quantizable operations if the original
+        // version is lesser than minimum quantized one mentioned by
+        // OperatorProperty.
         op_code->version = property.version;
       }
     }
diff --git a/tensorflow/lite/tools/optimize/model_utils.h b/tensorflow/lite/tools/optimize/model_utils.h
index 6583d6a10db..f90e6b1a21d 100644
--- a/tensorflow/lite/tools/optimize/model_utils.h
+++ b/tensorflow/lite/tools/optimize/model_utils.h
@@ -34,11 +34,13 @@ void MakeQuantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,
 
 // Create a new TensorT object without quantization parameters.
 void MakeTensor(const string& name, const std::vector<int32_t>& shape,
+                const std::vector<int32_t>& shape_signature,
                 const TensorType& type, std::unique_ptr<TensorT>* tensor);
 
 // Create a new TensorT object with quantization parameters.
 void MakeTensorWithQuantParam(const string& name,
                               const std::vector<int32_t>& shape,
+                              const std::vector<int32_t>& shape_signature,
                               const TensorType& type, float scale,
                               int64_t zero_point,
                               std::unique_ptr<TensorT>* tensor);
diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc
index d173bb608aa..0d2441a9c58 100644
--- a/tensorflow/lite/tools/optimize/modify_model_interface.cc
+++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc
@@ -383,9 +383,9 @@ void AddUint8Dequant(
         const std::pair<float, int32_t>& provided_quant_params =
             quant_params.at(string(tensor->name));
         utils::MakeTensorWithQuantParam(
-            added_tensor_name, tensor->shape, TensorType_UINT8,
-            provided_quant_params.first, provided_quant_params.second,
-            &leading_op_input);
+            added_tensor_name, tensor->shape, tensor->shape_signature,
+            TensorType_UINT8, provided_quant_params.first,
+            provided_quant_params.second, &leading_op_input);
         const int32_t leading_op_input_idx = subgraph->tensors.size();
         subgraph->tensors.push_back(std::move(leading_op_input));
 
@@ -423,9 +423,9 @@ void AddUint8Quant(
         const std::pair<float, int32_t>& provided_quant_params =
             quant_params.at(string(tensor->name));
         utils::MakeTensorWithQuantParam(
-            added_tensor_name, tensor->shape, TensorType_UINT8,
-            provided_quant_params.first, provided_quant_params.second,
-            &tailing_op_output);
+            added_tensor_name, tensor->shape, tensor->shape_signature,
+            TensorType_UINT8, provided_quant_params.first,
+            provided_quant_params.second, &tailing_op_output);
         const int32_t tailing_op_output_idx = subgraph->tensors.size();
         subgraph->tensors.push_back(std::move(tailing_op_output));
 
diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc
index 71fdad87bd2..3633cb63ace 100644
--- a/tensorflow/lite/tools/optimize/operator_property.cc
+++ b/tensorflow/lite/tools/optimize/operator_property.cc
@@ -815,7 +815,17 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
       property.outputs = {{0, {}}};
       property.version = 2;
       break;
+    case BuiltinOperator_PRELU:
+      property.inputs = {{0, {}}, {1, {}}};
+      property.outputs = {{0, {}}};
+      property.restrict_same_input_output_scale = false;
+      property.version = 1;
+      break;
     case BuiltinOperator_LEAKY_RELU:
+      property.inputs = {{0, {}}};
+      property.outputs = {{0, {}}};
+      property.version = 2;
+      break;
     case BuiltinOperator_RELU:
     case BuiltinOperator_RELU6:
       property.inputs = {{0, {}}};
diff --git a/tensorflow/lite/tools/optimize/operator_property.h b/tensorflow/lite/tools/optimize/operator_property.h
index 995595e7878..95b0e5000c3 100644
--- a/tensorflow/lite/tools/optimize/operator_property.h
+++ b/tensorflow/lite/tools/optimize/operator_property.h
@@ -86,7 +86,7 @@ struct OperatorProperty {
   bool restrict_same_input_output_scale = false;
 
   // Use same min of min and max of max for each group.
-  // Incompatable with restrict_same_input_output_scale and restricted_value.
+  // Incompatible with restrict_same_input_output_scale and restricted_value.
   // TODO(jianlijianli): make it compatible with other restrictions when there
   // is a use case.
   std::vector<std::vector<int>> restrict_scale = {};
diff --git a/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py b/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py
index 5e4bf99ccdf..782d88cbc9b 100644
--- a/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py
+++ b/tensorflow/lite/tools/optimize/python/modify_model_interface_lib.py
@@ -74,6 +74,6 @@ def modify_model_interface(input_file, output_file, input_type, output_type):
   # Throw an exception if the return status is an error.
   if status != 0:
     raise RuntimeError(
-        'Error occured when trying to modify the model input type from float '
+        'Error occurred when trying to modify the model input type from float '
         'to {input_type} and output type from float to {output_type}.'.format(
             input_type=input_type, output_type=output_type))
diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc
index 93372c6d460..b7b99d9c393 100644
--- a/tensorflow/lite/tools/optimize/quantize_model.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model.cc
@@ -141,8 +141,8 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
       const string leading_op_name = tensor->name;
       const string new_name_original_input = tensor->name + "_int8";
       tensor->name = new_name_original_input;
-      utils::MakeTensor(leading_op_name, tensor->shape, input_type,
-                        &leading_op_input);
+      utils::MakeTensor(leading_op_name, tensor->shape, tensor->shape_signature,
+                        input_type, &leading_op_input);
     } else {
       // Get scale and zero point from the first tensor.
       const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0];
@@ -156,9 +156,9 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
       const string leading_op_name = tensor->name;
       const string new_name_original_input = tensor->name + "_int8";
       tensor->name = new_name_original_input;
-      utils::MakeTensorWithQuantParam(leading_op_name, tensor->shape,
-                                      input_type, scale, zero_point + 128,
-                                      &leading_op_input);
+      utils::MakeTensorWithQuantParam(
+          leading_op_name, tensor->shape, tensor->shape_signature, input_type,
+          scale, zero_point + 128, &leading_op_input);
     }
     const int32_t leading_op_input_idx = subgraph->tensors.size();
     subgraph->tensors.push_back(std::move(leading_op_input));
@@ -193,8 +193,8 @@ int32_t SetOutputType(ModelT* model, SubGraphT* subgraph,
       const string tailing_op_name = tensor->name;
       const string new_name_original_output = tensor->name + "_int8";
       tensor->name = new_name_original_output;
-      utils::MakeTensor(tailing_op_name, tensor->shape, output_type,
-                        &tailing_op_output);
+      utils::MakeTensor(tailing_op_name, tensor->shape, tensor->shape_signature,
+                        output_type, &tailing_op_output);
     } else {
       // Get scale and zero point from the last tensor.
       const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0];
@@ -208,9 +208,9 @@ int32_t SetOutputType(ModelT* model, SubGraphT* subgraph,
       const string tailing_op_name = tensor->name;
       const string new_name_original_output = tensor->name + "_int8";
       tensor->name = new_name_original_output;
-      utils::MakeTensorWithQuantParam(tailing_op_name, tensor->shape,
-                                      output_type, scale, zero_point + 128,
-                                      &tailing_op_output);
+      utils::MakeTensorWithQuantParam(
+          tailing_op_name, tensor->shape, tensor->shape_signature, output_type,
+          scale, zero_point + 128, &tailing_op_output);
     }
     const int32_t tailing_op_output_idx = subgraph->tensors.size();
     subgraph->tensors.push_back(std::move(tailing_op_output));
@@ -340,8 +340,9 @@ TfLiteStatus ApplyConstraints(ModelT* model,
         std::unique_ptr<TensorT> additional_tensor;
         const string requant_tensor_name = input_tensor->name + "_requantized";
         utils::MakeTensorWithQuantParam(
-            requant_tensor_name, input_tensor->shape, TensorType_INT8,
-            output_scale, output_zp, &additional_tensor);
+            requant_tensor_name, input_tensor->shape,
+            input_tensor->shape_signature, TensorType_INT8, output_scale,
+            output_zp, &additional_tensor);
         const int32_t additional_tensor_idx = subgraph->tensors.size();
         subgraph->tensors.push_back(std::move(additional_tensor));
 
@@ -545,7 +546,7 @@ TfLiteStatus QuantizeOpInput(
         // operation since the preceding op may require a float output.
         std::unique_ptr<TensorT> op_output;
         utils::MakeTensor(tensor->name + "_int8", tensor->shape,
-                          TensorType_INT8, &op_output);
+                          tensor->shape_signature, TensorType_INT8, &op_output);
         op_output->quantization = absl::make_unique<QuantizationParametersT>();
         op_output->quantization->min.push_back(tensor->quantization->min[0]);
         op_output->quantization->max.push_back(tensor->quantization->max[0]);
@@ -573,7 +574,7 @@ TfLiteStatus QuantizeOpInput(
     // since this op is not quantizable.
     std::unique_ptr<TensorT> op_output;
     utils::MakeTensor(tensor->name + "_float", tensor->shape,
-                      TensorType_FLOAT32, &op_output);
+                      tensor->shape_signature, TensorType_FLOAT32, &op_output);
     const int32_t dequant_op_output_idx = subgraph->tensors.size();
     subgraph->tensors.push_back(std::move(op_output));
     std::unique_ptr<OperatorT> dequant_op;
diff --git a/tensorflow/lite/tools/optimize/quantize_weights.cc b/tensorflow/lite/tools/optimize/quantize_weights.cc
index 581819495b1..7e3853c645c 100644
--- a/tensorflow/lite/tools/optimize/quantize_weights.cc
+++ b/tensorflow/lite/tools/optimize/quantize_weights.cc
@@ -259,10 +259,14 @@ void MakeDequantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,
 
 // Create a new TensorT object.
 void MakeTensor(const string& name, const std::vector<int32_t>& shape,
+                const std::vector<int32_t>& shape_signature,
                 std::unique_ptr<TensorT>* tensor) {
   TensorT* tensor_raw = new TensorT;
   tensor_raw->name = name;
   tensor_raw->shape = shape;
+  if (!shape_signature.empty()) {
+    tensor_raw->shape_signature = shape_signature;
+  }
 
   tensor->reset(tensor_raw);
 }
@@ -419,8 +423,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder,
       // Create a new tensor to be the output of the dequantize op.
       std::unique_ptr<TensorT> dequantize_output;
       const string dequant_name = tensor->name + "_dequantize";
-      utils::MakeTensor(dequant_name, tensor->shape, TensorType_FLOAT32,
-                        &dequantize_output);
+      utils::MakeTensor(dequant_name, tensor->shape, tensor->shape_signature,
+                        TensorType_FLOAT32, &dequantize_output);
       const int32_t dequantize_output_idx = subgraph->tensors.size();
       subgraph->tensors.push_back(std::move(dequantize_output));
 
@@ -503,8 +507,8 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder,
       // Create a new tensor to be the output of the dequantize op.
       std::unique_ptr<TensorT> dequantize_output;
       const string dequant_name = tensor->name + "_dequantize";
-      utils::MakeTensor(dequant_name, tensor->shape, TensorType_FLOAT32,
-                        &dequantize_output);
+      utils::MakeTensor(dequant_name, tensor->shape, tensor->shape_signature,
+                        TensorType_FLOAT32, &dequantize_output);
       const int32_t dequantize_output_idx = subgraph->tensors.size();
       subgraph->tensors.push_back(std::move(dequantize_output));
 
diff --git a/tensorflow/lite/tools/signature/BUILD b/tensorflow/lite/tools/signature/BUILD
new file mode 100644
index 00000000000..cf28b2eab72
--- /dev/null
+++ b/tensorflow/lite/tools/signature/BUILD
@@ -0,0 +1,106 @@
+# Utilities for signature_defs in TFLite
+load("//tensorflow:tensorflow.bzl", "pybind_extension")
+load("//tensorflow:tensorflow.bzl", "if_not_windows")
+load("//tensorflow/lite:build_def.bzl", "tflite_copts")
+load("//tensorflow/lite/micro:build_def.bzl", "cc_library")
+load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite")
+
+package(
+    default_visibility = [
+        "//visibility:public",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+TFLITE_DEFAULT_COPTS = if_not_windows([
+    "-Wall",
+    "-Wno-comment",
+    "-Wno-extern-c-compat",
+])
+
+cc_library(
+    name = "signature_def_util",
+    srcs = ["signature_def_util.cc"],
+    hdrs = ["signature_def_util.h"],
+    copts = TFLITE_DEFAULT_COPTS + tflite_copts(),
+    deps = [
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:protos_all_cc_impl",
+        "//tensorflow/core/platform:errors",
+        "//tensorflow/core/platform:status",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_absl//absl/memory",
+        "@com_google_protobuf//:protobuf",
+        "@flatbuffers",
+    ],
+)
+
+cc_test(
+    name = "signature_def_util_test",
+    size = "small",
+    srcs = ["signature_def_util_test.cc"],
+    data = [
+        "//tensorflow/lite:testdata/add.bin",
+    ],
+    tags = [
+        "tflite_not_portable",
+    ],
+    deps = [
+        ":signature_def_util",
+        "//tensorflow/cc/saved_model:signature_constants",
+        "//tensorflow/core:tflite_portable_logging",
+        "//tensorflow/core/platform:errors",
+        "//tensorflow/lite:framework_lib",
+        "//tensorflow/lite/c:c_api",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/schema:schema_fbs",
+        "//tensorflow/lite/testing:util",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+pybind_extension(
+    name = "_pywrap_signature_def_util_wrapper",
+    srcs = [
+        "signature_def_util_wrapper_pybind11.cc",
+    ],
+    module_name = "_pywrap_signature_def_util_wrapper",
+    deps = [
+        ":signature_def_util",
+        "//tensorflow/lite:framework_lib",
+        "//tensorflow/python:pybind11_lib",
+        "@pybind11",
+    ],
+)
+
+py_library(
+    name = "signature_def_utils",
+    srcs = ["signature_def_utils.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":_pywrap_signature_def_util_wrapper",
+        "//tensorflow/core:protos_all_py",
+    ],
+)
+
+py_test(
+    name = "signature_def_utils_test",
+    srcs = ["signature_def_utils_test.py"],
+    data = ["//tensorflow/lite:testdata/add.bin"],
+    python_version = "PY3",
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_mac",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":signature_def_utils",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/core:protos_all_py",
+    ],
+)
+
+tflite_portable_test_suite()
diff --git a/tensorflow/lite/tools/signature/signature_def_util.cc b/tensorflow/lite/tools/signature/signature_def_util.cc
new file mode 100644
index 00000000000..e44fe98b3cc
--- /dev/null
+++ b/tensorflow/lite/tools/signature/signature_def_util.cc
@@ -0,0 +1,175 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/signature/signature_def_util.h"
+
+#include <string>
+
+#include "absl/memory/memory.h"
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "flatbuffers/flexbuffers.h"  // from @flatbuffers
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+#include "tensorflow/lite/model_builder.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace {
+
+using tensorflow::Status;
+using SerializedSignatureDefMap = std::map<std::string, std::string>;
+using SignatureDefMap = std::map<std::string, tensorflow::SignatureDef>;
+
+const Metadata* GetSignatureDefMetadata(const Model* model) {
+  if (!model || !model->metadata()) {
+    return nullptr;
+  }
+  for (int i = 0; i < model->metadata()->size(); ++i) {
+    const Metadata* metadata = model->metadata()->Get(i);
+    if (metadata->name()->str() == kSignatureDefsMetadataName) {
+      return metadata;
+    }
+  }
+  return nullptr;
+}
+
+Status ReadSignatureDefMap(const Model* model, const Metadata* metadata,
+                           SerializedSignatureDefMap* map) {
+  if (!model || !metadata || !map) {
+    return tensorflow::errors::InvalidArgument("Arguments must not be nullptr");
+  }
+  const flatbuffers::Vector<uint8_t>* flatbuffer_data =
+      model->buffers()->Get(metadata->buffer())->data();
+  const auto signature_defs =
+      flexbuffers::GetRoot(flatbuffer_data->data(), flatbuffer_data->size())
+          .AsMap();
+  for (int i = 0; i < signature_defs.Keys().size(); ++i) {
+    const std::string key = signature_defs.Keys()[i].AsString().c_str();
+    (*map)[key] = signature_defs[key].AsString().c_str();
+  }
+  return tensorflow::Status::OK();
+}
+
+}  // namespace
+
+Status SetSignatureDefMap(const Model* model,
+                          const SignatureDefMap& signature_def_map,
+                          std::string* model_data_with_signature_def) {
+  if (!model || !model_data_with_signature_def) {
+    return tensorflow::errors::InvalidArgument("Arguments must not be nullptr");
+  }
+  if (signature_def_map.empty()) {
+    return tensorflow::errors::InvalidArgument(
+        "signature_def_map should not be empty");
+  }
+  flexbuffers::Builder fbb;
+  const size_t start_map = fbb.StartMap();
+  auto mutable_model = absl::make_unique<ModelT>();
+  model->UnPackTo(mutable_model.get(), nullptr);
+  int buffer_id = mutable_model->buffers.size();
+  const Metadata* metadata = GetSignatureDefMetadata(model);
+  if (metadata) {
+    buffer_id = metadata->buffer();
+  } else {
+    auto buffer = absl::make_unique<BufferT>();
+    mutable_model->buffers.emplace_back(std::move(buffer));
+    auto sigdef_metadata = absl::make_unique<MetadataT>();
+    sigdef_metadata->buffer = buffer_id;
+    sigdef_metadata->name = kSignatureDefsMetadataName;
+    mutable_model->metadata.emplace_back(std::move(sigdef_metadata));
+  }
+  for (const auto& entry : signature_def_map) {
+    fbb.String(entry.first.c_str(), entry.second.SerializeAsString());
+  }
+  fbb.EndMap(start_map);
+  fbb.Finish();
+  mutable_model->buffers[buffer_id]->data = fbb.GetBuffer();
+  flatbuffers::FlatBufferBuilder builder;
+  auto packed_model = Model::Pack(builder, mutable_model.get());
+  FinishModelBuffer(builder, packed_model);
+  *model_data_with_signature_def =
+      std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
+                  builder.GetSize());
+  return Status::OK();
+}
+
+bool HasSignatureDef(const Model* model, const std::string& signature_key) {
+  if (!model) {
+    return false;
+  }
+  const Metadata* metadata = GetSignatureDefMetadata(model);
+  if (!metadata) {
+    return false;
+  }
+  SerializedSignatureDefMap signature_defs;
+  if (ReadSignatureDefMap(model, metadata, &signature_defs) !=
+      tensorflow::Status::OK()) {
+    return false;
+  }
+  return (signature_defs.find(signature_key) != signature_defs.end());
+}
+
+Status GetSignatureDefMap(const Model* model,
+                          SignatureDefMap* signature_def_map) {
+  if (!model || !signature_def_map) {
+    return tensorflow::errors::InvalidArgument("Arguments must not be nullptr");
+  }
+  SignatureDefMap retrieved_signature_def_map;
+  const Metadata* metadata = GetSignatureDefMetadata(model);
+  if (metadata) {
+    SerializedSignatureDefMap signature_defs;
+    auto status = ReadSignatureDefMap(model, metadata, &signature_defs);
+    if (status != tensorflow::Status::OK()) {
+      return tensorflow::errors::Internal("Error reading signature def map: %s",
+                                          status.error_message());
+    }
+    for (const auto& entry : signature_defs) {
+      tensorflow::SignatureDef signature_def;
+      if (!signature_def.ParseFromString(entry.second)) {
+        return tensorflow::errors::Internal(
+            "Cannot parse signature def found in flatbuffer.");
+      }
+      retrieved_signature_def_map[entry.first] = signature_def;
+    }
+    *signature_def_map = retrieved_signature_def_map;
+  }
+  return Status::OK();
+}
+
+Status ClearSignatureDefMap(const Model* model, std::string* model_data) {
+  if (!model || !model_data) {
+    return tensorflow::errors::InvalidArgument("Arguments must not be nullptr");
+  }
+  auto mutable_model = absl::make_unique<ModelT>();
+  model->UnPackTo(mutable_model.get(), nullptr);
+  for (int id = 0; id < model->metadata()->size(); ++id) {
+    const Metadata* metadata = model->metadata()->Get(id);
+    if (metadata->name()->str() == kSignatureDefsMetadataName) {
+      auto* buffers = &(mutable_model->buffers);
+      buffers->erase(buffers->begin() + metadata->buffer());
+      mutable_model->metadata.erase(mutable_model->metadata.begin() + id);
+      break;
+    }
+  }
+  flatbuffers::FlatBufferBuilder builder;
+  auto packed_model = Model::Pack(builder, mutable_model.get());
+  FinishModelBuffer(builder, packed_model);
+  *model_data =
+      std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
+                  builder.GetSize());
+  return Status::OK();
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/tools/signature/signature_def_util.h b/tensorflow/lite/tools/signature/signature_def_util.h
new file mode 100644
index 00000000000..7e9c96ffc43
--- /dev/null
+++ b/tensorflow/lite/tools/signature/signature_def_util.h
@@ -0,0 +1,71 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_TOOLS_SIGNATURE_DEF_UTIL_H_
+#define TENSORFLOW_LITE_TOOLS_SIGNATURE_DEF_UTIL_H_
+
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// Constant for name of the Metadata entry associated with SignatureDefs.
+constexpr char kSignatureDefsMetadataName[] = "signature_defs_metadata";
+
+// The function `SetSignatureDefMap()` results in
+// `model_data_with_signature_defs` containing a serialized TFLite model
+// identical to `model` with a metadata and associated buffer containing
+// a FlexBuffer::Map with `signature_def_map` keys and values serialized to
+// String.
+//
+// If a Metadata entry containing a SignatureDef map exists, it will be
+//   overwritten.
+//
+// Returns error if `model_data_with_signature_defs` is null or
+//   `signature_def_map` is empty.
+//
+// On success, returns tensorflow::Status::OK() or error otherwise.
+// On error, `model_data_with_signature_defs` is unchanged.
+tensorflow::Status SetSignatureDefMap(
+    const Model* model,
+    const std::map<std::string, tensorflow::SignatureDef>& signature_def_map,
+    std::string* model_data_with_signature_defs);
+
+// The function `HasSignatureDef()` returns true if `model` contains a Metadata
+// table pointing to a buffer containing a FlexBuffer::Map and the map has
+// `signature_key` as a key, or false otherwise.
+bool HasSignatureDef(const Model* model, const std::string& signature_key);
+
+// The function `GetSignatureDefMap()` results in `signature_def_map`
+// pointing to a map<std::string, tensorflow::SignatureDef>
+// parsed from `model`'s metadata buffer.
+//
+// If the Metadata entry does not exist, `signature_def_map` is unchanged.
+// If the Metadata entry exists but cannot be parsed, returns an error.
+tensorflow::Status GetSignatureDefMap(
+    const Model* model,
+    std::map<std::string, tensorflow::SignatureDef>* signature_def_map);
+
+// The function `ClearSignatureDefs` results in `model_data`
+// containing a serialized Model identical to `model` omitting any
+// SignatureDef-related metadata or buffers.
+tensorflow::Status ClearSignatureDefMap(const Model* model,
+                                        std::string* model_data);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_TOOLS_SIGNATURE_DEF_UTIL_H_
diff --git a/tensorflow/lite/tools/signature/signature_def_util_test.cc b/tensorflow/lite/tools/signature/signature_def_util_test.cc
new file mode 100644
index 00000000000..d4581e262a4
--- /dev/null
+++ b/tensorflow/lite/tools/signature/signature_def_util_test.cc
@@ -0,0 +1,167 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/tools/signature/signature_def_util.h"
+
+#include <gtest/gtest.h>
+#include "tensorflow/cc/saved_model/signature_constants.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/lite/c/c_api.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/model_builder.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/testing/util.h"
+
+namespace tflite {
+namespace {
+
+using tensorflow::kClassifyMethodName;
+using tensorflow::kDefaultServingSignatureDefKey;
+using tensorflow::kPredictMethodName;
+using tensorflow::SignatureDef;
+using tensorflow::Status;
+
+constexpr char kSignatureInput[] = "input";
+constexpr char kSignatureOutput[] = "output";
+constexpr char kTestFilePath[] = "tensorflow/lite/testdata/add.bin";
+
+class SimpleSignatureDefUtilTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    flatbuffer_model_ = FlatBufferModel::BuildFromFile(kTestFilePath);
+    ASSERT_NE(flatbuffer_model_, nullptr);
+    model_ = flatbuffer_model_->GetModel();
+    ASSERT_NE(model_, nullptr);
+  }
+
+  SignatureDef GetTestSignatureDef() {
+    auto signature_def = SignatureDef();
+    tensorflow::TensorInfo input_tensor;
+    tensorflow::TensorInfo output_tensor;
+    *input_tensor.mutable_name() = kSignatureInput;
+    *output_tensor.mutable_name() = kSignatureOutput;
+    *signature_def.mutable_method_name() = kClassifyMethodName;
+    (*signature_def.mutable_inputs())[kSignatureInput] = input_tensor;
+    (*signature_def.mutable_outputs())[kSignatureOutput] = output_tensor;
+    return signature_def;
+  }
+  std::unique_ptr<FlatBufferModel> flatbuffer_model_;
+  const Model* model_;
+};
+
+TEST_F(SimpleSignatureDefUtilTest, SetSignatureDefTest) {
+  SignatureDef expected_signature_def = GetTestSignatureDef();
+  std::string model_output;
+  const std::map<string, SignatureDef> expected_signature_def_map = {
+      {kDefaultServingSignatureDefKey, expected_signature_def}};
+  EXPECT_EQ(Status::OK(), SetSignatureDefMap(model_, expected_signature_def_map,
+                                             &model_output));
+  const Model* add_model = flatbuffers::GetRoot<Model>(model_output.data());
+  EXPECT_TRUE(HasSignatureDef(add_model, kDefaultServingSignatureDefKey));
+  std::map<string, SignatureDef> test_signature_def_map;
+  EXPECT_EQ(Status::OK(),
+            GetSignatureDefMap(add_model, &test_signature_def_map));
+  SignatureDef test_signature_def =
+      test_signature_def_map[kDefaultServingSignatureDefKey];
+  EXPECT_EQ(expected_signature_def.SerializeAsString(),
+            test_signature_def.SerializeAsString());
+}
+
+TEST_F(SimpleSignatureDefUtilTest, OverwriteSignatureDefTest) {
+  auto expected_signature_def = GetTestSignatureDef();
+  std::string model_output;
+  std::map<string, SignatureDef> expected_signature_def_map = {
+      {kDefaultServingSignatureDefKey, expected_signature_def}};
+  EXPECT_EQ(Status::OK(), SetSignatureDefMap(model_, expected_signature_def_map,
+                                             &model_output));
+  const Model* add_model = flatbuffers::GetRoot<Model>(model_output.data());
+  EXPECT_TRUE(HasSignatureDef(add_model, kDefaultServingSignatureDefKey));
+  std::map<string, SignatureDef> test_signature_def_map;
+  EXPECT_EQ(Status::OK(),
+            GetSignatureDefMap(add_model, &test_signature_def_map));
+  SignatureDef test_signature_def =
+      test_signature_def_map[kDefaultServingSignatureDefKey];
+  EXPECT_EQ(expected_signature_def.SerializeAsString(),
+            test_signature_def.SerializeAsString());
+  *expected_signature_def.mutable_method_name() = kPredictMethodName;
+  expected_signature_def_map.erase(
+      expected_signature_def_map.find(kDefaultServingSignatureDefKey));
+  constexpr char kTestSignatureDefKey[] = "ServingTest";
+  expected_signature_def_map[kTestSignatureDefKey] = expected_signature_def;
+  EXPECT_EQ(
+      Status::OK(),
+      SetSignatureDefMap(add_model, expected_signature_def_map, &model_output));
+  const Model* final_model = flatbuffers::GetRoot<Model>(model_output.data());
+  EXPECT_FALSE(HasSignatureDef(final_model, kDefaultServingSignatureDefKey));
+  EXPECT_EQ(Status::OK(),
+            GetSignatureDefMap(final_model, &test_signature_def_map));
+  EXPECT_NE(expected_signature_def.SerializeAsString(),
+            test_signature_def.SerializeAsString());
+  EXPECT_TRUE(HasSignatureDef(final_model, kTestSignatureDefKey));
+  EXPECT_EQ(Status::OK(),
+            GetSignatureDefMap(final_model, &test_signature_def_map));
+  test_signature_def = test_signature_def_map[kTestSignatureDefKey];
+  EXPECT_EQ(expected_signature_def.SerializeAsString(),
+            test_signature_def.SerializeAsString());
+}
+
+TEST_F(SimpleSignatureDefUtilTest, GetSignatureDefTest) {
+  std::map<string, SignatureDef> test_signature_def_map;
+  EXPECT_EQ(Status::OK(), GetSignatureDefMap(model_, &test_signature_def_map));
+  EXPECT_FALSE(HasSignatureDef(model_, kDefaultServingSignatureDefKey));
+}
+
+TEST_F(SimpleSignatureDefUtilTest, ClearSignatureDefTest) {
+  const int expected_num_buffers = model_->buffers()->size();
+  auto expected_signature_def = GetTestSignatureDef();
+  std::string model_output;
+  std::map<string, SignatureDef> expected_signature_def_map = {
+      {kDefaultServingSignatureDefKey, expected_signature_def}};
+  EXPECT_EQ(Status::OK(), SetSignatureDefMap(model_, expected_signature_def_map,
+                                             &model_output));
+  const Model* add_model = flatbuffers::GetRoot<Model>(model_output.data());
+  EXPECT_TRUE(HasSignatureDef(add_model, kDefaultServingSignatureDefKey));
+  SignatureDef test_signature_def;
+  std::map<string, SignatureDef> test_signature_def_map;
+  EXPECT_EQ(Status::OK(),
+            GetSignatureDefMap(add_model, &test_signature_def_map));
+  test_signature_def = test_signature_def_map[kDefaultServingSignatureDefKey];
+  EXPECT_EQ(expected_signature_def.SerializeAsString(),
+            test_signature_def.SerializeAsString());
+  EXPECT_EQ(Status::OK(), ClearSignatureDefMap(add_model, &model_output));
+  const Model* clear_model = flatbuffers::GetRoot<Model>(model_output.data());
+  EXPECT_FALSE(HasSignatureDef(clear_model, kDefaultServingSignatureDefKey));
+  EXPECT_EQ(expected_num_buffers, clear_model->buffers()->size());
+}
+
+TEST_F(SimpleSignatureDefUtilTest, SetSignatureDefErrorsTest) {
+  std::map<string, SignatureDef> test_signature_def_map;
+  std::string model_output;
+  EXPECT_TRUE(tensorflow::errors::IsInvalidArgument(
+      SetSignatureDefMap(model_, test_signature_def_map, &model_output)));
+  SignatureDef test_signature_def;
+  test_signature_def_map[kDefaultServingSignatureDefKey] = test_signature_def;
+  EXPECT_TRUE(tensorflow::errors::IsInvalidArgument(
+      SetSignatureDefMap(model_, test_signature_def_map, nullptr)));
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/lite/tools/signature/signature_def_util_wrapper_pybind11.cc b/tensorflow/lite/tools/signature/signature_def_util_wrapper_pybind11.cc
new file mode 100644
index 00000000000..9477305d433
--- /dev/null
+++ b/tensorflow/lite/tools/signature/signature_def_util_wrapper_pybind11.cc
@@ -0,0 +1,95 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "pybind11/pybind11.h"
+#include "pybind11/pytypes.h"
+#include "pybind11/stl.h"
+#include "tensorflow/lite/model_builder.h"
+#include "tensorflow/lite/tools/signature/signature_def_util.h"
+#include "tensorflow/python/lib/core/pybind11_lib.h"
+
+py::bytes WrappedSetSignatureDefMap(
+    const std::vector<uint8_t>& model_buffer,
+    const std::map<std::string, std::string>& serialized_signature_def_map) {
+  auto flatbuffer_model = tflite::FlatBufferModel::BuildFromBuffer(
+      reinterpret_cast<const char*>(model_buffer.data()), model_buffer.size());
+  auto* model = flatbuffer_model->GetModel();
+  if (!model) {
+    throw std::invalid_argument("Invalid model");
+  }
+  std::string data;
+  std::map<std::string, tensorflow::SignatureDef> signature_def_map;
+  for (const auto& entry : serialized_signature_def_map) {
+    tensorflow::SignatureDef signature_def;
+    if (!signature_def.ParseFromString(entry.second)) {
+      throw std::invalid_argument("Cannot parse signature def");
+    }
+    signature_def_map[entry.first] = signature_def;
+  }
+  auto status = tflite::SetSignatureDefMap(model, signature_def_map, &data);
+  if (status != tensorflow::Status::OK()) {
+    throw std::invalid_argument(status.error_message());
+  }
+  return py::bytes(data);
+}
+
+std::map<std::string, py::bytes> WrappedGetSignatureDefMap(
+    const std::vector<uint8_t>& model_buffer) {
+  auto flatbuffer_model = tflite::FlatBufferModel::BuildFromBuffer(
+      reinterpret_cast<const char*>(model_buffer.data()), model_buffer.size());
+  auto* model = flatbuffer_model->GetModel();
+  if (!model) {
+    throw std::invalid_argument("Invalid model");
+  }
+  std::string content;
+  std::map<std::string, tensorflow::SignatureDef> signature_def_map;
+  auto status = tflite::GetSignatureDefMap(model, &signature_def_map);
+  if (status != tensorflow::Status::OK()) {
+    throw std::invalid_argument("Cannot parse signature def");
+  }
+  std::map<std::string, py::bytes> serialized_signature_def_map;
+  for (const auto& entry : signature_def_map) {
+    serialized_signature_def_map[entry.first] =
+        py::bytes(entry.second.SerializeAsString());
+  }
+  return serialized_signature_def_map;
+}
+
+py::bytes WrappedClearSignatureDefs(const std::vector<uint8_t>& model_buffer) {
+  auto flatbuffer_model = tflite::FlatBufferModel::BuildFromBuffer(
+      reinterpret_cast<const char*>(model_buffer.data()), model_buffer.size());
+  auto* model = flatbuffer_model->GetModel();
+  if (!model) {
+    throw std::invalid_argument("Invalid model");
+  }
+  std::string content;
+  auto status = tflite::ClearSignatureDefMap(model, &content);
+  if (status != tensorflow::Status::OK()) {
+    throw std::invalid_argument("An unknown error occurred");
+  }
+  return py::bytes(content);
+}
+
+PYBIND11_MODULE(_pywrap_signature_def_util_wrapper, m) {
+  m.doc() = R"pbdoc(
+    _pywrap_signature_def_util_wrapper
+    -----
+  )pbdoc";
+
+  m.def("SetSignatureDefMap", &WrappedSetSignatureDefMap);
+
+  m.def("GetSignatureDefMap", &WrappedGetSignatureDefMap);
+
+  m.def("ClearSignatureDefs", &WrappedClearSignatureDefs);
+}
diff --git a/tensorflow/lite/tools/signature/signature_def_utils.py b/tensorflow/lite/tools/signature/signature_def_utils.py
new file mode 100644
index 00000000000..df25c651172
--- /dev/null
+++ b/tensorflow/lite/tools/signature/signature_def_utils.py
@@ -0,0 +1,95 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions related to SignatureDefs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.lite.tools.signature import _pywrap_signature_def_util_wrapper as signature_def_util
+
+
+def set_signature_defs(tflite_model, signature_def_map):
+  """Sets SignatureDefs to the Metadata of a TfLite flatbuffer buffer.
+
+  Args:
+    tflite_model: Binary TFLite model (bytes or bytes-like object) to which to
+      add signature_def.
+    signature_def_map: dict containing SignatureDefs to store in metadata.
+  Returns:
+    buffer: A TFLite model binary identical to model buffer with
+      metadata field containing SignatureDef.
+
+  Raises:
+    ValueError:
+      tflite_model buffer does not contain a valid TFLite model.
+      signature_def_map is empty or does not contain a SignatureDef.
+  """
+  model = tflite_model
+  if not isinstance(tflite_model, bytearray):
+    model = bytearray(tflite_model)
+  serialized_signature_def_map = {
+      k: v.SerializeToString() for k, v in signature_def_map.items()}
+  model_buffer = signature_def_util.SetSignatureDefMap(
+      model, serialized_signature_def_map)
+  return model_buffer
+
+
+def get_signature_defs(tflite_model):
+  """Get SignatureDef dict from the Metadata of a TfLite flatbuffer buffer.
+
+  Args:
+    tflite_model: TFLite model buffer to get the signature_def.
+
+  Returns:
+    dict containing serving names to SignatureDefs if exists, otherwise, empty
+      dict.
+
+  Raises:
+    ValueError:
+      tflite_model buffer does not contain a valid TFLite model.
+    DecodeError:
+      SignatureDef cannot be parsed from TfLite SignatureDef metadata.
+  """
+  model = tflite_model
+  if not isinstance(tflite_model, bytearray):
+    model = bytearray(tflite_model)
+  serialized_signature_def_map = signature_def_util.GetSignatureDefMap(model)
+  def _deserialize(serialized):
+    signature_def = meta_graph_pb2.SignatureDef()
+    signature_def.ParseFromString(serialized)
+    return signature_def
+  return {k: _deserialize(v) for k, v in serialized_signature_def_map.items()}
+
+
+def clear_signature_defs(tflite_model):
+  """Clears SignatureDefs from the Metadata of a TfLite flatbuffer buffer.
+
+  Args:
+    tflite_model: TFLite model buffer to remove signature_defs.
+
+  Returns:
+    buffer: A TFLite model binary identical to model buffer with
+      no SignatureDef metadata.
+
+  Raises:
+    ValueError:
+      tflite_model buffer does not contain a valid TFLite model.
+  """
+  model = tflite_model
+  if not isinstance(tflite_model, bytearray):
+    model = bytearray(tflite_model)
+  return signature_def_util.ClearSignatureDefs(model)
diff --git a/tensorflow/lite/tools/signature/signature_def_utils_test.py b/tensorflow/lite/tools/signature/signature_def_utils_test.py
new file mode 100644
index 00000000000..f7cb33188af
--- /dev/null
+++ b/tensorflow/lite/tools/signature/signature_def_utils_test.py
@@ -0,0 +1,76 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for signature_def_util.py.
+
+   - Tests adding a SignatureDef to TFLite metadata.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tensorflow as tf
+from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.lite.tools.signature import signature_def_utils
+
+
+class SignatureDefUtilsTest(tf.test.TestCase):
+
+  def testAddSignatureDefToFlatbufferMetadata(self):
+    """Test a SavedModel conversion has correct Metadata."""
+    filename = tf.compat.v1.resource_loader.get_path_to_datafile(
+        '../../testdata/add.bin')
+    if not os.path.exists(filename):
+      raise IOError('File "{0}" does not exist in {1}.'.format(
+          filename,
+          tf.compat.v1.resource_loader.get_root_dir_with_all_resources()))
+
+    with tf.io.gfile.GFile(filename, 'rb') as fp:
+      tflite_model = bytearray(fp.read())
+
+    self.assertIsNotNone(tflite_model, 'TFLite model is none')
+    sig_input_tensor = meta_graph_pb2.TensorInfo(
+        dtype=tf.as_dtype(tf.float32).as_datatype_enum,
+        tensor_shape=tf.TensorShape([1, 8, 8, 3]).as_proto())
+    sig_input_tensor_signature = {'x': sig_input_tensor}
+    sig_output_tensor = meta_graph_pb2.TensorInfo(
+        dtype=tf.as_dtype(tf.float32).as_datatype_enum,
+        tensor_shape=tf.TensorShape([1, 8, 8, 3]).as_proto())
+    sig_output_tensor_signature = {'y': sig_output_tensor}
+    predict_signature_def = (
+        tf.compat.v1.saved_model.build_signature_def(
+            sig_input_tensor_signature, sig_output_tensor_signature,
+            tf.saved_model.PREDICT_METHOD_NAME))
+    serving_key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+    signature_def_map = {serving_key: predict_signature_def}
+    tflite_model = signature_def_utils.set_signature_defs(
+        tflite_model, signature_def_map)
+    saved_signature_def_map = signature_def_utils.get_signature_defs(
+        tflite_model)
+    signature_def = saved_signature_def_map.get(serving_key)
+    self.assertIsNotNone(signature_def, 'SignatureDef not found')
+    self.assertEqual(signature_def.SerializeToString(),
+                     predict_signature_def.SerializeToString())
+    remove_tflite_model = (
+        signature_def_utils.clear_signature_defs(tflite_model))
+    signature_def_map = signature_def_utils.get_signature_defs(
+        remove_tflite_model)
+    self.assertIsNone(signature_def_map.get(serving_key),
+                      'SignatureDef found, but should be missing')
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc
index 0b892cf847f..118e2d420f8 100644
--- a/tensorflow/lite/tools/versioning/op_version.cc
+++ b/tensorflow/lite/tools/versioning/op_version.cc
@@ -53,6 +53,14 @@ inline bool HaveSameShapes(const SubGraph* subgraph, const Operator* op,
 int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
   switch (op_sig.op) {
     case BuiltinOperator_CONV_2D:
+      // If the op has signed int16 op_sig.inputs and op_sig.outputs, its
+      // version 4.
+      if (op_sig.input_types.at(0) == TensorType_INT16 &&
+          op_sig.input_types.at(1) == TensorType_INT16 &&
+          op_sig.output_types.at(1) == TensorType_INT16) {
+        return 4;
+      }
+
       // If the op has signed int8 op_sig.inputs and op_sig.outputs, its
       // version 3.
       if (op_sig.input_types.at(0) == TensorType_INT8 &&
@@ -70,6 +78,13 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       return 1;
 
     case BuiltinOperator_DEPTHWISE_CONV_2D:
+      // If the op accepts int16, we return version 5.
+      if (op_sig.input_types.at(0) == TensorType_INT16 &&
+          op_sig.input_types.at(1) == TensorType_INT16 &&
+          op_sig.output_types.at(1) == TensorType_INT16) {
+        return 5;
+      }
+
       // If the op is a signed int8 hybrid operation, we need to return
       // version 4.
       if (op_sig.input_types.at(0) == TensorType_FLOAT32 &&
@@ -105,12 +120,25 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       // | Hybrid          |                  3 |                        3 |
       // | Quantized Int8  |                  4 |                        4 |
       // +-----------------+--------------------+--------------------------+
+
+      // FullyConnected with sparse weight is supported at version 8.
+      if (op_sig.options.fully_connected.sparse_weight) {
+        return 8;
+      }
+
+      // Int16 fully fixed point kernel is at version 7.
+      if (op_sig.input_types.at(0) == TensorType_INT16 &&
+          op_sig.input_types.at(1) == TensorType_INT16 &&
+          op_sig.output_types.at(0) == TensorType_INT16) {
+        return 7;
+      }
+
       // 2 op_sig.inputs (no bias) use case is supported starting from
       // version 6.
       if (op_sig.input_types.size() == 2) {
         return 6;
       }
-      // `keep_num_dims` is supported at verison 5.
+      // `keep_num_dims` is supported at version 5.
       if (op_sig.options.fully_connected.keep_num_dims) {
         return 5;
       }
@@ -161,6 +189,10 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       return 1;
 
     case BuiltinOperator_MUL:
+      // Version 4 supports int16 inputs
+      if (op_sig.input_types.at(0) == TensorType_INT16) {
+        return 4;
+      }
       // Version 3 supports have a rescale value greater than or equal to 1.
       if (op_sig.options.mul.input1_scale != 0 &&
           op_sig.options.mul.input2_scale != 0 &&
@@ -237,6 +269,10 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       return 1;
 
     case BuiltinOperator_SPLIT:
+      // If the op take in16 input, it is version 4.
+      if (op_sig.input_types.at(1) == TensorType_INT16) {
+        return 4;
+      }
       // If the op take int8 input, it is version 2, for int32 it's version 3.
       // The input tensor is at index 1 not 0, 0 is the axis.
       if (op_sig.input_types.at(1) == TensorType_INT32) {
@@ -332,7 +368,15 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       }
       return 1;
     case BuiltinOperator_RESIZE_BILINEAR:
-      if (op_sig.options.resize_bilinear.half_pixel_centers) {
+      if (op_sig.options.resize.half_pixel_centers) {
+        return 3;
+      } else if (op_sig.input_types.at(0) == TensorType_INT8) {
+        return 2;
+      }
+      return 1;
+    case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+      if (op_sig.options.resize.half_pixel_centers ||
+          op_sig.options.resize.align_corners) {
         return 3;
       } else if (op_sig.input_types.at(0) == TensorType_INT8) {
         return 2;
@@ -406,6 +450,18 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       }
       return 1;
 
+    case BuiltinOperator_TANH:
+    case BuiltinOperator_LOGISTIC:
+      if (op_sig.input_types.at(0) == TensorType_INT16 &&
+          op_sig.output_types.at(0) == TensorType_INT16) {
+        return 3;
+      }
+
+      if (op_sig.input_types.at(0) == TensorType_INT8) {
+        return 2;
+      }
+      return 1;
+
     case BuiltinOperator_FILL:
       if (op_sig.input_types.size() >= 2 &&
           (op_sig.input_types.at(1) == TensorType_BOOL ||
@@ -426,11 +482,26 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       }
       return 1;
 
-    case BuiltinOperator_ADD:
+    case BuiltinOperator_LEAKY_RELU:
+      if (op_sig.input_types.at(0) == TensorType_INT16) {
+        return 2;
+      }
+      return 1;
+
     case BuiltinOperator_CONCATENATION:
+    case BuiltinOperator_SOFTMAX:
+      // In case of int16 inputs, the version is 3.
+      if (op_sig.input_types.at(0) == TensorType_INT16) {
+        return 3;
+      }
+      if (op_sig.input_types.at(0) == TensorType_INT8) {
+        return 2;
+      }
+      return 1;
+
+    case BuiltinOperator_ADD:
     case BuiltinOperator_PAD:
     case BuiltinOperator_PADV2:
-    case BuiltinOperator_SOFTMAX:
     case BuiltinOperator_SPACE_TO_DEPTH:
     case BuiltinOperator_SPLIT_V:
     case BuiltinOperator_MEAN:
@@ -438,9 +509,6 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
     case BuiltinOperator_REDUCE_MAX:
     case BuiltinOperator_REDUCE_MIN:
     case BuiltinOperator_RELU6:
-    case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
-    case BuiltinOperator_TANH:
-    case BuiltinOperator_LOGISTIC:
     case BuiltinOperator_LOG_SOFTMAX:
     case BuiltinOperator_TOPK_V2:
     case BuiltinOperator_ARG_MAX:
@@ -515,6 +583,11 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op,
         op_sig.options.fully_connected.weights_format =
             fully_connected_option->weights_format();
       }
+
+      const Tensor* weight_tensor =
+          subgraph->tensors()->Get(op->inputs()->Get(1));
+      op_sig.options.fully_connected.sparse_weight =
+          (weight_tensor->sparsity() != nullptr);
     } break;
 
     case BuiltinOperator_MUL: {
@@ -554,8 +627,19 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op,
       auto resize_bilinear_option =
           op->builtin_options_as_ResizeBilinearOptions();
       if (resize_bilinear_option) {
-        op_sig.options.resize_bilinear.half_pixel_centers =
+        op_sig.options.resize.half_pixel_centers =
             resize_bilinear_option->half_pixel_centers();
+        op_sig.options.resize.align_corners =
+            resize_bilinear_option->align_corners();
+      }
+    } break;
+    case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: {
+      auto resize_nn_option =
+          op->builtin_options_as_ResizeNearestNeighborOptions();
+      if (resize_nn_option) {
+        op_sig.options.resize.half_pixel_centers =
+            resize_nn_option->half_pixel_centers();
+        op_sig.options.resize.align_corners = resize_nn_option->align_corners();
       }
     } break;
     // TODO(b/150176627): Add tests for GetOpSignature.
diff --git a/tensorflow/lite/tools/versioning/op_version.h b/tensorflow/lite/tools/versioning/op_version.h
index 4b1771faec8..df74ffaf6dd 100644
--- a/tensorflow/lite/tools/versioning/op_version.h
+++ b/tensorflow/lite/tools/versioning/op_version.h
@@ -37,6 +37,9 @@ typedef struct {
     struct {
       bool keep_num_dims;
       FullyConnectedOptionsWeightsFormat weights_format;
+      // TODO(b/156530611): Make this global when more ops support sparse
+      // computation.
+      bool sparse_weight;
     } fully_connected;
     struct {
       float input1_scale;
@@ -48,7 +51,8 @@ typedef struct {
     } lstm;
     struct {
       bool half_pixel_centers;
-    } resize_bilinear;
+      bool align_corners;
+    } resize;
     struct {
       int32_t num_dims;
     } single_input_op;
diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc
index 7d9039ff848..4017fc3bff0 100644
--- a/tensorflow/lite/tools/versioning/op_version_test.cc
+++ b/tensorflow/lite/tools/versioning/op_version_test.cc
@@ -352,6 +352,15 @@ TEST(OpVersionTest, VersioningFullyConnectedTest) {
   fake_op_sig.options.fully_connected = {
       false, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8};
   EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 6);
+
+  fake_op_sig = {
+      .op = BuiltinOperator_FULLY_CONNECTED,
+      .input_types = std::vector<TensorType>{TensorType_INT8, TensorType_INT8},
+      .output_types = std::vector<TensorType>{TensorType_INT8},
+  };
+  fake_op_sig.options.fully_connected = {
+      false, FullyConnectedOptionsWeightsFormat_DEFAULT, true};
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 8);
 }
 
 TEST(OpVersionTest, VersioningDequantizeTest) {
@@ -594,4 +603,64 @@ TEST(OpVersionTEst, VersioningFillTest) {
                                                         TensorType_INT32}};
   EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1);
 }
+TEST(OpVersionTest, VersioningResizeBilinearTest) {
+  // Default.
+  OpSignature fake_op_sig = {
+      .op = BuiltinOperator_RESIZE_BILINEAR,
+      .input_types =
+          std::vector<TensorType>{TensorType_FLOAT32, TensorType_INT32},
+      .output_types = std::vector<TensorType>{TensorType_FLOAT32},
+  };
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1);
+
+  // align_corners=true is still version 1.
+  fake_op_sig.options.resize.align_corners = true;
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1);
+
+  // half_pixel_centers=true must be version 3.
+  fake_op_sig.options.resize.align_corners = false;
+  fake_op_sig.options.resize.half_pixel_centers = true;
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
+
+  // int8 input is version 2.
+  fake_op_sig = {
+      .op = BuiltinOperator_RESIZE_BILINEAR,
+      .input_types = std::vector<TensorType>{TensorType_INT8, TensorType_INT32},
+      .output_types = std::vector<TensorType>{TensorType_INT8},
+  };
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2);
+
+  fake_op_sig.options.resize.half_pixel_centers = true;
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
+}
+TEST(OpVersionTest, VersioningResizeNearestNeighborTest) {
+  // Default.
+  OpSignature fake_op_sig = {
+      .op = BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+      .input_types =
+          std::vector<TensorType>{TensorType_FLOAT32, TensorType_INT32},
+      .output_types = std::vector<TensorType>{TensorType_FLOAT32},
+  };
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1);
+
+  // align_corners=true is version 3.
+  fake_op_sig.options.resize.align_corners = true;
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
+
+  // half_pixel_centers=true must be version 3.
+  fake_op_sig.options.resize.align_corners = false;
+  fake_op_sig.options.resize.half_pixel_centers = true;
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
+
+  // int8 input is version 2.
+  fake_op_sig = {
+      .op = BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+      .input_types = std::vector<TensorType>{TensorType_INT8, TensorType_INT32},
+      .output_types = std::vector<TensorType>{TensorType_INT8},
+  };
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2);
+
+  fake_op_sig.options.resize.align_corners = true;
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3);
+}
 }  // namespace tflite
diff --git a/tensorflow/lite/tools/versioning/runtime_version.h b/tensorflow/lite/tools/versioning/runtime_version.h
index e4c25221310..ad88bd2ab89 100644
--- a/tensorflow/lite/tools/versioning/runtime_version.h
+++ b/tensorflow/lite/tools/versioning/runtime_version.h
@@ -24,8 +24,8 @@ namespace tflite {
 void UpdateMinimumRuntimeVersionForModel(uint8_t* model_buffer_pointer);
 
 // Returns true if the first version string precedes the second.
-// For example, '1.14' should precede '1.9', also '1.14.1' should precede
-// '1.14'. If two version string is equal, then false will be returned.
+// For example, '1.9' should precede '1.14', also '1.14' should precede
+// '1.14.1'. If two version string is equal, then false will be returned.
 bool CompareRuntimeVersion(const std::string&, const std::string&);
 
 }  // namespace tflite
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index d4df3df079e..41750ea02b4 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -6,8 +6,6 @@ tensorflow/compat_template_v1.__init__.py
 tensorflow/compiler/mlir/glob_lit_test.bzl
 tensorflow/lite/micro/build_def.bzl
 tensorflow/python/autograph/core/config.py
-tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
-tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
 tensorflow/python/eager/benchmarks_test_base.py
 tensorflow/python/tpu/profiler/pip_package/BUILD
 tensorflow/python/tpu/profiler/pip_package/README
@@ -343,6 +341,8 @@ tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh
 tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh
 tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh
 tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh
+tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh
+tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh
 tensorflow/tools/ci_build/release/ubuntu_16/sanity/build.sh
 tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh
 tensorflow/tools/ci_build/release/windows/cpu_libtensorflow/nightly.bat
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c185c0f5ac6..a49e4b74def 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1,4 +1,3 @@
-# Python support for TensorFlow.
 #
 # Public targets:
 #  ":platform" - Low-level and platform-specific Python code.
@@ -135,6 +134,7 @@ py_library(
         ":_pywrap_utils",
         ":array_ops",
         ":audio_ops_gen",
+        ":bincount",
         ":bitwise_ops",
         ":boosted_trees_ops",
         ":check_ops",
@@ -230,6 +230,7 @@ py_library(
         "//tensorflow/python/tools:module_util",
         "//tensorflow/python/tools/api/generator:create_python_api",
         "//tensorflow/python/tpu:tpu_noestimator",
+        "//tensorflow/python/types",
         "//third_party/py/numpy",
     ],
 )
@@ -655,15 +656,15 @@ tf_python_pybind_extension(
         "@com_google_absl//absl/types:optional",
     ] + if_static(
         extra_deps = [
-            "//tensorflow/core:eager_service_proto_cc",
-            "//tensorflow/core:master_proto_cc",
-            "//tensorflow/core:worker_proto_cc",
+            "//tensorflow/core/protobuf:eager_service_proto_cc",
+            "//tensorflow/core/protobuf:master_proto_cc",
+            "//tensorflow/core/protobuf:worker_proto_cc",
             "//tensorflow/core:version_lib",
         ],
         otherwise = [
-            "//tensorflow/core:eager_service_proto_cc_headers_only",
-            "//tensorflow/core:master_proto_cc_headers_only",
-            "//tensorflow/core:worker_proto_cc_headers_only",
+            "//tensorflow/core/protobuf:eager_service_proto_cc_headers_only",
+            "//tensorflow/core/protobuf:master_proto_cc_headers_only",
+            "//tensorflow/core/protobuf:worker_proto_cc_headers_only",
         ],
     ),
 )
@@ -996,6 +997,8 @@ cc_library(
         "//tensorflow/c:c_api",
         "//tensorflow/c:c_api_internal",
         "//tensorflow/c:tf_status_helper",
+        "//tensorflow/c:tf_tensor_internal",
+        "//tensorflow/c/eager:tfe_context_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
     ],
@@ -1686,6 +1689,7 @@ tf_py_test(
     deps = [
         ":auto_control_deps",
         ":client_testlib",
+        ":sendrecv_ops_gen",
     ],
 )
 
@@ -2897,6 +2901,11 @@ tf_gen_op_wrapper_private_py(
     ],
 )
 
+tf_gen_op_wrapper_private_py(
+    name = "count_ops_gen",
+    visibility = ["//learning/brain/python/ops:__pkg__"],
+)
+
 tf_gen_op_wrapper_private_py(
     name = "parsing_ops_gen",
     visibility = ["//learning/brain/python/ops:__pkg__"],
@@ -3462,6 +3471,28 @@ py_library(
     ],
 )
 
+py_library(
+    name = "bincount",
+    srcs = ["ops/bincount.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":count_ops_gen",
+        ":framework",
+        ":framework_for_generated_wrappers",
+    ],
+)
+
+tf_py_test(
+    name = "bincount_test",
+    size = "small",
+    srcs = ["ops/bincount_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":bincount",
+        ":platform_test",
+    ],
+)
+
 py_library(
     name = "ctc_ops",
     srcs = ["ops/ctc_ops.py"],
@@ -8019,14 +8050,14 @@ tf_python_pybind_extension(
         "//tensorflow/core/platform",
     ] + if_static(
         extra_deps = [
-            "//tensorflow/core:eager_service_proto_cc",
-            "//tensorflow/core:master_proto_cc",
-            "//tensorflow/core:worker_proto_cc",
+            "//tensorflow/core/protobuf:eager_service_proto_cc",
+            "//tensorflow/core/protobuf:master_proto_cc",
+            "//tensorflow/core/protobuf:worker_proto_cc",
         ],
         otherwise = [
-            "//tensorflow/core:eager_service_proto_cc_headers_only",
-            "//tensorflow/core:master_proto_cc_headers_only",
-            "//tensorflow/core:worker_proto_cc_headers_only",
+            "//tensorflow/core/protobuf:eager_service_proto_cc_headers_only",
+            "//tensorflow/core/protobuf:master_proto_cc_headers_only",
+            "//tensorflow/core/protobuf:worker_proto_cc_headers_only",
         ],
     ),
 )
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index c3075cb3d5c..8939c9b3143 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -85,6 +85,7 @@ from tensorflow.python import keras
 from tensorflow.python.feature_column import feature_column_lib as feature_column
 from tensorflow.python.layers import layers
 from tensorflow.python.module import module
+from tensorflow.python.ops import bincount
 from tensorflow.python.ops import bitwise_ops as bitwise
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import image_ops as image
diff --git a/tensorflow/python/autograph/converters/BUILD b/tensorflow/python/autograph/converters/BUILD
index 9c1d5a38707..ec780a7c0a1 100644
--- a/tensorflow/python/autograph/converters/BUILD
+++ b/tensorflow/python/autograph/converters/BUILD
@@ -33,6 +33,7 @@ py_library(
         "logical_expressions.py",
         "return_statements.py",
         "slices.py",
+        "variables.py",
     ],
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:__subpackages__"],
@@ -213,3 +214,16 @@ py_test(
         "//tensorflow/python/autograph/pyct",
     ],
 )
+
+py_test(
+    name = "variables_test",
+    srcs = ["variables_test.py"],
+    python_version = "PY3",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":converters",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/autograph/core:test_lib",
+        "//tensorflow/python/autograph/pyct",
+    ],
+)
diff --git a/tensorflow/python/autograph/converters/asserts.py b/tensorflow/python/autograph/converters/asserts.py
index bc47fc8e8a9..63ac1cf5672 100644
--- a/tensorflow/python/autograph/converters/asserts.py
+++ b/tensorflow/python/autograph/converters/asserts.py
@@ -48,4 +48,5 @@ class AssertTransformer(converter.Base):
 
 
 def transform(node, ctx):
-  return AssertTransformer(ctx).visit(node)
+  node = AssertTransformer(ctx).visit(node)
+  return node
diff --git a/tensorflow/python/autograph/converters/asserts_test.py b/tensorflow/python/autograph/converters/asserts_test.py
index fd31cd15a0e..dc435cbc90e 100644
--- a/tensorflow/python/autograph/converters/asserts_test.py
+++ b/tensorflow/python/autograph/converters/asserts_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.converters import asserts
 from tensorflow.python.autograph.converters import functions
+from tensorflow.python.autograph.converters import return_statements
 from tensorflow.python.autograph.core import converter_testing
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors_impl
@@ -36,7 +37,8 @@ class AssertsTest(converter_testing.TestCase):
       return a
 
     with ops.Graph().as_default():
-      with self.converted(test_fn, (functions, asserts), {}) as result:
+      with self.converted(
+          test_fn, (functions, asserts, return_statements), {}) as result:
         op = result.test_fn(constant_op.constant(False))
 
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, 'testmsg'):
diff --git a/tensorflow/python/autograph/converters/break_statements.py b/tensorflow/python/autograph/converters/break_statements.py
index 8de4865ed2c..c99d8d8bee2 100644
--- a/tensorflow/python/autograph/converters/break_statements.py
+++ b/tensorflow/python/autograph/converters/break_statements.py
@@ -20,7 +20,9 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
 
@@ -179,6 +181,9 @@ class BreakTransformer(converter.Base):
 
 
 def transform(node, ctx):
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+
   transformer = BreakTransformer(ctx)
   node = transformer.visit(node)
   return node
diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 6d59c4bc761..505925650d1 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -29,6 +29,7 @@ import gast
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
 from tensorflow.python.autograph.utils import ag_logging
 
@@ -218,4 +219,7 @@ def transform(node, ctx):
         node: The transformed AST
         new_names: set(string), containing any newly-generated names
   """
-  return CallTreeTransformer(ctx).visit(node)
+  node = qual_names.resolve(node)
+
+  node = CallTreeTransformer(ctx).visit(node)
+  return node
diff --git a/tensorflow/python/autograph/converters/continue_statements.py b/tensorflow/python/autograph/converters/continue_statements.py
index 2a1b56ae2f8..9a87e66be18 100644
--- a/tensorflow/python/autograph/converters/continue_statements.py
+++ b/tensorflow/python/autograph/converters/continue_statements.py
@@ -20,7 +20,9 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
 
@@ -159,6 +161,8 @@ class ContinueCanonicalizationTransformer(converter.Base):
 
 
 def transform(node, ctx):
-  transformer = ContinueCanonicalizationTransformer(ctx)
-  node = transformer.visit(node)
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+
+  node = ContinueCanonicalizationTransformer(ctx).visit(node)
   return node
diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index dc26757c46d..a903c43bcfc 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -24,9 +24,15 @@ from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.lang import directives
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import ast_util
+from tensorflow.python.autograph.pyct import cfg
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis import annos
+from tensorflow.python.autograph.pyct.static_analysis import liveness
+from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
+from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs
 from tensorflow.python.autograph.utils import compat_util
 
 
@@ -65,18 +71,33 @@ class ControlFlowTransformer(converter.Base):
       return_stmt = templates.replace(template, retvals=returns)
 
     if aliased_orig_names:
+      alias_declarations = []
+      for new_name, old_name in zip(aliased_new_names, aliased_orig_names):
+        template = """
+          try:
+            aliased_new_name = aliased_orig_name
+          except NameError:
+            aliased_new_name = ag__.Undefined(symbol_name)
+        """
+
+        alias_declarations.extend(
+            templates.replace(
+                template,
+                aliased_new_name=new_name,
+                aliased_orig_name=old_name,
+                symbol_name=gast.Constant(str(old_name), kind=None)))
+
       template = """
         def body_name():
-          aliased_new_names, = aliased_orig_names,
+          alias_declarations
           body
           return_stmt
       """
       return templates.replace(
           template,
+          alias_declarations=alias_declarations,
           body_name=body_name,
           body=body,
-          aliased_orig_names=aliased_orig_names,
-          aliased_new_names=aliased_new_names,
           return_stmt=return_stmt)
     else:
       template = """
@@ -127,13 +148,8 @@ class ControlFlowTransformer(converter.Base):
       return 'no variables'
     return ', '.join(map(str, symbol_set))
 
-  def _determine_aliased_symbols(self, scope, node_defined_in, block):
-    if block:
-      block_live_in = set(anno.getanno(block[0], anno.Static.LIVE_VARS_IN))
-    else:
-      block_live_in = set()
-
-    modified_live = scope.modified & node_defined_in & block_live_in
+  def _determine_aliased_symbols(self, scope, node_defined_in):
+    modified_live = scope.modified & node_defined_in
     # Composite symbols are handled elsewhere, see _create_state_functions
     return {
         s for s in modified_live
@@ -216,9 +232,9 @@ class ControlFlowTransformer(converter.Base):
     # that happens in the call to generic_visit below, because the conversion
     # generates nodes that lack static analysis annotations.
     need_alias_in_body = self._determine_aliased_symbols(
-        body_scope, defined_in, node.body)
+        body_scope, defined_in)
     need_alias_in_orelse = self._determine_aliased_symbols(
-        orelse_scope, defined_in, node.orelse)
+        orelse_scope, defined_in)
 
     node = self.generic_visit(node)
 
@@ -528,9 +544,23 @@ class ControlFlowTransformer(converter.Base):
         undefined_assigns=undefined_assigns)
 
 
+class AnnotatedDef(reaching_definitions.Definition):
+
+  def __init__(self):
+    super(AnnotatedDef, self).__init__()
+    self.directives = {}
+
+
 def transform(node, ctx):
-  transformer = ControlFlowTransformer(ctx)
-  return transformer.visit(node)
+  graphs = cfg.build(node)
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+  node = reaching_definitions.resolve(node, ctx, graphs)
+  node = reaching_fndefs.resolve(node, ctx, graphs)
+  node = liveness.resolve(node, ctx, graphs)
+
+  node = ControlFlowTransformer(ctx).visit(node)
+  return node
 
 
 compat_util.deprecated_py2_support(__name__)
diff --git a/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py b/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py
index c70460a2413..203b99517dc 100644
--- a/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py
+++ b/tensorflow/python/autograph/converters/control_flow_deprecated_py2.py
@@ -27,9 +27,15 @@ from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.lang import directives
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import ast_util
+from tensorflow.python.autograph.pyct import cfg
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis import annos
+from tensorflow.python.autograph.pyct.static_analysis import liveness
+from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
+from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs
 
 
 # TODO(mdan): Refactor functions to make them smaller.
@@ -52,18 +58,33 @@ class ControlFlowTransformer(converter.Base):
       return_stmt = templates.replace(template, retvals=returns)
 
     if aliased_orig_names:
+      alias_declarations = []
+      for new_name, old_name in zip(aliased_new_names, aliased_orig_names):
+        template = """
+          try:
+            aliased_new_name = aliased_orig_name
+          except NameError:
+            aliased_new_name = ag__.Undefined(symbol_name)
+        """
+
+        alias_declarations.extend(
+            templates.replace(
+                template,
+                aliased_new_name=new_name,
+                aliased_orig_name=old_name,
+                symbol_name=gast.Constant(str(old_name), kind=None)))
+
       template = """
         def body_name():
-          aliased_new_names, = aliased_orig_names,
+          alias_declarations
           body
           return_stmt
       """
       return templates.replace(
           template,
+          alias_declarations=alias_declarations,
           body_name=body_name,
           body=body,
-          aliased_orig_names=aliased_orig_names,
-          aliased_new_names=aliased_new_names,
           return_stmt=return_stmt)
     else:
       template = """
@@ -114,13 +135,8 @@ class ControlFlowTransformer(converter.Base):
       return 'no variables'
     return ', '.join(map(str, symbol_set))
 
-  def _determine_aliased_symbols(self, scope, node_defined_in, block):
-    if block:
-      block_live_in = set(anno.getanno(block[0], anno.Static.LIVE_VARS_IN))
-    else:
-      block_live_in = set()
-
-    modified_live = scope.modified & node_defined_in & block_live_in
+  def _determine_aliased_symbols(self, scope, node_defined_in):
+    modified_live = scope.modified & node_defined_in
     # Composite symbols are handled elsewhere see _create_state_functions
     return {s for s in modified_live if not s.is_composite()}
 
@@ -191,9 +207,9 @@ class ControlFlowTransformer(converter.Base):
     # that happens in the call to generic_visit below, because the conversion
     # generates nodes that lack static analysis annotations.
     need_alias_in_body = self._determine_aliased_symbols(
-        body_scope, defined_in, node.body)
+        body_scope, defined_in)
     need_alias_in_orelse = self._determine_aliased_symbols(
-        orelse_scope, defined_in, node.orelse)
+        orelse_scope, defined_in)
 
     node = self.generic_visit(node)
 
@@ -604,6 +620,20 @@ class ControlFlowTransformer(converter.Base):
           opts=opts)
 
 
+class AnnotatedDef(reaching_definitions.Definition):
+
+  def __init__(self):
+    super(AnnotatedDef, self).__init__()
+    self.directives = {}
+
+
 def transform(node, ctx):
+  graphs = cfg.build(node)
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+  node = reaching_definitions.resolve(node, ctx, graphs)
+  node = reaching_fndefs.resolve(node, ctx, graphs)
+  node = liveness.resolve(node, ctx, graphs)
+
   node = ControlFlowTransformer(ctx).visit(node)
   return node
diff --git a/tensorflow/python/autograph/converters/functions.py b/tensorflow/python/autograph/converters/functions.py
index 5ddbb277d10..26ead131f9b 100644
--- a/tensorflow/python/autograph/converters/functions.py
+++ b/tensorflow/python/autograph/converters/functions.py
@@ -23,7 +23,9 @@ import gast
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis import annos
 
 
@@ -36,15 +38,6 @@ class _Function(object):
 class FunctionTransformer(converter.Base):
   """Wraps function bodies around autograph-specific boilerplate."""
 
-  def visit_Return(self, node):
-    if node.value is None:
-      return node
-    node = self.generic_visit(node)
-    return templates.replace(
-        'return function_context_name.mark_return_value(value)',
-        function_context_name=self.state[_Function].context_name,
-        value=node.value)
-
   def _function_scope_options(self, fn_scope):
     """Returns the options with which to create function scopes."""
     # Top-level function receive the options that were directly requested.
@@ -139,4 +132,7 @@ class FunctionTransformer(converter.Base):
 
 
 def transform(node, ctx):
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+
   return FunctionTransformer(ctx).visit(node)
diff --git a/tensorflow/python/autograph/converters/functions_test.py b/tensorflow/python/autograph/converters/functions_test.py
index aad455e67d7..2a51ef71ebf 100644
--- a/tensorflow/python/autograph/converters/functions_test.py
+++ b/tensorflow/python/autograph/converters/functions_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.autograph.converters import functions
+from tensorflow.python.autograph.converters import return_statements
 from tensorflow.python.autograph.core import ag_ctx
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import converter_testing
@@ -74,7 +75,7 @@ class FunctionTransformer(converter_testing.TestCase):
       l += 1
       return l, inner_fn(l)
 
-    with self.converted(test_fn, functions, {},
+    with self.converted(test_fn, (functions, return_statements), {},
                         (ops.name_scope,)) as result:
       first, second = result.test_fn(constant_op.constant(1))
       self.assertIn('test_fn/', first.op.name)
@@ -119,6 +120,7 @@ class FunctionTransformer(converter_testing.TestCase):
     ns = {'TestClass': TestClass}
     node, ctx = self.prepare(TestClass, ns)
     node = functions.transform(node, ctx)
+    node = return_statements.transform(node, ctx)
 
     with self.compiled(node, {}, (ops.name_scope,)) as result:
       first, second = result.TestClass().test_fn(constant_op.constant(1))
diff --git a/tensorflow/python/autograph/converters/lists.py b/tensorflow/python/autograph/converters/lists.py
index 253156ceac1..0943009ef4b 100644
--- a/tensorflow/python/autograph/converters/lists.py
+++ b/tensorflow/python/autograph/converters/lists.py
@@ -36,7 +36,9 @@ from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.lang import directives
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
 
@@ -235,4 +237,7 @@ class ListTransformer(converter.Base):
 
 
 def transform(node, ctx):
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+
   return ListTransformer(ctx).visit(node)
diff --git a/tensorflow/python/autograph/converters/return_statements.py b/tensorflow/python/autograph/converters/return_statements.py
index 4d262d9fcf1..e4062e42db7 100644
--- a/tensorflow/python/autograph/converters/return_statements.py
+++ b/tensorflow/python/autograph/converters/return_statements.py
@@ -23,7 +23,9 @@ import gast
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
+from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis.annos import NodeAnno
 
 
@@ -218,9 +220,9 @@ class ReturnStatementsTransformer(converter.Base):
         retval = val
   """
 
-  def __init__(self, ctx, default_to_null_return):
+  def __init__(self, ctx, allow_missing_return):
     super(ReturnStatementsTransformer, self).__init__(ctx)
-    self.default_to_null_return = default_to_null_return
+    self.allow_missing_return = allow_missing_return
 
   def visit_Return(self, node):
     for block in reversed(self.state[_Block].stack):
@@ -337,69 +339,68 @@ class ReturnStatementsTransformer(converter.Base):
     return node
 
   def visit_FunctionDef(self, node):
-    self.state[_Function].enter()
-    self.state[_Block].enter()
-    self.state[_Block].is_function = True
+    with self.state[_Function] as fn:
+      with self.state[_Block] as block:
+        block.is_function = True
 
-    scope = anno.getanno(node, NodeAnno.BODY_SCOPE)
-    do_return_var_name = self.ctx.namer.new_symbol(
-        'do_return', scope.referenced)
-    retval_var_name = self.ctx.namer.new_symbol('retval_', scope.referenced)
-    self.state[_Function].do_return_var_name = do_return_var_name
-    self.state[_Function].retval_var_name = retval_var_name
+        scope = anno.getanno(node, NodeAnno.BODY_SCOPE)
+        do_return_var_name = self.ctx.namer.new_symbol('do_return',
+                                                       scope.referenced)
+        retval_var_name = self.ctx.namer.new_symbol('retval_', scope.referenced)
+        fn.do_return_var_name = do_return_var_name
+        fn.retval_var_name = retval_var_name
 
-    converted_body = self._visit_statement_block(node, node.body)
+        node.body = self._visit_statement_block(node, node.body)
 
-    # Avoid placing statements before any eventual docstring.
-    # TODO(mdan): Should a docstring even be included in the output?
-    docstring = None
-    if converted_body:
-      if (isinstance(converted_body[0], gast.Expr) and
-          isinstance(converted_body[0].value, gast.Constant)):
-        docstring = converted_body[0]
-        converted_body = converted_body[1:]
+        if block.return_used:
 
-    if self.state[_Block].return_used:
+          if self.allow_missing_return:
+            # The function whould have a single `with` node that wraps the
+            # entire body. If the function had a docstring, the body has two
+            # nodes, with the `with` as the second node.
+            wrapper_node = node.body[-1]
+            assert isinstance(wrapper_node, gast.With), (
+                'This transformer requires the functions converter.')
 
-      if self.default_to_null_return:
-        # TODO(mdan): Remove the (do_return_var_name,) below.
-        # Currently, that line ensures the variable is both defined and alive
-        # throughout the function.
-        template = """
-          do_return_var_name = False
-          retval_var_name = ag__.UndefinedReturnValue()
-          body
-          (do_return_var_name,)
-          return ag__.retval(retval_var_name)
-        """
-      else:
-        template = """
-          body
-          return retval_var_name
-        """
-      node.body = templates.replace(
-          template,
-          body=converted_body,
-          do_return_var_name=do_return_var_name,
-          retval_var_name=retval_var_name)
+            template = """
+              do_return_var_name = False
+              retval_var_name = ag__.UndefinedReturnValue()
+              body
+              return function_context.ret(retval_var_name, do_return_var_name)
+            """
 
-      if docstring:
-        node.body.insert(0, docstring)
+            wrapper_node.body = templates.replace(
+                template,
+                body=wrapper_node.body,
+                do_return_var_name=do_return_var_name,
+                function_context=anno.getanno(node, 'function_context_name'),
+                retval_var_name=retval_var_name)
+          else:
+            template = """
+              body
+              return retval_var_name
+            """
+            node.body = templates.replace(
+                template,
+                body=node.body,
+                do_return_var_name=do_return_var_name,
+                retval_var_name=retval_var_name)
 
-    self.state[_Block].exit()
-    self.state[_Function].exit()
     return node
 
 
 def transform(node, ctx, default_to_null_return=True):
-  """Ensure a function has only a single return."""
+  """Ensure a function has only a single return, at the end."""
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
+
   # Note: Technically, these two could be merged into a single walk, but
   # keeping them separate helps with readability.
-
   node = ConditionalReturnRewriter(ctx).visit(node)
 
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx, None)
   transformer = ReturnStatementsTransformer(
-      ctx, default_to_null_return=default_to_null_return)
+      ctx, allow_missing_return=default_to_null_return)
   node = transformer.visit(node)
-
   return node
diff --git a/tensorflow/python/autograph/converters/return_statements_test.py b/tensorflow/python/autograph/converters/return_statements_test.py
index df687927638..3f1e6a0bd97 100644
--- a/tensorflow/python/autograph/converters/return_statements_test.py
+++ b/tensorflow/python/autograph/converters/return_statements_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.autograph.converters import functions
 from tensorflow.python.autograph.converters import return_statements
 from tensorflow.python.autograph.core import converter_testing
 from tensorflow.python.framework import ops
@@ -28,7 +29,7 @@ class SingleReturnTest(converter_testing.TestCase):
 
   def assertTransformedEquivalent(self, test_fn, *inputs):
     ns = {'ops': ops}
-    with self.converted(test_fn, return_statements, ns) as result:
+    with self.converted(test_fn, (functions, return_statements), ns) as result:
       self.assertEqual(test_fn(*inputs), result.test_fn(*inputs))
 
   def test_straightline(self):
diff --git a/tensorflow/python/autograph/converters/variables.py b/tensorflow/python/autograph/converters/variables.py
new file mode 100644
index 00000000000..3028a65a69b
--- /dev/null
+++ b/tensorflow/python/autograph/converters/variables.py
@@ -0,0 +1,76 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Overloads all variable read operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.python.autograph.core import converter
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import templates
+
+
+class VariableAccessTransformer(converter.Base):
+  """Rewrites basic symbol reads.
+
+  This transformer rewrites variable reads with a "read" operator which allows
+  tracking activity.
+
+  Example:
+
+  For a basic statement:
+
+      a = b + c
+
+  This is translated to:
+
+      a = ld(b) + ld(c)
+
+  Augmented assignment operations also introduce a `ld` operator:
+
+      a += b
+
+  The assignment target also receives an operator to properly represent the
+  read:
+
+      a = ld(a)
+      a += ld(b)
+  """
+
+  def visit_Name(self, node):
+    # Only the loads which existed in the original code are overloaded.
+    if not anno.hasanno(node, anno.Static.ORIG_DEFINITIONS):
+      return node
+    if isinstance(node.ctx, gast.Load):
+      node = templates.replace_as_expression('ag__.ld(var_)', var_=node)
+    return node
+
+  def visit_AugAssign(self, node):
+    if isinstance(node.target, gast.Name):
+      template = """
+        var_ = ag__.ld(var_)
+        original
+      """
+      node = templates.replace(template, var_=node.target, original=node)
+    else:
+      node = self.generic_visit(node)
+    return node
+
+
+def transform(node, ctx):
+  return VariableAccessTransformer(ctx).visit(node)
diff --git a/tensorflow/python/autograph/converters/variables_test.py b/tensorflow/python/autograph/converters/variables_test.py
new file mode 100644
index 00000000000..556dafbaa8a
--- /dev/null
+++ b/tensorflow/python/autograph/converters/variables_test.py
@@ -0,0 +1,116 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for variables module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+
+from tensorflow.python.autograph.converters import variables
+from tensorflow.python.autograph.core import converter_testing
+from tensorflow.python.platform import test
+
+
+class VariablesTest(converter_testing.TestCase):
+
+  @contextlib.contextmanager
+  def apply_add_one_conversion(self, fn):
+    """Generates code which adds 1 to all variable reads."""
+    with self.converted(fn, variables, {}) as result:
+      result.ag__.__dict__['ld'] = lambda x: x + 1
+      yield result
+
+  def test_read(self):
+
+    def test_fn(l):
+      return l
+
+    with self.apply_add_one_conversion(test_fn) as result:
+      self.assertEqual(result.test_fn(1), 2)
+
+  def test_aug_assign(self):
+
+    def test_fn(l):
+      l *= 10
+      return l
+
+    with self.apply_add_one_conversion(test_fn) as result:
+      self.assertEqual(result.test_fn(1), (1 + 1) * 10 + 1)  # two reads
+
+  def test_attribute(self):
+
+    class TestClass(object):
+
+      def __init__(self):
+        self.v = 1
+
+      def __add__(self, other):
+        self.v += other
+        return self
+
+    def test_fn(l):
+      return l.v
+
+    tc = TestClass()
+    with self.apply_add_one_conversion(test_fn) as result:
+      self.assertEqual(result.test_fn(tc), 2)
+
+  def test_subscript(self):
+
+    class TestClass(object):
+
+      def __init__(self):
+        self.v = 1
+
+      def __add__(self, other):
+        self.v += other
+        return self
+
+      def __getitem__(self, _):
+        return self.v
+
+    def test_fn(l):
+      return l[0]
+
+    tc = TestClass()
+    with self.apply_add_one_conversion(test_fn) as result:
+      self.assertEqual(result.test_fn(tc), 2)
+
+  def test_call(self):
+
+    class TestClass(object):
+
+      def __init__(self):
+        self.v = 1
+
+      def __add__(self, other):
+        self.v += other
+        return self
+
+      def __call__(self):
+        return self.v
+
+    def test_fn(l):
+      return l()
+
+    tc = TestClass()
+    with self.apply_add_one_conversion(test_fn) as result:
+      self.assertEqual(result.test_fn(tc), 2)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/autograph/core/BUILD b/tensorflow/python/autograph/core/BUILD
index 655dc118a37..4a5c50dac55 100644
--- a/tensorflow/python/autograph/core/BUILD
+++ b/tensorflow/python/autograph/core/BUILD
@@ -30,6 +30,7 @@ py_library(
     visibility = ["//tensorflow:__subpackages__"],
     deps = [
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python/autograph/operators",
         "//tensorflow/python/autograph/pyct",
         "//tensorflow/python/autograph/pyct/static_analysis",
         "//tensorflow/python/autograph/utils",
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 77559fd2040..fd0dc0ebc2b 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -68,14 +68,9 @@ import enum
 
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import ast_util
-from tensorflow.python.autograph.pyct import cfg
 from tensorflow.python.autograph.pyct import parser
-from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import templates
 from tensorflow.python.autograph.pyct import transformer
-from tensorflow.python.autograph.pyct.static_analysis import activity
-from tensorflow.python.autograph.pyct.static_analysis import liveness
-from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
 from tensorflow.python.util.tf_export import tf_export
 
 # TODO(mdan): These contexts can be refactored into first class objects.
@@ -326,56 +321,3 @@ class Base(transformer.Base):
       return super(Base, self).visit(node)
     finally:
       self._ast_depth -= 1
-
-
-class AnnotatedDef(reaching_definitions.Definition):
-
-  def __init__(self):
-    super(AnnotatedDef, self).__init__()
-    self.directives = {}
-
-
-def standard_analysis(node, context, is_initial=False):
-  """Performs a complete static analysis of the given code.
-
-  Args:
-    node: ast.AST
-    context: converter.EntityContext
-    is_initial: bool, whether this is the initial analysis done on the input
-      source code
-
-  Returns:
-    ast.AST, same as node, with the static analysis annotations added
-  """
-  # TODO(mdan): Clear static analysis here.
-  # TODO(mdan): Consider not running all analyses every time.
-  # TODO(mdan): Don't return a node because it's modified by reference.
-  graphs = cfg.build(node)
-  node = qual_names.resolve(node)
-  node = activity.resolve(node, context, None)
-  node = reaching_definitions.resolve(node, context, graphs, AnnotatedDef)
-  node = liveness.resolve(node, context, graphs)
-  if is_initial:
-    anno.dup(
-        node,
-        {
-            anno.Static.DEFINITIONS: anno.Static.ORIG_DEFINITIONS,
-        },
-    )
-  return node
-
-
-def apply_(node, context, converter_module):
-  """Applies a converter to an AST.
-
-  Args:
-    node: ast.AST
-    context: converter.EntityContext
-    converter_module: converter.Base
-
-  Returns:
-    ast.AST, the result of applying converter to node
-  """
-  node = standard_analysis(node, context)
-  node = converter_module.transform(node, context)
-  return node
diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py
index b9e72e66c2e..fbb031876ad 100644
--- a/tensorflow/python/autograph/core/converter_testing.py
+++ b/tensorflow/python/autograph/core/converter_testing.py
@@ -31,12 +31,17 @@ from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import function_wrappers
 from tensorflow.python.autograph.lang import special_functions
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import cfg
 from tensorflow.python.autograph.pyct import loader
 from tensorflow.python.autograph.pyct import naming
 from tensorflow.python.autograph.pyct import origin_info
 from tensorflow.python.autograph.pyct import parser
 from tensorflow.python.autograph.pyct import pretty_printer
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import transformer
+from tensorflow.python.autograph.pyct.static_analysis import activity
+from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
 from tensorflow.python.platform import test
 
 
@@ -137,8 +142,7 @@ class TestCase(test.TestCase):
 
     if not isinstance(converter_module, (list, tuple)):
       converter_module = (converter_module,)
-    for i, m in enumerate(converter_module):
-      node = converter.standard_analysis(node, ctx, is_initial=not i)
+    for m in converter_module:
       node = m.transform(node, ctx)
 
     with self.compiled(node, namespace, tf_symbols) as result:
@@ -177,5 +181,16 @@ class TestCase(test.TestCase):
         namespace=namespace)
     ctx = transformer.Context(entity_info, namer, program_ctx)
     origin_info.resolve_entity(node, source, test_fn)
-    node = converter.standard_analysis(node, ctx, is_initial=True)
+
+    graphs = cfg.build(node)
+    node = qual_names.resolve(node)
+    node = activity.resolve(node, ctx, None)
+    node = reaching_definitions.resolve(node, ctx, graphs)
+    anno.dup(
+        node,
+        {
+            anno.Static.DEFINITIONS: anno.Static.ORIG_DEFINITIONS,
+        },
+    )
+
     return node, ctx
diff --git a/tensorflow/python/autograph/core/function_wrappers.py b/tensorflow/python/autograph/core/function_wrappers.py
index cc0e7b98de5..d425f8b679d 100644
--- a/tensorflow/python/autograph/core/function_wrappers.py
+++ b/tensorflow/python/autograph/core/function_wrappers.py
@@ -20,12 +20,16 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.core import ag_ctx
 from tensorflow.python.autograph.core import converter
+from tensorflow.python.autograph.operators import variables
 from tensorflow.python.framework import auto_control_deps
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.util import nest
 
 
+# TODO(mdan): Move this into operators - it represents a function definition.
+
+
 class FunctionScope(object):
   """Context manager that wraps the body of a converted function.
 
@@ -84,8 +88,13 @@ class FunctionScope(object):
     if self.use_auto_deps:
       self.autodeps_scope.__exit__(exc_type, exc_val, exc_tb)
 
-  def mark_return_value(self, value):
+  def ret(self, value, did_return):
     """Marks a value as returned from the function guarded by the scope."""
+    del did_return
+
+    if isinstance(value, variables.UndefinedReturnValue):
+      return None
+
     if self.use_auto_deps:
       self._return_value_marked = True
       if value is None:
diff --git a/tensorflow/python/autograph/core/function_wrappers_test.py b/tensorflow/python/autograph/core/function_wrappers_test.py
index 917a5358633..344ba495570 100644
--- a/tensorflow/python/autograph/core/function_wrappers_test.py
+++ b/tensorflow/python/autograph/core/function_wrappers_test.py
@@ -46,7 +46,7 @@ class FunctionWrappersTest(test.TestCase):
         converter.ConversionOptions(
             optional_features=converter.Feature.AUTO_CONTROL_DEPS)) as scope:
       v.assign(2)
-      op = scope.mark_return_value(constant_op.constant(1))
+      op = scope.ret(constant_op.constant(1), True)
     self.evaluate(op)
     self.assertEqual(self.evaluate(v.read_value()), 2)
 
diff --git a/tensorflow/python/autograph/g3doc/reference/control_flow.md b/tensorflow/python/autograph/g3doc/reference/control_flow.md
index 79cc0f31450..cf580af7330 100644
--- a/tensorflow/python/autograph/g3doc/reference/control_flow.md
+++ b/tensorflow/python/autograph/g3doc/reference/control_flow.md
@@ -164,7 +164,7 @@ after if
 #### Python values modified in TensorFlow control flow become Tensors
 
 If a symbol is modified in a TensorFlow control flow statement, then it becomes
-a `tf.Tensor`, even if it started off as a Python promitive value.
+a `tf.Tensor`, even if it started off as a Python primitive value.
 
 For example, the conditional below will run as a `tf.cond` (its condition is a
 `tf.Tensor`), which in turn will cause `i` to become a `tf.Tensor`.
diff --git a/tensorflow/python/autograph/g3doc/reference/generated_code.md b/tensorflow/python/autograph/g3doc/reference/generated_code.md
index b62911b7203..389fa53a065 100644
--- a/tensorflow/python/autograph/g3doc/reference/generated_code.md
+++ b/tensorflow/python/autograph/g3doc/reference/generated_code.md
@@ -66,7 +66,7 @@ print(inspect.getsourcefile(converted_f))
 ```
 
 `tf.autograph.to_code` is a shortcut to obtain the generated code, and it's
-equivalent with calling `inspect.getsource(tf.autograph.to_code(f))`.
+equivalent with calling `inspect.getsource(tf.autograph.to_graph(f))`.
 
 #### Recording diagnostic information: `tf.autograph.set_verbosity`
 
diff --git a/tensorflow/python/autograph/g3doc/reference/limitations.md b/tensorflow/python/autograph/g3doc/reference/limitations.md
index 1ced1fad486..c41350466fa 100644
--- a/tensorflow/python/autograph/g3doc/reference/limitations.md
+++ b/tensorflow/python/autograph/g3doc/reference/limitations.md
@@ -16,6 +16,88 @@ should not be confused with TensorFlow variables.
 Key Term: A TensorFlow loop variable (or loop variable for short) refers to a
 value (typically a `tf.Tensor`) modified by a loop. See `tf.while_loop`.
 
+### Undefined and None values in TensorFlow
+
+TensorFlow does not support undefined or `None` values. All tensors must have
+a value.
+
+Example:
+
+```
+x = tf.cond(
+    tf.random.uniform(()) > 0.5,
+    lambda: tf.constant(1),
+    lambda: None)  # Error -- a Tensor cannot be None
+```
+
+The same restriction carries over in AutoGraph. If a variable is created inside
+control flow, and used after, then it must be defined before the control flow
+statement:
+
+```
+if tf.random.uniform(()) > 0.5:
+  x = tf.constant(1)
+else:
+  x = None
+tf.print(x)  # Error -- x may be None here
+```
+
+For this reason, AutoGraph forbids variables to be defined in only one branch
+of a TensorFlow conditional, if the variable is used afterwards:
+
+```
+del x
+if tf.random.uniform(()) > 0.5:
+  x = tf.constant(1)
+else:
+  pass
+tf.print(x)  # Error -- x may be undefined here
+```
+
+Note that if the variable is not used after the control flow statement, then it
+is considered local to the control flow block, and is not subject to these
+restrictions.
+
+```
+del x
+if tf.random.uniform(()) > 0.5:
+  x = tf.constant(1)  # Okay -- x does not need to be returned from the TF cond
+else:
+  pass
+```
+
+Similarly, variables may not be defined inside a TensorFlow loop, unless they
+are local to the loop. A variable is local to the loop if (1) it's not used
+after the loop and (2) the value from a previour iteration is not used in the
+next iteration:
+
+```
+del x
+while tf.random.uniform(()) > 0.5:  # Error -- x must be defined before the loop
+  x = tf.constant(1)
+tf.print(x)
+```
+
+```
+del x
+while tf.random.uniform(()) > 0.5:  # Okay -- x is local to the loop
+  x = tf.constant(1)
+```
+
+Avoid these limitations by defining a default value before the control flow
+statement:
+
+```
+x = tf.constant()
+if tf.random.uniform(()) > 0.5:
+  x = tf.constant(1)
+tf.print(x)  # Okay -- x is either 0 or 1
+```
+
+Note: `None` values and undefined symbols are allowed in Eager control flow,
+because Eager execution uses Python control flow, rather than TensorFlow
+control flow ops.
+
 ### Indirect modifications and hidden side effects in TensorFlow control flow
 
 Key Point: We recommend using a functional programming style, immutable Python
@@ -187,6 +269,62 @@ objects, but it does support basic collection objects such as `list`, `dict`,
 `tuple`, `namedtuple` and their subclasses. Design your objects as subclasses
 of [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple).
 
+#### Variables closed over by lambda functions
+
+AutoGraph assumes that variables that local functions close over may be used
+anywhere in the parent function, because in general it is possible to hide a
+function call in almost any Python statement). For this reason, these variables
+are accounted within TensorFlow loops.
+
+For example, the following code correctly captures `a` in the TensorFlow loop
+variables:
+
+```
+a = 0
+def f():
+  tf.print(a)
+for i in tf.range(3):
+  a = i
+f()  # Prints 2
+```
+
+An consequence is that these variables must be defined before the loop (see
+Undefined and None values above). So the following code will raise an error,
+even if the variable is never used after the loop:
+
+```
+def f():
+  tf.print(a)
+for i in tf.range(3):  # Error -- `a` must be defined before the loop.
+  a = i
+```
+
+However, lambda functions are handled differently, for reasons of backward
+compatibility. Lambda functions are assumed to be used in the statement where
+they are used, or at least in the same block.
+
+```
+a = 0
+foo(lambda: a)  # This lambda is not expected to be called anywhere else.
+for i in tf.range(3):  # Okay -- `a` is local to the loop.
+  a = i
+```
+
+Due to that reason, the following code will not work as expected for TensorFlow
+loops.
+
+```
+a = 0
+l = lambda: tf.print(a)
+for i in tf.range(3):
+  a = i  # `a` is considered local to the loop
+l()  # Prints 0!
+```
+
+Note that none of these restrictions only apply to TensorFlow loops; Python
+loops correctly correctly handle closures in all cases.
+
+
 ### Python collections in TensorFlow control flow
 
 Key Point: Use TensorFlow collection classes instead of Python collections.
@@ -489,69 +627,6 @@ while tf.random.uniform(()) > 0.5:
   x = tf.constant((1, 2, 3))  # Error -- inconsistent shapes: (), (3,)
 ```
 
-### Undefined and None values in TensorFlow
-
-TensorFlow does not support undefined and `None` values. All tensors must have
-a value.
-
-Example:
-
-```
-x = tf.cond(
-    tf.random.uniform(()) > 0.5,
-    lambda: tf.constant(1),
-    lambda: None)  # Error -- a Tensor cannot be None
-```
-
-The same restriction carries over in AutoGraph, but only if the symbol is used
-after the conditional (otherwise AutoGraph avoids making it a return value
-of the `tf.cond`):
-
-```
-if tf.random.uniform(()) > 0.5:
-  x = tf.constant(1)
-else:
-  x = None
-tf.print(x)  # Error -- x may be None here
-```
-
-A related but less obvious restriction in AutoGraph forbids symbols to be
-defined in only one branch of TensorFlow control flow, if the symbol is
-used afterwards:
-
-```
-del x
-if tf.random.uniform(()) > 0.5:
-  x = tf.constant(1)
-else:
-  pass
-tf.print(x)  # Error -- x may be undefined here
-```
-
-Similarly, variables defined in a loop may not be used outside the loop, again
-if the symbol is used afterwards:
-
-```
-del x
-if tf.random.uniform(()) > 0.5:
-  x = tf.constant(1)
-tf.print(x)  # Error -- x may be undefined here
-```
-
-Avoid these limitations by defining a default value before the control flow
-statement:
-
-```
-x = tf.constant()
-if tf.random.uniform(()) > 0.5:
-  x = tf.constant(1)
-tf.print(x)  # Okay -- x is either 0 or 1
-```
-
-Note: `None` values and undefined symbols are allowed in Eager control flow,
-because Eager execution uses Python control flow, rather than TensorFlow
-control flow ops.
-
 ### Access to source code
 
 Key point: AutoGraph can only handle functions whose source code can be
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index fa90a4fa42c..eeea0aef896 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -36,14 +36,20 @@ from tensorflow.python.autograph.converters import lists
 from tensorflow.python.autograph.converters import logical_expressions
 from tensorflow.python.autograph.converters import return_statements
 from tensorflow.python.autograph.converters import slices
+from tensorflow.python.autograph.converters import variables
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import function_wrappers
 from tensorflow.python.autograph.core import unsupported_features_checker
 from tensorflow.python.autograph.lang import special_functions
+from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import cache
+from tensorflow.python.autograph.pyct import cfg
 from tensorflow.python.autograph.pyct import inspect_utils
+from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import transpiler
+from tensorflow.python.autograph.pyct.static_analysis import activity
+from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
 from tensorflow.python.autograph.utils import ag_logging as logging
 from tensorflow.python.eager import function
 from tensorflow.python.util import tf_inspect
@@ -58,24 +64,36 @@ class AutoGraphTranspiler(transpiler.FunctionTranspiler):
     # TODO(mdan): Insert list_comprehensions somewhere.
     unsupported_features_checker.verify(node)
 
-    node = converter.standard_analysis(node, ctx, is_initial=True)
-    node = converter.apply_(node, ctx, functions)
-    node = converter.apply_(node, ctx, directives)
-    node = converter.apply_(node, ctx, break_statements)
+    # Run initial analysis.
+    graphs = cfg.build(node)
+    node = qual_names.resolve(node)
+    node = activity.resolve(node, ctx, None)
+    node = reaching_definitions.resolve(node, ctx, graphs)
+    anno.dup(
+        node,
+        {
+            anno.Static.DEFINITIONS: anno.Static.ORIG_DEFINITIONS,
+        },
+    )
+
+    node = functions.transform(node, ctx)
+    node = directives.transform(node, ctx)
+    node = break_statements.transform(node, ctx)
     if ctx.user.options.uses(converter.Feature.ASSERT_STATEMENTS):
-      node = converter.apply_(node, ctx, asserts)
+      node = asserts.transform(node, ctx)
     # Note: sequencing continue canonicalization before for loop one avoids
     # dealing with the extra loop increment operation that the for
     # canonicalization creates.
-    node = converter.apply_(node, ctx, continue_statements)
-    node = converter.apply_(node, ctx, return_statements)
+    node = continue_statements.transform(node, ctx)
+    node = return_statements.transform(node, ctx)
     if ctx.user.options.uses(converter.Feature.LISTS):
-      node = converter.apply_(node, ctx, lists)
-      node = converter.apply_(node, ctx, slices)
-    node = converter.apply_(node, ctx, call_trees)
-    node = converter.apply_(node, ctx, control_flow)
-    node = converter.apply_(node, ctx, conditional_expressions)
-    node = converter.apply_(node, ctx, logical_expressions)
+      node = lists.transform(node, ctx)
+      node = slices.transform(node, ctx)
+    node = call_trees.transform(node, ctx)
+    node = control_flow.transform(node, ctx)
+    node = conditional_expressions.transform(node, ctx)
+    node = logical_expressions.transform(node, ctx)
+    node = variables.transform(node, ctx)
     return node
 
 
diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD
index 6db9e4f8e3b..3851c7b44ba 100644
--- a/tensorflow/python/autograph/operators/BUILD
+++ b/tensorflow/python/autograph/operators/BUILD
@@ -29,8 +29,7 @@ py_library(
         "logical.py",
         "py_builtins.py",
         "slices.py",
-        "special_values.py",
-        "symbols.py",
+        "variables.py",
     ],
     srcs_version = "PY2AND3",
     visibility = ["//tensorflow:__subpackages__"],
@@ -148,19 +147,8 @@ py_test(
 )
 
 py_test(
-    name = "special_values_test",
-    srcs = ["special_values_test.py"],
-    python_version = "PY3",
-    srcs_version = "PY2AND3",
-    deps = [
-        ":operators",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "symbols_test",
-    srcs = ["symbols_test.py"],
+    name = "variables_test",
+    srcs = ["variables_test.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py
index 495b6070aae..f7f9078107c 100644
--- a/tensorflow/python/autograph/operators/__init__.py
+++ b/tensorflow/python/autograph/operators/__init__.py
@@ -60,8 +60,6 @@ from tensorflow.python.autograph.operators.py_builtins import range_
 from tensorflow.python.autograph.operators.slices import get_item
 from tensorflow.python.autograph.operators.slices import GetItemOpts
 from tensorflow.python.autograph.operators.slices import set_item
-from tensorflow.python.autograph.operators.special_values import is_undefined
-from tensorflow.python.autograph.operators.special_values import is_undefined_return
-from tensorflow.python.autograph.operators.special_values import retval
-from tensorflow.python.autograph.operators.special_values import Undefined
-from tensorflow.python.autograph.operators.special_values import UndefinedReturnValue
+from tensorflow.python.autograph.operators.variables import ld
+from tensorflow.python.autograph.operators.variables import Undefined
+from tensorflow.python.autograph.operators.variables import UndefinedReturnValue
diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py
index 87eb83d709c..592281b0ce2 100644
--- a/tensorflow/python/autograph/operators/control_flow.py
+++ b/tensorflow/python/autograph/operators/control_flow.py
@@ -65,7 +65,7 @@ import traceback
 import numpy as np
 
 from tensorflow.python.autograph.operators import py_builtins
-from tensorflow.python.autograph.operators import special_values
+from tensorflow.python.autograph.operators import variables
 from tensorflow.python.autograph.utils import ag_logging
 from tensorflow.python.autograph.utils import compat_util
 from tensorflow.python.autograph.utils import misc
@@ -83,16 +83,10 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.util import lazy_loader
+from tensorflow.python.types import distribute
 from tensorflow.python.util import nest
 
 
-# TODO(b/145618471): Remove this dependency.
-# Lazy import to work around circular dependencies
-input_lib = lazy_loader.LazyLoader(
-    'input_lib', globals(),
-    'tensorflow.python.distribute.input_lib')
-
 PYTHON_MAX_ITERATIONS = 100000000  # Fails in about one minute for empty loops.
 WARN_INEFFICIENT_UNROLL = True
 INEFFICIENT_UNROLL_MIN_ITERATIONS = 3000
@@ -109,13 +103,13 @@ def _verify_loop_init_vars(values, symbol_names):
   for name, value in zip(symbol_names, values):
     if value is None:
       raise ValueError('"{}" may not be None before the loop.'.format(name))
-    if special_values.is_undefined_return(value):
+    if isinstance(value, variables.UndefinedReturnValue):
       # Assumption: the loop will only capture the variable which tracks the
       # return value if the loop contained a return statement.
       # TODO(mdan): This should be checked at the place where return occurs.
       raise ValueError(
           'return statements are not supported within a TensorFlow loop.')
-    if special_values.is_undefined(value):
+    if isinstance(value, variables.Undefined):
       raise ValueError('"{}" must be defined before the loop.'.format(name))
 
 
@@ -361,13 +355,12 @@ def for_stmt(iter_, extra_test, body, get_state, set_state, symbol_names, opts):
     _tf_ragged_for_stmt(
         iter_, extra_test, body, get_state, set_state, symbol_names, opts)
 
-  elif isinstance(iter_, input_lib.DistributedIterator):
+  elif isinstance(iter_, distribute.Iterator):
     raise NotImplementedError(
         'distributed iterators not supported yet, use the distributed dataset'
         ' directly')
 
-  # TODO(mdan): Resolve the private access issue.
-  elif isinstance(iter_, input_lib._IterableInput):  # pylint:disable=protected-access
+  elif isinstance(iter_, distribute.Iterable):
     _tf_distributed_iterable_for_stmt(
         iter_, extra_test, body, get_state, set_state, symbol_names, opts)
 
@@ -502,8 +495,7 @@ def _tf_range_for_stmt(
   iterate = compat_util.BasicRef(start)
 
   def _value_or(name, var, default):
-    if (name == opts['iterate_names']
-        and isinstance(var, special_values.Undefined)):
+    if (name == opts['iterate_names'] and isinstance(var, variables.Undefined)):
       return default
     return var
 
@@ -1026,7 +1018,15 @@ def _wrap_disallow_undefs_from_cond(func, branch_name):
       results_tuple = results
     else:
       results_tuple = results,
-    undefined = tuple(filter(special_values.is_undefined, results_tuple))
+
+    for result in results_tuple:
+      if isinstance(result, variables.UndefinedReturnValue):
+        raise ValueError(
+            'A value must also be returned from the {} branch. If a value is '
+            'returned from one branch of a conditional a value must be '
+            'returned from all branches.'.format(branch_name))
+
+    undefined = [v for v in results_tuple if isinstance(v, variables.Undefined)]
     if undefined:
       raise ValueError(
           'The following symbols must also be initialized in the {} branch: {}.'
@@ -1034,13 +1034,6 @@ def _wrap_disallow_undefs_from_cond(func, branch_name):
           ' statement.'.format(branch_name,
                                tuple(s.symbol_name for s in undefined)))
 
-    for result in results_tuple:
-      if special_values.is_undefined_return(result):
-        raise ValueError(
-            'A value must also be returned from the {} branch. If a value is '
-            'returned from one branch of a conditional a value must be '
-            'returned from all branches.'.format(branch_name))
-
     return results
 
   return wrapper
diff --git a/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py b/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py
index e01a2f206c8..5a900fb19ed 100644
--- a/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py
+++ b/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py
@@ -66,7 +66,7 @@ import functools
 import numpy as np
 
 from tensorflow.python.autograph.operators import py_builtins
-from tensorflow.python.autograph.operators import special_values
+from tensorflow.python.autograph.operators import variables
 from tensorflow.python.autograph.utils import ag_logging
 from tensorflow.python.autograph.utils import misc
 from tensorflow.python.autograph.utils import tensors
@@ -103,13 +103,13 @@ INEFFICIENT_UNROLL_MIN_OPS = 1
 
 def _disallow_undefs_into_loop(*values):
   """Ensures that all values in the state are defined when entering a loop."""
-  undefined = tuple(filter(special_values.is_undefined, values))
+  undefined = [v for v in values if isinstance(v, variables.Undefined)]
   if undefined:
     raise ValueError(
         '{} must be defined before the loop.'.format(
             ','.join(s.symbol_name for s in undefined)))
   for value in values:
-    if special_values.is_undefined_return(value):
+    if isinstance(value, variables.UndefinedReturnValue):
       # Assumption: the loop will only capture the variable which tracks the
       # return value if the loop contained a return statement.
       # TODO(mdan): This should be checked at the place where return occurs.
@@ -1129,7 +1129,7 @@ def _wrap_disallow_undefs_from_cond(func, branch_name):
       results_tuple = results
     else:
       results_tuple = results,
-    undefined = tuple(filter(special_values.is_undefined, results_tuple))
+    undefined = [v for v in results_tuple if isinstance(v, variables.Undefined)]
     if undefined:
       raise ValueError(
           'The following symbols must also be initialized in the {} branch: {}.'
@@ -1138,7 +1138,7 @@ def _wrap_disallow_undefs_from_cond(func, branch_name):
                                tuple(s.symbol_name for s in undefined)))
 
     for result in results_tuple:
-      if special_values.is_undefined_return(result):
+      if isinstance(result, variables.UndefinedReturnValue):
         raise ValueError(
             'A value must also be returned from the {} branch. If a value is '
             'returned from one branch of a conditional a value must be '
diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py
index 5f0a9d09bf3..1c4407904b2 100644
--- a/tensorflow/python/autograph/operators/control_flow_test.py
+++ b/tensorflow/python/autograph/operators/control_flow_test.py
@@ -29,7 +29,7 @@ import numpy as np
 import six
 
 from tensorflow.python.autograph.operators import control_flow
-from tensorflow.python.autograph.operators import special_values
+from tensorflow.python.autograph.operators import variables as variable_operators
 from tensorflow.python.autograph.utils import ag_logging
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import def_function
@@ -546,7 +546,7 @@ class ForLoopTest(test.TestCase):
     with self.assertRaisesRegex(ValueError, '"s" may not be None'):
       self._basic_loop(None, lambda i, s: s)
     with self.assertRaisesRegex(ValueError, '"s" must be defined'):
-      self._basic_loop(special_values.Undefined(''), lambda i, s: s)
+      self._basic_loop(variable_operators.Undefined(''), lambda i, s: s)
 
   def test_tensor_none_output(self):
     with self.assertRaisesRegex(ValueError, '"s" is None at the end'):
@@ -785,7 +785,7 @@ class WhileLoopTest(test.TestCase):
     with self.assertRaisesRegex(ValueError, '"s" may not be None'):
       self._basic_loop(None, lambda i, s: s)
     with self.assertRaisesRegex(ValueError, '"s" must be defined'):
-      self._basic_loop(special_values.Undefined(''), lambda i, s: s)
+      self._basic_loop(variable_operators.Undefined(''), lambda i, s: s)
 
   def test_tensor_none_output(self):
     with self.assertRaisesRegex(ValueError, '"s" is None at the end'):
@@ -887,10 +887,10 @@ class IfStmtTest(test.TestCase):
   def test_tensor_undefined_output(self):
     with self.assertRaisesRegex(
         ValueError, "must also be initialized in the if.*'s'"):
-      self._basic_cond(lambda: special_values.Undefined('s'), lambda: 1)
+      self._basic_cond(lambda: variable_operators.Undefined('s'), lambda: 1)
     with self.assertRaisesRegex(
         ValueError, "must also be initialized in the else.*'s'"):
-      self._basic_cond(lambda: 1, lambda: special_values.Undefined('s'))
+      self._basic_cond(lambda: 1, lambda: variable_operators.Undefined('s'))
 
   def test_tensor_dtype_change(self):
     with self.assertRaisesRegex(TypeError, '"s" has dtype int32.*but.*float32'):
diff --git a/tensorflow/python/autograph/operators/data_structures_test.py b/tensorflow/python/autograph/operators/data_structures_test.py
index c5a3a3d1cac..5d835fd3771 100644
--- a/tensorflow/python/autograph/operators/data_structures_test.py
+++ b/tensorflow/python/autograph/operators/data_structures_test.py
@@ -106,11 +106,12 @@ class ListTest(test.TestCase):
     with self.cached_session() as sess:
       self.assertAllEqual(self.evaluate(t), [[1, 2, 3]])
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def test_append_tensorarray(self):
     l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True)
     l1 = data_structures.list_append(l, 1)
     l2 = data_structures.list_append(l1, 2)
+
     with self.cached_session() as sess:
       self.assertAllEqual(self.evaluate(l1.stack()), [1])
       self.assertAllEqual(self.evaluate(l2.stack()), [1, 2])
diff --git a/tensorflow/python/autograph/operators/symbols.py b/tensorflow/python/autograph/operators/symbols.py
deleted file mode 100644
index 0dd7e0a5956..00000000000
--- a/tensorflow/python/autograph/operators/symbols.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Abstract representation of composite symbols that can be used in staging code.
-
-This provides a way to checkpoint the values of symbols that may be undefined
-entering staged control flow. This checkpointing is necessary to prevent some
-unintended side-effects. For example checkpointing prevents side-effects in one
-branch of a conditional from leaking into another.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.autograph.operators import special_values
-
-
-is_undefined = special_values.is_undefined
-Undefined = special_values.Undefined
-
-
-class Symbol(object):
-  """Representation of a simple or composite Python symbol.
-
-  Subclasses should implement `maybe_compute_value(self)` that returns the value
-  corresponding to the symbol or Undefined if no such value exists.
-  """
-
-  def __init__(self, name):
-    self.name = name
-
-
-class ValueSymbol(Symbol):
-  """Representation of a simple Python symbol with a concrete value.
-
-  This includes variables and literals. Since we are reifying undefined symbols
-  `Undefined` is also a valid value.
-  """
-
-  def __init__(self, name, value):
-    super(ValueSymbol, self).__init__(name)
-    self.value = value
-
-  def maybe_compute_value(self):
-    return self.value
-
-
-class AttributeAccessSymbol(Symbol):
-  """Representation of Python attribute access e.g. `a.b`."""
-
-  def __init__(self, parent_symbol, attr_name):
-    super(AttributeAccessSymbol, self).__init__(
-        parent_symbol.name + '.' + attr_name)
-    self.attr_name = attr_name
-    self.parent_symbol = parent_symbol
-
-  def maybe_compute_value(self):
-    """Compute the value corresponding to the attribute access or `Undefined`.
-
-    This will be `Undefined` if no such value exists either because there is no
-    such attribute or if the base is itself undefined.
-
-    Returns:
-      value corresponding to the attribute access or `Undefined`
-    """
-    parent_value = self.parent_symbol.maybe_compute_value()
-    if (is_undefined(parent_value) or
-        getattr(parent_value, self.attr_name, None) is None):
-      return Undefined(self.name)
-
-    return parent_value.__getattribute__(self.attr_name)
-
-
-class SubscriptSymbol(Symbol):
-  """Representation of Python subscript access e.g. `a[b]`."""
-
-  def __init__(self, parent_symbol, index_symbol):
-    super(SubscriptSymbol, self).__init__(
-        parent_symbol.name + '[' + index_symbol.name + ']')
-    self.index_symbol = index_symbol
-    self.parent_symbol = parent_symbol
-
-  def maybe_compute_value(self):
-    """Compute the value corresponding to the subscript access or `Undefined`.
-
-    This will be `Undefined` if no such value exists either because there is no
-    element corresponding to the given subscript or if the base itself is
-    not defined.
-
-    Returns:
-      value corresponding to the subscript access or `Undefined`
-    """
-    parent_value = self.parent_symbol.maybe_compute_value()
-    index_value = self.index_symbol.maybe_compute_value()
-    if is_undefined(parent_value) or is_undefined(index_value):
-      return Undefined(self.name)
-
-    try:
-      return parent_value[index_value]
-    except (IndexError, KeyError, TypeError):
-      # Reify the lack of an object for the given index/key
-      # This allows us to define them later without regret
-      return Undefined(self.name)
diff --git a/tensorflow/python/autograph/operators/symbols_test.py b/tensorflow/python/autograph/operators/symbols_test.py
deleted file mode 100644
index 3acb16273bd..00000000000
--- a/tensorflow/python/autograph/operators/symbols_test.py
+++ /dev/null
@@ -1,230 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for special symbol handling."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.autograph.operators import special_values
-from tensorflow.python.autograph.operators import symbols
-from tensorflow.python.platform import test
-
-Undefined = special_values.Undefined
-AttributeAccessSymbol = symbols.AttributeAccessSymbol
-SubscriptSymbol = symbols.SubscriptSymbol
-ValueSymbol = symbols.ValueSymbol
-
-
-class SymbolsTest(test.TestCase):
-
-  def test_value_symbol_returns_value(self):
-    a = 42
-    a_symbol = ValueSymbol('a', a)
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(a_symbol.name, 'a')
-
-  def test_attribute_access_missing_attribute(self):
-    class Foo(object):
-      pass
-    a = Foo()
-
-    a_symbol = ValueSymbol('a', a)
-    a_b_symbol = AttributeAccessSymbol(a_symbol, 'b')
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertIsInstance(a_b_symbol.maybe_compute_value(), Undefined)
-    self.assertEqual(a_b_symbol.maybe_compute_value().symbol_name, 'a.b')
-
-  def test_attribute_access_undefined_target(self):
-    a = Undefined('a')
-    a_symbol = ValueSymbol('a', a)
-    a_b_symbol = AttributeAccessSymbol(a_symbol, 'b')
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertIsInstance(a_b_symbol.maybe_compute_value(), Undefined)
-    self.assertEqual(a_b_symbol.maybe_compute_value().symbol_name, 'a.b')
-
-  def test_attribute_access_basic(self):
-    class Foo(object):
-
-      def __init__(self):
-        self.b = 'this is an attribute'
-
-    a = Foo()
-    a_symbol = ValueSymbol('a', a)
-    a_b_symbol = AttributeAccessSymbol(a_symbol, 'b')
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(a_b_symbol.maybe_compute_value(), a.b)
-
-  def test_item_access_undefined_index(self):
-    class Foo(object):
-
-      def __getitem__(self, key):
-        return 'this is an item'
-
-    a = Foo()
-    b = Undefined('b')
-    a_symbol = ValueSymbol('a', a)
-    b_symbol = ValueSymbol('b', b)
-    a_b_symbol = SubscriptSymbol(a_symbol, b_symbol)
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(b_symbol.maybe_compute_value(), b)
-    self.assertIsInstance(a_b_symbol.maybe_compute_value(), Undefined)
-    self.assertEqual(a_b_symbol.maybe_compute_value().symbol_name, 'a[b]')
-
-  def test_item_access_no_getitem(self):
-    class Foo(object):
-      pass
-
-    a = Foo()
-    b = 42
-    a_symbol = ValueSymbol('a', a)
-    b_symbol = ValueSymbol('b', b)
-    a_b_symbol = SubscriptSymbol(a_symbol, b_symbol)
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(b_symbol.maybe_compute_value(), b)
-    self.assertIsInstance(a_b_symbol.maybe_compute_value(), Undefined)
-    self.assertEqual(a_b_symbol.maybe_compute_value().symbol_name, 'a[b]')
-
-  def test_item_access_undefined_root(self):
-    a = Undefined('a')
-    b = 42
-    a_symbol = ValueSymbol('a', a)
-    b_symbol = ValueSymbol('b', b)
-    a_b_symbol = SubscriptSymbol(a_symbol, b_symbol)
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(b_symbol.maybe_compute_value(), b)
-    self.assertIsInstance(a_b_symbol.maybe_compute_value(), Undefined)
-    self.assertEqual(a_b_symbol.maybe_compute_value().symbol_name, 'a[b]')
-
-  def test_item_access_basic(self):
-    class Foo(object):
-
-      def __getitem__(self, key):
-        return 'this is an item'
-
-    a = Foo()
-    b = 42
-    a_symbol = ValueSymbol('a', a)
-    b_symbol = ValueSymbol('b', b)
-    a_b_symbol = SubscriptSymbol(a_symbol, b_symbol)
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(b_symbol.maybe_compute_value(), b)
-    self.assertEqual(a_b_symbol.maybe_compute_value(), a[b])
-
-  def test_item_access_after_attribute_access(self):
-    class Foo(object):
-
-      def __getitem__(self, key):
-        return 'this is an item'
-
-    class Bar(object):
-
-      def __init__(self):
-        self.b = Foo()
-
-    a = Bar()
-    c = 42
-    a_symbol = ValueSymbol('a', a)
-    c_symbol = ValueSymbol('c', c)
-    a_b_symbol = AttributeAccessSymbol(a_symbol, 'b')
-    a_b_c_symbol = SubscriptSymbol(a_b_symbol, c_symbol)
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(c_symbol.maybe_compute_value(), c)
-    self.assertEqual(a_b_symbol.maybe_compute_value(), a.b)
-    self.assertEqual(a_b_c_symbol.maybe_compute_value(), a.b[c])
-
-  def test_attribute_access_after_item_access(self):
-    class Bar(object):
-
-      def __init__(self):
-        self.c = object()
-
-    item = Bar()
-
-    class Foo(object):
-
-      def __getitem__(self, key):
-        return item
-
-    a = Foo()
-    b = 42
-    a_symbol = ValueSymbol('a', a)
-    b_symbol = ValueSymbol('b', b)
-    a_b_symbol = SubscriptSymbol(a_symbol, b_symbol)
-    a_b_c_symbol = AttributeAccessSymbol(a_b_symbol, 'c')
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(b_symbol.maybe_compute_value(), b)
-    self.assertEqual(a_b_symbol.maybe_compute_value(), a[b])
-    self.assertEqual(a_b_c_symbol.maybe_compute_value(), a[b].c)
-
-  def test_item_access_after_item_access(self):
-    class Bar(object):
-
-      def __getitem__(self, key):
-        return 'this is an item'
-
-    item = Bar()
-
-    class Foo(object):
-
-      def __getitem__(self, key):
-        return item
-
-    a = Foo()
-    b = 42
-    c = 43
-    a_symbol = ValueSymbol('a', a)
-    b_symbol = ValueSymbol('b', b)
-    c_symbol = ValueSymbol('b', c)
-    a_b_symbol = SubscriptSymbol(a_symbol, b_symbol)
-    a_b_c_symbol = SubscriptSymbol(a_b_symbol, c_symbol)
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(b_symbol.maybe_compute_value(), b)
-    self.assertEqual(a_b_symbol.maybe_compute_value(), a[b])
-    self.assertEqual(a_b_c_symbol.maybe_compute_value(), a[b][c])
-
-  def test_attribute_access_after_attribute_access(self):
-    class Bar(object):
-
-      def __init__(self):
-        self.c = object()
-
-    class Foo(object):
-
-      def __init__(self):
-        self.b = Bar()
-
-    a = Foo()
-    a_symbol = ValueSymbol('a', a)
-    a_b_symbol = AttributeAccessSymbol(a_symbol, 'b')
-    a_b_c_symbol = AttributeAccessSymbol(a_b_symbol, 'c')
-
-    self.assertEqual(a_symbol.maybe_compute_value(), a)
-    self.assertEqual(a_b_symbol.maybe_compute_value(), a.b)
-    self.assertEqual(a_b_c_symbol.maybe_compute_value(), a.b.c)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/autograph/operators/special_values.py b/tensorflow/python/autograph/operators/variables.py
similarity index 72%
rename from tensorflow/python/autograph/operators/special_values.py
rename to tensorflow/python/autograph/operators/variables.py
index c172cce23f1..150f64e1758 100644
--- a/tensorflow/python/autograph/operators/special_values.py
+++ b/tensorflow/python/autograph/operators/variables.py
@@ -19,6 +19,13 @@ from __future__ import division
 from __future__ import print_function
 
 
+def ld(v):
+  """Load variable operator."""
+  if isinstance(v, Undefined):
+    return v.read()
+  return v
+
+
 class Undefined(object):
   """Represents an undefined symbol in Python.
 
@@ -51,6 +58,10 @@ class Undefined(object):
   def __init__(self, symbol_name):
     self.symbol_name = symbol_name
 
+  def read(self):
+    raise UnboundLocalError("'{}' is used before assignment".format(
+        self.symbol_name))
+
   def __repr__(self):
     return self.symbol_name
 
@@ -66,34 +77,7 @@ class Undefined(object):
     return self
 
 
-def is_undefined(value):
-  """Checks whether Autograph has determined that a given value is undefined.
-
-  This only works in places where Autograph reifies undefined symbols. Note that
-  if this function is passed a truly undefined symbol the call-site will raise
-  NameError.
-
-  Args:
-    value: value to test for undefinedness
-  Returns:
-    Boolean, whether the input value is undefined.
-  """
-  return isinstance(value, Undefined)
-
-
 # TODO(mdan): Refactor as a RetVal object, aggregating the value and do_return.
 class UndefinedReturnValue(object):
-  """Represents a default return value from a function (None in Python)."""
+  """Represents a return value that is undefined."""
   pass
-
-
-def retval(value):
-  """Returns the actual value that a return statement should produce."""
-  if isinstance(value, UndefinedReturnValue):
-    return None
-  return value
-
-
-def is_undefined_return(value):
-  """Checks whether `value` is the default return value."""
-  return isinstance(value, UndefinedReturnValue)
diff --git a/tensorflow/python/autograph/operators/special_values_test.py b/tensorflow/python/autograph/operators/variables_test.py
similarity index 58%
rename from tensorflow/python/autograph/operators/special_values_test.py
rename to tensorflow/python/autograph/operators/variables_test.py
index 1742cc4277d..168e6172232 100644
--- a/tensorflow/python/autograph/operators/special_values_test.py
+++ b/tensorflow/python/autograph/operators/variables_test.py
@@ -18,28 +18,38 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.autograph.operators import special_values
+from tensorflow.python.autograph.operators import variables
 from tensorflow.python.platform import test
 
 
 class SpecialValuesTest(test.TestCase):
 
   def test_undefined(self):
-    undefined_symbol = special_values.Undefined('name')
-    self.assertEqual(undefined_symbol.symbol_name, 'name')
+    undefined_symbol = variables.Undefined('name')
+    undefined_symbol2 = variables.Undefined('name')
 
-    undefined_symbol2 = special_values.Undefined('name')
+    self.assertEqual(undefined_symbol.symbol_name, 'name')
+    self.assertEqual(undefined_symbol2.symbol_name, 'name')
     self.assertNotEqual(undefined_symbol, undefined_symbol2)
 
-    self.assertTrue(special_values.is_undefined(undefined_symbol))
-    self.assertTrue(special_values.is_undefined(undefined_symbol2))
-
   def test_undefined_operations(self):
-    undefined_symbol = special_values.Undefined('name')
+    undefined_symbol = variables.Undefined('name')
+
+    self.assertIsInstance(undefined_symbol.foo, variables.Undefined)
+    self.assertIsInstance(undefined_symbol[0], variables.Undefined)
+    self.assertNotIsInstance(undefined_symbol.__class__, variables.Undefined)
+
+  def test_read(self):
+    self.assertEqual(variables.ld(1), 1)
+    o = object()
+    self.assertEqual(variables.ld(o), o)
+
+    self.assertIsNone(variables.ld(None))
+
+  def test_read_undefined(self):
+    with self.assertRaisesRegex(UnboundLocalError, 'used before assignment'):
+      variables.ld(variables.Undefined('a'))
 
-    self.assertTrue(special_values.is_undefined(undefined_symbol.foo))
-    self.assertTrue(special_values.is_undefined(undefined_symbol[0]))
-    self.assertFalse(special_values.is_undefined(undefined_symbol.__class__))
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py
index 1e8595d2061..a5f3f5b33a4 100644
--- a/tensorflow/python/autograph/pyct/anno.py
+++ b/tensorflow/python/autograph/pyct/anno.py
@@ -93,6 +93,9 @@ class Static(NoValue):
   ORIG_DEFINITIONS = (
       'The value of DEFINITIONS that applied to the original code before any'
       ' conversion.')
+  DEFINED_FNS_IN = (
+      'Local function definitions that may exist when exiting the node. See'
+      ' reaching_fndefs.py')
   DEFINED_VARS_IN = (
       'Symbols defined when entering the node. See reaching_definitions.py.')
   LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.')
diff --git a/tensorflow/python/autograph/pyct/ast_util.py b/tensorflow/python/autograph/pyct/ast_util.py
index e897b47813a..5b6cb115a41 100644
--- a/tensorflow/python/autograph/pyct/ast_util.py
+++ b/tensorflow/python/autograph/pyct/ast_util.py
@@ -121,6 +121,12 @@ class SymbolRenamer(gast.NodeTransformer):
     # Renaming attributes is not supported.
     return self.generic_visit(node)
 
+  def visit_FunctionDef(self, node):
+    qn = qual_names.QN(node.name)
+    if qn in self.name_map:
+      node.name = str(self.name_map[qn])
+    return self.generic_visit(node)
+
 
 def rename_symbols(node, name_map):
   """Renames symbols in an AST. Requires qual_names annotations."""
diff --git a/tensorflow/python/autograph/pyct/ast_util_test.py b/tensorflow/python/autograph/pyct/ast_util_test.py
index c0ef9c587a5..8679e3b8dcb 100644
--- a/tensorflow/python/autograph/pyct/ast_util_test.py
+++ b/tensorflow/python/autograph/pyct/ast_util_test.py
@@ -90,6 +90,14 @@ class AstUtilTest(test.TestCase):
 
     self.assertIs(anno.getanno(node, 'foo'), orig_anno)
 
+  def test_rename_symbols_function(self):
+    node = parser.parse('def f():\n  pass')
+    node = ast_util.rename_symbols(node,
+                                   {qual_names.QN('f'): qual_names.QN('f1')})
+
+    source = parser.unparse(node, include_encoding_marker=False)
+    self.assertEqual(source.strip(), 'def f1():\n    pass')
+
   def test_copy_clean(self):
     node = parser.parse(
         textwrap.dedent("""
diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py
index ea22392375c..9a8ece2bc3a 100644
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@@ -29,6 +29,7 @@ notable exception:
    raise (i.e. a function call in the middle of a block does not return or jump
    to any except or finally block)
 TODO(mdan): Consider adding the edges above. They'd only add ~O(n) edges.
+TODO(mdan): Alternatively, consider adding an edge from try to all its excepts.
 """
 
 # TODO(mdan): The notion of 'statements' below is inaccurate.
diff --git a/tensorflow/python/autograph/pyct/static_analysis/BUILD b/tensorflow/python/autograph/pyct/static_analysis/BUILD
index 3620cff3fd1..0764a3e64b4 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/BUILD
+++ b/tensorflow/python/autograph/pyct/static_analysis/BUILD
@@ -23,6 +23,7 @@ py_library(
         "annos.py",
         "liveness.py",
         "reaching_definitions.py",
+        "reaching_fndefs.py",
     ],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py
index b9e398a8fe1..ca68bc9911c 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py
@@ -617,9 +617,23 @@ class ActivityAnalyzer(transformer.Base):
       # TODO(mdan): Do remove it, it's confusing.
       self._enter_scope(False)
       node.body = self.visit(node.body)
+
+      # The lambda body can contain nodes of types normally not found as
+      # statements, and may not have the SCOPE annotation needed by the CFG.
+      # So we attach one if necessary.
+      if not anno.hasanno(node.body, anno.Static.SCOPE):
+        anno.setanno(node.body, anno.Static.SCOPE, self.scope)
+
       self._exit_and_record_scope(node, NodeAnno.BODY_SCOPE)
 
+      lambda_scope = self.scope
       self._exit_and_record_scope(node, NodeAnno.ARGS_AND_BODY_SCOPE)
+
+      # Exception: lambdas are assumed to be used in the place where
+      # they are defined. Therefore, their activity is passed on to the
+      # calling statement.
+      self.scope.read.update(lambda_scope.read - lambda_scope.bound)
+
       return node
 
   def visit_With(self, node):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
index 7a6bfd4b820..3a1b552190a 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/activity_test.py
@@ -393,6 +393,31 @@ class ActivityAnalyzerTest(ActivityAnalyzerTestBase):
     self.assertScopeIs(scope, ('x', 'y'), ('y',))
     self.assertSymbolSetsAre(('x', 'y'), scope.bound, 'BOUND')
 
+  def test_nested_lambda(self):
+
+    def test_fn(a):
+      return lambda x: (x * a)
+
+    node, _ = self._parse_and_analyze(test_fn)
+
+    fn_node = node
+    scope = anno.getanno(fn_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(scope, ('a',), ())
+
+    return_node = node.body[0]
+
+    scope = anno.getanno(return_node, anno.Static.SCOPE)
+    self.assertScopeIs(scope, ('a',), ())
+
+    lam_def_node = return_node.value
+
+    scope = anno.getanno(lam_def_node, NodeAnno.BODY_SCOPE)
+    self.assertScopeIs(scope, ('a', 'x'), ())
+
+    scope = anno.getanno(lam_def_node, NodeAnno.ARGS_AND_BODY_SCOPE)
+    self.assertScopeIs(scope, ('a', 'x'), ())
+    self.assertSymbolSetsAre(('x',), scope.bound, 'BOUND')
+
   def test_nested_function_arg_defaults(self):
 
     def test_fn(a):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness.py b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
index 7d64a9377d7..5502147bf80 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness.py
@@ -42,9 +42,6 @@ class Analyzer(cfg.GraphVisitor):
 
   def __init__(self, graph, include_annotations):
     super(Analyzer, self).__init__(graph)
-    # This allows communicating that nodes generate extra symbols,
-    # e.g. those that a function definition closes over.
-    self.extra_gen = {}
     self.include_annotations = include_annotations
 
   def init_state(self, _):
@@ -56,7 +53,7 @@ class Analyzer(cfg.GraphVisitor):
     if anno.hasanno(node.ast_node, anno.Static.SCOPE):
       node_scope = anno.getanno(node.ast_node, anno.Static.SCOPE)
 
-      gen = node_scope.read | self.extra_gen.get(node.ast_node, frozenset())
+      gen = node_scope.read
       if not self.include_annotations:
         gen -= node_scope.annotations
       # TODO(mdan): verify whether composites' parents need to be added.
@@ -69,6 +66,18 @@ class Analyzer(cfg.GraphVisitor):
         live_out |= self.in_[n]
       live_in = gen | (live_out - kill)
 
+      reaching_functions = anno.getanno(
+          node.ast_node, anno.Static.DEFINED_FNS_IN)
+      for fn_ast_node in reaching_functions:
+        if isinstance(fn_ast_node, gast.Lambda):
+          # Exception: lambda functions are assumed to be used only in the
+          # place where they are defined, and not later.
+          continue
+        fn_scope = anno.getanno(fn_ast_node, annos.NodeAnno.ARGS_AND_BODY_SCOPE)
+        # Any closure of a reaching function definition is conservatively
+        # considered live.
+        live_in |= (fn_scope.read - fn_scope.bound)
+
     else:
       assert self.can_ignore(node), (node.ast_node, node)
 
@@ -84,7 +93,7 @@ class Analyzer(cfg.GraphVisitor):
     return prev_live_in != live_in
 
 
-class WholeTreeAnalyzer(transformer.Base):
+class TreeAnnotator(transformer.Base):
   """Runs liveness analysis on each of the functions defined in the AST.
 
   If a function defined other local functions, those will have separate CFGs.
@@ -94,7 +103,7 @@ class WholeTreeAnalyzer(transformer.Base):
   subfunction. For example:
 
     def foo():
-      # baz is live here
+      # baz is live from here on
       def bar():
         print(baz)
 
@@ -103,63 +112,14 @@ class WholeTreeAnalyzer(transformer.Base):
   """
 
   def __init__(self, source_info, graphs, include_annotations):
-    super(WholeTreeAnalyzer, self).__init__(source_info)
+    super(TreeAnnotator, self).__init__(source_info)
     self.include_annotations = include_annotations
     self.allow_skips = False
     self.graphs = graphs
     self.current_analyzer = None
-    self.analyzers = {}
-
-  def visit_FunctionDef(self, node):
-    parent_analyzer = self.current_analyzer
-    subgraph = self.graphs[node]
-
-    # Postorder tree processing makes this a bit complicated:
-    #  1. construct an analyzer object and put it on stack
-    #  2. recursively walk the subtree; this will initialize the analyzer's
-    #     in_ state properly (done in a block below)
-    #  3. run the final analysis
-    analyzer = Analyzer(subgraph, self.include_annotations)
-    self.current_analyzer = analyzer
-    node = self.generic_visit(node)
-    analyzer.visit_reverse()
-
-    if parent_analyzer is not None:
-      # Wire the state between the two subgraphs' analyzers.
-      child_in_state = analyzer.in_[subgraph.entry]
-      # Exception: symbols modified in the child function are local to it
-      body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
-      for qn in body_scope.modified:
-        # Note: a function modifying the symbol doesn't make that symbol
-        # live at the function's entry. In fact when that happens it is
-        # probably a case of undefined assignment, like this:
-        #
-        #   bar = 0
-        #   def foo():
-        #     print(bar)  # bar is undefined here!
-        #     bar = 1
-        #
-        # Hence we use discard and not remove below.
-        child_in_state.discard(qn)
-      parent_analyzer.extra_gen[node] = frozenset(child_in_state,)
-
-    self.analyzers[node] = analyzer
-    self.current_analyzer = parent_analyzer
-    return node
-
-
-class Annotator(transformer.Base):
-  """AST visitor that annotates each control flow block with live symbols."""
-
-  # Note: additional nodes may be added as needed.
-
-  def __init__(self, source_info, cross_function_analyzer):
-    super(Annotator, self).__init__(source_info)
-    self.cross_function_analyzer = cross_function_analyzer
-    self.current_analyzer = None
 
   def visit(self, node):
-    node = super(Annotator, self).visit(node)
+    node = super(TreeAnnotator, self).visit(node)
     if (self.current_analyzer is not None and
         isinstance(node, gast.stmt) and
         node in self.current_analyzer.graph.index):
@@ -168,14 +128,23 @@ class Annotator(transformer.Base):
                    frozenset(self.current_analyzer.in_[cfg_node]))
     return node
 
-  def visit_FunctionDef(self, node):
+  def _analyze_function(self, node, is_lambda):
     parent_analyzer = self.current_analyzer
-    self.current_analyzer = self.cross_function_analyzer.analyzers[node]
 
+    analyzer = Analyzer(self.graphs[node], self.include_annotations)
+    analyzer.visit_reverse()
+    self.current_analyzer = analyzer
     node = self.generic_visit(node)
+
     self.current_analyzer = parent_analyzer
     return node
 
+  def visit_Lambda(self, node):
+    return self._analyze_function(node, is_lambda=True)
+
+  def visit_FunctionDef(self, node):
+    return self._analyze_function(node, is_lambda=False)
+
   def _block_statement_live_out(self, node):
     successors = self.current_analyzer.graph.stmt_next[node]
     stmt_live_out = set()
@@ -246,9 +215,5 @@ def resolve(node, source_info, graphs, include_annotations=True):
   Returns:
     ast.AST
   """
-  cross_function_analyzer = WholeTreeAnalyzer(
-      source_info, graphs, include_annotations)
-  node = cross_function_analyzer.visit(node)
-  visitor = Annotator(source_info, cross_function_analyzer)
-  node = visitor.visit(node)
+  node = TreeAnnotator(source_info, graphs, include_annotations).visit(node)
   return node
diff --git a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
index 90bcc67301a..ecb466532e2 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/liveness_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import transformer
 from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis import liveness
+from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs
 from tensorflow.python.platform import test
 
 
@@ -49,7 +50,8 @@ class LivenessAnalyzerTestBase(test.TestCase):
     ctx = transformer.Context(entity_info, namer, None)
     node = activity.resolve(node, ctx)
     graphs = cfg.build(node)
-    liveness.resolve(node, ctx, graphs)
+    node = reaching_fndefs.resolve(node, ctx, graphs)
+    node = liveness.resolve(node, ctx, graphs)
     return node
 
   def assertHasLiveOut(self, node, expected):
@@ -191,6 +193,73 @@ class LivenessAnalyzerTest(LivenessAnalyzerTestBase):
 
     self.assertHasLiveOut(fn_body[0], 'a')
 
+  def test_live_out_nested_functions_defined_ahead(self):
+
+    def test_fn(a, b):
+      def foo():
+        return a
+
+      if b:
+        a = []
+
+      return foo
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body
+
+    self.assertHasLiveOut(fn_body[1], ('a', 'foo'))
+
+  def test_live_out_nested_functions_defined_after(self):
+
+    def test_fn(a, b):
+      if b:
+        a = []
+
+      def foo():
+        return a
+
+      return foo
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body
+
+    self.assertHasLiveOut(fn_body[0], ('a',))
+
+  def test_live_out_lambda(self):
+
+    def test_fn(a, b):
+      if b:
+        a = []
+
+      foo = lambda: a
+
+      if b:
+        pass
+
+      return foo
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body
+
+    self.assertHasLiveOut(fn_body[0], ('a', 'b'))
+    self.assertHasLiveOut(fn_body[2], ('foo',))
+
+  def test_live_out_nested_functions_hidden_by_argument(self):
+
+    def test_fn(b):
+      def foo(a):
+        return a
+
+      if b:
+        a = []  # pylint:disable=unused-variable
+
+      return foo
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body
+
+    self.assertHasLiveOut(fn_body[1], ('foo'))
+
   def test_live_out_nested_functions_isolation(self):
 
     def test_fn(b):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
index 008e4b8a595..4c6d7f3834d 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions.py
@@ -45,10 +45,12 @@ class Definition(object):
 
   Attributes:
     param_of: Optional[ast.AST]
+    directives: Dict, optional definition annotations
   """
 
   def __init__(self):
     self.param_of = None
+    self.directives = {}
 
   def __repr__(self):
     return '%s[%d]' % (self.__class__.__name__, id(self))
@@ -113,10 +115,6 @@ class Analyzer(cfg.GraphVisitor):
   def __init__(self, graph, definition_factory):
     self._definition_factory = definition_factory
     super(Analyzer, self).__init__(graph)
-    # This allows communicating that nodes have extra reaching definitions,
-    # e.g. those that a function closes over.
-    self.extra_in = {}
-
     self.gen_map = {}
 
   def init_state(self, _):
@@ -125,7 +123,7 @@ class Analyzer(cfg.GraphVisitor):
   def visit_node(self, node):
     prev_defs_out = self.out[node]
 
-    defs_in = _NodeState(self.extra_in.get(node.ast_node, None))
+    defs_in = _NodeState()
     for n in node.prev:
       defs_in |= self.out[n]
 
@@ -278,7 +276,7 @@ class TreeAnnotator(transformer.Base):
     return node
 
 
-def resolve(node, source_info, graphs, definition_factory):
+def resolve(node, source_info, graphs, definition_factory=Definition):
   """Resolves reaching definitions for each symbol.
 
   Args:
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_py3_test.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_py3_test.py
index 7333ec0c872..ba27280f729 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_py3_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_py3_test.py
@@ -78,6 +78,18 @@ class ReachingDefinitionsAnalyzerTest(
 
     self.assertSameDef(local_body[1].test, local_body[2].value.elts[0])
 
+    # Note: the function name is is visible inside the function body. But it's
+    # a closure variable, not a local.
+    #
+    # Example:
+    #
+    #   >>> def f():
+    #   ...  print(f)
+    #   >>> g = f
+    #   >>> f = 'something else'
+    #   >>> g()
+    #   something else
+    #
     self.assertHasDefinedIn(local_body[1], ('a', 'b'))
 
 
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py
index c4e7cbd4d17..64b00fcbeba 100644
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_test.py
@@ -255,6 +255,9 @@ class ReachingDefinitionsAnalyzerTest(ReachingDefinitionsAnalyzerTestBase):
 
     inner_fn_body = fn_body[1].body[1].body
     def_of_a_in_foo = inner_fn_body[0].value
+    # Even though `a` is visible in the inner functio above, the late binding
+    # makes it impossible to assume that the same value will be visible at
+    # call time.
     self.assertHasDefs(def_of_a_in_foo, 0)
 
   def test_nested_functions_isolation(self):
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs.py
new file mode 100644
index 00000000000..f650c392106
--- /dev/null
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs.py
@@ -0,0 +1,182 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An analysis that determines the reach of a function definition.
+
+A function definition is said to reach a statement if that function may exist
+(and therefore may be called) when that statement executes.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast
+
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import cfg
+from tensorflow.python.autograph.pyct import transformer
+
+
+class Definition(object):
+  """Definition objects describe a unique definition of a function."""
+
+  def __init__(self, def_node):
+    self.def_node = def_node
+
+
+class _NodeState(object):
+  """Abstraction for the state of the CFG walk for reaching definition analysis.
+
+  This is a value type. Only implements the strictly necessary operators.
+
+  Attributes:
+    value: Dict[qual_names.QN, Set[Definition, ...]], the defined symbols and
+        their possible definitions
+  """
+
+  def __init__(self, init_from=None):
+    if init_from:
+      self.value = set(init_from)
+    else:
+      self.value = set()
+
+  def __eq__(self, other):
+    return self.value == other.value
+
+  def __ne__(self, other):
+    return self.value != other.value
+
+  def __or__(self, other):
+    assert isinstance(other, _NodeState)
+    result = _NodeState(self.value)
+    result.value.update(other.value)
+    return result
+
+  def __add__(self, value):
+    result = _NodeState(self.value)
+    result.value.add(value)
+    return result
+
+  def __repr__(self):
+    return 'NodeState[%s]=%s' % (id(self), repr(self.value))
+
+
+class Analyzer(cfg.GraphVisitor):
+  """CFG visitor that determines reaching definitions at statement level."""
+
+  def __init__(self, graph, external_defs):
+    super(Analyzer, self).__init__(graph)
+    # This allows communicating that nodes have extra reaching definitions,
+    # e.g. those that a function closes over.
+    self.external_defs = external_defs
+
+  def init_state(self, _):
+    return _NodeState()
+
+  def visit_node(self, node):
+    prev_defs_out = self.out[node]
+
+    if node is self.graph.entry:
+      defs_in = _NodeState(self.external_defs)
+    else:
+      defs_in = prev_defs_out
+
+    for n in node.prev:
+      defs_in |= self.out[n]
+
+    defs_out = defs_in
+    if isinstance(node.ast_node, (gast.Lambda, gast.FunctionDef)):
+      defs_out += node.ast_node
+
+    self.in_[node] = defs_in
+    self.out[node] = defs_out
+
+    return prev_defs_out != defs_out
+
+
+class TreeAnnotator(transformer.Base):
+  """AST visitor that annotates each symbol name with its reaching definitions.
+
+  Simultaneously, the visitor runs the dataflow analysis on each function node,
+  accounting for the effect of closures. For example:
+
+    def foo():
+      def f():
+        pass
+      def g():
+        # `def f` reaches here
+  """
+
+  def __init__(self, source_info, graphs):
+    super(TreeAnnotator, self).__init__(source_info)
+    self.graphs = graphs
+    self.allow_skips = False
+    self.current_analyzer = None
+
+  def _proces_function(self, node):
+    parent_analyzer = self.current_analyzer
+    subgraph = self.graphs[node]
+
+    if (self.current_analyzer is not None
+        and node in self.current_analyzer.graph.index):
+      cfg_node = self.current_analyzer.graph.index[node]
+      defined_in = self.current_analyzer.in_[cfg_node].value
+    else:
+      defined_in = ()
+
+    analyzer = Analyzer(subgraph, defined_in)
+    analyzer.visit_forward()
+
+    self.current_analyzer = analyzer
+    node = self.generic_visit(node)
+    self.current_analyzer = parent_analyzer
+    return node
+
+  def visit_FunctionDef(self, node):
+    return self._proces_function(node)
+
+  def visit_Lambda(self, node):
+    return self._proces_function(node)
+
+  def visit(self, node):
+    # This can happen before entering the top level function
+    if (self.current_analyzer is not None
+        and node in self.current_analyzer.graph.index):
+      cfg_node = self.current_analyzer.graph.index[node]
+      anno.setanno(node, anno.Static.DEFINED_FNS_IN,
+                   self.current_analyzer.in_[cfg_node].value)
+
+    extra_node = anno.getanno(node, anno.Basic.EXTRA_LOOP_TEST, default=None)
+    if extra_node is not None:
+      cfg_node = self.current_analyzer.graph.index[extra_node]
+      anno.setanno(extra_node, anno.Static.DEFINED_FNS_IN,
+                   self.current_analyzer.in_[cfg_node].value)
+
+    return super(TreeAnnotator, self).visit(node)
+
+
+def resolve(node, source_info, graphs):
+  """Resolves reaching definitions for each symbol.
+
+  Args:
+    node: ast.AST
+    source_info: transformer.SourceInfo
+    graphs: Dict[ast.FunctionDef, cfg.Graph]
+  Returns:
+    ast.AST
+  """
+  visitor = TreeAnnotator(source_info, graphs)
+  node = visitor.visit(node)
+  return node
diff --git a/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs_test.py b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs_test.py
new file mode 100644
index 00000000000..500ac91f64f
--- /dev/null
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_fndefs_test.py
@@ -0,0 +1,58 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for reaching_fndefs module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import cfg
+from tensorflow.python.autograph.pyct import naming
+from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
+from tensorflow.python.autograph.pyct import transformer
+from tensorflow.python.autograph.pyct.static_analysis import activity
+from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
+from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs
+from tensorflow.python.platform import test
+
+
+class ReachingFndefsAnalyzerTest(test.TestCase):
+
+  def _parse_and_analyze(self, test_fn):
+    # TODO(mdan): Use a custom FunctionTransformer here.
+    node, source = parser.parse_entity(test_fn, future_features=())
+    entity_info = transformer.EntityInfo(
+        name=test_fn.__name__,
+        source_code=source,
+        source_file=None,
+        future_features=(),
+        namespace={})
+    node = qual_names.resolve(node)
+    namer = naming.Namer({})
+    ctx = transformer.Context(entity_info, namer, None)
+    node = activity.resolve(node, ctx)
+    graphs = cfg.build(node)
+    node = reaching_definitions.resolve(node, ctx, graphs)
+    node = reaching_fndefs.resolve(node, ctx, graphs)
+    return node
+
+  def assertHasFnDefs(self, node):
+    anno.getanno(node, anno.Static.DEFINED_FNS_IN)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/autograph/pyct/transformer.py b/tensorflow/python/autograph/pyct/transformer.py
index c8d5c9defe3..87abe3d185c 100644
--- a/tensorflow/python/autograph/pyct/transformer.py
+++ b/tensorflow/python/autograph/pyct/transformer.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import enum
 
 import gast
 
@@ -28,6 +29,14 @@ from tensorflow.python.autograph.pyct import pretty_printer
 from tensorflow.python.autograph.pyct import templates
 
 
+class AnalysisLevel(enum.IntEnum):
+
+  NONE = 0
+  ACTIVITY = 1
+  DEFINEDNESS = 2
+  LIVENESS = 3
+
+
 # TODO(znado): Use namedtuple.
 class Context(object):
   """Contains information about a source code transformation.
diff --git a/tensorflow/python/autograph/utils/tensor_list_test.py b/tensorflow/python/autograph/utils/tensor_list_test.py
index bbbc3bf6918..017d97bb040 100644
--- a/tensorflow/python/autograph/utils/tensor_list_test.py
+++ b/tensorflow/python/autograph/utils/tensor_list_test.py
@@ -34,7 +34,6 @@ class TensorListTest(test.TestCase):
   def _shape(self, shape_tuple):
     return constant(shape_tuple, dtypes.int32)
 
-  @test_util.run_v1_only("b/117943489")
   def test_dynamic_list_append(self):
     l = []
     l = tl.dynamic_list_append(l, 1)
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index dd8e64ac182..1c244c1b297 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -1917,6 +1917,9 @@ class SessionTest(test_util.TensorFlowTestCase):
         a = constant_op.constant(1)
         b = constant_op.constant(2)
         c = a + b
+        # Ensure if the same kernel with the same arguments is executed then its
+        # execution is logged.
+        d = a + b
     else:
       # Passing the config to the server, but not the session should still
       # result in logging device placement.
@@ -1925,12 +1928,16 @@ class SessionTest(test_util.TensorFlowTestCase):
       a = constant_op.constant(1)
       b = constant_op.constant(2)
       c = a + b
+      d = a + b
       with session.Session(server.target) as sess:
         with CaptureStderr() as log:
-          sess.run(c)
+          c, d = sess.run([c, d])
 
+    self.assertEqual(c, 3)
+    self.assertEqual(d, 3)
     # Ensure that we did log device placement.
-    self.assertTrue('/replica:0/task:0/device:CPU:0' in str(log), str(log))
+    add_executions = [l for l in str(log).splitlines() if 'AddV2' in l]
+    self.assertEqual(len(add_executions), 2)
 
   @test_util.run_v1_only('b/120545219')
   def testLocalMasterSessionTimeout(self):
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index 78a1613c86c..cb960fd599a 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -89,7 +89,8 @@ void TF_Run_wrapper_helper(TF_DeprecatedSession* session, const char* handle,
     input_names.push_back(key_string);
 
     inputs_safe.emplace_back(make_safe(static_cast<TF_Tensor*>(nullptr)));
-    s = PyArrayToTF_Tensor(value, &inputs_safe.back());
+    s = NdarrayToTensor(nullptr /*ctx*/, value, &inputs_safe.back(),
+                        true /*convert_to_string*/);
     if (!s.ok()) {
       Set_TF_Status_from_Status(out_status, s);
       return;
@@ -367,7 +368,7 @@ void TF_SessionRun_wrapper_helper(TF_Session* session, const char* handle,
   // cleaned up properly.
   //
   // Memory management:
-  // PyArrayToTF_Tensor() creates a new ndarray PyObject from the input
+  // NdarrayToTensor() creates a new ndarray PyObject from the input
   // ndarray. We manage the new ndarray's lifetime in order to keep the
   // underlying data buffer alive (the new ndarray also guarantees a contiguous
   // data buffer). The new ndarray's data buffer is used to create the
@@ -382,7 +383,7 @@ void TF_SessionRun_wrapper_helper(TF_Session* session, const char* handle,
   std::vector<Safe_TF_TensorPtr> input_vals_safe;
   for (PyObject* ndarray : input_ndarrays) {
     input_vals_safe.emplace_back(make_safe(static_cast<TF_Tensor*>(nullptr)));
-    s = PyArrayToTF_Tensor(ndarray, &input_vals_safe.back());
+    s = NdarrayToTensor(nullptr, ndarray, &input_vals_safe.back(), true);
     if (!s.ok()) {
       Set_TF_Status_from_Status(out_status, s);
       return;
diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 8228f5349e9..2a21590bb9a 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -25,13 +25,15 @@ from __future__ import print_function
 import datetime
 import os
 
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
+
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 4, 30)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 14)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
@@ -53,6 +55,10 @@ def _update_forward_compatibility_date_number(date_to_override=None):
     if delta_days:
       date += datetime.timedelta(days=int(delta_days))
 
+  if date < _FORWARD_COMPATIBILITY_HORIZON:
+    logging.warning("Trying to set the forward compatibility date to the past"
+                    " date %s. This will be ignored by TensorFlow." % (date))
+    return
   _FORWARD_COMPATIBILITY_DATE_NUMBER = _date_to_date_number(
       date.year, date.month, date.day)
 
diff --git a/tensorflow/python/compat/compat_test.py b/tensorflow/python/compat/compat_test.py
index 3d06649ede8..e43203f6efd 100644
--- a/tensorflow/python/compat/compat_test.py
+++ b/tensorflow/python/compat/compat_test.py
@@ -40,6 +40,10 @@ class CompatTest(test.TestCase):
     self.assertTrue(compat.forward_compatible(*one_day_before))
     self.assertFalse(compat.forward_compatible(*compatibility_date))
 
+  def test_past(self):
+    with compat.forward_compatibility_horizon(2018, 9, 18):
+      self.assertTrue(compat.forward_compatible(2020, 4, 4))
+
   def test_decorator(self):
     compatibility_date = self._compatibility_date()
     one_day_after = self._n_days_after(1)
diff --git a/tensorflow/python/compiler/tensorrt/BUILD b/tensorflow/python/compiler/tensorrt/BUILD
index 1e4c215994f..192ba71cebd 100644
--- a/tensorflow/python/compiler/tensorrt/BUILD
+++ b/tensorflow/python/compiler/tensorrt/BUILD
@@ -120,8 +120,10 @@ cuda_py_tests(
     srcs = [
         "test/base_test.py",
         "test/batch_matmul_test.py",
+        "test/biasadd_matmul_test.py",
         "test/binary_tensor_weight_broadcast_test.py",
         "test/combined_nms_test.py",
+        "test/concatenation_test.py",
         "test/const_broadcast_test.py",
         "test/conv2d_test.py",
         "test/dynamic_input_shapes_test.py",
@@ -155,27 +157,6 @@ cuda_py_tests(
     ],
 )
 
-cuda_py_tests(
-    name = "concatenation_test",
-    srcs = [
-        "test/biasadd_matmul_test.py",
-        "test/concatenation_test.py",
-    ],
-    python_version = "PY3",
-    tags = [
-        "no_rocm",
-        "no_windows",
-        "nomac",
-        "notap",  # b/140261407
-    ],
-    xla_enable_strict_auto_jit = False,
-    deps = [
-        ":tf_trt_integration_test_base",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
 cuda_py_test(
     name = "quantization_mnist_test",
     srcs = ["test/quantization_mnist_test.py"],
diff --git a/tensorflow/python/compiler/tensorrt/test/unary_test.py b/tensorflow/python/compiler/tensorrt/test/unary_test.py
index b88939d9b65..6f8c88c2832 100644
--- a/tensorflow/python/compiler/tensorrt/test/unary_test.py
+++ b/tensorflow/python/compiler/tensorrt/test/unary_test.py
@@ -50,6 +50,8 @@ class UnaryTest(trt_test.TfTrtIntegrationTestBase):
     q = q + 3.0
     a = gen_math_ops.reciprocal(q)
 
+    # this chain of operations has a batch size of 5, which is different from
+    # the batch size for the other operations.
     x = constant_op.constant(np.random.randn(5, 8, 12), dtype=x.dtype)
     q = math_ops.abs(x)
     q = q + 2.0
@@ -95,7 +97,7 @@ class UnaryTest(trt_test.TfTrtIntegrationTestBase):
 
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""
-    return ["TRTEngineOp_0"]
+    return ["TRTEngineOp_0", "TRTEngineOp_1"]
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py b/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py
index ffc98b917d2..13d56a84d3c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py
@@ -75,9 +75,8 @@ def _test_combinations():
       ("FromTensors2", lambda: dataset_ops.Dataset.from_tensors((0, 1)), 1),
       ("FromTensorSlices1",
        lambda: dataset_ops.Dataset.from_tensor_slices([0, 0, 0]), 3),
-      ("FromTensorSlices2",
-       lambda: dataset_ops.Dataset.from_tensor_slices(([0, 0, 0], [1, 1, 1])),
-       3),
+      ("FromTensorSlices2", lambda: dataset_ops.Dataset.from_tensor_slices(
+          ([0, 0, 0], [1, 1, 1])), 3),
       ("Interleave1", lambda: dataset_ops.Dataset.range(5).interleave(
           lambda _: dataset_ops.Dataset.from_tensors(0), cycle_length=1),
        cardinality.UNKNOWN),
@@ -134,6 +133,19 @@ def _test_combinations():
        lambda: dataset_ops.Dataset.range(5).filter(lambda _: True).take(2),
        cardinality.UNKNOWN),
       ("Take4", lambda: dataset_ops.Dataset.range(5).repeat().take(2), 2),
+      ("Unbatch1", lambda: dataset_ops.Dataset.range(5).batch(
+          2, drop_remainder=True).unbatch(), 4),
+      ("Unbatch2", lambda: dataset_ops.Dataset.range(5).batch(
+          2, drop_remainder=False).unbatch(), cardinality.UNKNOWN),
+      ("Unbatch3", lambda: dataset_ops.Dataset.range(5).batch(
+          2, drop_remainder=True).filter(lambda _: True).unbatch(),
+       cardinality.UNKNOWN),
+      ("Unbatch4", lambda: dataset_ops.Dataset.range(5).batch(
+          2, drop_remainder=True).repeat().unbatch(), cardinality.INFINITE),
+      ("Unbatch5", lambda: dataset_ops.Dataset.zip((
+          dataset_ops.Dataset.range(4).batch(2, drop_remainder=False),
+          dataset_ops.Dataset.range(5).batch(2, drop_remainder=True),
+      )).unbatch(), 4),
       ("Window1", lambda: dataset_ops.Dataset.range(5).window(
           size=2, shift=2, drop_remainder=True), 2),
       ("Window2", lambda: dataset_ops.Dataset.range(5).window(
@@ -144,12 +156,12 @@ def _test_combinations():
           (dataset_ops.Dataset.range(5), dataset_ops.Dataset.range(3))), 3),
       ("Zip3", lambda: dataset_ops.Dataset.zip((dataset_ops.Dataset.range(
           5), dataset_ops.Dataset.range(3).repeat())), 5),
-      ("Zip4", lambda: dataset_ops.Dataset.zip((dataset_ops.Dataset.range(
-          5).repeat(), dataset_ops.Dataset.range(3).repeat())),
-       cardinality.INFINITE),
-      ("Zip5", lambda: dataset_ops.Dataset.zip((dataset_ops.Dataset.range(
-          5), dataset_ops.Dataset.range(3).filter(lambda _: True))),
-       cardinality.UNKNOWN),
+      ("Zip4", lambda: dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.range(5).repeat(), dataset_ops.Dataset.range(3).
+           repeat())), cardinality.INFINITE),
+      ("Zip5", lambda: dataset_ops.Dataset.zip(
+          (dataset_ops.Dataset.range(5), dataset_ops.Dataset.range(3).filter(
+              lambda _: True))), cardinality.UNKNOWN),
   ]
 
   def reduce_fn(x, y):
diff --git a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
index 941ca209848..13948305aea 100644
--- a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
@@ -41,9 +41,9 @@ class CsvDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
-    for i, ip in enumerate(inputs):
+    for i, file_rows in enumerate(inputs):
       fn = os.path.join(self.get_temp_dir(), 'temp_%d.csv' % i)
-      contents = linebreak.join(ip).encode('utf-8')
+      contents = linebreak.join(file_rows).encode('utf-8')
       if compression_type is None:
         with open(fn, 'wb') as f:
           f.write(contents)
@@ -580,6 +580,13 @@ class CsvDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
           inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
           record_defaults=record_defaults)
 
+  def testCsvDataset_immutableParams(self):
+    inputs = [['a,b,c', '1,2,3', '4,5,6']]
+    filenames = self._setup_files(inputs)
+    select_cols = ['a', 'c']
+    _ = readers.make_csv_dataset(
+        filenames, batch_size=1, select_columns=select_cols)
+    self.assertAllEqual(select_cols, ['a', 'c'])
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/data/experimental/ops/data_service_ops.py b/tensorflow/python/data/experimental/ops/data_service_ops.py
index f8e9ac15723..67dfadb4841 100644
--- a/tensorflow/python/data/experimental/ops/data_service_ops.py
+++ b/tensorflow/python/data/experimental/ops/data_service_ops.py
@@ -24,8 +24,6 @@ import six
 from tensorflow.python import tf2
 from tensorflow.python.data.experimental.ops.distribute_options import ExternalStatePolicy
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_experimental_dataset_ops
@@ -36,10 +34,10 @@ class ProcessingMode(object):
 
   @staticmethod
   def validate(mode):
-    """Raises a TypeError if the given object is not a valid processing mode."""
+    """Raises a ValueError if the given object is not a valid processing mode."""
     valid_modes = [ProcessingMode.PARALLEL_EPOCHS]
     if mode not in valid_modes:
-      raise TypeError(
+      raise ValueError(
           "{0} is not a valid processing mode. Valid modes: {1}".format(
               mode, valid_modes))
 
@@ -50,8 +48,10 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource):
   def __init__(self,
                input_dataset,
                dataset_id,
+               processing_mode,
                address,
                protocol,
+               job_name=None,
                max_outstanding_requests=None,
                task_refresh_interval_hint_ms=None):
     """Constructs a _DataServiceDatasetV2.
@@ -60,9 +60,15 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource):
       input_dataset: The input dataset, which should be registered with the
         tf.data service under `dataset_id`.
       dataset_id: The dataset id for the dataset to read from.
+      processing_mode: A string specifying the policy for how data should be
+        processed by tf.data workers. Currently, the only supported value is
+        "parallel_epochs".
       address: The tf.data service address, e.g. "localhost:5000".
       protocol: The protocol to use for communicating with the tf.data service,
         e.g. "grpc".
+      job_name: (Optional.) The name of the job. This argument makes it
+        possible for multiple datasets to share the same job. The default
+        behavior is that the dataset creates anonymous, exclusively owned jobs.
       max_outstanding_requests: (Optional.) A limit on how many elements may be
         requested at the same time. You can use this option to control the
         amount of memory used, since `distribute` won't use more than
@@ -71,38 +77,42 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource):
         the master for task changes.
     """
 
+    if job_name is None:
+      job_name = ""
     if max_outstanding_requests is None:
       max_outstanding_requests = dataset_ops.AUTOTUNE
     if task_refresh_interval_hint_ms is None:
       task_refresh_interval_hint_ms = dataset_ops.AUTOTUNE
 
+    self._dataset_id = ops.convert_to_tensor(
+        dataset_id, dtype=dtypes.int64, name="dataset_id")
+    self._processing_mode = ops.convert_to_tensor(
+        processing_mode, dtype=dtypes.string, name="processing_mode")
+    self._address = ops.convert_to_tensor(
+        address, dtype=dtypes.string, name="address")
+    self._protocol = ops.convert_to_tensor(
+        protocol, dtype=dtypes.string, name="protocol")
+    self._job_name = ops.convert_to_tensor(
+        job_name, dtype=dtypes.string, name="job_name")
+    self._max_outstanding_requests = ops.convert_to_tensor(
+        max_outstanding_requests,
+        dtype=dtypes.int64,
+        name="max_outstanding_requests")
     self._element_spec = input_dataset.element_spec
-    self._dataset_id = dataset_id
-    self._address = address
-    self._protocol = protocol
-    self._max_outstanding_requests = max_outstanding_requests
-    self._task_refresh_interval_hint_ms = task_refresh_interval_hint_ms
 
     variant_tensor = gen_experimental_dataset_ops.data_service_dataset(
-        address=address,
-        protocol=protocol,
-        max_outstanding_requests=max_outstanding_requests,
+        dataset_id=self._dataset_id,
+        processing_mode=self._processing_mode,
+        address=self._address,
+        protocol=self._protocol,
+        job_name=self._job_name,
+        max_outstanding_requests=self._max_outstanding_requests,
         task_refresh_interval_hint_ms=task_refresh_interval_hint_ms,
+        iteration_counter=gen_experimental_dataset_ops.dummy_iteration_counter(
+        ),
         **self._flat_structure)
     super(_DataServiceDatasetV2, self).__init__(variant_tensor)
 
-  @property
-  def dataset_id(self):
-    return self._dataset_id
-
-  @property
-  def address(self):
-    return self._address
-
-  @property
-  def protocol(self):
-    return self._protocol
-
   @property
   def element_spec(self):
     return self._element_spec
@@ -112,30 +122,21 @@ class _DataServiceDatasetV1(dataset_ops.DatasetV1Adapter):
   """A `Dataset` that executes its input through the tf.data service."""
 
   @functools.wraps(_DataServiceDatasetV2.__init__)
-  def __init__(self, input_dataset, dataset_id, address, protocol,
-               max_outstanding_requests, task_refresh_interval_hint_ms):
+  def __init__(self, input_dataset, dataset_id, processing_mode, address,
+               protocol, job_name, max_outstanding_requests,
+               task_refresh_interval_hint_ms):
 
     self._wrapped = _DataServiceDatasetV2(
         input_dataset=input_dataset,
         dataset_id=dataset_id,
+        processing_mode=processing_mode,
         address=address,
         protocol=protocol,
+        job_name=job_name,
         max_outstanding_requests=max_outstanding_requests,
         task_refresh_interval_hint_ms=task_refresh_interval_hint_ms)
     super(_DataServiceDatasetV1, self).__init__(self._wrapped)
 
-  @property
-  def dataset_id(self):
-    return self._wrapped.dataset_id
-
-  @property
-  def address(self):
-    return self._wrapped.address
-
-  @property
-  def protocol(self):
-    return self._wrapped.protocol
-
 
 if tf2.enabled():
   _DataServiceDataset = _DataServiceDatasetV2
@@ -143,7 +144,9 @@ else:
   _DataServiceDataset = _DataServiceDatasetV1
 
 
-def _distribute(service,
+def _distribute(processing_mode,
+                service,
+                job_name=None,
                 max_outstanding_requests=None,
                 task_refresh_interval_hint_ms=None):
   """A transformation that moves dataset processing to the tf.data service.
@@ -152,9 +155,15 @@ def _distribute(service,
   parameters which we do not yet want to add to the public Python API.
 
   Args:
+    processing_mode: A string specifying the policy for how data should be
+      processed by tf.data workers. Currently, the only supported value is
+      "parallel_epochs".
     service: A string indicating how to connect to the tf.data service. The
       string should be in the format <protocol>://<address>, e.g.
       grpc://localhost:5000.
+    job_name: (Optional.) The name of the job. This argument makes it
+      possible for multiple datasets to share the same job. The default behavior
+      is that the dataset creates anonymous, exclusively owned jobs.
     max_outstanding_requests: (Optional.) A limit on how many elements may be
       requested at the same time. You can use this option to control the amount
       of memory used, since `distribute` won't use more than `element_size` *
@@ -165,6 +174,13 @@ def _distribute(service,
   Returns:
     Dataset: A `Dataset` of the elements produced by the data service.
   """
+  ProcessingMode.validate(processing_mode)
+  if job_name is not None:
+    if not isinstance(job_name, six.string_types):
+      raise ValueError("job_name must be a string, but job_name was of type "
+                       "{0}. job_name={1}".format(type(job_name), job_name))
+    if not job_name:
+      raise ValueError("job_name must not be empty")
   if not isinstance(service, six.string_types):
     raise ValueError(
         "service must be a string, but service was of type {0}. service={1}"
@@ -197,28 +213,52 @@ def _distribute(service,
     return _DataServiceDataset(
         input_dataset=dataset,
         dataset_id=dataset_id,
+        processing_mode=processing_mode,
         address=address,
         protocol=protocol,
+        job_name=job_name,
         max_outstanding_requests=max_outstanding_requests,
         task_refresh_interval_hint_ms=task_refresh_interval_hint_ms)
 
   return _apply_fn
 
 
-def distribute(service, max_outstanding_requests=None):
+def distribute(processing_mode,
+               service,
+               job_name=None,
+               max_outstanding_requests=None):
   """A transformation that moves dataset processing to the tf.data service.
 
+  When you iterate over a dataset containing the `distribute` transformation,
+  the tf.data service creates a "job" which produces data for the dataset
+  iteration.
+
+  The `processing_mode` argument controls what data is produced by a tf.data
+  service job. Currently, the only supported mode is "parallel_epochs".
+
+  processing_mode="parallel_epochs" means that multiple tf.data workers will
+  iterate through the dataset in parallel, each producing all elements of the
+  dataset. For example, if the dataset contains {0, 1, 2}, every tf.data worker
+  used for execution will produce {0, 1, 2}. If there are 3 workers, the job
+  will produce the elements {0, 0, 0, 1, 1, 1, 2, 2, 2} (though not necessarily
+  in that order). To account for this, it is recommended to randomly shuffle
+  your dataset, so that different tf.data workers will iterate through the
+  dataset in different orders.
+
+  In the future, there will be additional processing modes. For example,
+  a "one_epoch" mode which partitions the dataset across the tf.data
+  workers, so that the consumers see each element of the dataset only once.
+
   ```
-  dataset = tf.data.Dataset.range(10)
+  dataset = tf.data.Dataset.range(5)
   dataset = dataset.map(lambda x: x*x)
   dataset = dataset.apply(
-      tf.data.experimental.service.distribute("grpc://dataservice:5000"))
-  dataset = dataset.map(lambda x: x+10)
+      tf.data.experimental.service.distribute("parallel_epochs",
+                                              "grpc://dataservice:5000"))
+  dataset = dataset.map(lambda x: x+1)
 
-  job_token = tf.data.experimental.service.create_job(dataset)
-  it = tf.data.experimental.service.create_iterator(dataset, job_token)
-  for element in it:
-    # process element
+  for element in dataset:
+    print(element)  # prints { 1, 2, 5, 10, 17 }
   ```
 
   In the above example, the first two lines (before the call to `distribute`)
@@ -226,13 +266,43 @@ def distribute(service, max_outstanding_requests=None):
   RPC. The remaining transformations (after the call to `distribute`) will be
   executed locally.
 
-  The token returned from `create_job` may be used to create multiple
-  coordinated iterators which consume data from the same job.
+  The `job_name` argument allows jobs to be shared across multiple
+  datasets. Instead of each dataset creating its own job, all datasets with the
+  same `job_name` will consume from the same job. A new job will
+  be created for each iteration of the dataset (with each repetition of
+  `Dataset.repeat` counting as a new iteration). The following example
+  demonstrates shared iteration, with the assumption that the tf.data service is
+  running with a single worker.
+
+  ```
+  range5_dataset = tf.data.Dataset.range(5)
+  dataset1 = range5_dataset.apply(tf.data.experimental.service.distribute(
+      "parallel_epochs", "my_job_name", "grpc://dataservice:5000"))
+  dataset2 = range5_dataset.apply(tf.data.experimental.service.distribute(
+      "parallel_epochs", "my_job_name", "grpc://dataservice:5000"))
+  iter_1_1 = iter(dataset1)
+  iter_1_2 = iter(dataset1)
+  iter_2_1 = iter(dataset2)
+  iter_2_2 = iter(dataset2)
+  print(next(iter_1_1))  # Prints "0"
+  # iter_1_2 consumes from the same job as iter_1_1
+  print(next(iter_1_2))  # Prints "1"
+  # iter_2_1 consumes from a new job
+  print(next(iter_2_1))  # Prints "0"
+  # iter_2_2 consumes from the same job as iter_2_1
+  print(next(iter_2_2))  # Prints "1"
+  ```
 
   Args:
+    processing_mode: A string specifying the policy for how data should be
+      processed by tf.data workers. Currently, the only supported value is
+      "parallel_epochs".
     service: A string indicating how to connect to the tf.data service. The
       string should be in the format <protocol>://<address>, e.g.
       grpc://localhost:5000.
+    job_name: (Optional.) The name of the job. This argument makes it possible
+      for multiple datasets to share the same job. The default behavior is that
+      the dataset creates anonymous, exclusively owned jobs.
     max_outstanding_requests: (Optional.) A limit on how many elements may be
       requested at the same time. You can use this option to control the amount
       of memory used, since `distribute` won't use more than `element_size` *
@@ -241,99 +311,8 @@ def distribute(service, max_outstanding_requests=None):
   Returns:
     Dataset: A `Dataset` of the elements produced by the data service.
   """
-  return _distribute(service, max_outstanding_requests)
-
-
-def create_job(dataset, processing_mode):
-  """Creates a job for reading a dataset through the tf.data service.
-
-  The returned token can be used to create iterators for consuming data from
-  the job. `processing_mode` controls what data will be produced. Iterators
-  created from the same token will consume from the same job.
-
-  The `processing_mode` argument controls how data is processed by the
-  tf.data service. Currently, the only supported mode is "parallel_epochs".
-
-  processing_mode="parallel_epochs" means that multiple tf.data workers will
-  iterate through the dataset in parallel, each producing all elements of the
-  dataset. For example, if the dataset contains {0, 1, 2}, every tf.data worker
-  used for execution will produce {0, 1, 2}. If there are 3 workers and one
-  consumer, the consumer will receive the elements {0, 0, 0, 1, 1, 1, 2, 2, 2}
-  (though not necessarily in that order). To account for this, it is recommended
-  to randomly shuffle your dataset, so that different tf.data workers will
-  iterate through the dataset in different orders.
-
-  In the future, we plan to add additional epoch modes. For example, we will add
-  a "one_epoch" mode which partitions the dataset across the tf.data
-  workers, so that the consumers see each element of the dataset only once.
-
-  Args:
-    dataset: A `tf.data.Dataset` to create a job for. The dataset must contain a
-      single `distribute` transformation.
-    processing_mode: A string specifying the policy for how data should be
-      processed by tf.data workers. Currently, the only supported value is
-      "parallel_epochs".
-
-  Returns:
-    A token for reading from the created tf.data service job. To read using the
-      token, call `create_iterator(dataset, token)`
-
-  Raises:
-    ValueError: If the dataset contains no calls to `distribute` or more than 1
-      call to `distribute`.
-  """
-  datasets = _find_data_service_datasets(dataset)
-  if len(datasets) > 1:
-    raise ValueError(
-        "Datasets containing multiple calls to .distribute(...) are " +
-        "not supported")
-  if not datasets:
-    raise ValueError(
-        "Dataset does not contain any distribute() transformations")
-  ProcessingMode.validate(processing_mode)
-  data_service_dataset = datasets[0]
-  return gen_experimental_dataset_ops.create_job(
-      data_service_dataset.dataset_id, data_service_dataset.address,
-      data_service_dataset.protocol, processing_mode)
-
-
-def create_iterator(dataset, job_token):
-  """Creates an iterator for reading from the tf.data service.
-
-  Args:
-    dataset: A `tf.data.Dataset` object.
-    job_token: A token generated by `create_job`.
-
-  Returns:
-    A dataset iterator.
-
-  Raises:
-    RuntimeError: If called outside of a function in graph mode.
-  """
-  if context.executing_eagerly() or ops.inside_function():
-    return iterator_ops.OwnedIterator(dataset, job_token=job_token)
-  else:
-    raise RuntimeError("create_iterator() is only supported inside of "
-                       "tf.function or when eager execution is enabled.")
-
-
-def _find_data_service_datasets(dataset):
-  """Produces a list of all data service datasets in the given dataset.
-
-  Args:
-    dataset: A `tf.data.Dataset`.
-
-  Returns:
-    A list of all data service datasets.
-  """
-  result = []
-  to_check = [dataset]
-  while to_check:
-    d = to_check.pop()
-    if isinstance(d, dataset_ops.DatasetV1Adapter):
-      d = d._dataset  # pylint: disable=protected-access
-    if isinstance(d, _DataServiceDatasetV1) or isinstance(
-        d, _DataServiceDatasetV2):
-      result.append(d)
-    to_check.extend(d._inputs())  # pylint: disable=protected-access
-  return result
+  return _distribute(
+      processing_mode=processing_mode,
+      service=service,
+      job_name=job_name,
+      max_outstanding_requests=max_outstanding_requests)
diff --git a/tensorflow/python/data/experimental/ops/readers.py b/tensorflow/python/data/experimental/ops/readers.py
index 8795a206bb1..b8f4c34f40e 100644
--- a/tensorflow/python/data/experimental/ops/readers.py
+++ b/tensorflow/python/data/experimental/ops/readers.py
@@ -183,24 +183,30 @@ def _get_sorted_col_indices(select_columns, column_names):
   """Transforms select_columns argument into sorted column indices."""
   names_to_indices = {n: i for i, n in enumerate(column_names)}
   num_cols = len(column_names)
-  for i, v in enumerate(select_columns):
+
+  results = []
+  for v in select_columns:
+    # If value is already an int, check if it's valid.
     if isinstance(v, int):
       if v < 0 or v >= num_cols:
         raise ValueError(
             "Column index %d specified in select_columns out of valid range." %
             v)
-      continue
-    if v not in names_to_indices:
+      results.append(v)
+    # Otherwise, check that it's a valid column name and convert to the
+    # the relevant column index.
+    elif v not in names_to_indices:
       raise ValueError(
           "Value '%s' specified in select_columns not a valid column index or "
           "name." % v)
-    select_columns[i] = names_to_indices[v]
+    else:
+      results.append(names_to_indices[v])
 
   # Sort and ensure there are no duplicates
-  result = sorted(set(select_columns))
-  if len(result) != len(select_columns):
+  results = sorted(set(results))
+  if len(results) != len(select_columns):
     raise ValueError("select_columns contains duplicate columns")
-  return result
+  return results
 
 
 def _maybe_shuffle_and_repeat(
diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py
index 55fad6f7b7e..217c586caef 100644
--- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py
+++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py
@@ -37,10 +37,14 @@ from tensorflow.python.platform import test
 PROTOCOL = "grpc"
 
 
-def _make_distributed_dataset(dataset, service):
+def _make_distributed_dataset(dataset, service, job_name=None):
   """Creates a distributed dataset with a short task refresh interval."""
   return dataset.apply(
-      data_service_ops._distribute(service, task_refresh_interval_hint_ms=20))
+      data_service_ops._distribute(
+          "parallel_epochs",
+          service,
+          job_name=job_name,
+          task_refresh_interval_hint_ms=20))
 
 
 class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
@@ -65,27 +69,35 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     return self._master.target
 
-  @combinations.generate(test_base.eager_only_combinations())
-  def testMultipleEpochs(self):
-    service = self.create_cluster(1)
-    ds = dataset_ops.Dataset.range(3)
-    ds = _make_distributed_dataset(ds, service)
-    for _ in range(10):
-      token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-      it = data_service_ops.create_iterator(ds, token)
-      self.assertEqual(list(range(3)), [t.numpy() for t in it])
-
   @combinations.generate(test_base.eager_only_combinations())
   def testDistributeBasic(self):
     num_elements = 10
     service = self.create_cluster(1)
     ds = dataset_ops.Dataset.range(num_elements)
     ds = _make_distributed_dataset(ds, service)
-    token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-    it = data_service_ops.create_iterator(ds, token)
-    results = [t.numpy() for t in it]
+    results = [elem.numpy() for elem in ds]
     self.assertEqual(list(range(num_elements)), results)
 
+  @combinations.generate(test_base.eager_only_combinations())
+  def testMultipleEpochs(self):
+    num_elements = 3
+    service = self.create_cluster(1)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds = _make_distributed_dataset(ds, service)
+    for _ in range(10):
+      self.assertEqual(list(range(num_elements)), [elem.numpy() for elem in ds])
+
+  @combinations.generate(test_base.eager_only_combinations())
+  def testRepeatedDataset(self):
+    num_elements = 10
+    num_repetitions = 5
+    service = self.create_cluster(1)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds = _make_distributed_dataset(ds, service)
+    ds = ds.repeat(num_repetitions)
+    self.assertDatasetProduces(
+        ds, expected_output=num_repetitions * list(range(num_elements)))
+
   @combinations.generate(test_base.eager_only_combinations())
   def testConcurrentEpoch(self):
     num_elements = 10
@@ -96,9 +108,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     for _ in range(num_datasets):
       ds = dataset_ops.Dataset.range(num_elements)
       ds = _make_distributed_dataset(ds, service)
-      token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-      it = data_service_ops.create_iterator(ds, token)
-      iterators.append(it)
+      iterators.append(iter(ds))
       results.append([])
 
     for _ in range(num_elements):
@@ -110,6 +120,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @combinations.generate(test_base.eager_only_combinations())
   def testSharedEpoch(self):
+    self.skipTest("Not yet implemented")
     num_elements = 10
     num_iterators = 3
     service = self.create_cluster(1)
@@ -117,9 +128,8 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     ds = _make_distributed_dataset(ds, service)
     result = []
     iterators = []
-    token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
     for _ in range(num_iterators):
-      iterators.append(data_service_ops.create_iterator(ds, token))
+      iterators.append(iter(ds))
 
     # Alternate reading between the iterators.
     for _ in range(2):
@@ -140,9 +150,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     service = self.create_cluster(num_workers)
     ds = dataset_ops.Dataset.range(num_elements)
     ds = _make_distributed_dataset(ds, service)
-    token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-    iterator = data_service_ops.create_iterator(ds, token)
-    results = [elem.numpy() for elem in iterator]
+    results = [elem.numpy() for elem in ds]
     self.assertCountEqual(num_workers * list(range(num_elements)), results)
 
   @combinations.generate(test_base.eager_only_combinations())
@@ -154,8 +162,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     num_elements = 100
     ds = dataset_ops.Dataset.range(num_elements)
     ds = _make_distributed_dataset(ds, self._master.target)
-    token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-    iterator = data_service_ops.create_iterator(ds, token)
+    iterator = iter(ds)
     results = []
     # Read halfway through the dataset.
     for _ in range(num_elements // 2):
@@ -184,8 +191,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     num_elements = 100
     ds = dataset_ops.Dataset.range(num_elements)
     ds = _make_distributed_dataset(ds, self._master.target)
-    token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-    iterator = data_service_ops.create_iterator(ds, token)
+    iterator = iter(ds)
     # Read halfway through the dataset.
     midpoint = num_elements // 2
     for i in range(midpoint):
@@ -209,6 +215,21 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
         val = next(iterator).numpy()
       self.assertEqual(i, val)
 
+  @combinations.generate(test_base.eager_only_combinations())
+  def testMaxOutstandingRequests(self):
+    num_elements = 10
+    num_workers = 3
+    service = self.create_cluster(num_workers)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds = ds.apply(
+        data_service_ops._distribute(
+            "parallel_epochs",
+            service,
+            max_outstanding_requests=1,
+            task_refresh_interval_hint_ms=20))
+    self.assertCountEqual(num_workers * list(range(num_elements)),
+                          self.getDatasetOutput(ds))
+
   @combinations.generate(test_base.eager_only_combinations())
   def testInsideFunction(self):
     num_workers = 3
@@ -219,12 +240,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     def f():
       ds = dataset_ops.Dataset.range(num_elements)
       ds = _make_distributed_dataset(ds, service)
-      token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-      it = data_service_ops.create_iterator(ds, token)
       result = tensor_array_ops.TensorArray(
           dtypes.int64, size=num_workers * num_elements, dynamic_size=True)
       i = 0
-      for elem in it:
+      for elem in ds:
         result = result.write(i, elem)
         i += 1
       return result.stack()
@@ -232,6 +251,72 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     result = list(f().numpy())
     self.assertCountEqual(num_workers * list(range(num_elements)), result)
 
+  @combinations.generate(test_base.eager_only_combinations())
+  def testSharedJobName(self):
+    num_elements = 10
+    service = self.create_cluster(1)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds1 = _make_distributed_dataset(ds, service, job_name="job_name")
+    ds2 = _make_distributed_dataset(ds, service, job_name="job_name")
+    iter1 = iter(ds1)
+    iter2 = iter(ds2)
+    results = []
+    for _ in range(3):
+      results.append(next(iter1).numpy())
+      results.append(next(iter2).numpy())
+    for elem in iter1:
+      results.append(elem.numpy())
+    for elem in iter2:
+      results.append(elem.numpy())
+    self.assertCountEqual(list(range(num_elements)), results)
+
+  @combinations.generate(test_base.eager_only_combinations())
+  def testDifferentJobNames(self):
+    num_elements = 10
+    service = self.create_cluster(1)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds1 = _make_distributed_dataset(ds, service, job_name="job_name1")
+    ds2 = _make_distributed_dataset(ds, service, job_name="job_name2")
+    self.assertDatasetProduces(ds1, list(range(num_elements)))
+    self.assertDatasetProduces(ds2, list(range(num_elements)))
+
+  @combinations.generate(test_base.eager_only_combinations())
+  def testSharedJobNameMultiIteration(self):
+    num_elements = 10
+    service = self.create_cluster(1)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds1 = _make_distributed_dataset(ds, service, job_name="job_name")
+    ds2 = _make_distributed_dataset(ds, service, job_name="job_name")
+    # iteration 1
+    self.assertDatasetProduces(ds1, list(range(num_elements)))
+    self.assertDatasetProduces(ds2, [])
+    # iteration 2
+    self.assertDatasetProduces(ds2, list(range(num_elements)))
+    self.assertDatasetProduces(ds1, [])
+
+  @combinations.generate(test_base.eager_only_combinations())
+  def testSharedJobNameRepeat(self):
+    num_elements = 10
+    num_repetitions = 3
+    service = self.create_cluster(1)
+    ds = dataset_ops.Dataset.range(num_elements)
+    ds1 = _make_distributed_dataset(ds, service, job_name="job_name")
+    ds1 = ds1.repeat(num_repetitions)
+    ds2 = _make_distributed_dataset(ds, service, job_name="job_name")
+    ds2 = ds2.repeat(num_repetitions)
+    results = []
+    iter1 = iter(ds1)
+    iter2 = iter(ds2)
+    for _ in range(((num_elements * num_repetitions) // 2) - 1):
+      results.append(next(iter1).numpy())
+    for _ in range(((num_elements * num_repetitions) // 2) - 1):
+      results.append(next(iter2).numpy())
+    for elem in iter1:
+      results.append(elem.numpy())
+    for elem in iter2:
+      results.append(elem.numpy())
+    self.assertCountEqual(num_repetitions * list(range(num_elements)), results)
+
   def run_stateful(self, external_state_policy):
     num_elements = 10
     ds = dataset_ops.Dataset.range(num_elements).map(
@@ -243,9 +328,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     service = self.create_cluster(3)
     ds = _make_distributed_dataset(ds, service)
-    token = data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-    iterator = data_service_ops.create_iterator(ds, token)
-    next(iterator)
+    next(iter(ds))
 
   @combinations.generate(
       combinations.times(
@@ -262,27 +345,6 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.FailedPreconditionError):
       self.run_stateful(distribute_options.ExternalStatePolicy.FAIL)
 
-  @combinations.generate(test_base.eager_only_combinations())
-  def testNoDistributeCalls(self):
-    ds = dataset_ops.Dataset.range(1)
-    with self.assertRaisesWithLiteralMatch(
-        ValueError,
-        "Dataset does not contain any distribute() transformations"):
-      data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-
-  @combinations.generate(test_base.eager_only_combinations())
-  def testMultipleDistributeCalls(self):
-    service = self.create_cluster(1)
-    ds1 = dataset_ops.Dataset.range(1)
-    ds1 = _make_distributed_dataset(ds1, service)
-    ds2 = dataset_ops.Dataset.range(1)
-    ds2 = _make_distributed_dataset(ds2, service)
-    ds = dataset_ops.Dataset.zip((ds1, ds2))
-    with self.assertRaisesWithLiteralMatch(
-        ValueError, "Datasets containing multiple calls to .distribute(...) "
-        "are not supported"):
-      data_service_ops.create_job(ds, processing_mode="parallel_epochs")
-
   @combinations.generate(test_base.eager_only_combinations())
   def testDistributeFromInterleave(self):
     service = self.create_cluster(1)
@@ -302,14 +364,27 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testDistributeNonStringAddresses(self):
     ds = dataset_ops.Dataset.range(10)
     with self.assertRaisesRegex(ValueError, "service must be a string"):
-      ds = ds.apply(data_service_ops.distribute(service=1))
+      ds = ds.apply(
+          data_service_ops.distribute(
+              processing_mode="parallel_epochs", service=1))
 
   @combinations.generate(test_base.eager_only_combinations())
   def testDistributeEmptyAddress(self):
     ds = dataset_ops.Dataset.range(10)
     with self.assertRaisesWithLiteralMatch(ValueError,
                                            "service must not be empty"):
-      ds = ds.apply(data_service_ops.distribute(service=""))
+      ds = ds.apply(
+          data_service_ops.distribute(
+              processing_mode="parallel_epochs", service=""))
+
+  @combinations.generate(test_base.eager_only_combinations())
+  def testDistributeInvalidProcessingMode(self):
+    ds = dataset_ops.Dataset.range(10)
+    with self.assertRaisesRegex(ValueError,
+                                "invalid is not a valid processing mode"):
+      ds = ds.apply(
+          data_service_ops.distribute(
+              processing_mode="invalid", service="grpc://localhost:5000"))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d41aeb22d87..c64c909a622 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1657,7 +1657,8 @@ name=None))
     stays the same. For example, to flatten a dataset of batches into a
     dataset of their elements:
 
-    >>> dataset = Dataset.from_tensor_slices([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    >>> dataset = tf.data.Dataset.from_tensor_slices(
+    ...                [[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     >>> dataset = dataset.flat_map(lambda x: Dataset.from_tensor_slices(x))
     >>> list(dataset.as_numpy_iterator())
     [1, 2, 3, 4, 5, 6, 7, 8, 9]
diff --git a/tensorflow/python/debug/lib/debug_events_reader.py b/tensorflow/python/debug/lib/debug_events_reader.py
index 4ff7b261818..af4d4d0974d 100644
--- a/tensorflow/python/debug/lib/debug_events_reader.py
+++ b/tensorflow/python/debug/lib/debug_events_reader.py
@@ -410,6 +410,8 @@ class DebuggedGraph(object):
     self._inner_graph_ids = []
     # A dictionary from op name to GraphOpCreationDigest.
     self._op_by_name = dict()
+    # A dictionary mapping op to immediate downstream consumers.
+    self._op_consumers = collections.defaultdict(list)
 
   def add_inner_graph_id(self, inner_graph_id):
     """Add the debugger-generated ID of a graph nested within this graph.
@@ -434,6 +436,18 @@ class DebuggedGraph(object):
     self._op_by_name[
         graph_op_creation_digest.op_name] = graph_op_creation_digest
 
+  def add_op_consumer(self, src_op_name, src_slot, dst_op_name, dst_slot):
+    """Add a consuming op for this op.
+
+    Args:
+      src_op_name: Name of the op of which the output tensor is being consumed.
+      src_slot: 0-based output slot of the op being consumed.
+      dst_op_name: Name of the consuming op (e.g., "Conv2D_3/BiasAdd")
+      dst_slot: 0-based input slot of the consuming op that receives the tensor
+        from this op.
+    """
+    self._op_consumers[src_op_name].append((src_slot, dst_op_name, dst_slot))
+
   @property
   def name(self):
     return self._name
@@ -450,14 +464,40 @@ class DebuggedGraph(object):
   def inner_graph_ids(self):
     return self._inner_graph_ids
 
-  def get_op_type(self, op_name):
-    return self._op_by_name[op_name].op_type
-
   def get_tensor_id(self, op_name, output_slot):
     """Get the ID of a symbolic tensor in this graph."""
     return self._op_by_name[op_name].output_tensor_ids[output_slot]
 
-  # TODO(cais): Implement to_json().
+  def get_op_creation_digest(self, op_name):
+    """Get the GraphOpCreationDigest for a op in the graph."""
+    return self._op_by_name[op_name]
+
+  def get_op_consumers(self, src_op_name):
+    """Get all the downstream consumers of this op.
+
+    Only data (non-control) edges are tracked.
+
+    Args:
+      src_op_name: Name of the op providing the tensor being consumed.
+
+    Returns:
+      A list of (src_slot, dst_op_name, dst_slot) tuples. In each item of
+      the list:
+        src_slot: 0-based output slot of the op of which the output tensor
+          is being consumed.
+        dst_op_name: Name of the consuming op (e.g., "Conv2D_3/BiasAdd")
+        dst_slot: 0-based input slot of the consuming op that receives
+          the tensor from this op.
+    """
+    return self._op_consumers[src_op_name]
+
+  def to_json(self):
+    return {
+        "name": self.name,
+        "graph_id": self.graph_id,
+        "outer_graph_id": self._outer_graph_id,
+        "inner_graph_ids": self._inner_graph_ids,
+    }
 
 
 class DebuggedDevice(object):
@@ -483,7 +523,11 @@ class DebuggedDevice(object):
   def device_id(self):
     return self._device_id
 
-  # TODO(cais): Implement to_json().
+  def to_json(self):
+    return {
+        "device_name": self._device_name,
+        "device_id": self._device_id,
+    }
 
 
 class GraphOpCreationDigest(BaseDigest):
@@ -500,6 +544,9 @@ class GraphOpCreationDigest(BaseDigest):
     output_tensor_ids: Debugger-generated IDs for the output(s) of the op.
     input_names: Names of the input tensors to the op.
     device_name: The name of the device that the op is placed on (if available).
+    host_name: Name of the host on which the op is created.
+    stack_frame_ids: IDs of the frames of the stack trace at which the op
+      is created.
   """
 
   def __init__(self,
@@ -509,6 +556,8 @@ class GraphOpCreationDigest(BaseDigest):
                op_type,
                op_name,
                output_tensor_ids,
+               host_name,
+               stack_frame_ids,
                input_names=None,
                device_name=None):
     super(GraphOpCreationDigest, self).__init__(wall_time, offset)
@@ -516,6 +565,8 @@ class GraphOpCreationDigest(BaseDigest):
     self._op_type = op_type
     self._op_name = op_name
     self._output_tensor_ids = _tuple_or_none(output_tensor_ids)
+    self._host_name = host_name
+    self._stack_frame_ids = stack_frame_ids
     self._input_names = _tuple_or_none(input_names)
     self._device_name = device_name
 
@@ -547,6 +598,14 @@ class GraphOpCreationDigest(BaseDigest):
   def device_name(self):
     return self._device_name
 
+  @property
+  def host_name(self):
+    return self._host_name
+
+  @property
+  def stack_frame_ids(self):
+    return self._stack_frame_ids
+
   def to_json(self):
     output = super(GraphOpCreationDigest, self).to_json()
     output.update({
@@ -554,6 +613,8 @@ class GraphOpCreationDigest(BaseDigest):
         "op_type": self.op_type,
         "op_name": self.op_name,
         "output_tensor_ids": self.output_tensor_ids,
+        "host_name": self.host_name,
+        "stack_frame_ids": self.stack_frame_ids,
         "input_names": self.input_names,
         "device_name": self.device_name,
     })
@@ -849,9 +910,17 @@ class DebugDataReader(object):
             op_creation_proto.op_type,
             op_creation_proto.op_name,
             tuple(op_creation_proto.output_tensor_ids),
+            op_creation_proto.code_location.host_name,
+            tuple(op_creation_proto.code_location.stack_frame_ids),
             input_names=tuple(op_creation_proto.input_names))
         self._graph_op_digests.append(op_digest)
-        self._graph_by_id[op_creation_proto.graph_id].add_op(op_digest)
+        debugged_graph = self._graph_by_id[op_creation_proto.graph_id]
+        debugged_graph.add_op(op_digest)
+        for dst_slot, input_name in enumerate(op_creation_proto.input_names):
+          src_op_name, src_slot = input_name.split(":")
+          debugged_graph.add_op_consumer(src_op_name, int(src_slot),
+                                         op_creation_proto.op_name, dst_slot)
+
       elif debug_event.debugged_graph.ByteSize():
         graph_proto = debug_event.debugged_graph
         graph = DebuggedGraph(
@@ -936,7 +1005,7 @@ class DebugDataReader(object):
     Returns:
       Op type as a str.
     """
-    return self._graph_by_id[graph_id].get_op_type(op_name)
+    return self._graph_by_id[graph_id].get_op_creation_digest(op_name).op_type
 
   def _load_execution(self):
     """Incrementally read the .execution file."""
@@ -1136,13 +1205,10 @@ class DebugDataReader(object):
         1. The host name.
         2. The stack trace, as a list of (file_path, lineno, func) tuples.
     """
-    debug_event = self._reader.read_graphs_event(
-        graph_op_creation_digest.offset)
-    graph_op_creation = debug_event.graph_op_creation
-    host_name = graph_op_creation.code_location.host_name
-    return host_name, [
+    return graph_op_creation_digest.host_name, [
         self._stack_frame_by_id[frame_id][1:]
-        for frame_id in graph_op_creation.code_location.stack_frame_ids]
+        for frame_id in graph_op_creation_digest.stack_frame_ids
+    ]
 
   # TODO(cais): Add graph_execution_digests() with an ExecutionDigest
   #   as a kwarg, to establish the association between top-level and intra-graph
diff --git a/tensorflow/python/debug/lib/debug_events_writer_test.py b/tensorflow/python/debug/lib/debug_events_writer_test.py
index c953b5fb1e5..8002671450b 100644
--- a/tensorflow/python/debug/lib/debug_events_writer_test.py
+++ b/tensorflow/python/debug/lib/debug_events_writer_test.py
@@ -660,10 +660,53 @@ class DataObjectsTest(test_util.TensorFlowTestCase):
     self.assertIsNone(json["output_tensor_ids"])
     self.assertIsNone(json["debug_tensor_values"])
 
+  def testDebuggedDeviceToJons(self):
+    debugged_device = debug_events_reader.DebuggedDevice("/TPU:3", 4)
+    self.assertEqual(debugged_device.to_json(), {
+        "device_name": "/TPU:3",
+        "device_id": 4,
+    })
+
+  def testDebuggedGraphToJonsWitouthNameInnerOuterGraphIds(self):
+    debugged_graph = debug_events_reader.DebuggedGraph(
+        None,
+        "b1c2",
+        outer_graph_id=None,
+    )
+    self.assertEqual(
+        debugged_graph.to_json(), {
+            "name": None,
+            "graph_id": "b1c2",
+            "outer_graph_id": None,
+            "inner_graph_ids": [],
+        })
+
+  def testDebuggedGraphToJonsWithNameAndInnerOuterGraphIds(self):
+    debugged_graph = debug_events_reader.DebuggedGraph(
+        "loss_function",
+        "b1c2",
+        outer_graph_id="a0b1",
+    )
+    debugged_graph.add_inner_graph_id("c2d3")
+    debugged_graph.add_inner_graph_id("c2d3e4")
+    self.assertEqual(
+        debugged_graph.to_json(), {
+            "name": "loss_function",
+            "graph_id": "b1c2",
+            "outer_graph_id": "a0b1",
+            "inner_graph_ids": ["c2d3", "c2d3e4"],
+        })
+
   def testGraphOpCreationDigestNoInputNoDeviceNameToJson(self):
     op_creation_digest = debug_events_reader.GraphOpCreationDigest(
-        1234, 5678, "deadbeef", "FooOp", "Model_1/Foo_2",
-        [135], input_names=None, device_name=None)
+        1234,
+        5678,
+        "deadbeef",
+        "FooOp",
+        "Model_1/Foo_2", [135],
+        "machine.cluster", ("a1", "a2"),
+        input_names=None,
+        device_name=None)
     json = op_creation_digest.to_json()
     self.jsonRoundTripCheck(json)
     self.assertEqual(json["wall_time"], 1234)
@@ -671,13 +714,21 @@ class DataObjectsTest(test_util.TensorFlowTestCase):
     self.assertEqual(json["op_type"], "FooOp")
     self.assertEqual(json["op_name"], "Model_1/Foo_2")
     self.assertEqual(json["output_tensor_ids"], (135,))
+    self.assertEqual(json["host_name"], "machine.cluster")
+    self.assertEqual(json["stack_frame_ids"], ("a1", "a2"))
     self.assertIsNone(json["input_names"])
     self.assertIsNone(json["device_name"])
 
   def testGraphOpCreationDigestWithInputsAndDeviceNameToJson(self):
     op_creation_digest = debug_events_reader.GraphOpCreationDigest(
-        1234, 5678, "deadbeef", "FooOp", "Model_1/Foo_2",
-        [135], input_names=["Bar_1", "Qux_2"], device_name="/device:GPU:0")
+        1234,
+        5678,
+        "deadbeef",
+        "FooOp",
+        "Model_1/Foo_2", [135],
+        "machine.cluster", ("a1", "a2"),
+        input_names=["Bar_1", "Qux_2"],
+        device_name="/device:GPU:0")
     json = op_creation_digest.to_json()
     self.jsonRoundTripCheck(json)
     self.assertEqual(json["wall_time"], 1234)
@@ -685,6 +736,8 @@ class DataObjectsTest(test_util.TensorFlowTestCase):
     self.assertEqual(json["op_type"], "FooOp")
     self.assertEqual(json["op_name"], "Model_1/Foo_2")
     self.assertEqual(json["output_tensor_ids"], (135,))
+    self.assertEqual(json["host_name"], "machine.cluster")
+    self.assertEqual(json["stack_frame_ids"], ("a1", "a2"))
     self.assertEqual(json["input_names"], ("Bar_1", "Qux_2"))
     self.assertEqual(json["device_name"], "/device:GPU:0")
 
diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py
index efc5caae321..5f7fe5e7ea4 100644
--- a/tensorflow/python/debug/lib/dumping_callback.py
+++ b/tensorflow/python/debug/lib/dumping_callback.py
@@ -102,7 +102,12 @@ class _DumpingCallback(object):
     self._function_to_graph_id = dict()
     self._op_type_to_context_id = dict()
     # Keeps track of counter for symbolic tensors output by in-graph ops.
+    # It is used to make unique names for debugger-generated tensors.
     self._symbolic_tensor_counter = 0
+    # A map from the names of debugger-generated Identity and DebugIdentityV2
+    # tensors to the names of the original insrumented graph tensors. This is
+    # applicable to v1 graph mode only.
+    self._tensor_aliases = dict()
     self._source_file_paths_lock = threading.Lock()
     self._stack_frame_to_id_lock = threading.Lock()
     self._context_lock = threading.Lock()
@@ -298,11 +303,15 @@ class _DumpingCallback(object):
         # of Const ops can lead to downstream errors related to shapes. We opt
         # to use an identity op to avoid this issue at the cost of slightly
         # larger graph size.
+        self._tensor_aliases[debug_tensor.name] = tensor.name
         return debug_tensor
       else:
-        identity = array_ops.identity(tensor)
+        with self._symbolic_tensor_counter_lock:
+          identity_name = "tfdbg_identity_%d" % self._symbolic_tensor_counter
+        identity = array_ops.identity(tensor, name=identity_name)
         identity.op._add_control_input(  # pylint: disable=protected-access
             debug_tensor.op)
+        self._tensor_aliases[identity.name] = tensor.name
         return identity
 
   def _instrument_symbolic_tensors(self,
@@ -354,6 +363,9 @@ class _DumpingCallback(object):
           continue
         # Except in V1 graph mode + control flow, debug_identity_v2 triggers
         # auto control dependency because it's a stateful op.
+        with self._symbolic_tensor_counter_lock:
+          debug_identity_name = ("DebugIdentityV2_%d" %
+                                 self._symbolic_tensor_counter)
         debug_tensor = gen_debug_ops.debug_identity_v2(
             # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
             # as a low-overhead placeholder, since no actual tensor value is
@@ -363,7 +375,8 @@ class _DumpingCallback(object):
             op_name=op_name,
             output_slot=output_slot,
             tensor_debug_mode=self._tensor_debug_mode,
-            debug_urls=debug_urls)
+            debug_urls=debug_urls,
+            name=debug_identity_name)
         if is_v1_graph_mode:
           instrumented_tensors.append(self._process_v1_graph_mode_tensor(
               op_type, tensor, debug_tensor, tensor_debug_mode))
@@ -537,13 +550,12 @@ class _DumpingCallback(object):
       output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))
       if op_type in ("Const", "Placeholder", "PlaceholderWithDefault"):
         # In some cases, the op name of a Const or Placeholder op in a graph
-        # can be duplicate (e.g., with the name "resource").
-        # When this happens, we give the op an debugger-generated name
-        # in order to prevent problems and check failures down the pipe.
-        op_name = "%s_%d" % (op_name, self._symbolic_tensor_counter)
+        # can be duplicate (e.g., `None` or "resource").
+        # When this happens, we use the output tensor name to infer
+        # the non-duplicated tensor name.
+        op_name = outputs[0].name.split(":")[0]
       if is_v1_graph_mode:
         for input_tensor in inputs:
-          # TODO(cais):
           if input_tensor in self._placeholder_to_debug_tensor and outputs:
             outputs[0].op._add_control_input(  # pylint: disable=protected-access
                 self._placeholder_to_debug_tensor[input_tensor].op)
@@ -552,7 +564,9 @@ class _DumpingCallback(object):
           op_name=op_name,
           graph_name=graph.name if hasattr(graph, "name") else None,
           graph_id=context_id,
-          input_names=[input_tensor.name for input_tensor in inputs],
+          input_names=[
+              self._lookup_tensor_name(input_tensor) for input_tensor in inputs
+          ],
           num_outputs=len(outputs),
           output_tensor_ids=output_tensor_ids,
           code_location=self._process_stack_frames())
@@ -577,6 +591,22 @@ class _DumpingCallback(object):
           outputs, op_type, input_ids, output_tensor_device_ids,
           graph_id=context_id))
 
+  def _lookup_tensor_name(self, tensor):
+    """Look up the name of a graph tensor.
+
+    This method maps the name of a debugger-generated Identity or
+    DebugIdentityV2 tensor to the name of the original instrumented tensor,
+    if `tensor` is such a debugger-created tensor.
+    Otherwise, it returns the name of `tensor` as is.
+
+    Args:
+      tensor: The graph tensor to look up the name for.
+
+    Returns:
+      Name of the orignal instrumented tensor as known to the debugger.
+    """
+    return self._tensor_aliases.get(tensor.name, tensor.name)
+
   def _func_graph_id_from_func_name(self, op_type):
     """Attempt to get the ID of a FuncGraph based on an op type name.
 
diff --git a/tensorflow/python/debug/lib/dumping_callback_test.py b/tensorflow/python/debug/lib/dumping_callback_test.py
index 3486430ccfa..982e57b4a81 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test.py
@@ -756,6 +756,63 @@ class DumpingCallbackTest(
             non_placeholder_full_tensor_values[3],
             np.sin(np.log(5.0) + 1.0))  # Sin op.
 
+  @parameterized.named_parameters(
+      ("NoTensor", "NO_TENSOR"),
+      ("FullTensor", "FULL_TENSOR"),
+  )
+  @test_util.run_in_graph_and_eager_modes
+  def testGraphOpConsumingRelationIsCaptured(self, tensor_debug_mode):
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode=tensor_debug_mode)
+
+    @def_function.function
+    def log_sum(x, y):
+      return math_ops.log(x + y)
+
+    @def_function.function
+    def maxindex_sin1p_log_sum(x, y):
+      _, indices = array_ops.unique(math_ops.sin(1.0 + log_sum(x, y)))
+      return math_ops.reduce_max(indices)
+
+    x = constant_op.constant([2.0, 2.0])
+    y = constant_op.constant([3.0, 3.0])
+    maxindex = maxindex_sin1p_log_sum(x, y)
+    self.assertAllEqual(maxindex, 0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      traces = reader.graph_execution_traces()
+      add_traces = [trace for trace in traces if trace.op_type == "AddV2"]
+      log_traces = [trace for trace in traces if trace.op_type == "Log"]
+      sin_traces = [trace for trace in traces if trace.op_type == "Sin"]
+      unique_traces = [trace for trace in traces if trace.op_type == "Unique"]
+      max_traces = [trace for trace in traces if trace.op_type == "Max"]
+      self.assertLen(add_traces, 2)
+      self.assertLen(log_traces, 1)
+      self.assertLen(sin_traces, 1)
+      self.assertLen(unique_traces, 2)  # The Unique op outputs two tensors.
+      self.assertLen(max_traces, 1)
+      graph = reader.graph_by_id(add_traces[0].graph_id)
+      # The first AddV2 op is consumed by the Log op.
+      self.assertEqual(
+          graph.get_op_consumers(add_traces[0].op_name),
+          [(0, log_traces[0].op_name, 0)])
+      graph = reader.graph_by_id(add_traces[1].graph_id)
+      # The second AddV2 op is consumed by the Sin op.
+      self.assertEqual(
+          graph.get_op_consumers(add_traces[1].op_name),
+          [(0, sin_traces[0].op_name, 0)])
+      # The last Sin op is consumed by the Unique op.
+      self.assertEqual(
+          graph.get_op_consumers(sin_traces[0].op_name),
+          [(0, unique_traces[0].op_name, 0)])
+      # The Unique op's 2nd output tensor is consumed by the Max op.
+      self.assertEqual(
+          graph.get_op_consumers(unique_traces[0].op_name),
+          [(1, max_traces[0].op_name, 0)])
+
   def testCapturingExecutedGraphIdsOfTwoCompilationsOfSameFunction(self):
     """Test correct executed IDs of two FuncGraphs from the same Py function."""
     writer = dumping_callback.enable_dump_debug_info(
@@ -1386,6 +1443,51 @@ class DumpingCallbackTest(
       # The Mul and Sub ops are from the same innermost context.
       self.assertEqual(mul_op_digest.graph_id, sub_op_digest.graph_id)
 
+  @parameterized.named_parameters(
+      ("NoTensor", "NO_TENSOR"),
+      ("Shape", "SHAPE"),
+      ("FullTensor", "FULL_TENSOR"),
+  )
+  @test_util.run_in_graph_and_eager_modes
+  def testGraphInputTracingWorksWithConstAndPlaceholderTensors(
+      self, tensor_debug_mode):
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode=tensor_debug_mode)
+
+    @def_function.function
+    def func(x):
+      return (x + constant_op.constant(4.0)) / x
+
+    x = constant_op.constant(2.0)
+    self.assertAllClose(self.evaluate(func(x)), 3.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      graph_op_digests = reader.graph_op_digests()
+      placeholder_op_name = None
+      const_op_name = None
+      add_op_name = None
+      div_op_name = None
+      for op_digest in graph_op_digests:
+        if op_digest.op_type == "Placeholder":
+          placeholder_op_name = op_digest.op_name
+        elif op_digest.op_type == "Const":
+          const_op_name = op_digest.op_name
+        elif op_digest.op_type == "AddV2":
+          add_op_name = op_digest.op_name
+          self.assertLen(op_digest.input_names, 2)
+          self.assertEqual(op_digest.input_names[0], placeholder_op_name + ":0")
+          self.assertEqual(op_digest.input_names[1], const_op_name + ":0")
+        elif op_digest.op_type == "RealDiv":
+          div_op_name = op_digest
+          self.assertLen(op_digest.input_names, 2)
+          self.assertEqual(op_digest.input_names[0], add_op_name + ":0")
+          self.assertEqual(op_digest.input_names[1], placeholder_op_name + ":0")
+      self.assertTrue(add_op_name)
+      self.assertTrue(div_op_name)
+
 
 if __name__ == "__main__":
   ops.enable_eager_execution()
diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD
index 4df2088e02d..a7e62a2dc7c 100644
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD
@@ -226,9 +226,8 @@ py_test(
     python_version = "PY3",
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss_py2",
-        "notap",
-    ],  # b/138443278
+        "no_oss_py2",  # b/138443278
+    ],
     deps = [
         ":distribute_coordinator",
         "//tensorflow/core:protos_all_py",
@@ -453,6 +452,7 @@ cuda_py_test(
         "//tensorflow/python:array_ops",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:math_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/eager:context",
@@ -857,7 +857,6 @@ distribute_py_test(
     shard_count = 10,
     tags = [
         "multi_and_single_gpu",
-        "no_gpu_presubmit",  # TODO(b/154660040)
     ],
     deps = [
         ":collective_all_reduce_strategy",
@@ -936,8 +935,6 @@ cuda_py_test(
     srcs = ["cross_device_ops_test.py"],
     tags = [
         "multi_and_single_gpu",
-        "no_oss",  # TODO(b/151025792): enable after this is fixed.
-        "notap",  # TODO(b/151025792): enable after this is fixed.
     ],
     deps = [
         ":collective_all_reduce_strategy",
@@ -1036,7 +1033,7 @@ distribute_py_test(
         "no_rocm",
     ],
     tpu_tags = [
-        "no_oss",  # Target too big to run serially reliably.
+        "no_oss",  # b/150954621 Target too big to run serially reliably.
     ],
     deps = [
         ":combinations",
@@ -1191,6 +1188,7 @@ distribute_py_test(
     tags = [
         "multi_and_single_gpu",
         "no_oss",  # TODO(b/139815303): enable after this is fixed.
+        "noguitar",  # TODO(b/140755528): enable after this is fixed.
         "notap",  # TODO(b/139815303): enable after this is fixed.
     ],
     deps = [
@@ -1493,11 +1491,9 @@ distribute_py_test(
     name = "ctl_correctness_test",
     srcs = ["ctl_correctness_test.py"],
     main = "ctl_correctness_test.py",
-    shard_count = 20,
+    shard_count = 5,
     tags = [
         "multi_and_single_gpu",
-        "no_gpu_presubmit",  # TODO(b/154660040)
-        "noguitar",  # b/140755528
     ],
     deps = [
         ":combinations",
@@ -1550,7 +1546,6 @@ cuda_py_test(
     srcs = ["parameter_server_strategy_test.py"],
     tags = [
         "multi_and_single_gpu",
-        "no_oss",  # TODO(b/133330625)
     ],
     # b/141096229: Non-atomic AssignAdd
     xla_enable_strict_auto_jit = False,
@@ -1653,3 +1648,25 @@ py_test(
         "@absl_py//absl/testing:parameterized",
     ],
 )
+
+cuda_py_test(
+    name = "strategy_common_test",
+    srcs = ["strategy_common_test.py"],
+    tags = [
+        "multi_and_single_gpu",
+        # TODO(b/155301154): Enable this test on multi-gpu guitar once multi process
+        # runner can run on guitar.
+        "noguitar",
+    ],
+    xla_enable_strict_auto_jit = True,
+    deps = [
+        ":combinations",
+        ":reduce_util",
+        ":strategy_combinations",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python/eager:def_function",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
diff --git a/tensorflow/python/distribute/cluster_resolver/BUILD b/tensorflow/python/distribute/cluster_resolver/BUILD
index 8577f1978b9..c7427af2081 100644
--- a/tensorflow/python/distribute/cluster_resolver/BUILD
+++ b/tensorflow/python/distribute/cluster_resolver/BUILD
@@ -1,10 +1,6 @@
 # Description: Operations defined for Cluster Resolvers
 
 load("//tensorflow:tensorflow.bzl", "tf_py_test")
-load(
-    "//tensorflow/core/platform:build_config.bzl",
-    "tf_additional_rpc_deps",
-)
 
 package(
     default_visibility = [
@@ -64,12 +60,7 @@ py_library(
     name = "tpu_cluster_resolver_py",
     srcs = ["tpu_cluster_resolver.py"],
     srcs_version = "PY2AND3",
-    deps = [
-        ":base_cluster_resolver_py",
-        "//tensorflow/python:training_server_lib",
-        "//tensorflow/python/tpu:tpu_lib",
-        "//tensorflow/python/tpu/client",
-    ] + tf_additional_rpc_deps(),
+    deps = ["//tensorflow/python/distribute/cluster_resolver/tpu:tpu_cluster_resolver_py"],
 )
 
 py_library(
@@ -137,25 +128,6 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
-    name = "tpu_cluster_resolver_py_test",
-    size = "small",
-    srcs = ["tpu_cluster_resolver_test.py"],
-    grpc_enabled = True,
-    main = "tpu_cluster_resolver_test.py",
-    python_version = "PY3",
-    deps = [
-        ":tpu_cluster_resolver_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:training_server_lib",
-        "//tensorflow/python/tpu/client",
-        "@absl_py//absl/testing:flagsaver",
-    ],
-)
-
 tf_py_test(
     name = "slurm_cluster_resolver_py_test",
     size = "small",
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu/BUILD b/tensorflow/python/distribute/cluster_resolver/tpu/BUILD
new file mode 100644
index 00000000000..4825bf3b6d8
--- /dev/null
+++ b/tensorflow/python/distribute/cluster_resolver/tpu/BUILD
@@ -0,0 +1,44 @@
+# Description: OSS only cluster resolvers
+
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+load(
+    "//tensorflow/core/platform:build_config.bzl",
+    "tf_additional_rpc_deps",
+)
+
+package(
+    default_visibility = [
+        "//tensorflow:internal",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+py_library(
+    name = "tpu_cluster_resolver_py",
+    srcs = ["tpu_cluster_resolver.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:training_server_lib",
+        "//tensorflow/python/distribute/cluster_resolver:base_cluster_resolver_py",
+        "//tensorflow/python/tpu:tpu_lib",
+        "//tensorflow/python/tpu/client",
+    ] + tf_additional_rpc_deps(),
+)
+
+tf_py_test(
+    name = "tpu_cluster_resolver_py_test",
+    size = "small",
+    srcs = ["tpu_cluster_resolver_test.py"],
+    grpc_enabled = True,
+    main = "tpu_cluster_resolver_test.py",
+    python_version = "PY3",
+    deps = [
+        ":tpu_cluster_resolver_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:training_server_lib",
+        "//tensorflow/python/tpu/client",
+    ],
+)
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py
new file mode 100644
index 00000000000..943b736fde4
--- /dev/null
+++ b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py
@@ -0,0 +1,349 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Cluster Resolvers for Cloud TPUs."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+from tensorflow.python.distribute.cluster_resolver import cluster_resolver
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
+from tensorflow.python.training import server_lib
+from tensorflow.python.util import compat
+
+try:
+  from cloud_tpu_client import client  # pylint: disable=g-import-not-at-top
+except ImportError:
+  logging.debug(
+      'Falling back to TensorFlow client; we recommended you install the Cloud '
+      'TPU client directly with pip install cloud-tpu-client.')
+  from tensorflow.python.tpu.client import client  # pylint: disable=g-import-not-at-top
+
+
+def is_running_in_gce():
+  return True
+
+
+_TPU_DEVICE_REGEX = re.compile(
+    r'.*task:(?P<host_id>\d+)/.*device:TPU:(?P<core_id>\d+)$')
+_TPU_CONN_RETRIES = 120
+DeviceDetails = collections.namedtuple(
+    'DeviceDetails', ['device_map', 'total_cores'])
+
+
+class TPUClusterResolver(cluster_resolver.ClusterResolver):
+  """Cluster Resolver for Google Cloud TPUs.
+
+  This is an implementation of cluster resolvers for the Google Cloud TPU
+  service. As Cloud TPUs are in alpha, you will need to specify a API definition
+  file for this to consume, in addition to a list of Cloud TPUs in your Google
+  Cloud Platform project.
+
+  TPUClusterResolver supports the following distinct environments:
+  Google Compute Engine
+  Google Kubernetes Engine
+  Google internal
+  """
+
+  @staticmethod
+  def _get_device_dict_and_cores(devices):
+    """Returns a dict of hosts to cores and total cores given devices names.
+
+    Returns a namedtuple with two attributes:
+      device_map: A map of host_ids to a list of core_ids.
+      total_cores: The total number of cores within the TPU system.
+
+    Args:
+      devices: A list of devices returned by session.list_devices()
+    """
+    device_map = collections.defaultdict(list)
+    num_cores = 0
+    for device in devices:
+      match = _TPU_DEVICE_REGEX.match(device.name)
+      if match:
+        host_id = match.group('host_id')
+        core_id = match.group('core_id')
+        device_map[host_id].append(core_id)
+        num_cores += 1
+    return DeviceDetails(device_map, num_cores)
+
+  @staticmethod
+  def _verify_and_return_same_core_count(device_dict):
+    """Verifies that every device in device_dict has the same # of cores."""
+    num_cores_per_host_set = (
+        {len(core_ids) for core_ids in device_dict.values()})
+    if len(num_cores_per_host_set) != 1:
+      raise RuntimeError('TPU cores on each device is not the same. This '
+                         'should never happen. Devices: {}'.format(device_dict))
+    return num_cores_per_host_set.pop()
+
+  def __init__(self,
+               tpu=None,
+               zone=None,
+               project=None,
+               job_name='worker',
+               coordinator_name=None,
+               coordinator_address=None,
+               credentials='default',
+               service=None,
+               discovery_url=None):
+    """Creates a new TPUClusterResolver object.
+
+    The ClusterResolver will then use the parameters to query the Cloud TPU APIs
+    for the IP addresses and ports of each Cloud TPU listed.
+
+    Args:
+      tpu: A string corresponding to the TPU to use. If the string is an empty
+        string, the string 'local', or a string that begins with 'grpc://', then
+          it is assumed to not correspond with a Cloud TPU and will instead be
+          passed as the session master and no ClusterSpec propagation will be
+          done. In the future, this may also support a list of strings when
+          multiple Cloud TPUs are used.
+      zone: Zone where the TPUs are located. If omitted or empty, we will assume
+        that the zone of the TPU is the same as the zone of the GCE VM, which we
+        will try to discover from the GCE metadata service.
+      project: Name of the GCP project containing Cloud TPUs. If omitted or
+        empty, we will try to discover the project name of the GCE VM from the
+        GCE metadata service.
+      job_name: Name of the TensorFlow job the TPUs belong to.
+      coordinator_name: The name to use for the coordinator. Set to None if the
+        coordinator should not be included in the computed ClusterSpec.
+      coordinator_address: The address of the coordinator (typically an ip:port
+        pair). If set to None, a TF server will be started. If coordinator_name
+        is None, a TF server will not be started even if coordinator_address is
+        None.
+      credentials: GCE Credentials. If None, then we use default credentials
+        from the oauth2client
+      service: The GCE API object returned by the googleapiclient.discovery
+        function. If you specify a custom service object, then the credentials
+        parameter will be ignored.
+      discovery_url: A URL template that points to the location of the discovery
+        service. It should have two parameters {api} and {apiVersion} that when
+        filled in produce an absolute URL to the discovery document for that
+        service. The environment variable 'TPU_API_DISCOVERY_URL' will override
+        this.
+
+    Raises:
+      ImportError: If the googleapiclient is not installed.
+      ValueError: If no TPUs are specified.
+      RuntimeError: If an empty TPU name is specified and this is running in a
+        Google Cloud environment.
+    """
+
+    self._cloud_tpu_client = client.Client(
+        tpu=tpu,
+        zone=zone,
+        project=project,
+        credentials=credentials,
+        service=service,
+        discovery_url=discovery_url)
+
+    self._tpu = self._cloud_tpu_client.name()
+    # By default the task_type is 'worker` and the task_id is 0 (which is the
+    # first worker in the task).
+    self.task_type = job_name
+    self.task_id = 0
+    self._coordinator_name = coordinator_name
+    if (coordinator_name and not coordinator_address):
+      self._start_local_server()
+    else:
+      self._coordinator_address = coordinator_address
+
+  def __enter__(self):
+    self._cloud_tpu_client.enter()
+
+  def __exit__(self, type, value, traceback):  # pylint: disable=redefined-builtin
+    self._cloud_tpu_client.exit(type, value, traceback)
+
+  def master(self, task_type=None, task_id=None, rpc_layer=None):
+    """Get the Master string to be used for the session.
+
+    In the normal case, this returns the grpc path (grpc://1.2.3.4:8470) of
+    first instance in the ClusterSpec returned by the cluster_spec function.
+
+    If a non-TPU name is used when constructing a TPUClusterResolver, that will
+    be returned instead (e.g. If the tpus argument's value when constructing
+    this TPUClusterResolver was 'grpc://10.240.1.2:8470',
+    'grpc://10.240.1.2:8470' will be returned).
+
+    Args:
+      task_type: (Optional, string) The type of the TensorFlow task of the
+        master.
+      task_id: (Optional, integer) The index of the TensorFlow task of the
+        master.
+      rpc_layer: (Optional, string) The RPC protocol TensorFlow should use to
+        communicate with TPUs.
+
+    Returns:
+      string, the connection string to use when creating a session.
+
+    Raises:
+      ValueError: If none of the TPUs specified exists.
+    """
+
+    cluster_spec = self.cluster_spec()
+    if task_type is not None and task_id is not None:
+      # task_type and task_id is from the function parameter
+      master = cluster_spec.task_address(task_type, task_id)
+    elif self.task_type is not None and self.task_id is not None:
+      # task_type and task_id is from the object
+      master = cluster_spec.task_address(self.task_type, self.task_id)
+    else:
+      # by default we take the first item in the cluster with the right name
+      job_tasks = cluster_spec.job_tasks(self.task_type)
+      if not job_tasks:
+        raise ValueError('No TPUs with the specified names exist.')
+      master = job_tasks[0]
+    return cluster_resolver.format_master_url(master, 'grpc')
+
+  def get_master(self):
+    return self.master()
+
+  def get_job_name(self):
+    return self.task_type
+
+  def get_tpu_system_metadata(self):
+    """Returns the metadata of the TPU system.
+
+    Users can call this method to get some facts of the TPU system, like
+    total number of cores, number of TPU workers and the devices. E.g.
+    ```python
+
+    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
+    tpu_system_medata = resolver.get_tpu_system_metadata()
+    num_hosts = tpu_system_medata.num_hosts
+    ```
+
+    Returns:
+      A `tf.tpu.experimental.TPUSystemMetadata` object.
+    """
+    cluster_spec = self.cluster_spec()
+    cluster_def = cluster_spec.as_cluster_def() if cluster_spec else None
+    tpu_system_metadata = (
+        tpu_system_metadata_lib._query_tpu_system_metadata(  # pylint: disable=protected-access
+            self.master(),
+            cluster_def=cluster_def,
+            query_topology=False))
+
+    return tpu_system_metadata
+
+  def cluster_spec(self):
+    """Returns a ClusterSpec object based on the latest TPU information.
+
+    We retrieve the information from the GCE APIs every time this method is
+    called.
+
+    Returns:
+      A ClusterSpec containing host information returned from Cloud TPUs,
+      or None.
+
+    Raises:
+      RuntimeError: If the provided TPU is not healthy.
+    """
+    ############################################################################
+    # There are 5 potential cases this code must handle:
+    #  1. [Normal case.] We should resolve the TPU name to a set of tasks, and
+    #      a. Create a ClusterSpec that includes the coordinator job
+    #      b. Create a ClusterSpec without the coordinator job.
+    #  2. [GKE / No API Access.] We should not resolve the TPU name to a set of
+    #     tasks and
+    #      a. Create a ClusterSpec with the coordinator
+    #      b. Create a ClusterSpec without the coordinator
+    ############################################################################
+
+    network_endpoints = self._cloud_tpu_client.network_endpoints()
+    worker_list = [
+        '%s:%s' % (endpoint['ipAddress'], endpoint['port'])
+        for endpoint in network_endpoints
+    ]
+    cluster_spec = {self.task_type: worker_list}
+    if self._coordinator_address:
+      # {1, 2}.a
+      cluster_spec[self._coordinator_name] = [self._coordinator_address]
+
+    return server_lib.ClusterSpec(cluster_spec)
+
+  def num_accelerators(self,
+                       task_type=None,
+                       task_id=None,
+                       config_proto=None):
+    """Returns the number of TPU cores per worker.
+
+    Connects to the master and list all the devices present in the master,
+    and counts them up. Also verifies that the device counts per host in the
+    cluster is the same before returning the number of TPU cores per host.
+
+    Args:
+      task_type: Unused.
+      task_id: Unused.
+      config_proto: Used to create a connection to a TPU master in order to
+        retrieve the system metadata.
+
+    Raises:
+      RuntimeError: If we cannot talk to a TPU worker after retrying or if the
+        number of TPU devices per host is different.
+    """
+    retry_count = 1
+    # TODO(b/120564445): Replace with standard library for retries.
+    while True:
+      try:
+        device_details = TPUClusterResolver._get_device_dict_and_cores(
+            cluster_resolver.get_accelerator_devices(
+                self.master(), config_proto=config_proto))
+        break
+      except errors.DeadlineExceededError:
+        error_message = ('Failed to connect to master. The TPU might not be '
+                         'ready (e.g. still scheduling) or the master '
+                         'address is incorrect: got (%s)' % self.master())
+        if retry_count <= _TPU_CONN_RETRIES:
+          logging.warning(error_message)
+          logging.warning('Retrying (%d/%d)...', retry_count, _TPU_CONN_RETRIES)
+          retry_count += 1
+        else:
+          raise RuntimeError(error_message)
+
+    if device_details.total_cores:
+      return {'TPU': TPUClusterResolver._verify_and_return_same_core_count(
+          device_details.device_map)}
+    return {'TPU': 0}
+
+  @property
+  def environment(self):
+    """Returns the current environment which TensorFlow is running in."""
+    return self._environment
+
+  def _start_local_server(self):
+    address = compat.as_text(self._cloud_tpu_client.get_local_ip())
+    self._server = server_lib.Server({'local': ['0.0.0.0:0']},
+                                     protocol='grpc',
+                                     config=None,
+                                     start=True)
+    # self._server.target is of the form: grpc://ipaddress:port
+    target = compat.as_bytes(self._server.target)
+    splits = target.split(compat.as_bytes(':'))
+    assert len(splits) == 3, self._server.target
+    assert splits[0] == compat.as_bytes('grpc'), self._server.target
+    self._coordinator_port = compat.as_text(splits[2])
+    self._coordinator_address = '%s:%s' % (
+        address, compat.as_text(self._coordinator_port))
+
+  def __deepcopy__(self, memo):
+    # TODO(b/73668574): Remove this once RunConfig avoids performing deepcopy.
+    return self
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py
similarity index 99%
rename from tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
rename to tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py
index 1fad0a3fc95..1dc9a73fd74 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py
@@ -25,7 +25,7 @@ from six.moves.urllib.error import URLError
 
 from tensorflow.python import framework
 from tensorflow.python.client import session
-from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver
+from tensorflow.python.distribute.cluster_resolver.tpu import tpu_cluster_resolver as resolver
 from tensorflow.python.eager.context import LogicalDevice
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
@@ -41,7 +41,7 @@ except ImportError:
   logging.debug(
       'Falling back to TensorFlow client; we recommended you install the Cloud '
       'TPU client directly with pip install cloud-tpu-client.')
-  from tensorflow.python.tpu.client import client
+  from tensorflow.python.tpu.client import client  # pylint: disable=g-import-not-at-top
 
 
 class MockRequestClass(object):
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index 79ec0bc13d1..5731c2c930a 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -12,339 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Implementation of Cluster Resolvers for Cloud TPUs."""
+"""Shim so that direct imports of tpu_cluster_resolver get correct symbols.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import collections
-import re
-
-from tensorflow.python.distribute.cluster_resolver import cluster_resolver
-from tensorflow.python.framework import errors
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
-from tensorflow.python.training import server_lib
-from tensorflow.python.util import compat
+from tensorflow.python.distribute.cluster_resolver.tpu.tpu_cluster_resolver import is_running_in_gce  # pylint: disable=unused-import
+from tensorflow.python.distribute.cluster_resolver.tpu.tpu_cluster_resolver import TPUClusterResolver
 from tensorflow.python.util.tf_export import tf_export
 
-try:
-  from cloud_tpu_client import client  # pylint: disable=g-import-not-at-top
-except ImportError:
-  logging.debug(
-      'Falling back to TensorFlow client; we recommended you install the Cloud '
-      'TPU client directly with pip install cloud-tpu-client.')
-  from tensorflow.python.tpu.client import client
-
-def is_running_in_gce():
-  return True
-
-
-_TPU_DEVICE_REGEX = re.compile(
-    r'.*task:(?P<host_id>\d+)/.*device:TPU:(?P<core_id>\d+)$')
-_TPU_CONN_RETRIES = 120
-DeviceDetails = collections.namedtuple(
-    'DeviceDetails', ['device_map', 'total_cores'])
-
-
-@tf_export('distribute.cluster_resolver.TPUClusterResolver')
-class TPUClusterResolver(cluster_resolver.ClusterResolver):
-  """Cluster Resolver for Google Cloud TPUs.
-
-  This is an implementation of cluster resolvers for the Google Cloud TPU
-  service. As Cloud TPUs are in alpha, you will need to specify a API definition
-  file for this to consume, in addition to a list of Cloud TPUs in your Google
-  Cloud Platform project.
-
-  TPUClusterResolver supports the following distinct environments:
-  Google Compute Engine
-  Google Kubernetes Engine
-  Google internal
-  """
-
-  @staticmethod
-  def _get_device_dict_and_cores(devices):
-    """Returns a dict of hosts to cores and total cores given devices names.
-
-    Returns a namedtuple with two attributes:
-      device_map: A map of host_ids to a list of core_ids.
-      total_cores: The total number of cores within the TPU system.
-
-    Args:
-      devices: A list of devices returned by session.list_devices()
-    """
-    device_map = collections.defaultdict(list)
-    num_cores = 0
-    for device in devices:
-      match = _TPU_DEVICE_REGEX.match(device.name)
-      if match:
-        host_id = match.group('host_id')
-        core_id = match.group('core_id')
-        device_map[host_id].append(core_id)
-        num_cores += 1
-    return DeviceDetails(device_map, num_cores)
-
-  @staticmethod
-  def _verify_and_return_same_core_count(device_dict):
-    """Verifies that every device in device_dict has the same # of cores."""
-    num_cores_per_host_set = (
-        {len(core_ids) for core_ids in device_dict.values()})
-    if len(num_cores_per_host_set) != 1:
-      raise RuntimeError('TPU cores on each device is not the same. This '
-                         'should never happen. Devices: {}'.format(device_dict))
-    return num_cores_per_host_set.pop()
-
-  def __init__(self,
-               tpu=None,
-               zone=None,
-               project=None,
-               job_name='worker',
-               coordinator_name=None,
-               coordinator_address=None,
-               credentials='default',
-               service=None,
-               discovery_url=None):
-    """Creates a new TPUClusterResolver object.
-
-    The ClusterResolver will then use the parameters to query the Cloud TPU APIs
-    for the IP addresses and ports of each Cloud TPU listed.
-
-    Args:
-      tpu: A string corresponding to the TPU to use. If the string is an empty
-        string, the string 'local', or a string that begins with 'grpc://', then
-          it is assumed to not correspond with a Cloud TPU and will instead be
-          passed as the session master and no ClusterSpec propagation will be
-          done. In the future, this may also support a list of strings when
-          multiple Cloud TPUs are used.
-      zone: Zone where the TPUs are located. If omitted or empty, we will assume
-        that the zone of the TPU is the same as the zone of the GCE VM, which we
-        will try to discover from the GCE metadata service.
-      project: Name of the GCP project containing Cloud TPUs. If omitted or
-        empty, we will try to discover the project name of the GCE VM from the
-        GCE metadata service.
-      job_name: Name of the TensorFlow job the TPUs belong to.
-      coordinator_name: The name to use for the coordinator. Set to None if the
-        coordinator should not be included in the computed ClusterSpec.
-      coordinator_address: The address of the coordinator (typically an ip:port
-        pair). If set to None, a TF server will be started. If coordinator_name
-        is None, a TF server will not be started even if coordinator_address is
-        None.
-      credentials: GCE Credentials. If None, then we use default credentials
-        from the oauth2client
-      service: The GCE API object returned by the googleapiclient.discovery
-        function. If you specify a custom service object, then the credentials
-        parameter will be ignored.
-      discovery_url: A URL template that points to the location of the discovery
-        service. It should have two parameters {api} and {apiVersion} that when
-        filled in produce an absolute URL to the discovery document for that
-        service. The environment variable 'TPU_API_DISCOVERY_URL' will override
-        this.
-
-    Raises:
-      ImportError: If the googleapiclient is not installed.
-      ValueError: If no TPUs are specified.
-      RuntimeError: If an empty TPU name is specified and this is running in a
-        Google Cloud environment.
-    """
-
-    self._cloud_tpu_client = client.Client(
-        tpu=tpu,
-        zone=zone,
-        project=project,
-        credentials=credentials,
-        service=service,
-        discovery_url=discovery_url)
-
-    self._tpu = self._cloud_tpu_client.name()
-    # By default the task_type is 'worker` and the task_id is 0 (which is the
-    # first worker in the task).
-    self.task_type = job_name
-    self.task_id = 0
-    self._coordinator_name = coordinator_name
-    if (coordinator_name and not coordinator_address):
-      self._start_local_server()
-    else:
-      self._coordinator_address = coordinator_address
-
-  def __enter__(self):
-    self._cloud_tpu_client.enter()
-
-  def __exit__(self, type, value, traceback):  # pylint: disable=redefined-builtin
-    self._cloud_tpu_client.exit(type, value, traceback)
-
-  def master(self, task_type=None, task_id=None, rpc_layer=None):
-    """Get the Master string to be used for the session.
-
-    In the normal case, this returns the grpc path (grpc://1.2.3.4:8470) of
-    first instance in the ClusterSpec returned by the cluster_spec function.
-
-    If a non-TPU name is used when constructing a TPUClusterResolver, that will
-    be returned instead (e.g. If the tpus argument's value when constructing
-    this TPUClusterResolver was 'grpc://10.240.1.2:8470',
-    'grpc://10.240.1.2:8470' will be returned).
-
-    Args:
-      task_type: (Optional, string) The type of the TensorFlow task of the
-        master.
-      task_id: (Optional, integer) The index of the TensorFlow task of the
-        master.
-      rpc_layer: (Optional, string) The RPC protocol TensorFlow should use to
-        communicate with TPUs.
-
-    Returns:
-      string, the connection string to use when creating a session.
-
-    Raises:
-      ValueError: If none of the TPUs specified exists.
-    """
-
-    cluster_spec = self.cluster_spec()
-    if task_type is not None and task_id is not None:
-      # task_type and task_id is from the function parameter
-      master = cluster_spec.task_address(task_type, task_id)
-    elif self.task_type is not None and self.task_id is not None:
-      # task_type and task_id is from the object
-      master = cluster_spec.task_address(self.task_type, self.task_id)
-    else:
-      # by default we take the first item in the cluster with the right name
-      job_tasks = cluster_spec.job_tasks(self.task_type)
-      if not job_tasks:
-        raise ValueError('No TPUs with the specified names exist.')
-      master = job_tasks[0]
-    return cluster_resolver.format_master_url(master, 'grpc')
-
-  def get_master(self):
-    return self.master()
-
-  def get_job_name(self):
-    return self.task_type
-
-  def get_tpu_system_metadata(self):
-    """Returns the metadata of the TPU system.
-
-    Users can call this method to get some facts of the TPU system, like
-    total number of cores, number of TPU workers and the devices. E.g.
-    ```python
-
-    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
-    tpu_system_medata = resolver.get_tpu_system_metadata()
-    num_hosts = tpu_system_medata.num_hosts
-    ```
-
-    Returns:
-      A `tf.tpu.experimental.TPUSystemMetadata` object.
-    """
-    cluster_spec = self.cluster_spec()
-    cluster_def = cluster_spec.as_cluster_def() if cluster_spec else None
-    tpu_system_metadata = (
-        tpu_system_metadata_lib._query_tpu_system_metadata(  # pylint: disable=protected-access
-            self.master(),
-            cluster_def=cluster_def,
-            query_topology=False))
-
-    return tpu_system_metadata
-
-  def cluster_spec(self):
-    """Returns a ClusterSpec object based on the latest TPU information.
-
-    We retrieve the information from the GCE APIs every time this method is
-    called.
-
-    Returns:
-      A ClusterSpec containing host information returned from Cloud TPUs,
-      or None.
-
-    Raises:
-      RuntimeError: If the provided TPU is not healthy.
-    """
-    ############################################################################
-    # There are 5 potential cases this code must handle:
-    #  1. [Normal case.] We should resolve the TPU name to a set of tasks, and
-    #      a. Create a ClusterSpec that includes the coordinator job
-    #      b. Create a ClusterSpec without the coordinator job.
-    #  2. [GKE / No API Access.] We should not resolve the TPU name to a set of
-    #     tasks and
-    #      a. Create a ClusterSpec with the coordinator
-    #      b. Create a ClusterSpec without the coordinator
-    ############################################################################
-
-    network_endpoints = self._cloud_tpu_client.network_endpoints()
-    worker_list = [
-        '%s:%s' % (endpoint['ipAddress'], endpoint['port'])
-        for endpoint in network_endpoints
-    ]
-    cluster_spec = {self.task_type: worker_list}
-    if self._coordinator_address:
-      # {1, 2}.a
-      cluster_spec[self._coordinator_name] = [self._coordinator_address]
-
-    return server_lib.ClusterSpec(cluster_spec)
-
-  def num_accelerators(self,
-                       task_type=None,
-                       task_id=None,
-                       config_proto=None):
-    """Returns the number of TPU cores per worker.
-
-    Connects to the master and list all the devices present in the master,
-    and counts them up. Also verifies that the device counts per host in the
-    cluster is the same before returning the number of TPU cores per host.
-
-    Args:
-      task_type: Unused.
-      task_id: Unused.
-      config_proto: Used to create a connection to a TPU master in order to
-        retrieve the system metadata.
-
-    Raises:
-      RuntimeError: If we cannot talk to a TPU worker after retrying or if the
-        number of TPU devices per host is different.
-    """
-    retry_count = 1
-    # TODO(b/120564445): Replace with standard library for retries.
-    while True:
-      try:
-        device_details = TPUClusterResolver._get_device_dict_and_cores(
-            cluster_resolver.get_accelerator_devices(
-                self.master(), config_proto=config_proto))
-        break
-      except errors.DeadlineExceededError:
-        error_message = ('Failed to connect to master. The TPU might not be '
-                         'ready (e.g. still scheduling) or the master '
-                         'address is incorrect: got (%s)' % self.master())
-        if retry_count <= _TPU_CONN_RETRIES:
-          logging.warning(error_message)
-          logging.warning('Retrying (%d/%d)...', retry_count, _TPU_CONN_RETRIES)
-          retry_count += 1
-        else:
-          raise RuntimeError(error_message)
-
-    if device_details.total_cores:
-      return {'TPU': TPUClusterResolver._verify_and_return_same_core_count(
-          device_details.device_map)}
-    return {'TPU': 0}
-
-  @property
-  def environment(self):
-    """Returns the current environment which TensorFlow is running in."""
-    return self._environment
-
-  def _start_local_server(self):
-    address = compat.as_text(self._cloud_tpu_client.get_local_ip())
-    self._server = server_lib.Server({'local': ['0.0.0.0:0']},
-                                     protocol='grpc',
-                                     config=None,
-                                     start=True)
-    # self._server.target is of the form: grpc://ipaddress:port
-    target = compat.as_bytes(self._server.target)
-    splits = target.split(compat.as_bytes(':'))
-    assert len(splits) == 3, self._server.target
-    assert splits[0] == compat.as_bytes('grpc'), self._server.target
-    self._coordinator_port = compat.as_text(splits[2])
-    self._coordinator_address = '%s:%s' % (
-        address, compat.as_text(self._coordinator_port))
-
-  def __deepcopy__(self, memo):
-    # TODO(b/73668574): Remove this once RunConfig avoids performing deepcopy.
-    return self
+tf_export('distribute.cluster_resolver.TPUClusterResolver')(TPUClusterResolver)
diff --git a/tensorflow/python/distribute/collective_all_reduce_strategy_test.py b/tensorflow/python/distribute/collective_all_reduce_strategy_test.py
index 04248ee140d..ea7a90504d2 100644
--- a/tensorflow/python/distribute/collective_all_reduce_strategy_test.py
+++ b/tensorflow/python/distribute/collective_all_reduce_strategy_test.py
@@ -594,14 +594,14 @@ class LocalCollectiveAllReduceStrategy(
 
   @combinations.generate(
       combinations.combine(
-          mode=['graph', 'eager'], required_gpus=2, use_dataset=[True, False]))
+          mode=['graph'], required_gpus=2, use_dataset=[True, False]))
   def testMakeInputFnIterator(self, required_gpus, use_dataset):
     if use_dataset:
       fn = lambda: dataset_ops.Dataset.range(5 * required_gpus)
     else:
       def fn():
         dataset = dataset_ops.Dataset.range(5 * required_gpus)
-        it = dataset.make_one_shot_iterator()
+        it = dataset_ops.make_one_shot_iterator(dataset)
         return it.get_next
 
     expected_values = [
diff --git a/tensorflow/python/distribute/combinations.py b/tensorflow/python/distribute/combinations.py
index ffa03ee5329..9a479a3769b 100644
--- a/tensorflow/python/distribute/combinations.py
+++ b/tensorflow/python/distribute/combinations.py
@@ -22,6 +22,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import re
 import sys
 import types
 import unittest
@@ -94,10 +95,10 @@ class NamedGPUCombination(combinations_lib.TestCombination):
 
   Attributes:
     GPU_TEST: The environment is considered to have GPU hardware available if
-              the name of the program contains "test_gpu".
+              the name of the program contains "test_gpu" or "test_xla_gpu".
   """
 
-  GPU_TEST = "test_gpu" in sys.argv[0]
+  GPU_TEST = re.search(r"(test_gpu|test_xla_gpu)$", sys.argv[0])
 
   def should_execute_combination(self, kwargs):
     distributions = [
diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index 0a662908323..8c8970f4aeb 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import threading
 
 import enum
 import six
@@ -31,7 +32,7 @@ from tensorflow.python.distribute import reduce_util
 from tensorflow.python.distribute import tpu_values
 from tensorflow.python.distribute import values as value_lib
 from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
+from tensorflow.python.eager import executor
 from tensorflow.python.framework import kernels
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
@@ -948,6 +949,20 @@ class CollectiveAllReduce(CrossDeviceOps):
     self._collective_keys = (collective_keys or
                              cross_device_utils.CollectiveKeys())
     self._communication = communication
+    # In a multi threaded eager program we need to ensure different groups of
+    # collectives don't interleave each other, otherwise there will be deadlock.
+    self._lock = threading.Lock()
+
+    # Collective ops requires all devices to participate and is blocking. In
+    # eager, we need one async executor for each device to be able to launch
+    # them altogether. Note that async doesn't imply concurrency. Within an
+    # async executor operations are still executed sequentially. In graph or
+    # function building, the executors are not used.
+    self._executors = []
+    for _ in range(self._num_gpus_per_worker or 1):
+      # If num_gpus_per_worker is zero, we assume there's only one device (CPU).
+      self._executors.append(executor.new_executor(enable_async=True))
+
     super(CollectiveAllReduce, self).__init__()
 
   @property
@@ -1059,33 +1074,26 @@ class CollectiveAllReduce(CrossDeviceOps):
           "num_workers = %d, communication_hint = %s, num_packs = %d" %
           (batch_size, self._num_workers, communication, len(packs)), 10)
 
-    def batch_fn():
-      """Wrapper function around batched all-reduce calls."""
-      reduced_values = []
-      for pack in packs:
-        # By placing all CollectiveReduce ops in a pack under single name scope,
-        # we ensure they will be picked up by the `ScopedAllocator` grappler
-        # optimizer and packed into a single all-reduce.
-        with ops.name_scope("allreduce"):
-          for per_replica in pack:
-            # Add control dependencies per device from the last gradients to the
-            # current set, in order to serialize NCCL launches.
-            if (communication == CollectiveCommunication.NCCL.value and
-                reduced_values):
-              control_inputs = [g for g in reduced_values[-1]]
-            else:
-              control_inputs = None
-            reduced_values.append(
-                cross_device_utils.build_collective_reduce(
-                    per_replica.values, self._num_workers,
-                    self._collective_keys, "Add", "Id", communication,
-                    control_inputs))
-      return reduced_values
+    reduced_values = []
+    for pack in packs:
+      # By placing all CollectiveReduce ops in a pack under single name scope,
+      # we ensure they will be picked up by the `ScopedAllocator` grappler
+      # optimizer and packed into a single all-reduce.
+      with self._lock, ops.name_scope("allreduce"):
+        for per_replica in pack:
+          # Add control dependencies per device from the last gradients to the
+          # current set, in order to serialize NCCL launches.
+          if (communication == CollectiveCommunication.NCCL.value and
+              reduced_values):
+            control_inputs = list(reduced_values[-1])
+          else:
+            control_inputs = None
+          reduced_values.append(
+              cross_device_utils.build_collective_reduce(
+                  per_replica.values, self._num_workers,
+                  self._collective_keys, "Add", "Id", communication,
+                  control_inputs, executors=self._executors))
 
-    if context.executing_eagerly():
-      batch_fn = def_function.function(batch_fn)
-
-    reduced_values = batch_fn()
     mirrored = []
     # Reverse the order of reduced value to recover the order in the input.
     for value in reversed(reduced_values):
@@ -1134,6 +1142,12 @@ class CollectiveAllReduce(CrossDeviceOps):
       mirrored.append(value_lib.regroup(value, wrap_class=value_lib.Mirrored))
     return mirrored
 
+  def __deepcopy__(self, memo):
+    # distribute_coordinator deep-copies the strategy object, so
+    # CollectiveAllReduce needs to support deep copy as well.
+    return CollectiveAllReduce(self._num_workers, self._num_gpus_per_worker,
+                               self._collective_keys, self._communication)
+
 
 def choose_the_best(devices, session_config=None):
   """Find the best CrossDeviceOps locally given a `tf.compat.v1.ConfigProto`.
diff --git a/tensorflow/python/distribute/cross_device_ops_test.py b/tensorflow/python/distribute/cross_device_ops_test.py
index 7f25066a45f..09de4306199 100644
--- a/tensorflow/python/distribute/cross_device_ops_test.py
+++ b/tensorflow/python/distribute/cross_device_ops_test.py
@@ -19,6 +19,9 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+import os
+import threading
+import time
 
 from absl.testing import parameterized
 import numpy as np
@@ -39,6 +42,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import kernels
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import collective_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 
@@ -120,7 +124,8 @@ class CrossDeviceOpsTestBase(test.TestCase, parameterized.TestCase):
         self.evaluate(ops.convert_to_tensor(left)),
         self.evaluate(ops.convert_to_tensor(right)))
 
-  def _assert_mirrored_equal(self, left_list, right_list, sess):
+  def _assert_mirrored_equal(self, left_list, right_list, sess,
+                             run_options=None):
     if not isinstance(left_list, list):
       left_list, right_list = [left_list], [right_list]
 
@@ -141,7 +146,13 @@ class CrossDeviceOpsTestBase(test.TestCase, parameterized.TestCase):
       # Densify IndexedSlices.
       left = [ops.convert_to_tensor(v) for v in left]
       right = [ops.convert_to_tensor(v) for v in right]
-      left, right = sess.run((left, right))
+      if context.executing_eagerly():
+        # Optional args in session run are not supported when eager execution
+        # is enabled.
+        assert run_options is None
+        left, right = sess.run((left, right))
+      else:
+        left, right = sess.run((left, right), options=run_options)
       for left_value, right_value in zip(left, right):
         self.assertAllEqual(left_value, right_value)
 
@@ -552,6 +563,17 @@ class CollectiveAllReduceTest(multi_worker_test_base.MultiWorkerTestBase,
         return (collective_all_reduce_ops, devices,
                 "grpc://" + self._cluster_spec[task_type][task_id])
 
+  def _assert_mirrored_equal(self, left_list, right_list, sess):
+    if context.executing_eagerly():
+      run_options = None
+    else:
+      # TODO(b/151025792): figure out why missing run options would make the
+      # test flaky and whether this is a problem in TF 2.
+      run_options = config_pb2.RunOptions()
+      run_options.experimental.collective_graph_key = 5
+    super(CollectiveAllReduceTest, self)._assert_mirrored_equal(
+        left_list, right_list, sess, run_options=run_options)
+
   def _test_reduction(self,
                       task_type,
                       task_id,
@@ -817,6 +839,64 @@ class CollectiveAllReduceTest(multi_worker_test_base.MultiWorkerTestBase,
         variable_length=variable_length,
         local_mode=True)
 
+  @combinations.generate(
+      combinations.combine(
+          required_gpus=2,
+          mode="eager",
+          communication=[
+              CollectiveCommunication.NCCL, CollectiveCommunication.RING
+          ]))
+  def testEagerMultiThread(self, communication):
+    collective, devices, _ = self._get_test_objects(
+        None,
+        None,
+        num_gpus=2,
+        communication=communication,
+        use_strategy_object=False,
+        local_mode=True)
+
+    # We would like to simulate the following sequence:
+    #   thread-0  device0                 device1
+    #   thread-1          device0 device1
+    # If the kernel launch sequence is as-is the program will deadlock since
+    # NCCL requires the launch order to be same on each device.
+    v0 = _make_per_replica([1.0 for _ in devices], devices)
+    v1 = _make_per_replica([2.0 for _ in devices], devices)
+
+    # Add a delay to collective_ops.all_reduce according to the input tensors
+    # index in `sequence.`
+    sequence = [v0.values[0], v1.values[0], v1.values[1], v0.values[1]]
+    all_reduce = collective_ops.all_reduce
+
+    def delayed_all_reduce(input_tensor, *args, **kwargs):
+      for idx, v in enumerate(sequence):
+        if input_tensor is v:
+          time.sleep(idx)
+          break
+      return all_reduce(input_tensor, *args, **kwargs)
+
+    with test.mock.patch.object(collective_ops, "all_reduce",
+                                delayed_all_reduce):
+      # We only use NCCL for batch reduce with two or more values, so we use two
+      # values here.
+
+      def thread_fn():
+        reduced = collective.batch_reduce(reduce_util.ReduceOp.SUM, [(v0, v0),
+                                                                     (v0, v0)])
+        self.assertAllEqual(reduced[0].values, [2.0, 2.0])
+        self.assertAllEqual(reduced[1].values, [2.0, 2.0])
+
+      t = threading.Thread(target=thread_fn)
+      t.start()
+      reduced = collective.batch_reduce(reduce_util.ReduceOp.SUM, [(v1, v1),
+                                                                   (v1, v1)])
+      self.assertAllEqual(reduced[0].values, [4.0, 4.0])
+      self.assertAllEqual(reduced[1].values, [4.0, 4.0])
+      t.join()
+
 
 if __name__ == "__main__":
+  # Set default inter op thread pool size to one to ensure we don't exhaust the
+  # thread pool with the additional executors to run collectives in eager.
+  os.environ["TF_NUM_INTEROP_THREADS"] = "1"
   test.main()
diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py
index f9917385b59..d7be93ae2c4 100644
--- a/tensorflow/python/distribute/cross_device_utils.py
+++ b/tensorflow/python/distribute/cross_device_utils.py
@@ -337,10 +337,12 @@ def build_collective_reduce(input_tensors,
                             reduction_op='Add',
                             unary_op='Id',
                             communication_hint='AUTO',
-                            control_inputs=None):
+                            control_inputs=None,
+                            executors=None):
   """Build a subgraph that does one full all-reduce, using the collective Op.
 
-  This method must be called in graph mode or inside a tf.function.
+  If called in eager mode, it's required to supply a list of async executors for
+  each input Tensor.
 
   Args:
     input_tensors: tensors within a single worker graph that are to be reduced
@@ -355,6 +357,7 @@ def build_collective_reduce(input_tensors,
       implementation.
     control_inputs: if not None, add control edges between control_inputs and
       (index-wise) corresponding collective_reduce tensors
+    executors: a list of async executor. Required for eager execution.
 
   Returns:
     An array of final tensors, one per device, computed by the full reduction.
@@ -362,9 +365,11 @@ def build_collective_reduce(input_tensors,
   Raises:
     ValueError: There must be at least two tensors over all the workers.
   """
-  assert not context.executing_eagerly(), (
-      'build_collective_reduce can only be called in graph mode or inside '
-      'tf.function')
+  if context.executing_eagerly():
+    if (not executors or len(executors) != len(input_tensors) or
+        not all(e.is_async() for e in executors)):
+      raise ValueError(
+          'collectives requires async executors for each device in eager mode')
 
   group_size = len(input_tensors) * num_workers
   if group_size < 2:
@@ -375,15 +380,19 @@ def build_collective_reduce(input_tensors,
 
   out_tensors = []
   for idx, input_tensor in enumerate(input_tensors):
-    with ops.device(input_tensor.device):
-      with ops.control_dependencies(
-          _control_input(input_tensors, control_inputs, idx)):
-        out_tensor = collective_ops.all_reduce(input_tensor, group_size,
-                                               group_key, instance_key,
-                                               reduction_op, unary_op,
-                                               subdiv_offsets,
-                                               communication_hint)
-      out_tensors.append(out_tensor)
+    if context.executing_eagerly():
+      executor_scope = context.executor_scope(executors[idx])
+    else:
+      executor_scope = ops.NullContextmanager()
+    with executor_scope, \
+         ops.device(input_tensor.device), \
+         ops.control_dependencies(
+             _control_input(input_tensors, control_inputs, idx)):
+      out_tensor = collective_ops.all_reduce(input_tensor, group_size,
+                                             group_key, instance_key,
+                                             reduction_op, unary_op,
+                                             subdiv_offsets, communication_hint)
+    out_tensors.append(out_tensor)
   return out_tensors
 
 
diff --git a/tensorflow/python/distribute/ctl_correctness_test.py b/tensorflow/python/distribute/ctl_correctness_test.py
index 59fae808c21..6ce6a59df47 100644
--- a/tensorflow/python/distribute/ctl_correctness_test.py
+++ b/tensorflow/python/distribute/ctl_correctness_test.py
@@ -224,7 +224,7 @@ class TestDistributionStrategyDnnCorrectness(test.TestCase,
   @combinations.generate(
       combinations.combine(
           distribution=strategy_combinations.all_strategies,
-          optimizer_fn=strategy_combinations.optimizers_v1_and_v2,
+          optimizer_fn=strategy_combinations.optimizers_v2,
           mode=['eager'],
           iteration_type=['iterator', 'dataset'],
           inside_func=[False, True],
diff --git a/tensorflow/python/distribute/custom_training_loop_models_test.py b/tensorflow/python/distribute/custom_training_loop_models_test.py
index 3c748bd7364..48f2af0349a 100644
--- a/tensorflow/python/distribute/custom_training_loop_models_test.py
+++ b/tensorflow/python/distribute/custom_training_loop_models_test.py
@@ -378,6 +378,46 @@ class KerasModelsTest(test.TestCase, parameterized.TestCase):
     for model_v, model2_v in zip(model.variables, model2.variables):
       self.assertAllClose(model_v.numpy(), model2_v.numpy())
 
+  @combinations.generate(
+      combinations.combine(
+          distribution=strategy_combinations.all_strategies, mode=["eager"]))
+  def test_nested_tf_functions_with_control_flow(self, distribution):
+    inputs = np.random.random((10, 3)).astype(np.float32)
+    targets = np.ones((10, 4), dtype=np.float32)
+    dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)).repeat()
+    dataset = dataset.batch(10, drop_remainder=True)
+    input_iterator = iter(distribution.experimental_distribute_dataset(dataset))
+
+    def get_model():
+      x = keras.layers.Input(shape=(3,), name="input")
+      y = keras.layers.Dense(4, name="dense")(x)
+      model = keras.Model(x, y)
+      return model
+
+    with distribution.scope():
+      model = get_model()
+      optimizer = keras.optimizer_v2.gradient_descent.SGD(0.1, momentum=0.01)
+
+    @def_function.function
+    def train_step(iterator):
+
+      def step_fn(inputs):
+        images, targets = inputs
+        with backprop.GradientTape() as tape:
+          outputs = model(images)
+          loss = math_ops.reduce_sum(outputs - targets)
+        grads = tape.gradient(loss, model.variables)
+        optimizer.apply_gradients(zip(grads, model.variables))
+
+      distribution.run(step_fn, args=(next(iterator),))
+
+    @def_function.function
+    def train_steps(iterator):
+      for _ in math_ops.range(10):
+        train_step(iterator)
+
+    train_steps(input_iterator)
+
   @combinations.generate(
       combinations.combine(
           distribution=strategy_combinations.all_strategies,
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index d17a594cb5e..6baa15f59c1 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -1912,9 +1912,8 @@ class StrategyExtendedV2(object):
 
   def _reduce(self, reduce_op, value):
     # Default implementation until we have an implementation for each strategy.
-    return self._local_results(
-        self.reduce_to(reduce_op, value,
-                       device_util.current() or "/device:CPU:0"))[0]
+    dst = device_util.current() or self._default_device or "/device:CPU:0"
+    return self._local_results(self.reduce_to(reduce_op, value, dst))[0]
 
   def reduce_to(self, reduce_op, value, destinations, experimental_hints=None):
     """Combine (via e.g. sum or mean) values across replicas.
diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py
index 9defb75c703..26bc9a087fb 100644
--- a/tensorflow/python/distribute/input_lib.py
+++ b/tensorflow/python/distribute/input_lib.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
 import sys
 
 import six
@@ -47,6 +48,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.types import distribute as distribute_types
 from tensorflow.python.util import nest
 from tensorflow.python.util.deprecation import deprecated
 
@@ -256,7 +258,7 @@ def _get_static_shape(iterators):
     return static_shape
 
 
-class DistributedIteratorBase(object):
+class DistributedIteratorBase(distribute_types.Iterator):
   """Common implementation for all input iterators."""
 
   def __init__(self, input_workers, iterators, strategy):
@@ -494,12 +496,13 @@ class DistributedIteratorSpec(type_spec.TypeSpec):
   def _component_specs(self):
     specs = []
     worker_device_pairs = self._input_workers._worker_device_pairs  # pylint: disable=protected-access
-    for i in range(len(worker_device_pairs)):
-      input_device, compute_devices = worker_device_pairs[i]
+
+    for i, (input_device, compute_devices) in enumerate(worker_device_pairs):
+      element_spec = nest.map_structure(
+          functools.partial(_replace_per_replica_spec, i=i), self._element_spec)
       specs.append(_SingleWorkerDatasetIteratorSpec(input_device,
                                                     compute_devices,
-                                                    element_spec=
-                                                    self._element_spec))
+                                                    element_spec))
     return specs
 
   def _to_components(self, value):
@@ -518,6 +521,13 @@ class DistributedIteratorSpec(type_spec.TypeSpec):
     return DistributedIteratorSpec(value._input_workers, value._element_spec,
                                    value._strategy)
 
+  def _with_tensor_ranks_only(self):
+    element_spec = nest.map_structure(
+        lambda s: s._with_tensor_ranks_only(),  # pylint: disable=protected-access
+        self._element_spec)
+    return DistributedIteratorSpec(self._input_workers, element_spec,
+                                   self._strategy)
+
 
 class DistributedIterator(DistributedIteratorBase,
                           composite_tensor.CompositeTensor):
@@ -564,7 +574,7 @@ class DistributedIterator(DistributedIteratorBase,
                                    self._strategy)
 
 
-class _IterableInput(object):
+class _IterableInput(distribute_types.Iterable):
   """Base class for iterable inputs for distribution strategies."""
 
   def __init__(self, input_workers):
@@ -813,14 +823,15 @@ class DistributedDatasetsFromFunction(_IterableInput):
           "input_contexts (%d)" %
           (input_workers.num_workers, len(input_contexts)))
 
-    self._dataset_fn = dataset_fn
     self._input_workers = input_workers
     self._input_contexts = input_contexts
     self._strategy = strategy
-    self._element_spec = None
-
-    super(DistributedDatasetsFromFunction, self).__init__(
-        input_workers=input_workers)
+    self._datasets, element_spec = (
+        _create_datasets_per_worker_with_input_context(self._input_contexts,
+                                                       self._input_workers,
+                                                       dataset_fn))
+    self._element_spec = _create_distributed_tensor_spec(
+        self._strategy, element_spec)
 
   def __iter__(self):
     if (ops.executing_eagerly_outside_functions() or
@@ -832,9 +843,9 @@ class DistributedDatasetsFromFunction(_IterableInput):
       enable_legacy_iterators = getattr(self._strategy,
                                         "_enable_legacy_iterators", False)
 
-      iterators, element_spec = _create_iterators_per_worker_with_input_context(
-          self._input_contexts, self._input_workers, self._dataset_fn,
-          enable_legacy_iterators)
+      iterators = _create_iterators_per_worker(self._datasets,
+                                               self._input_workers,
+                                               enable_legacy_iterators)
 
       if enable_legacy_iterators:
         iterator = DistributedIteratorV1(self._input_workers, iterators,
@@ -842,8 +853,6 @@ class DistributedDatasetsFromFunction(_IterableInput):
       else:
         iterator = DistributedIterator(self._input_workers, iterators,
                                        self._strategy)
-      self._element_spec = _create_distributed_tensor_spec(self._strategy,
-                                                           element_spec)
       iterator._element_spec = self._element_spec  # pylint: disable=protected-access
       return iterator
 
@@ -886,13 +895,10 @@ class DistributedDatasetsFromFunctionV1(DistributedDatasetsFromFunction):
     return self._get_iterator()
 
   def _get_iterator(self):
-    iterators, element_spec = _create_iterators_per_worker_with_input_context(
-        self._input_contexts, self._input_workers, self._dataset_fn,
-        True)
+    iterators = _create_iterators_per_worker(self._datasets,
+                                             self._input_workers, True)
     iterator = DistributedIteratorV1(self._input_workers, iterators,
                                      self._strategy)
-    self._element_spec = _create_distributed_tensor_spec(self._strategy,
-                                                         element_spec)
     iterator._element_spec = self._element_spec  # pylint: disable=protected-access
     return iterator
 
@@ -1140,7 +1146,7 @@ class _SingleWorkerDatasetIteratorSpec(type_spec.TypeSpec):
 
   def __init__(self, worker, devices, element_spec):
     self._worker = worker
-    self._devices = devices
+    self._devices = tuple(device_util.canonicalize(d) for d in devices)
     self._element_spec = element_spec
 
   @property
@@ -1148,7 +1154,7 @@ class _SingleWorkerDatasetIteratorSpec(type_spec.TypeSpec):
     return _SingleWorkerOwnedDatasetIterator
 
   def _serialize(self):
-    return (self._worker, tuple(self._devices), self._element_spec)
+    return (self._worker, self._devices, self._element_spec)
 
   @property
   def _component_specs(self):
@@ -1365,27 +1371,16 @@ def _create_iterators_per_worker(worker_datasets, input_workers,
   return iterators
 
 
-def _create_iterators_per_worker_with_input_context(input_contexts,
-                                                    input_workers,
-                                                    dataset_fn,
-                                                    enable_legacy_iterators):
-  """Create a multidevice iterator per workers given a dataset function."""
-  iterators = []
-  element_specs = []
+def _create_datasets_per_worker_with_input_context(input_contexts,
+                                                   input_workers, dataset_fn):
+  """Create device datasets per worker given a dataset function."""
+  datasets = []
   for i, ctx in enumerate(input_contexts):
     worker = input_workers.worker_devices[i]
     with ops.device(worker):
       dataset = dataset_fn(ctx)
-      element_specs.append(dataset.element_spec)
-      devices = input_workers.compute_devices_for_worker(i)
-      if tf2.enabled() and not enable_legacy_iterators:
-        iterator = _SingleWorkerOwnedDatasetIterator(dataset, worker,
-                                                     devices)
-      else:
-        iterator = _SingleWorkerDatasetIterator(dataset, worker,
-                                                devices)
-      iterators.append(iterator)
-  return iterators, dataset.element_spec
+      datasets.append(dataset)
+  return datasets, dataset.element_spec
 
 
 # TODO(sourabhbajaj): Remove this in lieu of distributed datasets
@@ -1579,3 +1574,11 @@ def _create_distributed_tensor_spec(strategy, tensor_spec):
     return values.PerReplicaSpec(*value_specs)
 
   return nest.map_structure(_get_value_per_replica, tensor_spec)
+
+
+def _replace_per_replica_spec(spec, i):
+  """If `spec` is a `PerReplicaSpec`, then return its `i`th value_spec."""
+  if isinstance(spec, values.PerReplicaSpec):
+    return spec._value_specs[i]  # pylint: disable=protected-access
+  else:
+    return spec
diff --git a/tensorflow/python/distribute/input_lib_test.py b/tensorflow/python/distribute/input_lib_test.py
index 7aa0c804786..60212f7a3b7 100644
--- a/tensorflow/python/distribute/input_lib_test.py
+++ b/tensorflow/python/distribute/input_lib_test.py
@@ -44,6 +44,7 @@ from tensorflow.python.distribute import values
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import test
+from tensorflow.python.framework import composite_tensor
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -174,6 +175,10 @@ class DistributedIteratorTestBase(test.TestCase):
         else:
           self.skipTest("unsupported test combination")
 
+    if isinstance(iterator, composite_tensor.CompositeTensor):
+      nest.assert_same_structure(iterator, iterator._type_spec,
+                                 expand_composites=True)
+
     if iteration_type == "get_next":
       evaluate = lambda x: sess.run(x) if sess else self.evaluate(x)
       if not ops.executing_eagerly_outside_functions():
@@ -1013,7 +1018,6 @@ class DistributedIteratorMultiWorkerTest(
           required_gpus=0))
   def testUnevenDatasetBatchesBetweenGraph(self, input_type, api_type,
                                            iteration_type, strategy_cls):
-    self.skipTest("broken test to be fixed")
     if api_type == "wrap_into_dataset" and input_type == "input_fn":
       self.skipTest("unsupported test combination.")
     if tf2.enabled():
diff --git a/tensorflow/python/distribute/input_lib_type_spec_test.py b/tensorflow/python/distribute/input_lib_type_spec_test.py
index 53bcc576b24..7f5b0e09f2c 100644
--- a/tensorflow/python/distribute/input_lib_type_spec_test.py
+++ b/tensorflow/python/distribute/input_lib_type_spec_test.py
@@ -30,12 +30,14 @@ from tensorflow.python.distribute import strategy_combinations
 from tensorflow.python.distribute import values
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import test
+from tensorflow.python.framework import composite_tensor
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.ragged import ragged_tensor as ragged_tensor_lib
+from tensorflow.python.util import nest
 
 
 class DistributedIteratorTest(test.TestCase,
@@ -63,6 +65,7 @@ class DistributedIteratorTest(test.TestCase,
     dist_dataset = distribution.experimental_distribute_dataset(dataset)
     with distribution.scope():
       iterator = iter(dist_dataset)
+      _check_type_spec_structure(iterator)
 
     spec = iterator._type_spec
     self.assertEqual(spec._input_workers, iterator._input_workers)
@@ -95,6 +98,7 @@ class DistributedIteratorTest(test.TestCase,
     dist_dataset = distribution.experimental_distribute_dataset(dataset)
     with distribution.scope():
       iterator = iter(dist_dataset)
+      _check_type_spec_structure(iterator)
 
     spec = iterator._type_spec
 
@@ -139,6 +143,7 @@ class DistributedIteratorTest(test.TestCase,
     with distribution.scope():
       for _ in range(3):
         iterator = iter(dist_dataset)
+        _check_type_spec_structure(iterator)
         counter = f(iterator)
 
         self.assertEqual(trace_count[0], 1)
@@ -173,6 +178,7 @@ class InputTypeSpecTest(test.TestCase, parameterized.TestCase):
       ds = distribution.experimental_distribute_datasets_from_function(
           dataset_fn)
       iterator = iter(ds)
+      _check_type_spec_structure(iterator)
       type_spec = iterator.element_spec
 
     @def_function.function(input_signature=[type_spec])
@@ -276,6 +282,7 @@ class RaggedTensorDistributedIteratorTest(test.TestCase,
     dist_dataset = distribution.experimental_distribute_dataset(dataset)
     with distribution.scope():
       iterator = iter(dist_dataset)
+      _check_type_spec_structure(iterator)
 
     spec = iterator._type_spec
     self.assertEqual(spec._input_workers, iterator._input_workers)
@@ -336,6 +343,7 @@ class RaggedTensorDistributedIteratorTest(test.TestCase,
     dist_dataset = distribution.experimental_distribute_dataset(dataset)
     with distribution.scope():
       iterator = iter(dist_dataset)
+      _check_type_spec_structure(iterator)
 
     spec = iterator._type_spec
 
@@ -391,11 +399,18 @@ class RaggedTensorDistributedIteratorTest(test.TestCase,
     with distribution.scope():
       for _ in range(3):
         iterator = iter(dist_dataset)
+        _check_type_spec_structure(iterator)
         counter = f(iterator)
 
         self.assertEqual(trace_count[0], 1)
         self.assertEqual(counter, 5)
 
 
+def _check_type_spec_structure(x):
+  """Verifies that `x` has the same structure as its `TypeSpec`."""
+  if isinstance(x, composite_tensor.CompositeTensor):
+    nest.assert_same_structure(x, x._type_spec, expand_composites=True)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py
index 47c3a744419..1413777d0bc 100644
--- a/tensorflow/python/distribute/multi_process_runner_test.py
+++ b/tensorflow/python/distribute/multi_process_runner_test.py
@@ -160,7 +160,7 @@ class MultiProcessRunnerTest(test.TestCase):
       for i in range(0, 10):
         print(
             'index {}, iteration {}'.format(self._worker_idx(), i), flush=True)
-        time.sleep(1)
+        time.sleep(5)
 
     mpr = multi_process_runner.MultiProcessRunner(
         proc_func,
diff --git a/tensorflow/python/distribute/multi_worker_continuous_run_test.py b/tensorflow/python/distribute/multi_worker_continuous_run_test.py
index 90484a12423..437255c1015 100644
--- a/tensorflow/python/distribute/multi_worker_continuous_run_test.py
+++ b/tensorflow/python/distribute/multi_worker_continuous_run_test.py
@@ -33,10 +33,13 @@ from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import test
 from tensorflow.python.framework import config
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variable_scope
 
+
 NUM_WORKERS = 5
 
 
@@ -84,9 +87,10 @@ class MultiWorkerContinuousRunTest(test.TestCase, parameterized.TestCase):
       for _ in range(20):
         worker_step_fn(worker_id)
 
-    multi_process_runner.run(
-        worker_fn,
-        cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS))
+    with test_util.skip_if_error(self, errors_impl.UnavailableError):
+      multi_process_runner.run(
+          worker_fn,
+          cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS))
 
   @combinations.generate(combinations.combine(mode=['eager']))
   def testVariableInitializationWithChangingShape(self, mode):
@@ -116,9 +120,10 @@ class MultiWorkerContinuousRunTest(test.TestCase, parameterized.TestCase):
       for i in range(20):
         worker_step_fn(worker_id, num_dims=(i + 1))
 
-    multi_process_runner.run(
-        worker_fn,
-        cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS))
+    with test_util.skip_if_error(self, errors_impl.UnavailableError):
+      multi_process_runner.run(
+          worker_fn,
+          cluster_spec=test_base.create_cluster_spec(num_workers=NUM_WORKERS))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/distribute/multi_worker_test_base.py b/tensorflow/python/distribute/multi_worker_test_base.py
index b90c922c9e2..408cad2ca0a 100644
--- a/tensorflow/python/distribute/multi_worker_test_base.py
+++ b/tensorflow/python/distribute/multi_worker_test_base.py
@@ -50,6 +50,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import coordinator
 from tensorflow.python.training import server_lib
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util.compat import collections_abc
 
@@ -193,7 +194,7 @@ def create_in_process_cluster(num_workers,
         protocol=rpc_layer)
   except errors.UnknownError as e:
     if 'Could not start gRPC server' in e.message:
-      test.TestCase.skipTest('Cannot start std servers.')
+      raise unittest.SkipTest('Cannot start std servers.')
     else:
       raise
   return cluster
@@ -559,6 +560,10 @@ class MultiWorkerMultiProcessTest(test.TestCase):
     return subprocess.Popen(
         cmd_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
 
+  @deprecation.deprecated(
+      None, '`run_multiple_tasks_in_processes` is deprecated; any new test '
+      'requiring multiple processes should use `multi_process_runner` for '
+      'better support of log printing, streaming, and more functionality.')
   def run_multiple_tasks_in_processes(self, cmd_args, cluster_spec):
     """Run `cmd_args` in a process for each task in `cluster_spec`."""
     processes = {}
@@ -570,6 +575,10 @@ class MultiWorkerMultiProcessTest(test.TestCase):
         processes[task_type].append(p)
     return processes
 
+  @deprecation.deprecated(
+      None, '`join_independent_workers` is deprecated; any new test '
+      'requiring multiple processes should use `multi_process_runner` for '
+      'better support of log printing, streaming, and more functionality.')
   def join_independent_workers(self, worker_processes):
     return_codes = []
     for p in nest.flatten(worker_processes):
@@ -585,6 +594,10 @@ class MultiWorkerMultiProcessTest(test.TestCase):
     for return_code in return_codes:
       self.assertEqual(return_code, 0)
 
+  @deprecation.deprecated(
+      None, '`stream_stderr` is deprecated; any new test '
+      'requiring multiple processes should use `multi_process_runner` for '
+      'better support of log printing, streaming, and more functionality.')
   def stream_stderr(self, processes, print_only_first=False):
     """Consume stderr of all processes and print to stdout.
 
diff --git a/tensorflow/python/distribute/parallel_device/BUILD b/tensorflow/python/distribute/parallel_device/BUILD
index e7526a56f66..930816d4407 100644
--- a/tensorflow/python/distribute/parallel_device/BUILD
+++ b/tensorflow/python/distribute/parallel_device/BUILD
@@ -1,4 +1,8 @@
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_library", "tf_gen_op_wrapper_py")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
+
 package(
+    default_visibility = ["//tensorflow:internal"],
     licenses = ["notice"],  # Apache 2.0
 )
 
@@ -13,6 +17,7 @@ py_library(
     srcs = ["parallel_device.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":parallel_device_ops",
         ":saving",
         "//tensorflow/python:_pywrap_parallel_device",
     ],
@@ -25,6 +30,25 @@ py_library(
     deps = ["//tensorflow/python:framework_ops"],
 )
 
+tf_gen_op_wrapper_py(
+    name = "parallel_device_ops_py",
+    out = "gen_parallel_device_ops.py",
+    deps = ["//tensorflow/c/eager/parallel_device:parallel_device_ops"],
+)
+
+tf_custom_op_library(
+    name = "_parallel_device_ops.so",
+    srcs = ["//tensorflow/c/eager/parallel_device:parallel_device_ops_srcs"],
+)
+
+tf_custom_op_py_library(
+    name = "parallel_device_ops",
+    dso = [":_parallel_device_ops.so"],
+    kernels = ["//tensorflow/c/eager/parallel_device:parallel_device_ops"],
+    visibility = ["//tensorflow:internal"],
+    deps = [":parallel_device_ops_py"],
+)
+
 py_test(
     name = "parallel_device_test",
     srcs = ["parallel_device_test.py"],
diff --git a/tensorflow/python/distribute/parallel_device/parallel_device.py b/tensorflow/python/distribute/parallel_device/parallel_device.py
index 982b061cdb7..2dbdc653a64 100644
--- a/tensorflow/python/distribute/parallel_device/parallel_device.py
+++ b/tensorflow/python/distribute/parallel_device/parallel_device.py
@@ -22,11 +22,17 @@ import contextlib
 import threading
 
 from tensorflow.python import _pywrap_parallel_device
+from tensorflow.python.distribute.parallel_device import gen_parallel_device_ops
 from tensorflow.python.distribute.parallel_device import saving
 from tensorflow.python.eager import context
+from tensorflow.python.framework import load_library
 from tensorflow.python.framework import ops
+from tensorflow.python.platform import resource_loader
 from tensorflow.python.tpu.ops import tpu_ops
 
+load_library.load_op_library(
+    resource_loader.get_path_to_datafile("_parallel_device_ops.so"))
+
 _next_device_number = 0
 _next_device_number_lock = threading.Lock()
 
@@ -58,6 +64,8 @@ class ParallelDevice(object):
     device, device_info = _pywrap_parallel_device.GetParallelDeviceCapsules(
         self.name, self.components)
     context.register_custom_device(device, self.name, device_info)
+    with ops.device(self.name):
+      self._device_ids = gen_parallel_device_ops.device_id()
 
   def pack(self, tensors):
     """Create a tensor on the parallel device from a sequence of tensors.
@@ -84,6 +92,18 @@ class ParallelDevice(object):
       return tpu_ops.tpu_replicated_output(
           parallel_tensor, num_replicas=len(self.components))
 
+  @property
+  def device_ids(self):
+    """A parallel tensor with scalar integers numbering component devices.
+
+    Each device ID is placed on its corresponding device, in the same order as
+    the `components` constructor argument.
+
+    Returns:
+      A parallel tensor containing 0 on the first device, 1 on the second, etc.
+    """
+    return self._device_ids
+
   # TODO(allenl): Fixing saving in Python is a bit odd. One alternative would be
   # to provide a hook for the custom device to create save specs/etc., then call
   # that hook from the default variable implementation if the variable is on a
diff --git a/tensorflow/python/distribute/parallel_device/parallel_device_test.py b/tensorflow/python/distribute/parallel_device/parallel_device_test.py
index d3f3417eca9..e35eb601cc5 100644
--- a/tensorflow/python/distribute/parallel_device/parallel_device_test.py
+++ b/tensorflow/python/distribute/parallel_device/parallel_device_test.py
@@ -119,6 +119,12 @@ class ParallelDeviceTests(_VirtualDeviceTestCase):
     self.assertIn(self.device.components[0], outputs[0].backing_device)
     self.assertIn(self.device.components[1], outputs[1].backing_device)
 
+  def test_device_id(self):
+    device_ids = self.device.unpack(self.device.device_ids)
+    self.assertAllClose([0, 1], device_ids)
+    self.assertIn(self.device.components[0], device_ids[0].backing_device)
+    self.assertIn(self.device.components[1], device_ids[1].backing_device)
+
   def test_collective_reduce(self):
     with ops.device(self.device.name):
       x = self.device.pack(
diff --git a/tensorflow/python/distribute/strategy_combinations.py b/tensorflow/python/distribute/strategy_combinations.py
index 8d721698d5c..e69c8c7f129 100644
--- a/tensorflow/python/distribute/strategy_combinations.py
+++ b/tensorflow/python/distribute/strategy_combinations.py
@@ -265,7 +265,8 @@ def distributions_and_v1_and_v2_optimizers():
 
 strategies_minus_tpu = [
     default_strategy, one_device_strategy, one_device_strategy_gpu,
-    mirrored_strategy_with_gpu_and_cpu, mirrored_strategy_with_two_gpus
+    mirrored_strategy_with_gpu_and_cpu, mirrored_strategy_with_two_gpus,
+    central_storage_strategy_with_gpu_and_cpu
 ]
 
 strategies_minus_default_and_tpu = [
diff --git a/tensorflow/python/distribute/strategy_common_test.py b/tensorflow/python/distribute/strategy_common_test.py
new file mode 100644
index 00000000000..c277310b6a0
--- /dev/null
+++ b/tensorflow/python/distribute/strategy_common_test.py
@@ -0,0 +1,65 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for common methods in strategy classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import reduce_util
+from tensorflow.python.distribute import strategy_combinations
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+
+
+class StrategyReduceTest(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.combine(
+          strategy=[strategy_combinations.multi_worker_mirrored_two_workers] +
+          strategy_combinations.strategies_minus_tpu,
+          mode=['eager']))
+  def testSimpleReduce(self, strategy):
+
+    def fn_eager():
+
+      def replica_fn():
+        return array_ops.ones((), dtypes.float32)
+
+      per_replica_value = strategy.run(replica_fn)
+      return strategy.reduce(
+          reduce_util.ReduceOp.SUM, value=per_replica_value, axis=None)
+
+    fn_graph = def_function.function(fn_eager)
+
+    # Run reduce under the strategy scope to explicitly enter
+    # strategy default_device scope.
+    with strategy.scope():
+      self.assertEqual(fn_eager().numpy(), 1.0 * strategy.num_replicas_in_sync)
+      self.assertEqual(fn_graph().numpy(), 1.0 * strategy.num_replicas_in_sync)
+
+    # Run reduce without a strategy scope to implicitly enter
+    # strategy default_device scope.
+    self.assertEqual(fn_eager().numpy(), 1.0 * strategy.num_replicas_in_sync)
+    self.assertEqual(fn_graph().numpy(), 1.0 * strategy.num_replicas_in_sync)
+
+
+if __name__ == '__main__':
+  combinations.main()
diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py
index 6e51b84a1d1..b574c523ccd 100644
--- a/tensorflow/python/distribute/tpu_strategy.py
+++ b/tensorflow/python/distribute/tpu_strategy.py
@@ -96,35 +96,34 @@ def validate_run_function(fn):
 
 @tf_export("distribute.experimental.TPUStrategy", v1=[])
 class TPUStrategy(distribute_lib.Strategy):
-  """TPU distribution strategy implementation."""
+  """TPU distribution strategy implementation.
+
+  To construct a TPUStrategy object, you need to run the
+  initialization code as below:
+
+  >>> resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
+  >>> tf.config.experimental_connect_to_cluster(resolver)
+  >>> tf.tpu.experimental.initialize_tpu_system(resolver)
+  >>> strategy = tf.distribute.experimental.TPUStrategy(resolver)
+
+  While using distribution strategies, the variables created within strategy's
+  scope will be replicated across all the replicas and can be kept in sync
+  using all-reduce algorithms.
+
+  To run TF2 programs on TPUs, you can either use `.compile` and
+  `.fit` APIs in `tf.keras` with TPUStrategy, or write your own customized
+  training loop by calling `strategy.run` directly. Note that
+  TPUStrategy doesn't support pure eager execution, so please make sure the
+  function passed into `strategy.run` is a `tf.function` or
+  `strategy.run` is called inside a `tf.function` if eager
+  behavior is enabled.
+  """
 
   def __init__(self,
                tpu_cluster_resolver=None,
                device_assignment=None):
     """Synchronous training in TPU donuts or Pods.
 
-    To construct a TPUStrategy object, you need to run the
-    initialization code as below:
-
-    ```python
-    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu)
-    tf.config.experimental_connect_to_cluster(resolver)
-    tf.tpu.experimental.initialize_tpu_system(resolver)
-    strategy = tf.distribute.experimental.TPUStrategy(resolver)
-    ```
-
-    While using distribution strategies, the variables created within strategy's
-    scope will be replicated across all the replicas and can be kept in sync
-    using all-reduce algorithms.
-
-    To run TF2 programs on TPUs, you can either use `.compile` and
-    `.fit` APIs in `tf.keras` with TPUStrategy, or write your own customized
-    training loop by calling `strategy.run` directly. Note that
-    TPUStrategy doesn't support pure eager execution, so please make sure the
-    function passed into `strategy.run` is a `tf.function` or
-    `strategy.run` is called inside a `tf.function` if eager
-    behavior is enabled.
-
     Args:
       tpu_cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
         which provides information about the TPU cluster.
@@ -209,26 +208,26 @@ class TPUStrategyV1(distribute_lib.StrategyV1):
     Users can pass strategy specific options to `options` argument. An example
     to enable bucketizing dynamic shapes in `TPUStrategy.run`
     is:
-    ```python
 
-    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
-    tf.config.experimental_connect_to_cluster(resolver)
-    tf.tpu.experimental.initialize_tpu_system(resolver)
-    strategy = tf.distribute.experimental.TPUStrategy(tpu='')
+    >>> resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
+    >>> tf.config.experimental_connect_to_cluster(resolver)
+    >>> tf.tpu.experimental.initialize_tpu_system(resolver)
+    >>> strategy = tf.distribute.experimental.TPUStrategy(resolver)
 
-    options = tf.distribute.RunOptions()
-    options.experimental_bucketizing_dynamic_shape = True
+    >>> options = tf.distribute.RunOptions(
+    ...     experimental_bucketizing_dynamic_shape=True)
 
-    iterator = iter(inputs)
+    >>> dataset = tf.data.Dataset.range(
+    ...    strategy.num_replicas_in_sync, output_type=dtypes.float32).batch(
+    ...        strategy.num_replicas_in_sync, drop_remainder=True)
+    >>> input_iterator = iter(strategy.experimental_distribute_dataset(dataset))
 
-    @tf.function()
-    def step_fn(inputs):
-      output = tf.reduce_sum(inputs)
-      return output
+    >>> @tf.function()
+    ... def step_fn(inputs):
+    ...  output = tf.reduce_sum(inputs)
+    ...  return output
 
-      strategy.run(step_fn, args=(next(iterator),),
-                                   options=options)
-    ```
+    >>> strategy.run(step_fn, args=(next(input_iterator),), options=options)
 
     Args:
       fn: The function to run. The output must be a `tf.nest` of `Tensor`s.
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index fda258578aa..444915aa123 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -38,6 +38,7 @@ from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.training.saving import saveable_object
 from tensorflow.python.training.saving import saveable_object_util
 from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.types import core
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
@@ -422,7 +423,8 @@ class DistributedVarOp(object):
     return hash((self.name, self.graph, self.traceback, self.type))
 
 
-class DistributedVariable(DistributedDelegate, variables_lib.Variable):
+class DistributedVariable(DistributedDelegate, variables_lib.Variable,
+                          core.Tensor):
   """Holds a map from replica to variables."""
 
   # TODO(josh11b): Support changing the set of variables if e.g. if new
@@ -587,6 +589,96 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable):
   def value(self):
     return self._get_closest().value()
 
+  def numpy(self):
+    if context.executing_eagerly():
+      return self.read_value().numpy()
+    else:
+      raise NotImplementedError(
+          "numpy() is only available when eager execution is enabled.")
+
+  def assign_sub(self, value, use_locking=False, name=None, read_value=True):
+    assign_sub_fn = lambda var, *a, **kw: var.assign_sub(*a, **kw)
+    return self._update(
+        update_fn=assign_sub_fn,
+        value=value,
+        use_locking=use_locking,
+        name=name,
+        read_value=read_value)
+
+  def assign_add(self, value, use_locking=False, name=None, read_value=True):
+    assign_add_fn = lambda var, *a, **kw: var.assign_add(*a, **kw)
+    return self._update(
+        update_fn=assign_add_fn,
+        value=value,
+        use_locking=use_locking,
+        name=name,
+        read_value=read_value)
+
+  def assign(self, value, use_locking=False, name=None, read_value=True):
+    assign_fn = lambda var, *a, **kw: var.assign(*a, **kw)
+    return self._update(
+        update_fn=assign_fn,
+        value=value,
+        use_locking=use_locking,
+        name=name,
+        read_value=read_value)
+
+  def scatter_sub(self, sparse_delta, use_locking=False, name=None):
+    scatter_sub_fn = lambda var, *a, **kw: var.scatter_sub(*a, **kw)
+    return self._update(
+        update_fn=scatter_sub_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
+  def scatter_add(self, sparse_delta, use_locking=False, name=None):
+    scatter_add_fn = lambda var, *a, **kw: var.scatter_add(*a, **kw)
+    return self._update(
+        update_fn=scatter_add_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
+  def scatter_mul(self, sparse_delta, use_locking=False, name=None):
+    scatter_mul_fn = lambda var, *a, **kw: var.scatter_mul(*a, **kw)
+    return self._update(
+        update_fn=scatter_mul_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
+  def scatter_div(self, sparse_delta, use_locking=False, name=None):
+    scatter_div_fn = lambda var, *a, **kw: var.scatter_div(*a, **kw)
+    return self._update(
+        update_fn=scatter_div_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
+  def scatter_min(self, sparse_delta, use_locking=False, name=None):
+    scatter_min_fn = lambda var, *a, **kw: var.scatter_min(*a, **kw)
+    return self._update(
+        update_fn=scatter_min_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
+  def scatter_max(self, sparse_delta, use_locking=False, name=None):
+    scatter_max_fn = lambda var, *a, **kw: var.scatter_max(*a, **kw)
+    return self._update(
+        update_fn=scatter_max_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
+  def scatter_update(self, sparse_delta, use_locking=False, name=None):
+    scatter_update_fn = lambda var, *a, **kw: var.scatter_update(*a, **kw)
+    return self._update(
+        update_fn=scatter_update_fn,
+        value=sparse_delta,
+        use_locking=use_locking,
+        name=name)
+
   def _update_cross_replica(self, update_fn, value, **kwargs):
     """Applies updates across replicas.
 
@@ -651,9 +743,6 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable):
     pass
 
 
-ops.register_dense_tensor_like_type(DistributedVariable)
-
-
 def _validate_colocate_extended(v, extended):
   variable_strategy = v._distribute_strategy  # pylint: disable=protected-access
   if variable_strategy.extended is not extended:
@@ -836,66 +925,7 @@ class MirroredVariable(DistributedVariable, Mirrored):
     return ds_context.get_replica_context().merge_call(
         merge_fn, args=(value,), kwargs=kwargs)
 
-  def assign_sub(self, value, use_locking=False, name=None, read_value=True):
-    assign_sub_fn = lambda var, *a, **kw: var.assign_sub(*a, **kw)
-    return self._update(
-        update_fn=assign_sub_fn,
-        value=value,
-        use_locking=use_locking,
-        name=name,
-        read_value=read_value)
-
-  def assign_add(self, value, use_locking=False, name=None, read_value=True):
-    assign_add_fn = lambda var, *a, **kw: var.assign_add(*a, **kw)
-    return self._update(
-        update_fn=assign_add_fn,
-        value=value,
-        use_locking=use_locking,
-        name=name,
-        read_value=read_value)
-
-  def assign(self, value, use_locking=False, name=None, read_value=True):
-    assign_fn = lambda var, *a, **kw: var.assign(*a, **kw)
-    return self._update(
-        update_fn=assign_fn,
-        value=value,
-        use_locking=use_locking,
-        name=name,
-        read_value=read_value)
-
-  def scatter_sub(self, sparse_delta, use_locking=False, name=None):
-    scatter_sub_fn = lambda var, *a, **kw: var.scatter_sub(*a, **kw)
-    return self._update(
-        update_fn=scatter_sub_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
-
-  def scatter_add(self, sparse_delta, use_locking=False, name=None):
-    scatter_add_fn = lambda var, *a, **kw: var.scatter_add(*a, **kw)
-    return self._update(
-        update_fn=scatter_add_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
-
-  def scatter_mul(self, sparse_delta, use_locking=False, name=None):
-    scatter_mul_fn = lambda var, *a, **kw: var.scatter_mul(*a, **kw)
-    return self._update(
-        update_fn=scatter_mul_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
-
-  def scatter_div(self, sparse_delta, use_locking=False, name=None):
-    scatter_div_fn = lambda var, *a, **kw: var.scatter_div(*a, **kw)
-    return self._update(
-        update_fn=scatter_div_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
-
-  def scatter_min(self, sparse_delta, use_locking=False, name=None):
+  def scatter_min(self, *args, **kwargs):
     if (self._aggregation != vs.VariableAggregation.ONLY_FIRST_REPLICA and
         self._aggregation != vs.VariableAggregation.NONE):
       raise NotImplementedError("scatter_min is only supported for mirrored "
@@ -903,14 +933,9 @@ class MirroredVariable(DistributedVariable, Mirrored):
                                 "`tf.distribute.Strategy` scope) with NONE or "
                                 "`ONLY_FIRST_REPLICA` aggregation, got: %s" %
                                 self._aggregation)
-    scatter_min_fn = lambda var, *a, **kw: var.scatter_min(*a, **kw)
-    return self._update(
-        update_fn=scatter_min_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
+    return super(MirroredVariable, self).scatter_min(*args, **kwargs)
 
-  def scatter_max(self, sparse_delta, use_locking=False, name=None):
+  def scatter_max(self, *args, **kwargs):
     if (self._aggregation != vs.VariableAggregation.ONLY_FIRST_REPLICA and
         self._aggregation != vs.VariableAggregation.NONE):
       raise NotImplementedError("scatter_max is only supported for mirrored "
@@ -918,14 +943,9 @@ class MirroredVariable(DistributedVariable, Mirrored):
                                 "`tf.distribute.Strategy` scope) with NONE or "
                                 "`ONLY_FIRST_REPLICA` aggregation, got: %s" %
                                 self._aggregation)
-    scatter_max_fn = lambda var, *a, **kw: var.scatter_max(*a, **kw)
-    return self._update(
-        update_fn=scatter_max_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
+    return super(MirroredVariable, self).scatter_max(*args, **kwargs)
 
-  def scatter_update(self, sparse_delta, use_locking=False, name=None):
+  def scatter_update(self, *args, **kwargs):
     if (self._aggregation != vs.VariableAggregation.ONLY_FIRST_REPLICA and
         self._aggregation != vs.VariableAggregation.NONE):
       raise NotImplementedError("scatter_update is only supported for mirrored "
@@ -933,12 +953,7 @@ class MirroredVariable(DistributedVariable, Mirrored):
                                 "`tf.distribute.Strategy` scope) with NONE or "
                                 "`ONLY_FIRST_REPLICA` aggregation, got: %s" %
                                 self._aggregation)
-    scatter_update_fn = lambda var, *a, **kw: var.scatter_update(*a, **kw)
-    return self._update(
-        update_fn=scatter_update_fn,
-        value=sparse_delta,
-        use_locking=use_locking,
-        name=name)
+    return super(MirroredVariable, self).scatter_update(*args, **kwargs)
 
   def _get_cross_replica(self):
     # Return identity, to avoid directly exposing the variable to the user and
@@ -1051,6 +1066,11 @@ def _assert_replica_context(strategy):
 class SyncOnReadVariable(DistributedVariable):
   """Holds a map from replica to variables whose values are reduced on save."""
 
+  def _update_replica(self, update_fn, value, **kwargs):
+    return update_fn(self._get_closest(), value, **kwargs)
+
+  # TODO(b/154017756): Make assign behaivor in cross replica context consistent
+  # with MirroredVariable.
   def assign_sub(self, *args, **kwargs):
     with ds_context.enter_or_assert_strategy(self._distribute_strategy):
       if ds_context.in_cross_replica_context():
@@ -1064,7 +1084,7 @@ class SyncOnReadVariable(DistributedVariable):
                 _assign_sub_on_device(v.device, v, args[0])
                 for v in self._values))
       else:
-        return self._get().assign_sub(*args, **kwargs)
+        return super(SyncOnReadVariable, self).assign_sub(*args, **kwargs)
 
   def assign_add(self, *args, **kwargs):
     with ds_context.enter_or_assert_strategy(self._distribute_strategy):
@@ -1079,7 +1099,7 @@ class SyncOnReadVariable(DistributedVariable):
                 _assign_add_on_device(v.device, v, args[0])
                 for v in self._values))
       else:
-        return self._get().assign_add(*args, **kwargs)
+        return super(SyncOnReadVariable, self).assign_add(*args, **kwargs)
 
   def assign(self, *args, **kwargs):
     with ds_context.enter_or_assert_strategy(self._distribute_strategy):
@@ -1093,7 +1113,33 @@ class SyncOnReadVariable(DistributedVariable):
         return control_flow_ops.group(
             tuple(_assign_on_device(v.device, v, tensor) for v in self._values))
       else:
-        return self._get().assign(*args, **kwargs)
+        return super(SyncOnReadVariable, self).assign(*args, **kwargs)
+
+  def _scatter_not_implemented(self, method):
+    raise NotImplementedError(
+        "Variables with `synchronization=ON_READ` doesn't support `%s`" %
+        method)
+
+  def scatter_sub(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_sub")
+
+  def scatter_add(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_add")
+
+  def scatter_mul(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_mul")
+
+  def scatter_div(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_div")
+
+  def scatter_min(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_min")
+
+  def scatter_max(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_max")
+
+  def scatter_update(self, *args, **kwargs):
+    self._scatter_not_implemented("scatter_update")
 
   def value(self):
     with ds_context.enter_or_assert_strategy(self._distribute_strategy):
@@ -1103,13 +1149,6 @@ class SyncOnReadVariable(DistributedVariable):
         # _get_closest() returns a Variable.
         return self._get_closest().value()
 
-  def numpy(self):
-    if context.executing_eagerly():
-      return self.read_value().numpy()
-    else:
-      raise NotImplementedError(
-          "numpy() is only available when eager execution is enabled.")
-
   def _get_cross_replica(self):
     if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA:
       return self._primary
@@ -1340,7 +1379,7 @@ def value_container(val):
   return val
 
 
-class AggregatingVariable(variables_lib.Variable):
+class AggregatingVariable(variables_lib.Variable, core.Tensor):
   """A wrapper around a variable that aggregates updates across replicas."""
 
   def __init__(self, strategy, v, aggregation):
@@ -1385,11 +1424,23 @@ class AggregatingVariable(variables_lib.Variable):
               _aggregation_error_msg.format(
                   variable_type="AggregatingVariable"))
 
-        def merge_fn(strategy, value, *other_args, **other_kwargs):
+        def merge_fn(strategy,
+                     value,
+                     use_locking=False,
+                     name=None,
+                     read_value=True):
           v = _apply_aggregation(strategy, value, self._aggregation, self)
+          if name and isinstance(name, PerReplica):
+            name = name.values[0]
           return strategy.extended.update(
-              self, f, args=(v,) + other_args, kwargs=other_kwargs)
-
+              self,
+              f,
+              args=(v,),
+              kwargs={
+                  "use_locking": use_locking,
+                  "name": name,
+                  "read_value": read_value
+              })
         return replica_context.merge_call(merge_fn, args=args, kwargs=kwargs)
 
   def assign_sub(self, *args, **kwargs):
@@ -1597,4 +1648,3 @@ def _tensor_conversion_aggregate(var, dtype=None, name=None, as_ref=False):
 
 ops.register_tensor_conversion_function(AggregatingVariable,
                                         _tensor_conversion_aggregate)
-ops.register_dense_tensor_like_type(AggregatingVariable)
diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py
index daa7e5563d3..67ed86b4047 100644
--- a/tensorflow/python/distribute/values_test.py
+++ b/tensorflow/python/distribute/values_test.py
@@ -56,6 +56,7 @@ from tensorflow.python.saved_model.model_utils import mode_keys
 from tensorflow.python.tpu import tpu_strategy_util
 from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training.tracking import util as trackable_utils
+from tensorflow.python.types import core
 from tensorflow.python.util import nest
 
 
@@ -623,10 +624,10 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase):
       v = variables_lib.Variable(
           0., synchronization=synchronization, aggregation=aggregation)
     # In cross replica context.
-    self.assertTrue(ops.is_dense_tensor_like(v))
+    self.assertIsInstance(v, core.Tensor)
     # In replica context.
     distribution.run(
-        lambda v: self.assertTrue(ops.is_dense_tensor_like(v)), args=(v,))
+        lambda v: self.assertIsInstance(v, core.Tensor), args=(v,))
 
   def testAssignReturnValueIsTensorLike(self, distribution, synchronization,
                                         aggregation):
@@ -645,9 +646,9 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase):
       # values is not allowed when aggregation is SUM. See
       # `cross_device_ops.reduce_non_distributed_value`.
       delta = array_ops.identity(1.)
-      self.assertTrue(ops.is_dense_tensor_like(v.assign(delta)))
-      self.assertTrue(ops.is_dense_tensor_like(v.assign_sub(delta)))
-      self.assertTrue(ops.is_dense_tensor_like(v.assign_add(delta)))
+      self.assertIsInstance(v.assign(delta), core.Tensor)
+      self.assertIsInstance(v.assign_sub(delta), core.Tensor)
+      self.assertIsInstance(v.assign_add(delta), core.Tensor)
 
     # In cross replica context we return a PerReplica which is not Tensor like
     # yet.
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 3a823bb908e..fb7c4055136 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -529,7 +529,7 @@ def make_vjp(f, params=None, persistent=True):
     wrapped_fn = tfe.make_vjp(f)
     result, vjp = wrapped_fn(tf.constant(3.0))
     # result is 9.0
-    vjp()  # the vjp function rturns 6.0
+    vjp()  # the vjp function returns 6.0
 
   Raises:
     ValueError: if `f` returns None.
diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py
index 9630ce01ce9..227fca5ea6f 100644
--- a/tensorflow/python/eager/benchmarks_test.py
+++ b/tensorflow/python/eager/benchmarks_test.py
@@ -110,14 +110,12 @@ def run_benchmark(func, num_iters, execution_mode=None):
 class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
 
   def __init__(self):
-    # TODO(b/153054118): Add tf.RandomUniform
-    if not context.is_tfrt_enabled():
-      # used for multiply benchmarks
-      self._m_2 = random_ops.random_uniform([2])
+    # used for multiply benchmarks
+    self._m_2 = random_ops.random_uniform([2])
 
-      # used for matmul benchmarks
-      self._m_2_by_2 = random_ops.random_uniform((2, 2))
-      self._m_100_by_784 = random_ops.random_uniform((100, 784))
+    # used for matmul benchmarks
+    self._m_2_by_2 = random_ops.random_uniform((2, 2))
+    self._m_100_by_784 = random_ops.random_uniform((100, 784))
 
     self._num_iters_2_by_2 = 30000
     self._num_iters_100_by_784 = 30000
@@ -182,22 +180,18 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
         func()  # Warmup.
       self._run(func, 3000)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_create_float_constant(self):
     self._benchmark_create_constant(42.0, dtype=None)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_create_float_constant_uncached(self):
     self._benchmark_create_constant(42.0, dtype=None, cached=False)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_create_int32_constant(self):
     if context.num_gpus():
       return  # int32 constants are always allocated on CPU.
 
     self._benchmark_create_constant(42, dtype=dtypes.int32)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_create_int32_constant_uncached(self):
     if context.num_gpus():
       return  # int32 constants are always allocated on CPU.
@@ -213,21 +207,17 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
         func()  # Warmup.
       self._run(func, 30000)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_add_float_scalars(self):
     self._benchmark_add(42.0, 24.0)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_add_int32_scalars(self):
     self._benchmark_add(42, 24)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_add_float_scalar_tensor(self):
     tensor_a = constant_op.constant(42.0)
     tensor_b = constant_op.constant(24.0)
     self._benchmark_add(tensor_a, tensor_b)
 
-  @test_util.disable_tfrt("Scalars are not handled correctly")
   def benchmark_add_int32_scalar_tensor(self):
     tensor_a = constant_op.constant(42)
     tensor_b = constant_op.constant(24)
@@ -319,17 +309,16 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
     func = lambda: math_ops.multiply(m, m)
     self._run(func, num_iters)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("numpy() not supported")
   def benchmark_np_multiply(self):
     self._benchmark_np_multiply(self._m_2, 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_multiply_CPU(self):
     with context.device(CPU):
       m = self._m_2.cpu()
       self._benchmark_tf_multiply(m, 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tf_multiply_GPU(self):
     if not context.num_gpus():
       return
@@ -337,13 +326,12 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       m = self._m_2.gpu()
       self._benchmark_tf_multiply(m, 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_multiply_op_CPU(self):
     with context.device(CPU):
       m = self._m_2.cpu()
       self._benchmark_tf_multiply_op(m, 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tf_multiply_op_GPU(self):
     if not context.num_gpus():
       return
@@ -351,7 +339,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       m = self._m_2.gpu()
       self._benchmark_tf_multiply_op(m, 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_identity(self):
     m = self._m_2
     self._run(lambda: gen_array_ops.identity(m), 30000)
@@ -360,7 +347,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
   def benchmark_slowpath_tf_identity(self):
     self._run(lambda: gen_array_ops.identity(1), 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tfe_py_execute_identity(self):
     m = self._m_2
     ctx_handle = context.context()._handle
@@ -498,19 +484,17 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._run(m.value, num_iters)
 
   # Benchmarks for A^2, A of dimension 2 by 2.
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_np_matmul_2_by_2(self):
     self._benchmark_np_matmul(
         self._m_2_by_2, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_matmul_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_tf_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_tf_matmul_2_by_2_CPU_async(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
@@ -520,35 +504,32 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_2_by_2,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_gen_math_ops_matmul_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_gen_math_ops_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tfe_py_fastpath_execute_matmul_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_tfe_py_fastpath_execute_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tfe_py_execute_matmul_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_tfe_py_execute_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("Mutex corrupt: waiting writer with no waiters")
   def benchmark_defun_matmul_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_defun_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_defun_matmul_2_by_2_CPU_async(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
@@ -558,14 +539,14 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_2_by_2,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("Mutex corrupt: waiting writer with no waiters")
   def benchmark_defun_matmul_forward_backward_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_defun_matmul_forward_backward(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_defun_matmul_forward_backward_2_by_2_CPU_async(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
@@ -575,7 +556,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_2_by_2,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tf_matmul_2_by_2_GPU(self):
     if not context.num_gpus():
       return
@@ -584,7 +565,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_tf_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_tf_matmul_2_by_2_GPU_async(self):
     if not context.num_gpus():
       return
@@ -596,7 +577,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_2_by_2,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_gen_math_ops_matmul_2_by_2_GPU(self):
     if not context.num_gpus():
       return
@@ -605,7 +586,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_gen_math_ops_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tfe_py_execute_matmul_2_by_2_GPU(self):
     if not context.num_gpus():
       return
@@ -614,7 +595,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_tfe_py_execute_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_defun_matmul_2_by_2_GPU(self):
     if not context.num_gpus():
       return
@@ -623,7 +604,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_defun_matmul(
           m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_defun_matmul_2_by_2_GPU_async(self):
     if not context.num_gpus():
       return
@@ -635,28 +616,26 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_2_by_2,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("function not supported")
   def benchmark_nested_defun_matmul_2_by_2(self):
     m = self._m_2_by_2.cpu()
     self._benchmark_nested_defun_matmul(
         m, transpose_b=False, num_iters=self._num_iters_2_by_2)
 
   # Benchmarks for AA.T, A of dimension 100 by 784.
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_np_matmul_100_by_784(self):
     self._benchmark_np_matmul(
         self._m_100_by_784,
         transpose_b=True,
         num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_matmul_100_by_784_CPU(self):
     with context.device(CPU):
       m = self._m_100_by_784.cpu()
       self._benchmark_tf_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_tf_matmul_100_by_784_CPU_async(self):
     with context.device(CPU):
       m = self._m_100_by_784.cpu()
@@ -666,35 +645,33 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_100_by_784,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_gen_math_ops_matmul_100_by_784_CPU(self):
     with context.device(CPU):
       m = self._m_100_by_784.cpu()
       self._benchmark_gen_math_ops_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tfe_py_fastpath_execute_matmul_100_by_784_CPU(self):
     with context.device(CPU):
       m = self._m_100_by_784.cpu()
       self._benchmark_tfe_py_fastpath_execute_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tfe_py_execute_matmul_100_by_784_CPU(self):
     with context.device(CPU):
       m = self._m_100_by_784.cpu()
       self._benchmark_tfe_py_execute_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("function not supported")
   def benchmark_defun_matmul_100_by_784_CPU(self):
     with context.device(CPU):
       m = self._m_100_by_784.cpu()
       self._benchmark_defun_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tf_matmul_100_by_784_GPU(self):
     if not context.num_gpus():
       return
@@ -703,7 +680,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_tf_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("async not supported")
   def benchmark_tf_matmul_100_by_784_GPU_async(self):
     if not context.num_gpus():
       return
@@ -715,7 +692,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
           num_iters=self._num_iters_100_by_784,
           execution_mode=context.ASYNC)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_gen_math_ops_matmul_100_by_784_GPU(self):
     if not context.num_gpus():
       return
@@ -724,7 +701,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_gen_math_ops_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tfe_py_execute_matmul_100_by_784_GPU(self):
     if not context.num_gpus():
       return
@@ -733,7 +710,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_tfe_py_execute_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_defun_matmul_100_by_784_GPU(self):
     if not context.num_gpus():
       return
@@ -742,7 +719,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       self._benchmark_defun_matmul(
           m, transpose_b=True, num_iters=self._num_iters_100_by_784)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_nested_defun_matmul_100_by_784(self):
     m = self._m_100_by_784.gpu()
     self._benchmark_nested_defun_matmul(
@@ -815,35 +792,35 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
         func()
       self._run(func, 3000)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_matmul_256_by_2096_CPU(self):
     self._benchmark_forwardprop_matmul_CPU(shape=(256, 2096))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_in_defun_matmul_256_by_2096_CPU(self):
     self._benchmark_forwardprop_in_defun_matmul_CPU(shape=(256, 2096))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_in_defun_of_defun_matmul_256_by_2096_CPU(self):
     self._benchmark_forwardprop_in_defun_of_defun_matmul_CPU(shape=(256, 2096))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_of_defun_matmul_256_by_2096_CPU(self):
     self._benchmark_forwardprop_of_defun_matmul_CPU(shape=(256, 2096))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_matmul_100_by_784_CPU(self):
     self._benchmark_forwardprop_matmul_CPU(shape=(100, 784))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_in_defun_matmul_100_by_784_CPU(self):
     self._benchmark_forwardprop_in_defun_matmul_CPU(shape=(100, 784))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_in_defun_of_defun_matmul_100_by_784_CPU(self):
     self._benchmark_forwardprop_in_defun_of_defun_matmul_CPU(shape=(100, 784))
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("defun not supported")
   def benchmark_forwardprop_of_defun_matmul_100_by_784_CPU(self):
     self._benchmark_forwardprop_of_defun_matmul_CPU(shape=(100, 784))
 
@@ -988,25 +965,20 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       func = lambda: array_ops.zeros_like(m)
       self._run(func, 3000)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_zeros_like_CPU(self):
     self._benchmark_tf_zeros_like(self._m_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_zeros_like_GPU(self):
     self._benchmark_tf_zeros_like(self._m_2_by_2, device=GPU)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_zeros_like_variable_CPU(self):
     m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
     self._benchmark_tf_zeros_like(m)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_zeros_like_variable_GPU(self):
     m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
     self._benchmark_tf_zeros_like(m, device=GPU)
 
-  @test_util.disable_tfrt("random ops not supported")
   def _benchmark_tf_random_uniform_2_by_2(self,
                                           shape=(2, 2),
                                           dtype=dtypes.int32,
@@ -1018,30 +990,24 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
 
       self._run(func, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_random_uniform_2_by_2_integer_CPU(self):
     self._benchmark_tf_random_uniform_2_by_2()
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_random_uniform_2_by_2_integer_GPU(self):
     self._benchmark_tf_random_uniform_2_by_2(device=GPU)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_random_uniform_2_by_2_float_CPU(self):
     self._benchmark_tf_random_uniform_2_by_2(dtype=dtypes.float32)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_random_uniform_2_by_2_float_GPU(self):
     self._benchmark_tf_random_uniform_2_by_2(
         dtype=dtypes.float32, device=GPU)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_random_uniform_2_by_2_default_setting_CPU(self):
     with context.device(CPU):
       func = lambda: random_ops.random_uniform((2, 2))
       self._run(func, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_random_uniform_2_by_2_default_setting_GPU(self):
     with context.device(GPU):
       func = lambda: random_ops.random_uniform((2, 2))
@@ -1063,19 +1029,15 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
 
       self._run(func, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_dropout_scalar_rate_2_by_2_CPU(self):
     self._benchmark_tf_dropout_2_by_2(is_rate_tensor=False)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_dropout_scalar_rate_2_by_2_GPU(self):
     self._benchmark_tf_dropout_2_by_2(is_rate_tensor=False, device=GPU)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_dropout_2_by_2_CPU(self):
     self._benchmark_tf_dropout_2_by_2()
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_tf_dropout_2_by_2_GPU(self):
     self._benchmark_tf_dropout_2_by_2(device=GPU)
 
@@ -1088,25 +1050,25 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
     func = lambda: array_ops.transpose(m, perm, conjugate)
     self._run(func, num_iters, execution_mode=execution_mode)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("ConvertToEagerTensorUncached error")
   def benchmark_tf_transpose_2_by_2_CPU(self):
     with context.device(CPU):
       m = self._m_2_by_2.cpu()
       self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_tf_transpose_2_by_2_GPU(self):
     with context.device(GPU):
       m = self._m_2_by_2.gpu()
       self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("ConvertToEagerTensorUncached error")
   def benchmark_tf_transpose_variable_2_by_2_CPU(self):
     with context.device(CPU):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
       self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("Cannot convert array to EagerTensor of dtype int32")
   def benchmark_tf_transpose_variable_2_by_2_GPU(self):
     with context.device(GPU):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
@@ -1164,26 +1126,23 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       return defined(t1=t, t2=t, t3=t, t4=t, t5=t, t6=t, t7=t, t8=t)
     self._run(signature_computation, 30000)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_matmul_read_variable_op_2_by_2_CPU(self):
     with context.device(CPU):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
       self._benchmark_matmul_read_variable(m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_matmul_read_variable_op_with_tape_2_by_2_CPU(self):
     with context.device(CPU):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
       self._benchmark_matmul_read_variable_with_tape(
           m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_read_variable_op_2_by_2_CPU(self):
     with context.device(CPU):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
       self._benchmark_read_variable(m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_read_variable_op_2_by_2_GPU(self):
     if not context.num_gpus():
       return
@@ -1191,14 +1150,13 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2.gpu())
       self._benchmark_read_variable(m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
   def benchmark_read_variable_op_with_tape_2_by_2_CPU(self):
     with context.device(CPU):
       m = resource_variable_ops.ResourceVariable(self._m_2_by_2)
       self._benchmark_read_variable_with_tape(
           m, num_iters=self._num_iters_2_by_2)
 
-  @test_util.disable_tfrt("random ops not supported")
+  @test_util.disable_tfrt("copy to GPU not supported")
   def benchmark_read_variable_op_with_tape_2_by_2_GPU(self):
     if not context.num_gpus():
       return
@@ -1228,7 +1186,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
 
     self._run(scan, 100)
 
-  @test_util.disable_tfrt("add not supported, only add_v2")
   def benchmark_fastpath_conversion_type_inference(self):
     c = constant_op.constant(1., dtype=dtypes.float32)
 
@@ -1268,7 +1225,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
     xs = [[[np.linspace(0, 1, 21).tolist()] * 20] * 20]
     self._run(lambda: constant_op.constant(xs, dtype=dtypes.float64), 10000)
 
-  @test_util.disable_tfrt("tf.fill not supported")
   def benchmark_list_of_zeros_to_np_array(self):
     values = []
     for _ in range(1000):
@@ -1286,11 +1242,11 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
         resources.append(resource_variable_ops.ResourceVariable(self._m_2))
       self._run(lambda: add_all(resources), num_iters)
 
-  @test_util.disable_tfrt("Random uniform needs fallback")
+  @test_util.disable_tfrt("funtion not supported")
   def benchmarkFunctionWithFiveResourceInputs(self):
     self._benchmarkFunctionWithResourceInputs(5, 1000)
 
-  @test_util.disable_tfrt("Random uniform needs fallback")
+  @test_util.disable_tfrt("funtion not supported")
   def benchmarkFunctionWithFiveHundredResourceInputs(self):
     self._benchmarkFunctionWithResourceInputs(500, 100)
 
@@ -1325,15 +1281,15 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
     with context.device(CPU):
       self._run(benchmark_fn, 10)
 
-  @test_util.disable_tfrt("VarHandleOp needs fallback")
+  @test_util.disable_tfrt("funtion not supported")
   def benchmarkTenThousandResourceReadsInCondInInnerFunc(self):
     self._benchmarkResourceReadsInCondInInnerFunc(10000)
 
-  @test_util.disable_tfrt("VarHandleOp needs fallback")
+  @test_util.disable_tfrt("funtion not supported")
   def benchmarkHundredResourceReadsInCondInInnerFunc(self):
     self._benchmarkResourceReadsInCondInInnerFunc(100)
 
-  @test_util.disable_tfrt("VarHandleOp needs fallback")
+  @test_util.disable_tfrt("funtion not supported")
   def benchmarkTenResourceReadsInCondInInnerFunc(self):
     self._benchmarkResourceReadsInCondInInnerFunc(10)
 
diff --git a/tensorflow/python/eager/benchmarks_test_base.py b/tensorflow/python/eager/benchmarks_test_base.py
index 552d844c32d..3d81d08ccbf 100644
--- a/tensorflow/python/eager/benchmarks_test_base.py
+++ b/tensorflow/python/eager/benchmarks_test_base.py
@@ -32,4 +32,6 @@ class MicroBenchmarksBase(test.Benchmark):
         "examples_per_sec": float("{0:.3f}".format(num_iters / total_time)),
         "us_per_example": float("{0:.3f}".format(total_time * 1e6 / num_iters))
     }
-    self.report_benchmark(iters=num_iters, wall_time=mean_us, extras=extras)
+    benchmark_name = self._get_benchmark_name()
+    self.report_benchmark(
+        iters=num_iters, wall_time=mean_us, extras=extras, name=benchmark_name)
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 182b8478420..86b3d5cf95f 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -1509,9 +1509,11 @@ class Context(object):
     return self.config.allow_soft_placement
 
   @soft_device_placement.setter
-  def soft_device_placement(self, enabled):
-    self._soft_device_placement = enabled
+  def soft_device_placement(self, enable):
+    if self._context_handle is not None:
+      pywrap_tfe.TFE_ContextSetSoftDevicePlacement(self._handle, enable)
 
+    self._soft_device_placement = enable
     self._thread_local_data.function_call_options = None
 
   @property
@@ -1519,15 +1521,11 @@ class Context(object):
     return self.config.log_device_placement
 
   @log_device_placement.setter
-  def log_device_placement(self, enabled):
-    if self._log_device_placement == enabled:
-      return
-
+  def log_device_placement(self, enable):
     if self._context_handle is not None:
-      raise RuntimeError(
-          "Device placement logging must be set at program startup")
+      pywrap_tfe.TFE_ContextSetLogDevicePlacement(self._handle, enable)
 
-    self._log_device_placement = enabled
+    self._log_device_placement = enable
     self._thread_local_data.function_call_options = None
 
   @property
diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index 47b3966827f..c1401fc56ee 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -1112,5 +1112,4 @@ class EagerTensorCacheTest(test_util.TensorFlowTestCase):
 
 
 if __name__ == '__main__':
-  context.set_log_device_placement(True)
   test.main()
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 5fbf6c93ca4..c61f39111b1 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -109,7 +109,7 @@ class _FrequentTracingDetector(object):
             "retracing. Tracing is expensive and the excessive number of "
             "tracings could be due to (1) creating @tf.function repeatedly in "
             "a loop, (2) passing tensors with different shapes, (3) passing "
-            "Python objects instead of tensors. For (1), please  define your "
+            "Python objects instead of tensors. For (1), please define your "
             "@tf.function outside of the loop. For (2), @tf.function has "
             "experimental_relax_shapes=True option that relaxes argument "
             "shapes that can avoid unnecessary retracing. For (3), please "
@@ -632,10 +632,14 @@ class Function(object):
       attributes.update(_XlaMustCompile=bool(self._experimental_compile))
       if self._experimental_compile:
         attributes.update(_noinline=True)
+        # TODO(b/149755889): Until XLA is always linked, we have to do a runtime
+        # check.
         if not pywrap_tfe.TF_IsXlaEnabled():
-          raise ValueError("Attempting to use experimental_compile, "
-                           "but XLA support is not linked in. "
-                           "Rebuild with --define=with_xla_support=true.")
+          raise ValueError(
+              "Attempting to use experimental_compile, "
+              "but XLA support is not linked in. "
+              "Is the dependency to tensorflow/compiler/jit:xla_gpu_jit "
+              "(or xla_cpu_jit) present?")
     if not attributes:
       attributes = None
     return function_lib.defun_with_attributes(
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 58bf8986200..d5daa3acc99 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -368,13 +368,6 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
     signature_args, _ = conc.structured_input_signature
     self.assertEqual('z', signature_args[0][0].name)
 
-    with self.assertRaisesRegexp(
-        ValueError, 'either zero or all names have to be specified'):
-      conc = g.get_concrete_function([
-          tensor_spec.TensorSpec(None, dtypes.float32, 'z'),
-          tensor_spec.TensorSpec(None, dtypes.float32),
-      ])
-
   def test_error_inner_capture(self):
 
     @def_function.function
diff --git a/tensorflow/python/eager/forwardprop.py b/tensorflow/python/eager/forwardprop.py
index 0bb1e89e4a3..762fea85d8c 100644
--- a/tensorflow/python/eager/forwardprop.py
+++ b/tensorflow/python/eager/forwardprop.py
@@ -23,9 +23,9 @@ import threading
 from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import backprop_util
-from tensorflow.python.eager import def_function
 from tensorflow.python.eager import execute
 from tensorflow.python.eager import forwardprop_util
+from tensorflow.python.eager import function
 
 from tensorflow.python.framework import ops
 
@@ -145,9 +145,15 @@ def _jvp_helper(op_name, attr_tuple, inputs, outputs, tangents):
 # implementations, or a more satisfying story about how we re-specialize
 # gradients which were traced with relaxed shapes (e.g. use conds instead of
 # trace-time Python logic).
-_jvp_relaxed_shapes = def_function.function(
+#
+# Using function.defun rather than def_function.function avoids
+# tf.config.run_functions_eagerly(True). `_jvp_helper` doesn't successfully run
+# eagerly (infinite recursion), and even if it did it would use extra memory and
+# run unnecessary computation. The function does not create variables, so the
+# two symbols are otherwise equivalent.
+_jvp_relaxed_shapes = function.defun(
     _jvp_helper, experimental_relax_shapes=True)
-_jvp_exact_shapes = def_function.function(
+_jvp_exact_shapes = function.defun(
     _jvp_helper, experimental_relax_shapes=False)
 
 # The maximum number of exact-shape traces to perform for a single op before
diff --git a/tensorflow/python/eager/forwardprop_test.py b/tensorflow/python/eager/forwardprop_test.py
index aad179ffb6b..4ddba6b9be3 100644
--- a/tensorflow/python/eager/forwardprop_test.py
+++ b/tensorflow/python/eager/forwardprop_test.py
@@ -235,6 +235,17 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
       self.assertIsNone(acc1.jvp(y))
     self.assertIsNone(acc2.jvp(y))
 
+  def testRunFunctionsEagerly(self):
+    try:
+      original_setting = def_function.functions_run_eagerly()
+      def_function.run_functions_eagerly(True)
+      x = constant_op.constant(1.)
+      with forwardprop.ForwardAccumulator(x, 2.) as acc:
+        y = x * 3.
+      self.assertAllClose(6., acc.jvp(y))
+    finally:
+      def_function.run_functions_eagerly(original_setting)
+
   def testJVPFunctionUsedByAccumulatorForOps(self):
     previous_fn = forwardprop._jvp_dispatch
     try:
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 57a915a17c9..97708f056c2 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 import collections
 import functools
 import itertools
+import pprint
 import threading
 import types as types_lib
 import weakref
@@ -152,100 +153,33 @@ CacheKey = collections.namedtuple("CacheKey", [
 ])
 
 
-def _flat_shape_list(*params):
-  """Return a flat list of TensorShapes, one for each tensor[spec] in `*params`.
-
-  If `params` contains `CompositeTensors`, then they are expanded to their
-  components `Tensors`.
-
-  Args:
-    *params: Set of nested entries containing Tensors, TensorSpec, and
-      non-tensors.
-
-  Returns:
-    A list of entries containing either `None` or `TensorShape`.
-  """
-  return [
-      tensor_shape.TensorShape(x.shape)
-      if isinstance(x, (ops.Tensor, tensor_spec.DenseSpec)) else None
-      for x in nest.flatten(params, expand_composites=True)
-  ]
+def _type_spec_for(x):
+  """Returns a TypeSpec for `x`, or `None` if `x` doesn't have a TensorSpec."""
+  if isinstance(x, ops.Tensor):
+    return tensor_spec.TensorSpec.from_tensor(x)
+  elif isinstance(x, type_spec.TypeSpec):
+    return x
+  elif isinstance(x, composite_tensor.CompositeTensor):
+    return x._type_spec  # pylint: disable=protected-access
+  else:
+    return None
 
 
-def _shape_less_specific_than(relaxed, to_check):
-  """Checks if `relaxed` is less specific than `to_check`.
-
-  This is an asymmetric check, unlike `TensorShape.is_compatible_with`. If
-  `to_check` has a dimension with an undefined shape, `relaxed` must also have
-  an undefined shape for that dimension.
-
-  Args:
-    relaxed: A `TensorShape` to check against.
-    to_check: A second `TensorShape`.
-
-  Returns:
-    True if `to_check` represents a set of shapes which is a subset of
-    `relaxed`'s shapes and False otherwise.
-  """
-  if to_check.dims is not None and relaxed.dims is not None:
-    if to_check.rank != relaxed.rank:
-      return False
-    for check_dim, relaxed_dim in zip(to_check.dims, relaxed.dims):
-      if check_dim.value is None and relaxed_dim.value is not None:
-        return False
-      if not relaxed_dim.is_compatible_with(check_dim):
-        return False
-  return True
+def _is_type_subset(a, b):
+  """Returns true if TypeSpec `b` is a subset of type `a` (or if a is None.)"""
+  if a is None:
+    return True
+  else:
+    return a.most_specific_compatible_type(b) == a
 
 
-def _compatible_shapes(flat_relaxed, flat_to_check):
-  """Check if lists of TensorShapes contain compatible shapes.
-
-  Checks that each `flat_relaxed` shape covers a superset of the shapes of the
-  corresponding `flat_to_check` shape.
-
-  Args:
-    flat_relaxed: List of TensorShape or None.
-    flat_to_check: List of TensorShape or None.
-
-  Returns:
-    A python bool.
-
-  Raises:
-    RuntimeError:
-      if `len(flat_relaxed) != len(flat_to_check)`.
-    RuntimeError:
-      if `flat_relaxed[i] is None != flat_to_check[i] is None` for any `i`.
-  """
-
-  if len(flat_relaxed) != len(flat_to_check):
-    raise RuntimeError("Expected shape lists of identical lengths, but saw: "
-                       "%s and %s" % (flat_relaxed, flat_to_check))
-  def is_compatible(relaxed, to_check):
-    """Internal help function.
-
-    Args:
-      relaxed: TensorShape or None.
-      to_check: TensorShape or None.
-
-    Returns:
-      Python bool.
-
-    Raises:
-      RuntimeError: If `relaxed is None != to_check is None`.
-    """
-    # If both x and y are None, there is no shape to compare.  Otherwise check
-    # if they are compatible with each other.  Either way, both input signatures
-    # must have have Tensors in the same entries.  If not, raise an assertion
-    # error.
-    if relaxed is None != to_check is None:
-      raise RuntimeError(
-          "Expected signature type matches between flattened input shapes "
-          "%s and %s; but saw that (%s is None) != (%s is None)"
-          % (flat_relaxed, flat_to_check, relaxed, to_check))
-    return relaxed is None or _shape_less_specific_than(relaxed, to_check)
-  return all(is_compatible(relaxed, to_check)
-             for relaxed, to_check in zip(flat_relaxed, flat_to_check))
+def _shape_relaxed_type_for_composite_tensor(x):
+  """Returns a shape-relaxed TypeSpec for x (if composite) or x (if not)."""
+  if isinstance(x, composite_tensor.CompositeTensor):
+    # pylint: disable=protected-access
+    return x._type_spec._with_tensor_ranks_only()
+  else:
+    return x
 
 
 def common_shape(x, y):
@@ -2288,7 +2222,8 @@ class ConcreteFunction(object):
         pieces = nest.flatten(spec, expand_composites=False)
         markers = [_Marker("<{}>".format(i + 1)) for i in range(len(pieces))]
         structure = nest.pack_sequence_as(spec, markers)
-        result = "{}".format(structure)
+        # Ensure dictionaries are sorted by key (for determinism)
+        result = pprint.pformat(structure, width=10000)
         for (marker, piece) in zip(markers, pieces):
           result += "\n      {}: {}".format(marker, pretty_print_spec(piece))
         return result
@@ -2780,9 +2715,10 @@ class FunctionCache(object):
     # The primary cache, mapping a fully shaped CacheKey to a function.
     self.primary = collections.OrderedDict()
     # A cache key lookup, mapping a CacheKey generated without shape info to a
-    # flat list of relaxed shapes (one for each argument).  Arguments that are
-    # not Tensors contain a `None` for the corresponding relaxed shape.
-    self.arg_relaxed_shapes = collections.OrderedDict()
+    # flat list of `TypeSpec`s with relaxed shapes (one for each flattened
+    # argument). Arguments that are not Tensors or `CompositeTensor`s contain a
+    # `None` for the corresponding relaxed spec.
+    self.arg_relaxed_specs = collections.OrderedDict()
     # The secondary cache, mapping a CacheKey generated without shape info to a
     # function.
     self.arg_relaxed = collections.OrderedDict()
@@ -2790,7 +2726,7 @@ class FunctionCache(object):
     self._garbage_collectors = [
         _FunctionGarbageCollector(self.primary),
         _FunctionGarbageCollector(self.arg_relaxed),
-        _FunctionGarbageCollector(self.arg_relaxed_shapes)]
+        _FunctionGarbageCollector(self.arg_relaxed_specs)]
 
   def all_values(self):
     """A set of all `ConcreteFunction` instances held by this cache."""
@@ -3130,33 +3066,60 @@ class Function(object):
 
   def _define_function_with_shape_relaxation(self, args, kwargs):
     """Define a function, relaxing arg shapes to avoid unnecessary retracing."""
+    any_composite_args = any(isinstance(x, composite_tensor.CompositeTensor)
+                             for x in nest.flatten((args, kwargs)))
 
-    rank_only_cache_key = self._cache_key(
-        args, kwargs, include_tensor_ranks_only=True)
+    # Build a cache key where TensorShapes include only rank information (and
+    # not information about the size of each dimension).
+    if not any_composite_args:
+      rank_only_cache_key = self._cache_key(
+          args, kwargs, include_tensor_ranks_only=True)
+    else:
+      # For the rank-only cache key, replace any composite tensors with
+      # shape-relaxed TypeSpecs.
+      (cache_key_args, cache_key_kwargs) = nest.map_structure(
+          _shape_relaxed_type_for_composite_tensor, (args, kwargs))
+      rank_only_cache_key = self._cache_key(
+          cache_key_args, cache_key_kwargs, include_tensor_ranks_only=True)
 
-    arg_shapes = _flat_shape_list(args, kwargs)
-    relaxed_arg_shapes = self._function_cache.arg_relaxed_shapes.get(
+    arg_specs = [_type_spec_for(x) for x in nest.flatten((args, kwargs))]
+    relaxed_arg_specs = self._function_cache.arg_relaxed_specs.get(
         rank_only_cache_key, None)
     relaxed_arg_function = self._function_cache.arg_relaxed.get(
         rank_only_cache_key, None)
 
     if (relaxed_arg_function is not None
-        and _compatible_shapes(flat_relaxed=relaxed_arg_shapes,
-                               flat_to_check=arg_shapes)):
+        and all(_is_type_subset(x, y) for (x, y) in
+                zip(relaxed_arg_specs, arg_specs))):
       return relaxed_arg_function, args, kwargs
 
-    if relaxed_arg_shapes is None:
-      relaxed_arg_shapes = arg_shapes
+    if relaxed_arg_specs is None:
+      relaxed_arg_specs = arg_specs
     else:
-      if len(arg_shapes) != len(relaxed_arg_shapes):
-        raise RuntimeError("Expected arg_shapes len to match "
-                           "relaxed_arg_shapes len: %d vs. %d"
-                           % (len(arg_shapes), len(relaxed_arg_shapes)))
-      relaxed_arg_shapes = [
-          common_shape(x, y) for (x, y) in zip(
-              arg_shapes, relaxed_arg_shapes)]
-    self._function_cache.arg_relaxed_shapes[rank_only_cache_key] = (
-        relaxed_arg_shapes)
+      if len(arg_specs) != len(relaxed_arg_specs):
+        raise RuntimeError("Expected arg_specs len to match "
+                           "relaxed_arg_specs len: %d vs. %d"
+                           % (len(arg_specs), len(relaxed_arg_specs)))
+      relaxed_arg_specs = [
+          x if x is None else x.most_specific_compatible_type(y)
+          for (x, y) in zip(arg_specs, relaxed_arg_specs)]
+    self._function_cache.arg_relaxed_specs[rank_only_cache_key] = (
+        relaxed_arg_specs)
+    relaxed_arg_shapes = [
+        x if x is None else x.shape
+        for x in nest.flatten(relaxed_arg_specs, expand_composites=True)]
+
+    if any_composite_args:
+      # Rebuild composite tensors with the relaxed TypeSpecs.  For example,
+      # if a tf.data iterator is passed as an argument, then we need to relax
+      # the TensorShapes in its element_spec.
+      (relaxed_arg_specs, relaxed_kwarg_specs) = nest.pack_sequence_as(
+          (args, kwargs), relaxed_arg_specs, expand_composites=False)
+      (args, kwargs) = nest.pack_sequence_as(
+          (relaxed_arg_specs, relaxed_kwarg_specs),
+          nest.flatten((args, kwargs), expand_composites=True),
+          expand_composites=True)
+
     graph_function = self._create_graph_function(
         args, kwargs, override_flat_arg_shapes=relaxed_arg_shapes)
     self._function_cache.arg_relaxed[rank_only_cache_key] = graph_function
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index fd668716236..4e68f1460d9 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -31,6 +31,8 @@ import numpy
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.autograph.core import ag_ctx
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import cancellation
 from tensorflow.python.eager import context
@@ -72,6 +74,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops.structured import structured_tensor
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_ops
 from tensorflow.python.util import compat
@@ -361,6 +364,125 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     foo.func(constant_op.constant([1.0, 2.0]))
     self.assertTrue(unknown_dim[0])
 
+  def testInputShapeFunctionRelaxationWithRaggedTensors(self):
+    traced_type_spec = [None]
+
+    @def_function.function(experimental_relax_shapes=True)
+    def func(x):
+      traced_type_spec[0] = x._type_spec
+      return x
+
+    def check_trace(x, expected_trace):
+      traced_type_spec[0] = None
+      func(x)
+      self.assertEqual(traced_type_spec[0], expected_trace)
+
+    check_trace(  # Initial call gets traced.
+        ragged_factory_ops.constant([[1], [2, 3, 4]]),
+        ragged_tensor.RaggedTensorSpec([2, None], dtypes.int32))
+    check_trace(  # Input TypeSpec is the same -> no retrace.
+        ragged_factory_ops.constant([[1, 2], [3, 4]]), None)
+    check_trace(  # Even if component tensor shapes change -> no retrace.
+        ragged_factory_ops.constant([[1, 2], [3, 4, 5, 6]]), None)
+    check_trace(  # Different TypeSpec shape (nrows): retrace
+        ragged_factory_ops.constant([[1], [2], [3]]),
+        ragged_tensor.RaggedTensorSpec([3, None], dtypes.int32))
+    check_trace(  # Different nrows again: relax & retrace
+        ragged_factory_ops.constant([[1], [2], [3], [4]]),
+        ragged_tensor.RaggedTensorSpec([None, None], dtypes.int32))
+    check_trace(  # Different nrows yet again: not retrace
+        ragged_factory_ops.constant([[1]]), None)
+    check_trace(  # Different ragged_rank: retrace
+        ragged_factory_ops.constant([[[1]]]),
+        ragged_tensor.RaggedTensorSpec([1, None, None], dtypes.int32))
+    check_trace(  # Different ragged_rank again: retrace & relax
+        ragged_factory_ops.constant([[[1]], [[2]]]),
+        ragged_tensor.RaggedTensorSpec([None, None, None], dtypes.int32))
+
+  def testInputShapeFunctionRelaxationWithStructuredTensors(self):
+    traced_type_spec = [None]
+
+    @def_function.function(experimental_relax_shapes=True)
+    def func(x):
+      traced_type_spec[0] = x._type_spec
+      return x
+
+    def check_trace(x, expected_trace):
+      traced_type_spec[0] = None
+      func(x)
+      self.assertEqual(traced_type_spec[0], expected_trace)
+
+    # If we have TypeSpecs that differ in ways other than just their shape,
+    # then retrace each time.
+    check_trace(
+        structured_tensor.StructuredTensor.from_pyval({'a': [1]}),
+        structured_tensor.StructuredTensorSpec(
+            [], {'a': tensor_spec.TensorSpec((1,), dtypes.int32)}))
+    check_trace(
+        structured_tensor.StructuredTensor.from_pyval({'b': [1]}),
+        structured_tensor.StructuredTensorSpec(
+            [], {'b': tensor_spec.TensorSpec((1,), dtypes.int32)}))
+    check_trace(
+        structured_tensor.StructuredTensor.from_pyval({'c': [1]}),
+        structured_tensor.StructuredTensorSpec(
+            [], {'c': tensor_spec.TensorSpec((1,), dtypes.int32)}))
+
+    # But if we call again with only shape different, then do relax:
+    check_trace(  # retrace
+        structured_tensor.StructuredTensor.from_pyval({'a': [1, 2]}),
+        structured_tensor.StructuredTensorSpec(
+            [], {'a': tensor_spec.TensorSpec((2,), dtypes.int32)}))
+    check_trace(  # relax & retrace
+        structured_tensor.StructuredTensor.from_pyval({'a': [1, 2, 3]}),
+        structured_tensor.StructuredTensorSpec(
+            [], {'a': tensor_spec.TensorSpec((None,), dtypes.int32)}))
+    check_trace(  # use relaxed graph
+        structured_tensor.StructuredTensor.from_pyval({'a': [1, 2, 3, 4]}),
+        None)
+
+  def testInputShapeFunctionRelaxationWithDatasetIterators(self):
+    # For dataset iterators, the TypeSpec includes type information that's
+    # not derivable from the component tensors.  Make sure that the TypeSpec
+    # shapes get relaxed as appropriate.
+
+    traced_type_spec = [None]
+
+    @def_function.function(experimental_relax_shapes=True)
+    def func(x):
+      traced_type_spec[0] = x._type_spec
+      return x
+
+    def check_trace(x, expected_trace):
+      traced_type_spec[0] = None
+      func(x)
+      self.assertEqual(traced_type_spec[0], expected_trace)
+
+    ds_1_2 = dataset_ops.DatasetV2.from_tensors(array_ops.zeros([1, 2]))
+    ds_2_2 = dataset_ops.DatasetV2.from_tensors(array_ops.zeros([2, 2]))
+    ds_3_2 = dataset_ops.DatasetV2.from_tensors(array_ops.zeros([3, 2]))
+    ds_4_2 = dataset_ops.DatasetV2.from_tensors(array_ops.zeros([4, 2]))
+    ds_2_1 = dataset_ops.DatasetV2.from_tensors(array_ops.zeros([2, 1]))
+    check_trace(  # shape=[1, 2]: retrace
+        dataset_ops.make_one_shot_iterator(ds_1_2),
+        iterator_ops.IteratorSpec(
+            tensor_spec.TensorSpec([1, 2], dtypes.float32)))
+    check_trace(  # shape=[1, 2]: no retrace (use the [1, 2] graph)
+        dataset_ops.make_one_shot_iterator(ds_1_2), None)
+    check_trace(  # shape=[2, 2]: retrace
+        dataset_ops.make_one_shot_iterator(ds_2_2),
+        iterator_ops.IteratorSpec(
+            tensor_spec.TensorSpec([2, 2], dtypes.float32)))
+    check_trace(  # shape=[3, 2]: relax to [None, 2] and retrace
+        dataset_ops.make_one_shot_iterator(ds_3_2),
+        iterator_ops.IteratorSpec(
+            tensor_spec.TensorSpec([None, 2], dtypes.float32)))
+    check_trace(  # shape=[4, 2]: no retrace (use the [None, 2] graph)
+        dataset_ops.make_one_shot_iterator(ds_4_2), None)
+    check_trace(  # shape=[2, 1]: relax to [None, None] and retrace
+        dataset_ops.make_one_shot_iterator(ds_2_1),
+        iterator_ops.IteratorSpec(
+            tensor_spec.TensorSpec([None, None], dtypes.float32)))
+
   def testCapturesVariables(self):
     a = variables.Variable(1.0, trainable=False)
     b = variables.Variable(1.0)
@@ -797,7 +919,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     class MyNdarray(numpy.ndarray):
       pass
 
-     # Test that the subclasses of ndarray are converted too.
+    # Test that the subclasses of ndarray are converted too.
     self.assertEqual(1., defined(np_ones.view(MyNdarray)).numpy())
     self.assertEqual(0., defined(np_zeros.view(MyNdarray)).numpy())
 
@@ -1717,11 +1839,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.assertLen(total_function_cache(defined), 2)
 
       # pylint: disable=protected-access
-      self.assertLen(defined._function_cache.arg_relaxed_shapes, 1)
-      relaxed_shapes = (
-          list(defined._function_cache.arg_relaxed_shapes.values())[0])
-      self.assertLen(relaxed_shapes, 1)
-      relaxed_shape = relaxed_shapes[0]
+      self.assertLen(defined._function_cache.arg_relaxed_specs, 1)
+      relaxed_specs = (
+          list(defined._function_cache.arg_relaxed_specs.values())[0])
+      self.assertLen(relaxed_specs, 1)
+      relaxed_shape = relaxed_specs[0].shape
       # pylint: enable=protected-access
       self.assertEqual(relaxed_shape.rank, 1)
       self.assertEqual(tensor_shape.dimension_value(relaxed_shape[0]), None)
@@ -3621,8 +3743,12 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
                   r'      <1>: int32 Tensor, shape=\(\)\n'
                   r'      <2>: RaggedTensorSpec\(.*\)\n'
                   r'      <3>: RaggedTensorSpec\(.*\)')
-    self.assertRegexpMatches(c3.pretty_printed_signature(),
-                             c3_summary + '\n' + c3_details)
+
+    # python 3.5 does not gurantee deterministic iteration of dict contents
+    # which can lead mismatch on pretty_printed_signature output for "Args"
+    if sys.version_info >= (3, 6):
+      self.assertRegexpMatches(c3.pretty_printed_signature(),
+                               c3_summary + '\n' + c3_details)
 
     # pylint: disable=keyword-arg-before-vararg
     @def_function.function
diff --git a/tensorflow/python/eager/gradient_input_output_exclusions.py b/tensorflow/python/eager/gradient_input_output_exclusions.py
index 983f10551ba..94962bf6135 100644
--- a/tensorflow/python/eager/gradient_input_output_exclusions.py
+++ b/tensorflow/python/eager/gradient_input_output_exclusions.py
@@ -36,6 +36,7 @@ from tensorflow.python.autograph.pyct import qual_names
 from tensorflow.python.autograph.pyct import transformer
 from tensorflow.python.autograph.pyct.static_analysis import activity
 from tensorflow.python.autograph.pyct.static_analysis import liveness
+from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs
 from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
 
@@ -208,6 +209,7 @@ def _live_tensors(f, attr_name="inputs"):
   graphs = cfg.build(node)
   node = qual_names.resolve(node)
   node = activity.resolve(node, ctx, None)
+  node = reaching_fndefs.resolve(node, ctx, graphs)
   node = liveness.resolve(node, ctx, graphs)
 
   op_arg_name = anno.getanno(node.args.args[0], anno.Basic.QN)
diff --git a/tensorflow/python/eager/pywrap_gradient_exclusions.cc b/tensorflow/python/eager/pywrap_gradient_exclusions.cc
index 882c8097a0f..72757ae41e2 100644
--- a/tensorflow/python/eager/pywrap_gradient_exclusions.cc
+++ b/tensorflow/python/eager/pywrap_gradient_exclusions.cc
@@ -50,7 +50,7 @@ auto OpGradientInfoInit(const T &a) {
 
 absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedInputIndices(
     const tensorflow::string &op_name) {
-  static std::array<OpIndexInfo, 347> a = {{
+  static std::array<OpIndexInfo, 348> a = {{
       {"Acosh"},
       {"AllToAll", 1, {0}},
       {"ApproximateEqual"},
@@ -396,6 +396,7 @@ absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedInputIndices(
       {"WholeFileReader"},
       {"XlaClusterOutput"},
       {"XlaSharding"},
+      {"XlaSpmdShardToFullShape"},
       {"ZerosLike"},
       {"VarHandleOp"},
   }};
@@ -410,7 +411,7 @@ absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedInputIndices(
 
 absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedOutputIndices(
     const tensorflow::string &op_name) {
-  static std::array<OpIndexInfo, 459> a = {{
+  static std::array<OpIndexInfo, 461> a = {{
       {"Abs"},
       {"AccumulateNV2"},
       {"Acos"},
@@ -865,6 +866,8 @@ absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedOutputIndices(
       {"XlaClusterOutput"},
       {"XlaEinsum"},
       {"XlaSharding"},
+      {"XlaSpmdFullToShardShape"},
+      {"XlaSpmdShardToFullShape"},
       {"Xlog1py"},
       {"Xlogy"},
       {"ZerosLike"},
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index e82a72851ca..2d96ed57246 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -3223,6 +3223,9 @@ bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info,
   auto cleaner = tensorflow::gtl::MakeCleanup([op] { TFE_DeleteOp(op); });
   if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false;
 
+  TFE_OpSetDevice(op, parent_op_exec_info.device_name, status);
+  if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false;
+
   // Set dtype
   DCHECK(PyObject_HasAttrString(input, "_dtype"));
   tensorflow::Safe_PyObjectPtr dtype(PyObject_GetAttrString(input, "_dtype"));
@@ -3232,9 +3235,6 @@ bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info,
   }
   TFE_OpSetAttrType(op, "dtype", static_cast<TF_DataType>(value));
 
-  TFE_OpSetDevice(op, parent_op_exec_info.device_name, status);
-  if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false;
-
   // Get handle
   tensorflow::Safe_PyObjectPtr handle(PyObject_GetAttrString(input, "_handle"));
   if (!EagerTensor_CheckExact(handle.get())) return false;
@@ -4019,6 +4019,11 @@ tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg,
           "Error while reading CompositeTensor._type_spec.");
     }
     result->objects.push_back(type_spec);
+  } else if (tensorflow::swig::IsTypeSpec(arg)) {
+    // Add the typespec (not a weakref) in case it's a temporary object.
+    absl::StrAppend(&result->str, kRaw);
+    Py_INCREF(arg);
+    result->objects.push_back(arg);
   } else if (tensorflow::swig::IsAttrs(arg)) {
     absl::StrAppend(&result->str, kAttrs);
     tensorflow::Safe_PyObjectPtr attrs(
diff --git a/tensorflow/python/eager/remote_cluster_test.py b/tensorflow/python/eager/remote_cluster_test.py
index a0ce235c282..025899e271e 100644
--- a/tensorflow/python/eager/remote_cluster_test.py
+++ b/tensorflow/python/eager/remote_cluster_test.py
@@ -300,7 +300,6 @@ class DynamicClusterTest(test.TestCase, parameterized.TestCase):
       y = worker_fn(x1)
     np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy())
 
-  @test_util.run_in_async_and_sync_mode
   def testPendingNodesServerReplaced(self):
     """Update cluster when nodes are still pending on remote workers."""
     with ops.device(self.device_local):
@@ -371,10 +370,8 @@ class DynamicClusterTest(test.TestCase, parameterized.TestCase):
     for result in t1_results + t2_results:
       np.testing.assert_array_equal([[2, 2], [2, 2]], result)
 
-  @test_util.run_in_async_and_sync_mode
   def testMultiThreadPendingNodesLockFree(self):
     """Update cluster when other remote function calls are being launched."""
-    self.skipTest("b/154053481")
 
     with ops.device(self.device_t1):
       x1 = array_ops.ones([2, 2])
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index c9f923b63b6..d67cdf9cc06 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -13,6 +13,7 @@ py_library(
         ":feature_column",
         ":feature_column_v2",
         "//tensorflow/python:util",
+        "//tensorflow/python/keras/feature_column",
     ],
 )
 
diff --git a/tensorflow/python/feature_column/feature_column_lib.py b/tensorflow/python/feature_column/feature_column_lib.py
index 6a995842d8b..afe14f55bfc 100644
--- a/tensorflow/python/feature_column/feature_column_lib.py
+++ b/tensorflow/python/feature_column/feature_column_lib.py
@@ -27,4 +27,5 @@ from tensorflow.python.feature_column.feature_column import *
 from tensorflow.python.feature_column.feature_column_v2 import *
 from tensorflow.python.feature_column.sequence_feature_column import *
 from tensorflow.python.feature_column.serialization import *
+from tensorflow.python.keras.feature_column.sequence_feature_column import *
 # pylint: enable=unused-import,line-too-long
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 9e7df6678de..23a9861eb1b 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -2231,7 +2231,7 @@ class FeatureColumn(object):
     In CPython, `__lt__` must be defined for all objects in the
     sequence being sorted.
 
-    If any objects in teh sequence being sorted do not have an `__lt__` method
+    If any objects in the sequence being sorted do not have an `__lt__` method
     compatible with feature column objects (such as strings), then CPython will
     fall back to using the `__gt__` method below.
     https://docs.python.org/3/library/stdtypes.html#list.sort
diff --git a/tensorflow/python/feature_column/keras_integration_test.py b/tensorflow/python/feature_column/keras_integration_test.py
index d2d356c4a6e..e0677e84e50 100644
--- a/tensorflow/python/feature_column/keras_integration_test.py
+++ b/tensorflow/python/feature_column/keras_integration_test.py
@@ -316,14 +316,6 @@ class FeatureColumnsIntegrationTest(keras_parameterized.TestCase):
       self.assertIsInstance(revived, fc.DenseFeatures)
       self.assertNotIsInstance(revived, dense_features_v2.DenseFeatures)
 
-  def test_serialization_sequence_features(self):
-    rating = fc.sequence_numeric_column('rating')
-    sequence_feature = fc.SequenceFeatures([rating])
-    config = keras.layers.serialize(sequence_feature)
-
-    revived = keras.layers.deserialize(config)
-    self.assertIsInstance(revived, fc.SequenceFeatures)
-
   # This test is an example for a regression on categorical inputs, i.e.,
   # the output is 0.4, 0.6, 0.9 when input is 'alpha', 'beta', 'gamma'
   # separately.
diff --git a/tensorflow/python/feature_column/sequence_feature_column.py b/tensorflow/python/feature_column/sequence_feature_column.py
index 2f78de3e514..9409c29fdf8 100644
--- a/tensorflow/python/feature_column/sequence_feature_column.py
+++ b/tensorflow/python/feature_column/sequence_feature_column.py
@@ -30,156 +30,14 @@ from tensorflow.python.feature_column import utils as fc_utils
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.layers import serialization as layer_serialization
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import sparse_ops
-from tensorflow.python.util.tf_export import keras_export
 from tensorflow.python.util.tf_export import tf_export
 
+
 # pylint: disable=protected-access
-
-
-@keras_export('keras.experimental.SequenceFeatures')
-class SequenceFeatures(fc._BaseFeaturesLayer):
-  """A layer for sequence input.
-
-    All `feature_columns` must be sequence dense columns with the same
-    `sequence_length`. The output of this method can be fed into sequence
-    networks, such as RNN.
-
-    The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
-    `T` is the maximum sequence length for this batch, which could differ from
-    batch to batch.
-
-    If multiple `feature_columns` are given with `Di` `num_elements` each, their
-    outputs are concatenated. So, the final `Tensor` has shape
-    `[batch_size, T, D0 + D1 + ... + Dn]`.
-
-    Example:
-
-    ```python
-    # Behavior of some cells or feature columns may depend on whether we are in
-    # training or inference mode, e.g. applying dropout.
-    training = True
-    rating = sequence_numeric_column('rating')
-    watches = sequence_categorical_column_with_identity(
-        'watches', num_buckets=1000)
-    watches_embedding = embedding_column(watches, dimension=10)
-    columns = [rating, watches_embedding]
-
-    sequence_input_layer = SequenceFeatures(columns)
-    features = tf.io.parse_example(...,
-                                   features=make_parse_example_spec(columns))
-    sequence_input, sequence_length = sequence_input_layer(
-       features, training=training)
-    sequence_length_mask = tf.sequence_mask(sequence_length)
-
-    rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size, training=training)
-    rnn_layer = tf.keras.layers.RNN(rnn_cell, training=training)
-    outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
-    ```
-  """
-
-  def __init__(
-      self,
-      feature_columns,
-      trainable=True,
-      name=None,
-      **kwargs):
-    """"Constructs a SequenceFeatures layer.
-
-    Args:
-      feature_columns: An iterable of dense sequence columns. Valid columns are
-        - `embedding_column` that wraps a `sequence_categorical_column_with_*`
-        - `sequence_numeric_column`.
-      trainable: Boolean, whether the layer's variables will be updated via
-        gradient descent during training.
-      name: Name to give to the SequenceFeatures.
-      **kwargs: Keyword arguments to construct a layer.
-
-    Raises:
-      ValueError: If any of the `feature_columns` is not a
-        `SequenceDenseColumn`.
-    """
-    super(SequenceFeatures, self).__init__(
-        feature_columns=feature_columns,
-        trainable=trainable,
-        name=name,
-        expected_column_type=fc.SequenceDenseColumn,
-        **kwargs)
-
-  @property
-  def _is_feature_layer(self):
-    return True
-
-  def _target_shape(self, input_shape, total_elements):
-    return (input_shape[0], input_shape[1], total_elements)
-
-  def call(self, features, training=None):
-    """Returns sequence input corresponding to the `feature_columns`.
-
-    Args:
-      features: A dict mapping keys to tensors.
-      training: Python boolean or None, indicating whether to the layer is being
-        run in training mode. This argument is passed to the call method of any
-        `FeatureColumn` that takes a `training` argument. For example, if a
-        `FeatureColumn` performed dropout, the column could expose a `training`
-        argument to control whether the dropout should be applied. If `None`,
-        defaults to `tf.keras.backend.learning_phase()`.
-
-
-    Returns:
-      An `(input_layer, sequence_length)` tuple where:
-      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
-          `T` is the maximum sequence length for this batch, which could differ
-          from batch to batch. `D` is the sum of `num_elements` for all
-          `feature_columns`.
-      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
-          length for each example.
-
-    Raises:
-      ValueError: If features are not a dictionary.
-    """
-    if not isinstance(features, dict):
-      raise ValueError('We expected a dictionary here. Instead we got: ',
-                       features)
-    if training is None:
-      training = backend.learning_phase()
-    transformation_cache = fc.FeatureTransformationCache(features)
-    output_tensors = []
-    sequence_lengths = []
-
-    for column in self._feature_columns:
-      with ops.name_scope(column.name):
-        try:
-          dense_tensor, sequence_length = column.get_sequence_dense_tensor(
-              transformation_cache, self._state_manager, training=training)
-        except TypeError:
-          dense_tensor, sequence_length = column.get_sequence_dense_tensor(
-              transformation_cache, self._state_manager)
-        # Flattens the final dimension to produce a 3D Tensor.
-        output_tensors.append(self._process_dense_tensor(column, dense_tensor))
-        sequence_lengths.append(sequence_length)
-
-    # Check and process sequence lengths.
-    fc._verify_static_batch_size_equality(sequence_lengths,
-                                          self._feature_columns)
-    sequence_length = _assert_all_equal_and_return(sequence_lengths)
-
-    return self._verify_and_concat_tensors(output_tensors), sequence_length
-
-
-layer_serialization.inject_feature_column_v1_objects(
-    'SequenceFeatures', SequenceFeatures)
-
-
-layer_serialization.inject_feature_column_v2_objects(
-    'SequenceFeatures', SequenceFeatures)
-
-
 def concatenate_context_input(context_input, sequence_input):
   """Replicates `context_input` across all timesteps of `sequence_input`.
 
diff --git a/tensorflow/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/python/feature_column/sequence_feature_column_integration_test.py
index 888c21c8450..9f9740afa76 100644
--- a/tensorflow/python/feature_column/sequence_feature_column_integration_test.py
+++ b/tensorflow/python/feature_column/sequence_feature_column_integration_test.py
@@ -24,130 +24,13 @@ import tempfile
 from google.protobuf import text_format
 
 from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.feature_column import dense_features
 from tensorflow.python.feature_column import feature_column_v2 as fc
 from tensorflow.python.feature_column import sequence_feature_column as sfc
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras.layers import recurrent
-from tensorflow.python.ops import init_ops_v2
 from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class SequenceFeatureColumnIntegrationTest(test.TestCase):
-
-  def _make_sequence_example(self):
-    example = example_pb2.SequenceExample()
-    example.context.feature['int_ctx'].int64_list.value.extend([5])
-    example.context.feature['float_ctx'].float_list.value.extend([123.6])
-    for val in range(0, 10, 2):
-      feat = feature_pb2.Feature()
-      feat.int64_list.value.extend([val] * val)
-      example.feature_lists.feature_list['int_list'].feature.extend([feat])
-    for val in range(1, 11, 2):
-      feat = feature_pb2.Feature()
-      feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
-      example.feature_lists.feature_list['str_list'].feature.extend([feat])
-
-    return example
-
-  def _build_feature_columns(self):
-    col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
-    ctx_cols = [
-        fc.embedding_column(col, dimension=10),
-        fc.numeric_column('float_ctx')
-    ]
-
-    identity_col = sfc.sequence_categorical_column_with_identity(
-        'int_list', num_buckets=10)
-    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
-        'bytes_list', hash_bucket_size=100)
-    seq_cols = [
-        fc.embedding_column(identity_col, dimension=10),
-        fc.embedding_column(bucket_col, dimension=20)
-    ]
-
-    return ctx_cols, seq_cols
-
-  def test_sequence_example_into_input_layer(self):
-    examples = [_make_sequence_example().SerializeToString()] * 100
-    ctx_cols, seq_cols = self._build_feature_columns()
-
-    def _parse_example(example):
-      ctx, seq = parsing_ops.parse_single_sequence_example(
-          example,
-          context_features=fc.make_parse_example_spec_v2(ctx_cols),
-          sequence_features=fc.make_parse_example_spec_v2(seq_cols))
-      ctx.update(seq)
-      return ctx
-
-    ds = dataset_ops.Dataset.from_tensor_slices(examples)
-    ds = ds.map(_parse_example)
-    ds = ds.batch(20)
-
-    # Test on a single batch
-    features = dataset_ops.make_one_shot_iterator(ds).get_next()
-
-    # Tile the context features across the sequence features
-    sequence_input_layer = sfc.SequenceFeatures(seq_cols)
-    seq_layer, _ = sequence_input_layer(features)
-    input_layer = dense_features.DenseFeatures(ctx_cols)
-    ctx_layer = input_layer(features)
-    input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
-
-    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
-    output = rnn_layer(input_layer)
-
-    with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      features_r = sess.run(features)
-      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
-
-      output_r = sess.run(output)
-      self.assertAllEqual(output_r.shape, [20, 10])
-
-  @test_util.run_deprecated_v1
-  def test_shared_sequence_non_sequence_into_input_layer(self):
-    non_seq = fc.categorical_column_with_identity('non_seq',
-                                                  num_buckets=10)
-    seq = sfc.sequence_categorical_column_with_identity('seq',
-                                                        num_buckets=10)
-    shared_non_seq, shared_seq = fc.shared_embedding_columns_v2(
-        [non_seq, seq],
-        dimension=4,
-        combiner='sum',
-        initializer=init_ops_v2.Ones(),
-        shared_embedding_collection_name='shared')
-
-    seq = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [0, 1], [1, 0]],
-        values=[0, 1, 2],
-        dense_shape=[2, 2])
-    non_seq = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [0, 1], [1, 0]],
-        values=[0, 1, 2],
-        dense_shape=[2, 2])
-    features = {'seq': seq, 'non_seq': non_seq}
-
-    # Tile the context features across the sequence features
-    seq_input, seq_length = sfc.SequenceFeatures([shared_seq])(features)
-    non_seq_input = dense_features.DenseFeatures([shared_non_seq])(features)
-
-    with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      output_seq, output_seq_length, output_non_seq = sess.run(
-          [seq_input, seq_length, non_seq_input])
-      self.assertAllEqual(output_seq, [[[1, 1, 1, 1], [1, 1, 1, 1]],
-                                       [[1, 1, 1, 1], [0, 0, 0, 0]]])
-      self.assertAllEqual(output_seq_length, [2, 1])
-      self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
-
-
 class SequenceExampleParsingTest(test.TestCase):
 
   def test_seq_ex_in_sequence_categorical_column_with_identity(self):
diff --git a/tensorflow/python/feature_column/sequence_feature_column_test.py b/tensorflow/python/feature_column/sequence_feature_column_test.py
index 35eaa37cce7..3d5d24ec03a 100644
--- a/tensorflow/python/feature_column/sequence_feature_column_test.py
+++ b/tensorflow/python/feature_column/sequence_feature_column_test.py
@@ -29,7 +29,6 @@ from tensorflow.python.feature_column import feature_column_v2 as fc
 from tensorflow.python.feature_column import sequence_feature_column as sfc
 from tensorflow.python.feature_column import serialization
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
@@ -49,538 +48,6 @@ def _initialized_session(config=None):
   return sess
 
 
-class SequenceFeaturesTest(test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args_a': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2, 0, 1),
-           'dense_shape': (2, 2)},
-       'sparse_input_args_b': {
-           # example 0, ids [1]
-           # example 1, ids [2, 0]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (1, 2, 0),
-           'dense_shape': (2, 2)},
-       'expected_input_layer': [
-           # example 0, ids_a [2], ids_b [1]
-           [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
-           # example 1, ids_a [0, 1], ids_b [2, 0]
-           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
-       'expected_sequence_length': [1, 2]},
-      {'testcase_name': '3D',
-       'sparse_input_args_a': {
-           # feature 0, ids [[2], [0, 1]]
-           # feature 1, ids [[0, 0], [1]]
-           'indices': (
-               (0, 0, 0), (0, 1, 0), (0, 1, 1),
-               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2, 0, 1, 0, 0, 1),
-           'dense_shape': (2, 2, 2)},
-       'sparse_input_args_b': {
-           # feature 0, ids [[1, 1], [1]]
-           # feature 1, ids [[2], [0]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (1, 1, 1, 2, 0),
-           'dense_shape': (2, 2, 2)},
-       'expected_input_layer': [
-           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
-           [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
-           # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
-           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
-       'expected_sequence_length': [2, 2]},
-      )
-  @test_util.run_in_graph_and_eager_modes
-  def test_embedding_column(
-      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
-      expected_sequence_length):
-
-    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
-    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
-    vocabulary_size = 3
-    embedding_dimension_a = 2
-    embedding_values_a = (
-        (1., 2.),  # id 0
-        (3., 4.),  # id 1
-        (5., 6.)  # id 2
-    )
-    embedding_dimension_b = 3
-    embedding_values_b = (
-        (11., 12., 13.),  # id 0
-        (14., 15., 16.),  # id 1
-        (17., 18., 19.)  # id 2
-    )
-    def _get_initializer(embedding_dimension, embedding_values):
-
-      def _initializer(shape, dtype, partition_info=None):
-        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-        self.assertEqual(dtypes.float32, dtype)
-        self.assertIsNone(partition_info)
-        return embedding_values
-      return _initializer
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column_a = fc.embedding_column(
-        categorical_column_a,
-        dimension=embedding_dimension_a,
-        initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    embedding_column_b = fc.embedding_column(
-        categorical_column_b,
-        dimension=embedding_dimension_b,
-        initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
-
-    # Test that columns are reordered alphabetically.
-    sequence_input_layer = sfc.SequenceFeatures(
-        [embedding_column_b, embedding_column_a])
-    input_layer, sequence_length = sequence_input_layer({
-        'aaa': sparse_input_a, 'bbb': sparse_input_b,})
-
-    self.evaluate(variables_lib.global_variables_initializer())
-    weights = sequence_input_layer.weights
-    self.assertCountEqual(
-        ('sequence_features/aaa_embedding/embedding_weights:0',
-         'sequence_features/bbb_embedding/embedding_weights:0'),
-        tuple([v.name for v in weights]))
-    self.assertAllEqual(embedding_values_a, self.evaluate(weights[0]))
-    self.assertAllEqual(embedding_values_b, self.evaluate(weights[1]))
-    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
-    self.assertAllEqual(
-        expected_sequence_length, self.evaluate(sequence_length))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_embedding_column_with_non_sequence_categorical(self):
-    """Tests that error is raised for non-sequence embedding column."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    embedding_column_a = fc.embedding_column(
-        categorical_column_a, dimension=2)
-    sequence_input_layer = sfc.SequenceFeatures([embedding_column_a])
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In embedding_column: aaa_embedding\. categorical_column must be of '
-        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
-      _, _ = sequence_input_layer({'aaa': sparse_input})
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_shared_embedding_column(self):
-    with ops.Graph().as_default():
-      vocabulary_size = 3
-      sparse_input_a = sparse_tensor.SparseTensorValue(
-          # example 0, ids [2]
-          # example 1, ids [0, 1]
-          indices=((0, 0), (1, 0), (1, 1)),
-          values=(2, 0, 1),
-          dense_shape=(2, 2))
-      sparse_input_b = sparse_tensor.SparseTensorValue(
-          # example 0, ids [1]
-          # example 1, ids [2, 0]
-          indices=((0, 0), (1, 0), (1, 1)),
-          values=(1, 2, 0),
-          dense_shape=(2, 2))
-
-      embedding_dimension = 2
-      embedding_values = (
-          (1., 2.),  # id 0
-          (3., 4.),  # id 1
-          (5., 6.)  # id 2
-      )
-
-      def _get_initializer(embedding_dimension, embedding_values):
-
-        def _initializer(shape, dtype, partition_info=None):
-          self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
-          self.assertEqual(dtypes.float32, dtype)
-          self.assertIsNone(partition_info)
-          return embedding_values
-
-        return _initializer
-
-      expected_input_layer = [
-          # example 0, ids_a [2], ids_b [1]
-          [[5., 6., 3., 4.], [0., 0., 0., 0.]],
-          # example 1, ids_a [0, 1], ids_b [2, 0]
-          [[1., 2., 5., 6.], [3., 4., 1., 2.]],
-      ]
-      expected_sequence_length = [1, 2]
-
-      categorical_column_a = sfc.sequence_categorical_column_with_identity(
-          key='aaa', num_buckets=vocabulary_size)
-      categorical_column_b = sfc.sequence_categorical_column_with_identity(
-          key='bbb', num_buckets=vocabulary_size)
-      # Test that columns are reordered alphabetically.
-      shared_embedding_columns = fc.shared_embedding_columns_v2(
-          [categorical_column_b, categorical_column_a],
-          dimension=embedding_dimension,
-          initializer=_get_initializer(embedding_dimension, embedding_values))
-
-      sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
-      input_layer, sequence_length = sequence_input_layer({
-          'aaa': sparse_input_a, 'bbb': sparse_input_b})
-
-      global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-      self.assertCountEqual(
-          ('aaa_bbb_shared_embedding:0',),
-          tuple([v.name for v in global_vars]))
-      with _initialized_session() as sess:
-        self.assertAllEqual(embedding_values,
-                            global_vars[0].eval(session=sess))
-        self.assertAllEqual(expected_input_layer,
-                            input_layer.eval(session=sess))
-        self.assertAllEqual(
-            expected_sequence_length, sequence_length.eval(session=sess))
-
-  @test_util.run_deprecated_v1
-  def test_shared_embedding_column_with_non_sequence_categorical(self):
-    """Tests that error is raised for non-sequence shared embedding column."""
-    vocabulary_size = 3
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    categorical_column_b = fc.categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size)
-    shared_embedding_columns = fc.shared_embedding_columns_v2(
-        [categorical_column_a, categorical_column_b], dimension=2)
-
-    sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In embedding_column: aaa_shared_embedding\. categorical_column must '
-        r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'):
-      _, _ = sequence_input_layer({'aaa': sparse_input_a,
-                                   'bbb': sparse_input_b})
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args_a': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (2, 0, 1),
-           'dense_shape': (2, 2)},
-       'sparse_input_args_b': {
-           # example 0, ids [1]
-           # example 1, ids [1, 0]
-           'indices': ((0, 0), (1, 0), (1, 1)),
-           'values': (1, 1, 0),
-           'dense_shape': (2, 2)},
-       'expected_input_layer': [
-           # example 0, ids_a [2], ids_b [1]
-           [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
-           # example 1, ids_a [0, 1], ids_b [1, 0]
-           [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
-       'expected_sequence_length': [1, 2]},
-      {'testcase_name': '3D',
-       'sparse_input_args_a': {
-           # feature 0, ids [[2], [0, 1]]
-           # feature 1, ids [[0, 0], [1]]
-           'indices': (
-               (0, 0, 0), (0, 1, 0), (0, 1, 1),
-               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
-           'values': (2, 0, 1, 0, 0, 1),
-           'dense_shape': (2, 2, 2)},
-       'sparse_input_args_b': {
-           # feature 0, ids [[1, 1], [1]]
-           # feature 1, ids [[1], [0]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (1, 1, 1, 1, 0),
-           'dense_shape': (2, 2, 2)},
-       'expected_input_layer': [
-           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
-           [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
-           # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
-           [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
-       'expected_sequence_length': [2, 2]},
-      )
-  @test_util.run_in_graph_and_eager_modes
-  def test_indicator_column(
-      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
-      expected_sequence_length):
-    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
-    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
-
-    vocabulary_size_a = 3
-    vocabulary_size_b = 2
-
-    categorical_column_a = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size_a)
-    indicator_column_a = fc.indicator_column(categorical_column_a)
-    categorical_column_b = sfc.sequence_categorical_column_with_identity(
-        key='bbb', num_buckets=vocabulary_size_b)
-    indicator_column_b = fc.indicator_column(categorical_column_b)
-    # Test that columns are reordered alphabetically.
-    sequence_input_layer = sfc.SequenceFeatures(
-        [indicator_column_b, indicator_column_a])
-    input_layer, sequence_length = sequence_input_layer({
-        'aaa': sparse_input_a, 'bbb': sparse_input_b})
-
-    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
-    self.assertAllEqual(
-        expected_sequence_length, self.evaluate(sequence_length))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_indicator_column_with_non_sequence_categorical(self):
-    """Tests that error is raised for non-sequence categorical column."""
-    vocabulary_size = 3
-    sparse_input = sparse_tensor.SparseTensorValue(
-        # example 0, ids [2]
-        # example 1, ids [0, 1]
-        indices=((0, 0), (1, 0), (1, 1)),
-        values=(2, 0, 1),
-        dense_shape=(2, 2))
-
-    categorical_column_a = fc.categorical_column_with_identity(
-        key='aaa', num_buckets=vocabulary_size)
-    indicator_column_a = fc.indicator_column(categorical_column_a)
-
-    sequence_input_layer = sfc.SequenceFeatures([indicator_column_a])
-    with self.assertRaisesRegexp(
-        ValueError,
-        r'In indicator_column: aaa_indicator\. categorical_column must be of '
-        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
-      _, _ = sequence_input_layer({'aaa': sparse_input})
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [0., 1]
-           # example 1, [10.]
-           'indices': ((0, 0), (0, 1), (1, 0)),
-           'values': (0., 1., 10.),
-           'dense_shape': (2, 2)},
-       'expected_input_layer': [
-           [[0.], [1.]],
-           [[10.], [0.]]],
-       'expected_sequence_length': [2, 1]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # feature 0, ids [[20, 3], [5]]
-           # feature 1, ids [[3], [8]]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
-           'values': (20., 3., 5., 3., 8.),
-           'dense_shape': (2, 2, 2)},
-       'expected_input_layer': [
-           [[20.], [3.], [5.], [0.]],
-           [[3.], [0.], [8.], [0.]]],
-       'expected_sequence_length': [2, 2]},
-      )
-  @test_util.run_in_graph_and_eager_modes
-  def test_numeric_column(
-      self, sparse_input_args, expected_input_layer, expected_sequence_length):
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-
-    numeric_column = sfc.sequence_numeric_column('aaa')
-
-    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
-    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
-
-    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
-    self.assertAllEqual(
-        expected_sequence_length, self.evaluate(sequence_length))
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
-           # example 1, [10., 11., 12., 13.]
-           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 8)},
-       'expected_input_layer': [
-           # The output of numeric_column._get_dense_tensor should be flattened.
-           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
-           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
-       'expected_sequence_length': [2, 1]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
-           # example 1, [[10., 11., 12., 13.], []]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
-                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 2, 4)},
-       'expected_input_layer': [
-           # The output of numeric_column._get_dense_tensor should be flattened.
-           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
-           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
-       'expected_sequence_length': [2, 1]},
-      )
-  @test_util.run_in_graph_and_eager_modes
-  def test_numeric_column_multi_dim(
-      self, sparse_input_args, expected_input_layer, expected_sequence_length):
-    """Tests SequenceFeatures for multi-dimensional numeric_column."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
-
-    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
-    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
-
-    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
-    self.assertAllEqual(
-        expected_sequence_length, self.evaluate(sequence_length))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_sequence_length_not_equal(self):
-    """Tests that an error is raised when sequence lengths are not equal."""
-    # Input a with sequence_length = [2, 1]
-    sparse_input_a = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (0, 1), (1, 0)),
-        values=(0., 1., 10.),
-        dense_shape=(2, 2))
-    # Input b with sequence_length = [1, 1]
-    sparse_input_b = sparse_tensor.SparseTensorValue(
-        indices=((0, 0), (1, 0)),
-        values=(1., 10.),
-        dense_shape=(2, 2))
-    numeric_column_a = sfc.sequence_numeric_column('aaa')
-    numeric_column_b = sfc.sequence_numeric_column('bbb')
-
-    sequence_input_layer = sfc.SequenceFeatures(
-        [numeric_column_a, numeric_column_b])
-
-    with self.assertRaisesRegexp(
-        errors.InvalidArgumentError, r'Condition x == y did not hold.*'):
-      _, sequence_length = sequence_input_layer({
-          'aaa': sparse_input_a,
-          'bbb': sparse_input_b
-      })
-      self.evaluate(sequence_length)
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
-           # example 1, [[[10., 11.],  [12., 13.]]]
-           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
-                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 8)},
-       'expected_shape': [2, 2, 4]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
-           # example 1, [[10., 11., 12., 13.], []]
-           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
-                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
-                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
-           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
-           'dense_shape': (2, 2, 4)},
-       'expected_shape': [2, 2, 4]},
-      )
-  @test_util.run_in_graph_and_eager_modes
-  def test_static_shape_from_tensors_numeric(
-      self, sparse_input_args, expected_shape):
-    """Tests that we return a known static shape when we have one."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
-
-    sequence_input_layer = sfc.SequenceFeatures([numeric_column])
-    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
-    shape = input_layer.get_shape()
-    self.assertEqual(shape, expected_shape)
-
-  @parameterized.named_parameters(
-      {'testcase_name': '2D',
-       'sparse_input_args': {
-           # example 0, ids [2]
-           # example 1, ids [0, 1]
-           # example 2, ids []
-           # example 3, ids [1]
-           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
-           'values': (2, 0, 1, 1),
-           'dense_shape': (4, 2)},
-       'expected_shape': [4, 2, 3]},
-      {'testcase_name': '3D',
-       'sparse_input_args': {
-           # example 0, ids [[2]]
-           # example 1, ids [[0, 1], [2]]
-           # example 2, ids []
-           # example 3, ids [[1], [0, 2]]
-           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
-                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
-           'values': (2, 0, 1, 2, 1, 0, 2),
-           'dense_shape': (4, 2, 2)},
-       'expected_shape': [4, 2, 3]}
-      )
-  @test_util.run_in_graph_and_eager_modes
-  def test_static_shape_from_tensors_indicator(
-      self, sparse_input_args, expected_shape):
-    """Tests that we return a known static shape when we have one."""
-    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
-    categorical_column = sfc.sequence_categorical_column_with_identity(
-        key='aaa', num_buckets=3)
-    indicator_column = fc.indicator_column(categorical_column)
-
-    sequence_input_layer = sfc.SequenceFeatures([indicator_column])
-    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
-    shape = input_layer.get_shape()
-    self.assertEqual(shape, expected_shape)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_compute_output_shape(self):
-    price1 = sfc.sequence_numeric_column('price1', shape=2)
-    price2 = sfc.sequence_numeric_column('price2')
-    features = {
-        'price1': sparse_tensor.SparseTensor(
-            indices=[[0, 0, 0], [0, 0, 1],
-                     [0, 1, 0], [0, 1, 1],
-                     [1, 0, 0], [1, 0, 1],
-                     [2, 0, 0], [2, 0, 1],
-                     [3, 0, 0], [3, 0, 1]],
-            values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
-            dense_shape=(4, 3, 2)),
-        'price2': sparse_tensor.SparseTensor(
-            indices=[[0, 0],
-                     [0, 1],
-                     [1, 0],
-                     [2, 0],
-                     [3, 0]],
-            values=[10., 11., 20., 30., 40.],
-            dense_shape=(4, 3))}
-    sequence_features = sfc.SequenceFeatures([price1, price2])
-    seq_input, seq_len = sequence_features(features)
-    self.assertEqual(
-        sequence_features.compute_output_shape((None, None)),
-        (None, None, 3))
-    self.evaluate(variables_lib.global_variables_initializer())
-    self.evaluate(lookup_ops.tables_initializer())
-
-    self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
-                         [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
-                         [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
-                         [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
-                        self.evaluate(seq_input))
-    self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
-
-
 @test_util.run_all_in_graph_and_eager_modes
 class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
   """Tests the utility fn concatenate_context_input."""
diff --git a/tensorflow/python/feature_column/serialization_test.py b/tensorflow/python/feature_column/serialization_test.py
index 8a9082d02e1..78b72746ac9 100644
--- a/tensorflow/python/feature_column/serialization_test.py
+++ b/tensorflow/python/feature_column/serialization_test.py
@@ -22,7 +22,6 @@ from absl.testing import parameterized
 
 from tensorflow.python.feature_column import dense_features
 from tensorflow.python.feature_column import feature_column_v2 as fc
-from tensorflow.python.feature_column import sequence_feature_column as sfc
 from tensorflow.python.feature_column import serialization
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
@@ -180,40 +179,6 @@ class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class SequenceFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(('default', None, None),
-                                  ('trainable', True, 'trainable'),
-                                  ('not_trainable', False, 'frozen'))
-  def test_get_config(self, trainable, name):
-    cols = [sfc.sequence_numeric_column('a')]
-    orig_layer = sfc.SequenceFeatures(cols, trainable=trainable, name=name)
-    config = orig_layer.get_config()
-
-    self.assertEqual(config['name'], orig_layer.name)
-    self.assertEqual(config['trainable'], trainable)
-    self.assertLen(config['feature_columns'], 1)
-    self.assertEqual(config['feature_columns'][0]['class_name'],
-                     'SequenceNumericColumn')
-    self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
-
-  @parameterized.named_parameters(('default', None, None),
-                                  ('trainable', True, 'trainable'),
-                                  ('not_trainable', False, 'frozen'))
-  def test_from_config(self, trainable, name):
-    cols = [sfc.sequence_numeric_column('a')]
-    orig_layer = sfc.SequenceFeatures(cols, trainable=trainable, name=name)
-    config = orig_layer.get_config()
-
-    new_layer = sfc.SequenceFeatures.from_config(config)
-
-    self.assertEqual(new_layer.name, orig_layer.name)
-    self.assertEqual(new_layer.trainable, trainable)
-    self.assertLen(new_layer._feature_columns, 1)
-    self.assertEqual(new_layer._feature_columns[0].name, 'a')
-
-
 @test_util.run_all_in_graph_and_eager_modes
 class LinearModelLayerSerializationTest(test.TestCase, parameterized.TestCase):
 
diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py
index c674717482d..dfe84f14f26 100644
--- a/tensorflow/python/framework/auto_control_deps.py
+++ b/tensorflow/python/framework/auto_control_deps.py
@@ -45,6 +45,9 @@ ASYNC_STATEFUL_OPS = [
     "CollectiveBcastSend",
     "CollectiveBcastRecv",
     "NcclAllReduce",
+    # We do not add "Send" here since we want it to be added as a control output
+    # in order to avoid being pruned.
+    "Recv",
 ]
 
 LEGACY_RANDOM_OPS = [
@@ -96,7 +99,8 @@ LEGACY_RANDOM_OPS = [
 _ORDER_INSENSITIVE_STATEFUL_OPS = [
     "CudnnRNNV2", "CudnnRNNV3", "CudnnRNNBackpropV2", "CudnnRNNBackpropV3",
     "EnqueueTPUEmbeddingSparseBatch", "EnqueueTPUEmbeddingIntegerBatch",
-    "EnqueueTPUEmbeddingSparseTensorBatch"
+    "EnqueueTPUEmbeddingSparseTensorBatch",
+    "EnqueueTPUEmbeddingRaggedTensorBatch"
 ]
 # LINT.ThenChange(//tensorflow/core/grappler/optimizers/function_optimizer.cc)
 
diff --git a/tensorflow/python/framework/auto_control_deps_test.py b/tensorflow/python/framework/auto_control_deps_test.py
index 114c64900c6..d0e08e676d5 100644
--- a/tensorflow/python/framework/auto_control_deps_test.py
+++ b/tensorflow/python/framework/auto_control_deps_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_resource_variable_ops
+from tensorflow.python.ops import gen_sendrecv_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -165,6 +166,16 @@ class AutomaticControlDependenciesTest(test.TestCase):
       # Last write must be in `ops_which_must_run`.
       self.assertIn(assign_op4, c.ops_which_must_run)
 
+  def testSendInOpsWithMustRun(self):
+    with context.graph_mode(), self.cached_session():
+      v = resource_variable_ops.ResourceVariable(1.0)
+      self.evaluate(variables.global_variables_initializer())
+      with acd.AutomaticControlDependencies() as c:
+        send_op = gen_sendrecv_ops.send(v, "x", "/", 0, "/")
+
+      # Send must be in `ops_which_must_run`.
+      self.assertIn(send_op, c.ops_which_must_run)
+
   def _testVariableReadInFunctionalOp(self, build_functional_op, op_type):
     v = resource_variable_ops.ResourceVariable(1.0)
     self.evaluate(variables.global_variables_initializer())
@@ -751,7 +762,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
     grad = backprop.implicit_grad(lambda v: v**2)(v)
 
     with self.assertRaisesRegexp(TypeError,
-                                 '.*must return zero or more Tensors.*'):
+                                 ".*must return zero or more Tensors.*"):
       # TODO(akshayka): We might want to allow defun-ing Python functions
       # that return operations (and just execute the op instead of running it).
       optimizer.apply_gradients(grad)
@@ -803,6 +814,6 @@ class AutomaticControlDependenciesTest(test.TestCase):
     self.assertEqual(self.evaluate(outer()), 2.0)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   ops.enable_eager_execution()
   test.main()
diff --git a/tensorflow/python/framework/config_test.py b/tensorflow/python/framework/config_test.py
index b07bb874385..3051f1d0623 100644
--- a/tensorflow/python/framework/config_test.py
+++ b/tensorflow/python/framework/config_test.py
@@ -159,7 +159,6 @@ class ConfigTest(test.TestCase, parameterized.TestCase):
     else:
       self.assertFalse(config.get_soft_device_placement())
 
-    @def_function.function
     def mod():
       with ops.device('/device:GPU:0'):
         a = constant_op.constant(1.0)
@@ -172,8 +171,10 @@ class ConfigTest(test.TestCase, parameterized.TestCase):
         config.get_soft_device_placement(),
         context.context().soft_device_placement)
 
-    # Since soft placement is enabled, the mod operation should work with CPU
+    # Since soft placement is enabled, the mod operation should fallback to CPU
+    # with pure eager execution as well as functions
     mod()
+    def_function.function(mod)()
 
     config.set_soft_device_placement(False)
     self.assertEqual(config.get_soft_device_placement(), False)
@@ -182,8 +183,11 @@ class ConfigTest(test.TestCase, parameterized.TestCase):
         context.context().soft_device_placement)
 
     # Since soft placement is disabled, the mod operation should fail on GPU
+    # with pure eager execution as well as functions
     with self.assertRaises(errors.InvalidArgumentError):
       mod()
+    with self.assertRaises(errors.InvalidArgumentError):
+      def_function.function(mod)()
 
   @reset_eager
   def testLogDevicePlacement(self):
@@ -203,12 +207,8 @@ class ConfigTest(test.TestCase, parameterized.TestCase):
 
     context.ensure_initialized()
 
-    with self.assertRaises(RuntimeError):
-      context.set_log_device_placement(True)
-
-    # If the setting the device placement is a no-op, do not throw a runtime
-    # exception.
-    context.set_log_device_placement(False)
+    # Changing the device placement should not throw an exception
+    context.set_log_device_placement(True)
 
   @reset_eager
   def testEnableMlirBridge(self):
diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index bc4cb4094ad..6b358a3c51a 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py
@@ -1190,13 +1190,6 @@ def _get_defun_inputs(args, names, structure, flat_shapes=None):
     arg_value = nest.map_structure(_get_composite_tensor_spec, arg_value)
 
     flattened = nest.flatten(arg_value, expand_composites=True)
-    tensor_specs = [
-        arg for arg in flattened if isinstance(arg, tensor_spec.DenseSpec)
-    ]
-    specified_names = [arg.name for arg in tensor_specs if arg.name]
-    if specified_names and len(specified_names) < len(tensor_specs):
-      raise ValueError("If specifying TensorSpec names for nested structures, "
-                       "either zero or all names have to be specified.")
 
     for arg in flattened:
       # We have a shape entry for each arg, regardless of whether it's a real
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 7f5754ba57a..43652d51eae 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -62,6 +62,7 @@ from tensorflow.python.framework import versions
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.platform import app
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.types import core as core_tf_types
 from tensorflow.python.types import internal
 from tensorflow.python.util import compat
 from tensorflow.python.util import decorator_utils
@@ -213,53 +214,11 @@ def _as_graph_element(obj):
   return None
 
 
-_TENSOR_LIKE_TYPES = tuple()
-
-
+# Deprecated - do not use.
+# This API to avoid breaking estimator and tensorflow-mesh which depend on this
+# internal API. The stub should be safe to use after TF 2.3 is released.
 def is_dense_tensor_like(t):
-  """EXPERIMENTAL: Returns true if `t` implements the tensor interface.
-
-  See `register_dense_tensor_like_type()` for the current definition of a
-  "tensor-like type".
-
-  Args:
-    t: An object.
-
-  Returns:
-    True iff `t` is an instance of one of the registered "tensor-like" types.
-  """
-  return isinstance(t, _TENSOR_LIKE_TYPES)
-
-
-def register_dense_tensor_like_type(tensor_type):
-  """EXPERIMENTAL: Registers `tensor_type` as implementing the tensor interface.
-
-  A "tensor-like type" can represent a single dense tensor, and implements
-  the `name`, `dtype` and `shape` properties.
-
-  Args:
-    tensor_type: A type implementing the tensor interface.
-
-  Raises:
-    TypeError: If `tensor_type` does not implement the tensor interface.
-  """
-  if not (hasattr(tensor_type, "name") and
-          isinstance(tensor_type.name, property)):
-    raise TypeError("Type %s does not define a `name` property" %
-                    tensor_type.__name__)
-  if not (hasattr(tensor_type, "dtype") and
-          isinstance(tensor_type.dtype, property)):
-    raise TypeError("Type %s does not define a `dtype` property" %
-                    tensor_type.__name__)
-  if not (hasattr(tensor_type, "shape") and
-          isinstance(tensor_type.shape, property)):
-    raise TypeError("Type %s does not define a `shape` property" %
-                    tensor_type.__name__)
-  # We expect this list to be small, so choose quadratic complexity
-  # for registration, so that we have a tuple that can be used for
-  # more efficient `isinstance` checks later.
-  global _TENSOR_LIKE_TYPES
-  _TENSOR_LIKE_TYPES = tuple(list(_TENSOR_LIKE_TYPES) + [tensor_type])
+  return isinstance(t, core_tf_types.Tensor)
 
 
 def uid():
@@ -304,7 +263,7 @@ def disable_tensor_equality():
 
 # TODO(mdan): This object should subclass Symbol, not just Tensor.
 @tf_export("Tensor")
-class Tensor(internal.NativeObject):
+class Tensor(internal.NativeObject, core_tf_types.Tensor):
   """A tensor is a multidimensional array of elements represented by a
 
   `tf.Tensor` object.  All elements are of a single known data type.
@@ -519,8 +478,8 @@ class Tensor(internal.NativeObject):
 
   def _disallow_when_autograph_enabled(self, task):
     raise errors.OperatorNotAllowedInGraphError(
-        "{} is not allowed: AutoGraph did not convert this function. Try"
-        " decorating it directly with @tf.function.".format(task))
+        "{} is not allowed: AutoGraph did convert this function. This might"
+        " indicate you are trying to use an unsupported feature.".format(task))
 
   def _disallow_in_graph_mode(self, task):
     raise errors.OperatorNotAllowedInGraphError(
@@ -1067,15 +1026,17 @@ class _EagerTensorBase(Tensor):
     except core._NotOkStatusException as e:
       six.raise_from(core._status_to_exception(e.code, e.message), None)
 
+  def __array__(self):
+    return self._numpy()
+
   def _numpy_internal(self):
     raise NotImplementedError()
 
   def _numpy(self):
-    # pylint: disable=protected-access
     try:
       return self._numpy_internal()
-    except core._NotOkStatusException as e:
-      six.raise_from(core._status_to_exception(e.code, e.message), None)
+    except core._NotOkStatusException as e:  # pylint: disable=protected-access
+      six.raise_from(core._status_to_exception(e.code, e.message), None)  # pylint: disable=protected-access
 
   @property
   def dtype(self):
@@ -1303,9 +1264,6 @@ class _EagerTensorBase(Tensor):
 EagerTensor = pywrap_tfe.TFE_Py_InitEagerTensor(_EagerTensorBase)
 
 
-register_dense_tensor_like_type(Tensor)
-
-
 @tf_export(v1=["convert_to_tensor"])
 def convert_to_tensor_v1(value,
                          dtype=None,
@@ -1748,8 +1706,8 @@ def _NodeDef(op_type, name, attrs=None):
 
 # Copied from core/framework/node_def_util.cc
 # TODO(mrry,josh11b): Consolidate this validation in C++ code.
-_VALID_OP_NAME_REGEX = re.compile("^[A-Za-z0-9.][A-Za-z0-9_.\\-/>]*$")
-_VALID_SCOPE_NAME_REGEX = re.compile("^[A-Za-z0-9_.\\-/>]*$")
+_VALID_OP_NAME_REGEX = re.compile(r"^[A-Za-z0-9.][A-Za-z0-9_.\\/>-]*$")
+_VALID_SCOPE_NAME_REGEX = re.compile(r"^[A-Za-z0-9_.\\/>-]*$")
 
 
 def _create_c_op(graph, node_def, inputs, control_inputs, op_def=None):
@@ -6259,10 +6217,12 @@ def add_to_collection(name, value):
   Args:
     name: The key for the collection. For example, the `GraphKeys` class
       contains many standard names for collections.
-    value: The value to add to the collection.  @compatibility(eager)
-      Collections are only supported in eager when variables are created inside
-      an EagerVariableStore (e.g. as part of a layer or template).
-      @end_compatibility
+    value: The value to add to the collection.
+
+  @compatibility(eager)
+  Collections are only supported in eager when variables are created inside
+  an EagerVariableStore (e.g. as part of a layer or template).
+  @end_compatibility
   """
   get_default_graph().add_to_collection(name, value)
 
@@ -6277,10 +6237,12 @@ def add_to_collections(names, value):
   Args:
     names: The key for the collections. The `GraphKeys` class contains many
       standard names for collections.
-    value: The value to add to the collections.  @compatibility(eager)
-      Collections are only supported in eager when variables are created inside
-      an EagerVariableStore (e.g. as part of a layer or template).
-      @end_compatibility
+    value: The value to add to the collections.
+
+  @compatibility(eager)
+  Collections are only supported in eager when variables are created inside
+  an EagerVariableStore (e.g. as part of a layer or template).
+  @end_compatibility
   """
   get_default_graph().add_to_collections(names, value)
 
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index b97c0d03395..322df8ffac8 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -120,7 +120,7 @@ class TensorAndShapeTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(TypeError, "iterating.*not allowed in Graph"):
       next(iter(t))
     with self.assertRaisesRegexp(
-        TypeError, "iterating.*AutoGraph did not convert"):
+        TypeError, "iterating.*AutoGraph did convert"):
       with ag_ctx.ControlStatusCtx(ag_ctx.Status.ENABLED):
         next(iter(t))
     with self.assertRaisesRegexp(
@@ -136,7 +136,7 @@ class TensorAndShapeTest(test_util.TensorFlowTestCase):
         TypeError, "using.*as a.*bool.*not allowed in Graph"):
       bool(t)
     with self.assertRaisesRegexp(
-        TypeError, "using.*as a.*bool.*AutoGraph did not convert"):
+        TypeError, "using.*as a.*bool.*AutoGraph did convert"):
       with ag_ctx.ControlStatusCtx(ag_ctx.Status.ENABLED):
         bool(t)
     with self.assertRaisesRegexp(
@@ -1312,6 +1312,18 @@ class NameStackTest(test_util.TensorFlowTestCase):
     self.assertEqual("bar_2", g.unique_name("bar", mark_as_used=False))
     self.assertEqual("bar_2", g.unique_name("bar"))
 
+  def testBackslashAndDashRegex(self):
+    # GitHub issue 39019, all should pass
+    g = ops.Graph()
+    with g.name_scope("n_CatCntc-campaign\\c_campaign"):
+      pass
+    with g.name_scope("foo"):
+      with g.name_scope("n_CatCntc-campaign\\c_campaign"):
+        pass
+    with g.name_scope("n_CatCntc-campaign\\c_campaign"):
+      with g.name_scope("foo"):
+        pass
+
   @test_util.run_deprecated_v1
   def testNameAndVariableScope(self):
     with self.cached_session() as sess:
@@ -3256,56 +3268,6 @@ class DeprecatedTest(test_util.TensorFlowTestCase):
         test_ops.old()
 
 
-class DenseTensorLikeTypeTest(test_util.TensorFlowTestCase):
-
-  @test_util.disable_tfrt("Graph is not supported yet.")
-  def testSuccess(self):
-    op = ops.Operation(
-        ops._NodeDef("FloatOutput", "myop"), ops.Graph(), [], [dtypes.float32])
-    t = op.outputs[0]
-    self.assertTrue(ops.is_dense_tensor_like(t))
-
-    v = variables.Variable([17])
-    self.assertTrue(ops.is_dense_tensor_like(v))
-
-  class BadClassNoName(object):
-    pass
-
-  class BadClassBadName(object):
-
-    def name(self):
-      pass
-
-  class BadClassNoDtype(object):
-
-    @property
-    def name(self):
-      pass
-
-  class BadClassBadDtype(object):
-
-    @property
-    def name(self):
-      pass
-
-    def dtype(self):
-      pass
-
-  def testBadClass(self):
-    with self.assertRaisesRegexp(TypeError, "`name`"):
-      ops.register_dense_tensor_like_type(
-          DenseTensorLikeTypeTest.BadClassNoName)
-    with self.assertRaisesRegexp(TypeError, "`name`"):
-      ops.register_dense_tensor_like_type(
-          DenseTensorLikeTypeTest.BadClassBadName)
-    with self.assertRaisesRegexp(TypeError, "`dtype`"):
-      ops.register_dense_tensor_like_type(
-          DenseTensorLikeTypeTest.BadClassNoDtype)
-    with self.assertRaisesRegexp(TypeError, "`dtype`"):
-      ops.register_dense_tensor_like_type(
-          DenseTensorLikeTypeTest.BadClassBadDtype)
-
-
 class NameScopeTest(test_util.TensorFlowTestCase):
 
   def testStripAndPrependScope(self):
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 63365b815aa..968b635250a 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -26,6 +26,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.types import core
 from tensorflow.python.types import internal
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
@@ -260,8 +261,12 @@ def _check_quantized(values):
 
 def _generate_isinstance_check(expected_types):
   def inner(values):
-    _ = [_check_failed(v) for v in nest.flatten(values)
-         if not isinstance(v, expected_types)]
+    for v in nest.flatten(values):
+      if not (isinstance(v, expected_types) or
+              (isinstance(v, np.ndarray) and
+               issubclass(v.dtype.type, expected_types))):
+        _check_failed(v)
+
   return inner
 
 _check_int = _generate_isinstance_check(
@@ -1005,7 +1010,7 @@ def is_tensor(x):  # pylint: disable=invalid-name
     `True` if `x` is a tensor or "tensor-like", `False` if not.
   """
   return (isinstance(x, internal.NativeObject) or
-          ops.is_dense_tensor_like(x) or
+          isinstance(x, core.Tensor) or
           getattr(x, "is_tensor_like", False))
 
 
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
index 6df20b54aa0..ad0aec1623d 100644
--- a/tensorflow/python/framework/tensor_util_test.py
+++ b/tensorflow/python/framework/tensor_util_test.py
@@ -713,6 +713,19 @@ class TensorUtilTest(test.TestCase):
     self.assertAllEqual(
         np.array([[(1 + 2j), (3 + 4j)], [(5 + 6j), (7 + 8j)]]), a)
 
+  def testNestedNumpyArrayWithoutDType(self):
+    t = tensor_util.make_tensor_proto([10.0, 20.0, np.array(30.0)])
+    a = tensor_util.MakeNdarray(t)
+    self.assertEqual(np.float32, a.dtype)
+    self.assertAllClose(np.array([10.0, 20.0, 30.0], dtype=np.float32), a)
+
+  def testNestedNumpyArrayWithDType(self):
+    t = tensor_util.make_tensor_proto([10.0, 20.0, np.array(30.0)],
+                                      dtype=dtypes.float32)
+    a = tensor_util.MakeNdarray(t)
+    self.assertEqual(np.float32, a.dtype)
+    self.assertAllClose(np.array([10.0, 20.0, 30.0], dtype=np.float32), a)
+
   def testUnsupportedDTypes(self):
     with self.assertRaises(TypeError):
       tensor_util.make_tensor_proto(np.array([1]), 0)
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 535de3402db..d5bbd889166 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -460,6 +460,38 @@ def skip_if(condition):
   return real_skip_if
 
 
+@contextlib.contextmanager
+def skip_if_error(test_obj, error_type, messages=None):
+  """Context manager to skip cases not considered failures by the tests.
+
+  Note that this does not work if used in setUpClass/tearDownClass.
+  Usage in setUp/tearDown works fine just like regular test methods.
+
+  Args:
+    test_obj: A test object provided as `self` in the test methods; this object
+      is usually an instance of `unittest.TestCase`'s subclass and should have
+      `skipTest` method.
+    error_type: The error type to skip. Note that if `messages` are given, both
+      `error_type` and `messages` need to match for the test to be skipped.
+    messages: Optional, a string or list of strings. If `None`, the test will be
+      skipped if `error_type` matches what is raised; otherwise, the test is
+      skipped if any of the `messages` is contained in the message of the error
+      raised, and `error_type` matches the error raised.
+
+  Yields:
+    Nothing.
+  """
+  if messages:
+    messages = nest.flatten(messages)
+  try:
+    yield
+  except error_type as e:
+    if not messages or any([message in str(e) for message in messages]):
+      test_obj.skipTest("Skipping error: {}".format(str(e)))
+    else:
+      raise
+
+
 def enable_c_shapes(fn):
   """No-op. TODO(b/74620627): Remove this."""
   return fn
@@ -1075,7 +1107,6 @@ def eager_lazy_remote_copy_on_and_off(f):
 def run_in_graph_and_eager_modes(func=None,
                                  config=None,
                                  use_gpu=True,
-                                 reset_test=True,
                                  assert_no_eager_garbage=False):
   """Execute the decorated test with and without enabling eager execution.
 
@@ -1117,8 +1148,6 @@ def run_in_graph_and_eager_modes(func=None,
     config: An optional config_pb2.ConfigProto to use to configure the session
       when executing graphs.
     use_gpu: If True, attempt to run as many operations as possible on GPU.
-    reset_test: If True, tearDown and SetUp the test case between the two
-      executions of the test (once with and once without eager execution).
     assert_no_eager_garbage: If True, sets DEBUG_SAVEALL on the garbage
       collector and asserts that no extra garbage has been created when running
       the test with eager execution enabled. This will fail if there are
@@ -1162,17 +1191,15 @@ def run_in_graph_and_eager_modes(func=None,
         run_eagerly = assert_no_new_tensors(
             assert_no_garbage_created(run_eagerly))
 
-      if reset_test:
-        # This decorator runs the wrapped test twice.
-        # Reset the test environment between runs.
-        self.tearDown()
-        self._tempdir = None
+      # This decorator runs the wrapped test twice.
+      # Reset the test environment between runs.
+      self.tearDown()
+      self._tempdir = None
       # Create a new graph for the eagerly executed version of this test for
       # better isolation.
       graph_for_eager_test = ops.Graph()
       with graph_for_eager_test.as_default(), context.eager_mode():
-        if reset_test:
-          self.setUp()
+        self.setUp()
         run_eagerly(self, **kwargs)
       ops.dismantle_graph(graph_for_eager_test)
 
diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py
index b5cb903c666..2bd75c3919e 100644
--- a/tensorflow/python/framework/test_util_test.py
+++ b/tensorflow/python/framework/test_util_test.py
@@ -22,6 +22,7 @@ import collections
 import copy
 import random
 import threading
+import unittest
 import weakref
 
 from absl.testing import parameterized
@@ -808,6 +809,66 @@ class TestUtilTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertEqual(tested_codepaths, set(["present", "future"]))
 
 
+class SkipTestTest(test_util.TensorFlowTestCase):
+
+  def _verify_test_in_set_up_or_tear_down(self):
+    with self.assertRaises(unittest.SkipTest):
+      with test_util.skip_if_error(self, ValueError,
+                                   ["foo bar", "test message"]):
+        raise ValueError("test message")
+    try:
+      with self.assertRaisesRegexp(ValueError, "foo bar"):
+        with test_util.skip_if_error(self, ValueError, "test message"):
+          raise ValueError("foo bar")
+    except unittest.SkipTest:
+      raise RuntimeError("Test is not supposed to skip.")
+
+  def setUp(self):
+    super(SkipTestTest, self).setUp()
+    self._verify_test_in_set_up_or_tear_down()
+
+  def tearDown(self):
+    super(SkipTestTest, self).tearDown()
+    self._verify_test_in_set_up_or_tear_down()
+
+  def test_skip_if_error_should_skip(self):
+    with self.assertRaises(unittest.SkipTest):
+      with test_util.skip_if_error(self, ValueError, "test message"):
+        raise ValueError("test message")
+
+  def test_skip_if_error_should_skip_with_list(self):
+    with self.assertRaises(unittest.SkipTest):
+      with test_util.skip_if_error(self, ValueError,
+                                   ["foo bar", "test message"]):
+        raise ValueError("test message")
+
+  def test_skip_if_error_should_skip_without_expected_message(self):
+    with self.assertRaises(unittest.SkipTest):
+      with test_util.skip_if_error(self, ValueError):
+        raise ValueError("test message")
+
+  def test_skip_if_error_should_skip_without_error_message(self):
+    with self.assertRaises(unittest.SkipTest):
+      with test_util.skip_if_error(self, ValueError):
+        raise ValueError()
+
+  def test_skip_if_error_should_raise_message_mismatch(self):
+    try:
+      with self.assertRaisesRegexp(ValueError, "foo bar"):
+        with test_util.skip_if_error(self, ValueError, "test message"):
+          raise ValueError("foo bar")
+    except unittest.SkipTest:
+      raise RuntimeError("Test is not supposed to skip.")
+
+  def test_skip_if_error_should_raise_no_message(self):
+    try:
+      with self.assertRaisesRegexp(ValueError, ""):
+        with test_util.skip_if_error(self, ValueError, "test message"):
+          raise ValueError()
+    except unittest.SkipTest:
+      raise RuntimeError("Test is not supposed to skip.")
+
+
 # Its own test case to reproduce variable sharing issues which only pop up when
 # setUp() is overridden and super() is not called.
 class GraphAndEagerNoVariableSharing(test_util.TensorFlowTestCase):
diff --git a/tensorflow/python/framework/type_spec.py b/tensorflow/python/framework/type_spec.py
index 8da3265e810..e6e921e6184 100644
--- a/tensorflow/python/framework/type_spec.py
+++ b/tensorflow/python/framework/type_spec.py
@@ -131,6 +131,31 @@ class TypeSpec(object):
         self._serialize(), other._serialize())  # pylint: disable=protected-access
     return self._deserialize(merged)
 
+  def _with_tensor_ranks_only(self):
+    """Returns a TypeSpec compatible with `self`, with tensor shapes relaxed.
+
+    Returns:
+      A `TypeSpec` that is compatible with `self`, where any `TensorShape`
+      information has been relaxed to include only tensor rank (and not
+      the dimension sizes for individual axes).
+    """
+
+    # === Subclassing ===
+    # If not overridden by a subclass, the default behavior is to serialize
+    # this TypeSpec, relax any TensorSpec or TensorShape values, and
+    # deserialize the result.
+
+    def relax(value):
+      if isinstance(value, TypeSpec):
+        return value._with_tensor_ranks_only()  # pylint: disable=protected-access
+      elif (isinstance(value, tensor_shape.TensorShape) and
+            value.rank is not None):
+        return tensor_shape.TensorShape([None] * value.rank)
+      else:
+        return value
+
+    return self._deserialize(nest.map_structure(relax, self._serialize()))
+
   # === Component encoding for values ===
 
   @abc.abstractmethod
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 79a31a7603a..4cd0af07c74 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -28,6 +28,7 @@ py_library(
         "//tensorflow/python/eager:monitoring",
         "//tensorflow/python/keras/applications",
         "//tensorflow/python/keras/datasets",
+        "//tensorflow/python/keras/feature_column",
         "//tensorflow/python/keras/layers",
         "//tensorflow/python/keras/mixed_precision/experimental:mixed_precision_experimental",
         "//tensorflow/python/keras/optimizer_v2",
diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index 1b68f4209d9..34d04d68c6c 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -487,4 +487,4 @@ def get(identifier):
   else:
     raise TypeError(
         'Could not interpret activation function identifier: {}'.format(
-            repr(identifier)))
+            identifier))
diff --git a/tensorflow/python/keras/api/BUILD b/tensorflow/python/keras/api/BUILD
index 7dee9b1f638..b393c2006e3 100644
--- a/tensorflow/python/keras/api/BUILD
+++ b/tensorflow/python/keras/api/BUILD
@@ -47,6 +47,7 @@ keras_packages = [
     "tensorflow.python.keras.engine.sequential",
     "tensorflow.python.keras.engine.training",
     "tensorflow.python.keras.estimator",
+    "tensorflow.python.keras.feature_column.sequence_feature_column",
     "tensorflow.python.keras.initializers",
     "tensorflow.python.keras.initializers.initializers_v1",
     "tensorflow.python.keras.initializers.initializers_v2",
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 503f6cf0e92..11e53e032ae 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -393,6 +393,9 @@ def _default_learning_phase():
           False, shape=(), name='keras_learning_phase')
 
 
+@deprecated('2020-10-11',
+            'Simply pass a True/False value to the `training` argument '
+            'of the `__call__` method of your layer or model.')
 @keras_export('keras.backend.set_learning_phase')
 def set_learning_phase(value):
   """Sets the learning phase to a fixed value.
@@ -1159,53 +1162,6 @@ def is_placeholder(x):
     return False
 
 
-def freezable_variable(value, shape=None, name=None):
-  """A tensor-like object whose value can be updated only up until execution.
-
-  After creating the freezable variable, you can update its value by calling
-  `var.update_value(new_value)` (similar to a regular variable).
-  Unlike an actual variable, the value used during execution is the current
-  value at the time the execution function (`backend.function()`) was created.
-
-  This is an internal API, expected to be temporary. It is used to implement a
-  mutable `trainable` property for `BatchNormalization` layers, with a frozen
-  value after model compilation.
-
-  We don't use a plain variable in this case because we need the value used
-  in a specific model to be frozen after `compile` has been called
-  (e.g. GAN use case).
-
-  Arguments:
-    value: The initial value for the tensor-like object.
-    shape: The shape for the tensor-like object (cannot be changed).
-    name: The name for the tensor-like object.
-
-  Returns:
-    A tensor-like object with a static value that can be updated via
-    `x.update_value(new_value)`, up until creating an execution function
-    (afterwards the value is fixed).
-  """
-  graph = get_graph()
-  with graph.as_default():
-    x = array_ops.placeholder_with_default(
-        value, shape=shape, name=name)
-    x._initial_value = value
-    x._current_value = value
-
-    def update_value(new_value):
-      x._current_value = new_value
-
-    def get_value():
-      return x._current_value
-
-    x.update_value = update_value
-    x.get_value = get_value
-
-    global _FREEZABLE_VARS
-    _FREEZABLE_VARS[graph].add(x)
-  return x
-
-
 @keras_export('keras.backend.shape')
 def shape(x):
   """Returns the symbolic shape of a tensor or variable.
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 6748a572805..db326ea32f0 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -307,14 +307,20 @@ class CallbackList(object):
       end_hook_name = hook_name
       begin_hook_name = 'on_{mode}_batch_begin'.format(mode=mode)
 
-      threshold_time = 0.5 * batch_time
+      threshold_time = 1.5 * batch_time
       warning_msg = ('Callbacks method `{hook}` is slow compared to '
-                     'the batch time. Check your callbacks.')
+                     'the batch time (batch time: {batch_time:.4f}s vs '
+                     '`{hook}` time: {cbk_time:.4f}s). Check your callbacks.')
       if self._timing[begin_hook_name] > threshold_time:
-        logging.warning(warning_msg.format(hook=begin_hook_name))
+        logging.warning(warning_msg.format(
+            hook=begin_hook_name,
+            batch_time=batch_time,
+            cbk_time=self._timing[begin_hook_name]))
       if self._timing[end_hook_name] > threshold_time:
-        logging.warning(warning_msg.format(hook=end_hook_name))
-
+        logging.warning(warning_msg.format(
+            hook=end_hook_name,
+            batch_time=batch_time,
+            cbk_time=self._timing[end_hook_name]))
       self._check_timing = False
       self._batch_start_time = None
 
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 9d15f87ed79..2f1256ee3ee 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -302,8 +302,8 @@ class KerasCallbacksTest(keras_parameterized.TestCase):
           epochs=10,
           callbacks=[SleepCallback()])
     warning_msg = ('Callbacks method `on_train_batch_end` is slow compared '
-                   'to the batch time. Check your callbacks.')
-    self.assertIn(warning_msg, warning_messages)
+                   'to the batch time')
+    self.assertIn(warning_msg, '\n'.join(warning_messages))
 
   @keras_parameterized.run_with_all_model_types(exclude_models='functional')
   @keras_parameterized.run_all_keras_modes
diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD
index 14647df05e8..87625446e2f 100644
--- a/tensorflow/python/keras/distribute/BUILD
+++ b/tensorflow/python/keras/distribute/BUILD
@@ -128,9 +128,12 @@ distribute_py_test(
         "multi_and_single_gpu",
         "no_rocm",  # times out on ROCm
         "no_windows_gpu",
-        "notpu",  # TODO(b/142805125): Enable tpu_test_target once fixed on tpu.
+        "notpu",  # TODO(b/155867206) flaky segfault
         "notsan",
     ],
+    tpu_tags = [
+        "no_oss",  # b/155502591
+    ],
     deps = [
         ":distribute_strategy_test_lib",
     ],
@@ -187,12 +190,9 @@ distribute_py_test(
     # shards more evenly.
     shard_count = 19,
     tags = [
-        "manual",
         "multi_and_single_gpu",
-        "no_oss",
         "no_rocm",  # times out on ROCm
         "no_windows_gpu",
-        "notap",  # TODO(b/153671866)
         "notsan",
     ],
     deps = [
@@ -246,11 +246,10 @@ distribute_py_test(
     # shards more evenly.
     shard_count = 31,
     tags = [
-        "manual",
         "multi_and_single_gpu",
         "no_oss",  # b/136660639
         "no_windows_gpu",
-        "notap",  # TODO(b/153672562)
+        "notpu",  # TODO(b/153672562)
         "notsan",
     ],
     deps = [
@@ -266,12 +265,9 @@ distribute_py_test(
     main = "keras_stateful_lstm_model_correctness_test.py",
     shard_count = 4,
     tags = [
-        "manual",
         "multi_and_single_gpu",
-        "no_oss",
         "no_pip",
         "no_windows_gpu",
-        "notap",  # TODO(b/153392670)
         "notsan",
     ],
     deps = [
@@ -324,7 +320,6 @@ cuda_py_test(
     shard_count = 4,
     tags = [
         "multi_and_single_gpu",
-        "no_oss",  # http://b/119349471
         "tf_integration_test",
     ],
     deps = [
@@ -367,30 +362,6 @@ cuda_py_test(
     ],
 )
 
-cuda_py_test(
-    name = "multi_worker_callback_tf1_test",
-    srcs = ["multi_worker_callback_tf1_test.py"],
-    # TODO(b/132384649): Enable for guitar and oss tests.
-    shard_count = 24,
-    tags = [
-        "manual",
-        "no_oss",
-        "noguitar",
-        "notap",
-    ],
-    deps = [
-        ":distribute",
-        ":multi_worker_testing_utils",
-        "//tensorflow/python:platform",
-        "//tensorflow/python/distribute:collective_all_reduce_strategy",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/distribute:distribute_config",
-        "//tensorflow/python/distribute:distribute_coordinator",
-        "//tensorflow/python/distribute:multi_worker_test_base",
-        "//tensorflow/python/keras",
-    ],
-)
-
 py_test(
     name = "multi_worker_callback_tf2_test",
     srcs = ["multi_worker_callback_tf2_test.py"],
@@ -444,6 +415,7 @@ py_library(
 py_library(
     name = "tpu_strategy_test_utils",
     srcs = ["tpu_strategy_test_utils.py"],
+    srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/python:platform",
         "//tensorflow/python/distribute:tpu_strategy",
@@ -452,3 +424,25 @@ py_library(
         "//tensorflow/python/tpu:tpu_strategy_util",
     ],
 )
+
+py_test(
+    name = "multi_worker_tutorial_test",
+    srcs = ["multi_worker_tutorial_test.py"],
+    python_version = "PY3",
+    shard_count = 5,
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],  # TODO(b/156029134)
+    deps = [
+        "//tensorflow/python:platform",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/distribute:collective_all_reduce_strategy",
+        "//tensorflow/python/distribute:combinations",
+        "//tensorflow/python/distribute:multi_process_runner",
+        "//tensorflow/python/distribute:multi_worker_test_base",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/keras/optimizer_v2",
+    ],
+)
diff --git a/tensorflow/python/keras/distribute/distribute_strategy_test.py b/tensorflow/python/keras/distribute/distribute_strategy_test.py
index 030b063a465..f6a83c499fe 100644
--- a/tensorflow/python/keras/distribute/distribute_strategy_test.py
+++ b/tensorflow/python/keras/distribute/distribute_strategy_test.py
@@ -259,14 +259,6 @@ def all_strategy_combinations():
   return strategy_minus_tpu_combinations() + tpu_strategy_combinations()
 
 
-def all_strategy_combinations_plus_run_distributed():
-  return (combinations.combine(
-      distribution=strategies_minus_tpu,
-      mode=['graph', 'eager']) + combinations.combine(
-          distribution=tpu_strategies,
-          mode=['graph', 'eager']))
-
-
 def all_strategy_minus_default_and_tpu_combinations():
   return combinations.combine(
       distribution=[
@@ -460,7 +452,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
         distributed_training_utils.get_input_params(
             distribution, 64, steps=10, batch_size=13)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_calling_model_with_numpy_arrays(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -494,7 +486,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
         model.predict(inputs)
         model.predict(inputs, batch_size=8)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_calling_model_with_mixed_precision(self, distribution):
     if isinstance(distribution.extended,
                   parameter_server_strategy.ParameterServerStrategyExtended):
@@ -540,7 +532,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
       model.predict(inputs)
       model.predict(inputs, batch_size=8)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_operator_overload_mixed_precision(self, distribution):
     # Regression test that tests a fixed bug does not reoccur. Adding an
     # AutoCastVariable to a tensor on a TPU, where the variable was the LHS of
@@ -600,7 +592,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
                                   'cannot be called in cross-replica context'):
         optimizer.apply_gradients(zip(gradients, model.trainable_variables))
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_calling_model_with_nested_numpy_arrays(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -672,7 +664,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
       result = model.evaluate(inputs, targets, batch_size=2, verbose=1)
       self.assertAllClose(result, 13.5)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_flatten_predict_outputs(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -839,7 +831,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
           atol=1e-4,
           rtol=1e-4)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_gradients_are_none(self, distribution):
 
     if not context.executing_eagerly():
@@ -870,7 +862,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
 class TestDistributionStrategyWithDatasets(test.TestCase,
                                            parameterized.TestCase):
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -903,7 +895,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
           validation_steps=2)
       model.predict(get_predict_dataset(distribution), steps=2)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_model_interleaved_eval_same_as_direct_eval(
       self, distribution):
     with self.cached_session():
@@ -954,7 +946,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       self.assertEqual(interleaved_output.history['val_categorical_accuracy'],
                        [x[2] for x in user_controlled_output])
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -991,10 +983,14 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
 
       model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_fit_with_dictionary_in_the_dataset_b135161171(
       self, distribution):
 
+    if isinstance(distribution,
+                  (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1)):
+      self.skipTest('b/142805125')
+
     def custom_loss(predict, label, weight):
       bce = keras.losses.binary_crossentropy(label, predict)
       return math_ops.reduce_mean(bce * weight)
@@ -1035,7 +1031,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
 
       model.fit(data)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_fit_eval_and_predict_methods_on_dataset_without_steps(
       self, distribution):
     with self.cached_session():
@@ -1071,11 +1067,14 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       self.assertAllClose(
           predict_with_numpy, predict_with_ds, atol=1e-4, rtol=1e-4)
 
-  @combinations.generate(
-      combinations.times(
-          strategy_minus_tpu_combinations()))
+  @combinations.generate(all_strategy_combinations())
   def test_on_dataset_with_unknown_cardinality_without_steps(
       self, distribution, mode):
+
+    if mode == 'graph' and isinstance(
+        distribution, (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1)):
+      self.skipTest('partial batch not supported with TPU in graph mode.')
+
     with self.cached_session():
       with distribution.scope():
         optimizer_fn = gradient_descent_keras.SGD
@@ -1126,9 +1125,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
           atol=1e-4,
           rtol=1e-4)
 
-  @combinations.generate(
-      combinations.times(
-          tpu_strategy_combinations()))
+  @combinations.generate(tpu_strategy_combinations_graph_only())
   def test_on_dataset_with_unknown_cardinality(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -1169,7 +1166,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
                                    'Number of steps could not be inferred'):
         model.fit(dataset, epochs=1)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_fit_eval_and_predict_methods_on_dataset(
       self, distribution):
     with self.cached_session():
@@ -1321,7 +1318,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       ref_output = np.ones((160, 1), dtype=np.float32)
       self.assertArrayNear(output, ref_output, 1e-1)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def testOptimizerWithCallbacks(self, distribution):
     with self.cached_session():
       with distribution.scope():
@@ -1605,7 +1602,7 @@ class TestRegularizerLoss(test.TestCase, parameterized.TestCase):
 class TestDistributionStrategyWithKerasModels(test.TestCase,
                                               parameterized.TestCase):
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_distribution_strategy_on_sequential_model(
       self, distribution):
     with distribution.scope():
@@ -1624,7 +1621,7 @@ class TestDistributionStrategyWithKerasModels(test.TestCase,
     model.predict(inputs, batch_size=10)
     model.evaluate(inputs, targets, batch_size=10)
 
-  @combinations.generate(all_strategy_combinations_plus_run_distributed())
+  @combinations.generate(all_strategy_combinations())
   def test_distribution_strategy_on_functional_model(
       self, distribution):
     with distribution.scope():
diff --git a/tensorflow/python/keras/distribute/multi_worker_callback_tf1_test.py b/tensorflow/python/keras/distribute/multi_worker_callback_tf1_test.py
deleted file mode 100644
index 95a235e7b33..00000000000
--- a/tensorflow/python/keras/distribute/multi_worker_callback_tf1_test.py
+++ /dev/null
@@ -1,597 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras callbacks in multi-worker training with TF1."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import tempfile
-import threading
-
-from absl.testing import parameterized
-
-from tensorflow.python import keras
-from tensorflow.python.distribute import collective_all_reduce_strategy as collective_strategy
-from tensorflow.python.distribute import combinations
-from tensorflow.python.distribute import distribute_coordinator as dc
-from tensorflow.python.distribute import mirrored_strategy
-from tensorflow.python.distribute import multi_worker_test_base as test_base
-from tensorflow.python.distribute import multi_worker_util
-from tensorflow.python.keras import backend as K
-from tensorflow.python.keras import callbacks
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.distribute import multi_worker_testing_utils
-from tensorflow.python.keras.distribute import multi_worker_training_state as training_state
-from tensorflow.python.platform import test
-
-
-def get_strategy_object(strategy_cls):
-  if strategy_cls == mirrored_strategy.MirroredStrategy:
-    return strategy_cls(mirrored_strategy.all_local_devices())
-  else:
-    # CollectiveAllReduceStrategy and ParameterServerStrategy.
-    return strategy_cls()
-
-
-def generate_callback_test_function(custom_callable):
-  """Generic template for callback tests using mnist synthetic dataset."""
-
-  @combinations.generate(
-      combinations.combine(
-          mode=['graph'],
-          strategy_cls=[collective_strategy.CollectiveAllReduceStrategy],
-          required_gpus=[0, 1],
-          file_format=['h5', 'tf']))
-  def test_template(self, strategy_cls, file_format):
-    num_workers = 2
-    num_epoch = 2
-
-    cluster_spec = test_base.create_cluster_spec(num_workers=num_workers)
-    self._barrier = dc._Barrier(2)
-
-    def _independent_worker_fn(*args, **kwargs):  # pylint: disable=unused-argument
-      """Simulates an Independent Worker inside of a thread."""
-      with test.mock.patch.object(dc, '_run_std_server',
-                                  self._make_mock_run_std_server()):
-        strategy = get_strategy_object(strategy_cls)
-        batch_size = 64
-        steps = 2
-        train_ds, _ = multi_worker_testing_utils.mnist_synthetic_dataset(
-            batch_size, steps)
-        with strategy.scope():
-          model = multi_worker_testing_utils.get_mnist_model((28, 28, 1))
-
-        custom_callable(
-            model,
-            self,
-            train_ds,
-            num_epoch,
-            steps,
-            strategy,
-            saving_filepath=kwargs['saving_filepath'],
-            barrier=kwargs['barrier'],
-            threading_local=kwargs['threading_local'])
-
-    # Pass saving_filepath from the parent thread to ensure every worker has the
-    # same filepath to save.
-    saving_filepath = os.path.join(self.get_temp_dir(),
-                                   'checkpoint.' + file_format)
-    barrier = dc._Barrier(2)
-    threading_local = threading.local()
-    threads = self.run_multiple_tasks_in_threads(
-        _independent_worker_fn,
-        cluster_spec,
-        saving_filepath=saving_filepath,
-        barrier=barrier,
-        threading_local=threading_local)
-    self.assertFalse(training_state.checkpoint_exists(saving_filepath))
-
-    threads_to_join = []
-    strategy = get_strategy_object(strategy_cls)
-    if strategy.extended.experimental_between_graph:
-      for ts in threads.values():
-        threads_to_join.extend(ts)
-    else:
-      threads_to_join = [threads['worker'][0]]
-    self.join_independent_workers(threads_to_join)
-
-  return test_template
-
-
-class KerasMultiWorkerCallbackTest(test_base.IndependentWorkerTestBase,
-                                   parameterized.TestCase):
-  """KerasMultiWorkerCallbackTest for TF1.
-
-  TODO(rchao): Migrate all tests in this class to
-  `multi_worker_callback_tf2_test`.
-  """
-
-  # The callables of the actual testing content to be run go below.
-  @staticmethod
-  def callableForTestChiefOnlyCallback(model, test_obj, train_ds, num_epoch,
-                                       steps, strategy, saving_filepath,
-                                       **kwargs):
-
-    class ChiefOnly(keras.callbacks.Callback):
-
-      def __init__(self):
-        self._chief_worker_only = True
-        self.filtered_correctly = True
-
-      def on_train_begin(self, logs):
-        if not multi_worker_util.is_chief():
-          # Non-chief workers shouldn't run this callback.
-          self.filtered_correctly = False
-
-    cb = ChiefOnly()
-    model.fit(
-        x=train_ds, epochs=num_epoch, steps_per_epoch=steps, callbacks=[cb])
-
-    test_obj.assertTrue(cb.filtered_correctly)
-
-  @staticmethod
-  def callableForTestModelCheckpointSavesOnChiefButNotOtherwise(
-      model, test_obj, train_ds, num_epoch, steps, strategy, saving_filepath,
-      **kwargs):
-
-    extension = os.path.splitext(saving_filepath)[1]
-
-    # Incorporate type/index information and thread id in saving_filepath to
-    # ensure every worker has a unique path. Note that in normal use case the
-    # saving_filepath will be the same for all workers, but we use different
-    # ones here just to test out chief saves checkpoint but non-chief doesn't.
-
-    saving_filepath = os.path.join(
-        test_obj.get_temp_dir(), 'checkpoint_%s_%d%s' %
-        (test_base.get_task_type(), test_base.get_task_index(), extension))
-
-    # The saving_filepath shouldn't exist at the beginning (as it's unique).
-    test_obj.assertFalse(training_state.checkpoint_exists(saving_filepath))
-
-    model.fit(
-        x=train_ds,
-        epochs=num_epoch,
-        steps_per_epoch=steps,
-        callbacks=[callbacks.ModelCheckpoint(filepath=saving_filepath)])
-
-    # If it's chief, the model should be saved; if not, the model shouldn't.
-    test_obj.assertEqual(
-        training_state.checkpoint_exists(saving_filepath), test_base.is_chief())
-
-  @staticmethod
-  def initialFitting(test_obj, model, train_ds, num_epoch, steps,
-                     saving_filepath):
-    # The saving_filepath shouldn't exist at the beginning.
-    test_obj.assertFalse(training_state.checkpoint_exists(saving_filepath))
-
-    model.fit(
-        x=train_ds,
-        epochs=num_epoch,
-        steps_per_epoch=steps,
-        callbacks=[
-            callbacks.ModelCheckpoint(
-                filepath=saving_filepath, save_weights_only=True)
-        ])
-
-    # The saving_filepath should exist after fitting with callback. Both chief
-    # and non-chief worker should both see it exists (which was saved only by
-    # chief).
-    test_obj.assertTrue(training_state.checkpoint_exists(saving_filepath))
-
-    history_after_one_more_epoch = model.fit(
-        x=train_ds, epochs=1, steps_per_epoch=steps)
-
-    # The saving_filepath should continue to exist (if it did) after fitting
-    # without callback.
-    test_obj.assertTrue(training_state.checkpoint_exists(saving_filepath))
-
-    return saving_filepath, history_after_one_more_epoch
-
-  @staticmethod
-  def callableForTestLoadWeightFromModelCheckpoint(model, test_obj, train_ds,
-                                                   num_epoch, steps, strategy,
-                                                   saving_filepath, **kwargs):
-    filepaths = []
-    real_mkstemp = tempfile.mkstemp
-    def mocked_mkstemp():
-      # Only non-chief should call tempfile.mkstemp() inside fit() in sync
-      # training.
-      assert not test_base.is_chief()
-      file_handle, temp_file_name = real_mkstemp()
-      extension = os.path.splitext(saving_filepath)[1]
-      temp_filepath = temp_file_name + extension
-      filepaths.append(temp_filepath)
-      return file_handle, temp_file_name
-
-    # Mock tempfile.mkstemp() so the filepaths can be stored and verified later.
-    with test.mock.patch.object(tempfile, 'mkstemp', mocked_mkstemp):
-      saving_filepath, history_after_one_more_epoch = \
-          KerasMultiWorkerCallbackTest.initialFitting(
-              test_obj, model, train_ds, num_epoch, steps, saving_filepath)
-
-      with strategy.scope():
-        model.load_weights(saving_filepath)
-
-      history_after_loading_weight_and_one_more_epoch = model.fit(
-          x=train_ds, epochs=1, steps_per_epoch=steps)
-
-      test_obj.assertAllClose(
-          history_after_one_more_epoch.history,
-          history_after_loading_weight_and_one_more_epoch.history,
-          rtol=5e-5)
-
-    # Verify the temp files are indeed removed (no trace left behind).
-    for filepath in filepaths:
-      assert not training_state.checkpoint_exists(filepath)
-
-  @staticmethod
-  def callableForTestModelRestoreCallback(model, test_obj, train_ds, num_epoch,
-                                          steps, strategy, saving_filepath,
-                                          **kwargs):
-
-    saving_filepath, history_after_one_more_epoch = \
-        KerasMultiWorkerCallbackTest.initialFitting(
-            test_obj, model, train_ds, num_epoch, steps, saving_filepath)
-
-    # The model should get restored to the weights previously saved, by
-    # adding a ModelCheckpoint callback (which results in a
-    # _ModelRestoreCallback being added), with load_weights_on_restart=True.
-    history_after_model_restoring_and_one_more_epoch = model.fit(
-        x=train_ds,
-        epochs=1,
-        steps_per_epoch=steps,
-        callbacks=[
-            callbacks.ModelCheckpoint(
-                filepath=saving_filepath,
-                save_weights_only=True,
-                load_weights_on_restart=True)
-        ])
-
-    # Asserting the history one epoch after initial fitting and one epoch after
-    # restoring are closed.
-    test_obj.assertAllClose(
-        history_after_one_more_epoch.history,
-        history_after_model_restoring_and_one_more_epoch.history,
-        rtol=5e-5)
-
-    history_one_more_epoch_without_model_restoring = model.fit(
-        x=train_ds, epochs=1, steps_per_epoch=steps)
-
-    # Ensuring training for another epoch gives different result.
-    test_obj.assertNotAllClose(
-        history_after_model_restoring_and_one_more_epoch.history,
-        history_one_more_epoch_without_model_restoring.history,
-        rtol=5e-5)
-
-  @staticmethod
-  def callableForTestBackupModelRemoved(model, test_obj, train_ds, num_epoch,
-                                        steps, strategy, saving_filepath,
-                                        **kwargs):
-
-    # `barrier` object needs to be passed in from parent
-    # thread so both threads refer to the same object.
-    barrier = kwargs['barrier']
-
-    num_epoch = 3
-
-    # Testing the backup filepath `multi_worker_training_state` uses.
-    _, backup_filepath = training_state._get_backup_filepath(saving_filepath)
-
-    # The backup_filepath shouldn't exist at the beginning.
-    test_obj.assertFalse(training_state.checkpoint_exists(backup_filepath))
-
-    # Callback to verify that the backup file exists in the middle of training.
-    class BackupFilepathVerifyingCallback(callbacks.Callback):
-
-      def on_epoch_begin(self, epoch, logs=None):
-        if epoch > 1:
-          # Asserting that after the first two epochs, the backup file should
-          # exist.
-          test_obj.assertTrue(training_state.checkpoint_exists(backup_filepath))
-
-    model.fit(
-        x=train_ds,
-        epochs=num_epoch,
-        steps_per_epoch=steps,
-        callbacks=[
-            callbacks.ModelCheckpoint(
-                filepath=saving_filepath, save_weights_only=True),
-            BackupFilepathVerifyingCallback()
-        ])
-
-    # Sync on the two threads so we make sure the backup file is removed before
-    # we move on.
-    barrier.wait()
-
-    # The back up file should not exist at successful exit of `model.fit()`.
-    test_obj.assertFalse(training_state.checkpoint_exists(backup_filepath))
-
-  @staticmethod
-  def callableForTestBackupModelNotRemovedIfInterrupted(model, test_obj,
-                                                        train_ds, num_epoch,
-                                                        steps, strategy,
-                                                        saving_filepath,
-                                                        **kwargs):
-
-    # `barrier` object needs to be passed in from parent
-    # thread so both threads refer to the same object.
-    barrier = kwargs['barrier']
-
-    num_epoch = 4
-
-    # Testing the backup filepath `multi_worker_training_state` uses.
-    _, backup_filepath = training_state._get_backup_filepath(saving_filepath)
-
-    # The backup_filepath shouldn't exist at the beginning.
-    test_obj.assertFalse(training_state.checkpoint_exists(backup_filepath))
-
-    # Callback to interrupt in the middle of training.
-    class InterruptingCallback(callbacks.Callback):
-
-      def on_epoch_begin(self, epoch, logs=None):
-        if epoch == 2:
-          raise RuntimeError('Interrupting!')
-
-    try:
-      model.fit(
-          x=train_ds,
-          epochs=num_epoch,
-          steps_per_epoch=steps,
-          callbacks=[
-              callbacks.ModelCheckpoint(
-                  filepath=saving_filepath, save_weights_only=True),
-              InterruptingCallback()
-          ])
-    except RuntimeError as e:
-      if 'Interrupting!' not in e.message:
-        raise
-
-    # Sync on the two threads.
-    barrier.wait()
-
-    # The back up file should exist after interruption of `model.fit()`.
-    test_obj.assertTrue(training_state.checkpoint_exists(backup_filepath))
-
-  @staticmethod
-  def callableForTestUnmatchedModelFile(model, test_obj, train_ds, num_epoch,
-                                        steps, strategy, saving_filepath,
-                                        **kwargs):
-
-    # The saving_filepath shouldn't exist at the beginning.
-    test_obj.assertFalse(training_state.checkpoint_exists(saving_filepath))
-
-    model.fit(
-        x=train_ds,
-        epochs=num_epoch,
-        steps_per_epoch=steps,
-        callbacks=[
-            callbacks.ModelCheckpoint(
-                filepath=saving_filepath, save_weights_only=True)
-        ])
-
-    (train_ds, _), (_, _) = testing_utils.get_test_data(
-        train_samples=10, test_samples=10, input_shape=(3,), num_classes=2)
-
-    # Switch to a model of different structure.
-    with strategy.scope():
-      model = keras.models.Sequential()
-      model.add(keras.layers.Dense(5, input_dim=3, activation='relu'))
-      model.add(keras.layers.Dense(2, activation='softmax'))
-      model.compile(
-          loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])
-
-    test_obj.assertTrue(training_state.checkpoint_exists(saving_filepath))
-
-    if saving_filepath.endswith('.tf'):
-      test_obj.skipTest('Loading mismatched TF checkpoint would cause Fatal '
-                        'Python error: Aborted. Skipping.')
-
-    # Unmatched format. Should raise ValueError.
-    with test_obj.assertRaisesRegexp(ValueError, 'Error loading file from'):
-      model.fit(
-          x=train_ds,
-          epochs=num_epoch,
-          batch_size=8,
-          callbacks=[
-              callbacks.ModelCheckpoint(
-                  filepath=saving_filepath,
-                  save_weights_only=True,
-                  load_weights_on_restart=True)
-          ])
-
-  @staticmethod
-  def callableForTestReduceLROnPlateau(model, test_obj, train_ds, num_epoch,
-                                       steps, strategy, saving_filepath,
-                                       **kwargs):
-
-    cbks = [
-        callbacks.ReduceLROnPlateau(
-            monitor='loss',
-            factor=0.1,
-            min_delta=1,
-            patience=1,
-            cooldown=5,
-            verbose=1)
-    ]
-
-    # It is expected that the learning rate would drop by `factor` within
-    # 3 epochs with `min_delta=1`.
-    model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks)
-    test_obj.assertAllClose(
-        float(K.get_value(model.optimizer.lr)), 0.0001, atol=1e-8)
-
-    # It is expected that the learning rate would drop by another `factor`
-    # within 3 epochs with `min_delta=1`.
-    model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks)
-    test_obj.assertAllClose(
-        float(K.get_value(model.optimizer.lr)), 0.00001, atol=1e-8)
-
-  @staticmethod
-  def callableForTestEarlyStopping(model, test_obj, train_ds, num_epoch, steps,
-                                   strategy, saving_filepath, **kwargs):
-
-    class EpochCounterCallback(callbacks.Callback):
-
-      def on_epoch_begin(self, epoch, logs):
-        self.last_epoch = epoch
-
-    epoch_counter_cbk = EpochCounterCallback()
-    cbks = [
-        callbacks.EarlyStopping(
-            monitor='loss', min_delta=0.05, patience=1, verbose=1),
-        epoch_counter_cbk
-    ]
-
-    # Empirically, it is expected that `model.fit()` would terminate around the
-    # 22th epoch. Asserting that it should have been stopped before the 50th
-    # epoch to avoid flakiness and be more predictable.
-    model.fit(x=train_ds, epochs=100, steps_per_epoch=steps, callbacks=cbks)
-    test_obj.assertLess(epoch_counter_cbk.last_epoch, 50)
-
-  @staticmethod
-  def callableForTestLearningRateScheduler(model, test_obj, train_ds, num_epoch,
-                                           steps, strategy, saving_filepath,
-                                           **kwargs):
-
-    cbks = [
-        callbacks.LearningRateScheduler(
-            schedule=lambda x: 1. / (1. + x), verbose=1)
-    ]
-
-    # It is expected that with `epochs=2`, the learning rate would drop to
-    # 1 / (1 + 2) = 0.5.
-    model.fit(x=train_ds, epochs=2, steps_per_epoch=steps, callbacks=cbks)
-    test_obj.assertAllClose(
-        float(K.get_value(model.optimizer.lr)), 0.5, atol=1e-8)
-
-    # It is expected that with `epochs=4`, the learning rate would drop to
-    # 1 / (1 + 4) = 0.25.
-    model.fit(x=train_ds, epochs=4, steps_per_epoch=steps, callbacks=cbks)
-    test_obj.assertAllClose(
-        float(K.get_value(model.optimizer.lr)), 0.25, atol=1e-8)
-
-  # pylint: disable=g-doc-args
-  @staticmethod
-  def callableForTestIntermediateDirForFTAreRemoved(model, test_obj, train_ds,
-                                                    num_epoch, steps, strategy,
-                                                    saving_filepath, **kwargs):
-    """Testing that the temporary directory are removed.
-
-    Some temporary directories are created for the purpose of fault tolerance.
-    This test ensures that such directories should have been removed at the time
-    `model.fit()` finishes successfully.
-    """
-
-    # `threading_local` and `barrier` objects have to be passed in from parent
-    # thread so both threads refer to the same object.
-    threading_local = kwargs['threading_local']
-    barrier = kwargs['barrier']
-
-    # Two threads will each has one copy of `temp_dirs_supposed_to_be_removed`
-    # list.
-    threading_local.temp_dirs_supposed_to_be_removed = []
-
-    callbacks_list = [
-        callbacks.ModelCheckpoint(
-            filepath=saving_filepath,
-            save_weights_only=True,
-            load_weights_on_restart=True),
-    ]
-
-    # Keep the references to the real function objects.
-    real_os_path_join = os.path.join
-    real_tempfile_mkdtemp = tempfile.mkdtemp
-
-    # Make a `os.path.join` wrapper, which will be patched onto the real
-    # function, so the temporary directories can be tracked.
-    def wrapper_os_path_join(path, *paths):
-      join_result = real_os_path_join(path, *paths)
-      if len(paths) == 1 and paths[0] == 'backup':
-        threading_local.temp_dirs_supposed_to_be_removed.append(join_result)
-      return join_result
-
-    # Likewise for `tempfile.mkdtemp`.
-    def wrapper_tempfile_mkdtemp():
-      result = real_tempfile_mkdtemp()
-      threading_local.temp_dirs_supposed_to_be_removed.append(result)
-      return result
-
-    # Now the two threads must sync here: if they are out of sync, one thread
-    # can go ahead and patch `os.path.join` while the other has not even
-    # assigned the real `os.path.join` to `real_os_path_join`. If this happened,
-    # the "real" `os.path.join` the slower thread would see is actually the
-    # wrapper of the other.
-    barrier.wait()
-
-    # Note that `os.path.join` will respect the second patch (there are two
-    # patches because of the two threads). Both threads will refer to the same
-    # copy of `wrapper_os_path_join` because of the `barrier` preceding
-    # `model.fit()`. Likewise for `wrapper_tempfile_mkdtemp`.
-    os.path.join = wrapper_os_path_join
-    tempfile.mkdtemp = wrapper_tempfile_mkdtemp
-
-    barrier.wait()
-    model.fit(
-        x=train_ds,
-        epochs=num_epoch,
-        steps_per_epoch=steps,
-        callbacks=callbacks_list)
-
-    # Sync before un-patching to prevent either thread from accessing the real
-    # functions. Also to make sure `model.fit()` is done on both threads (so we
-    # can safely assert the directories are removed).
-    barrier.wait()
-    os.path.join = real_os_path_join
-    tempfile.mkdtemp = real_tempfile_mkdtemp
-
-    # There should be directory (names) that are supposed to be removed.
-    test_obj.assertTrue(threading_local.temp_dirs_supposed_to_be_removed)
-    for temp_dir_supposed_to_be_removed in (
-        threading_local.temp_dirs_supposed_to_be_removed):
-      # They should have been removed and thus don't exist.
-      test_obj.assertFalse(os.path.exists(temp_dir_supposed_to_be_removed))
-
-  # The actual testing methods go here.
-  test_chief_only_callback = generate_callback_test_function(
-      callableForTestChiefOnlyCallback.__func__)
-  test_model_checkpoint_saves_on_chief_but_not_otherwise = \
-      generate_callback_test_function(
-          callableForTestModelCheckpointSavesOnChiefButNotOtherwise.__func__)
-  test_load_weight_from_model_checkpoint = generate_callback_test_function(
-      callableForTestLoadWeightFromModelCheckpoint.__func__)
-  test_model_restore_callback = generate_callback_test_function(
-      callableForTestModelRestoreCallback.__func__)
-  test_unmatched_model_file = generate_callback_test_function(
-      callableForTestUnmatchedModelFile.__func__)
-  test_reduce_lr_on_plateau = generate_callback_test_function(
-      callableForTestReduceLROnPlateau.__func__)
-  test_early_stopping = generate_callback_test_function(
-      callableForTestEarlyStopping.__func__)
-  test_learning_rate_scheduler = generate_callback_test_function(
-      callableForTestLearningRateScheduler.__func__)
-  test_intermediate_dir_for_ft_are_removed = generate_callback_test_function(
-      callableForTestIntermediateDirForFTAreRemoved.__func__)
-  test_backup_model_removed = generate_callback_test_function(
-      callableForTestBackupModelRemoved.__func__)
-  test_backup_model_not_removed_if_interrupted = \
-      generate_callback_test_function(
-          callableForTestBackupModelNotRemovedIfInterrupted.__func__)
-
-
-if __name__ == '__main__':
-  with test.mock.patch.object(sys, 'exit', os._exit):
-    test.main()
diff --git a/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py b/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py
new file mode 100644
index 00000000000..1a46bcd7499
--- /dev/null
+++ b/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py
@@ -0,0 +1,148 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test for multi-worker training tutorial."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import contextlib
+import os
+import re
+from absl.testing import parameterized
+import numpy as np
+from tensorflow.python import keras
+from tensorflow.python.data.experimental.ops import distribute_options
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import collective_all_reduce_strategy
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import multi_process_runner
+from tensorflow.python.distribute import multi_worker_test_base
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.datasets import mnist
+from tensorflow.python.keras.optimizer_v2 import gradient_descent
+from tensorflow.python.platform import test
+from tensorflow.python.util import nest
+
+
+class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase):
+  """Test multi-worker training flow demo'ed in go/multi-worker-with-keras."""
+
+  @contextlib.contextmanager
+  def skip_fetch_failure_exception(self):
+    try:
+      yield
+    except Exception as e:  # pylint: disable=broad-except
+      if 'URL fetch failure' in str(e):
+        self.skipTest('URL fetch error not considered failure of the test.')
+      else:
+        raise
+
+  @combinations.generate(
+      combinations.combine(
+          mode=['eager'],
+          shard_policy=[None] + list(distribute_options.AutoShardPolicy)))
+  def testMultiWorkerTutorial(self, mode, shard_policy):
+    """Test multi-worker training flow demo'ed in go/multi-worker-with-keras.
+
+    This test should be kept in sync with the code samples in
+    go/multi-worker-with-keras.
+
+    Args:
+      mode: Runtime mode.
+      shard_policy: None or any of tf.data.experimental.AutoShardPolicy for
+        testing.
+    """
+    if shard_policy is distribute_options.AutoShardPolicy.FILE:
+      self.skipTest('TensorSliceDataset is not shardable with FILE policy.')
+
+    def mnist_dataset(batch_size):
+      with self.skip_fetch_failure_exception():
+        (x_train, y_train), _ = mnist.load_data()
+      # The `x` arrays are in uint8 and have values in the range [0, 255].
+      # We need to convert them to float32 with values in the range [0, 1]
+      x_train = x_train / np.float32(255)
+      y_train = y_train.astype(np.int64)
+      train_dataset = dataset_ops.DatasetV2.from_tensor_slices(
+          (x_train, y_train)).shuffle(60000).repeat().batch(batch_size)
+      return train_dataset
+
+    def build_and_compile_cnn_model():
+      model = keras.Sequential([
+          keras.layers.Input(shape=(28, 28)),
+          keras.layers.Reshape(target_shape=(28, 28, 1)),
+          keras.layers.Conv2D(32, 3, activation='relu'),
+          keras.layers.Flatten(),
+          keras.layers.Dense(128, activation='relu'),
+          keras.layers.Dense(10)
+      ])
+      model.compile(
+          loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+          optimizer=gradient_descent.SGD(learning_rate=0.001),
+          metrics=['accuracy'])
+      return model
+
+    per_worker_batch_size = 64
+
+    single_worker_dataset = mnist_dataset(per_worker_batch_size)
+    single_worker_model = build_and_compile_cnn_model()
+    single_worker_model.fit(single_worker_dataset, epochs=3, steps_per_epoch=70)
+
+    num_workers = 4
+
+    def proc_func():
+      global_batch_size = per_worker_batch_size * num_workers
+      strategy = collective_all_reduce_strategy.CollectiveAllReduceStrategy()
+      with strategy.scope():
+        multi_worker_model = build_and_compile_cnn_model()
+
+      callbacks = [
+          keras.callbacks.ModelCheckpoint(
+              filepath=os.path.join(self.get_temp_dir(), 'checkpoint'))
+      ]
+
+      multi_worker_dataset = mnist_dataset(global_batch_size)
+      if shard_policy:
+        options = dataset_ops.Options()
+        options.experimental_distribute.auto_shard_policy = shard_policy
+        multi_worker_dataset = multi_worker_dataset.with_options(options)
+
+      multi_worker_model.fit(
+          multi_worker_dataset,
+          epochs=3,
+          steps_per_epoch=70,
+          callbacks=callbacks)
+
+    with test_util.skip_if_error(self, errors_impl.UnavailableError):
+      mpr_result = multi_process_runner.run(
+          proc_func,
+          multi_worker_test_base.create_cluster_spec(num_workers=num_workers),
+          list_stdout=True)
+
+    def extract_accuracy(worker_id, input_string):
+      match = re.match(
+          r'\[worker\-{}\].*accuracy: (\d+\.\d+).*'.format(worker_id),
+          input_string)
+      return None if match is None else float(match.group(1))
+
+    for worker_id in range(num_workers):
+      accu_result = nest.map_structure(
+          lambda x: extract_accuracy(worker_id, x),  # pylint: disable=cell-var-from-loop
+          mpr_result.stdout)
+      self.assertTrue(
+          any(accu_result), 'Every worker is supposed to have accuracy result.')
+
+
+if __name__ == '__main__':
+  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD
index 203e481170f..1ff15d7e2e1 100644
--- a/tensorflow/python/keras/engine/BUILD
+++ b/tensorflow/python/keras/engine/BUILD
@@ -21,8 +21,8 @@ py_library(
     srcs = [
         "__init__.py",
         "compile_utils.py",
+        "functional.py",
         "input_layer.py",
-        "network.py",
         "node.py",
         "partial_batch_padding_handler.py",
         "saving.py",
@@ -460,9 +460,9 @@ tf_py_test(
 )
 
 tf_py_test(
-    name = "network_test",
+    name = "functional_test",
     size = "medium",
-    srcs = ["network_test.py"],
+    srcs = ["functional_test.py"],
     python_version = "PY3",
     shard_count = 8,
     tags = [
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 4085ae6e0da..94b696d842b 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -313,7 +313,6 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
     # Provides information about which inputs are compatible with the layer.
     self._input_spec = None
     self.supports_masking = False
-    self._supports_ragged_inputs = False
 
     self._init_set_name(name)
     self._activity_regularizer = kwargs.pop('activity_regularizer', None)
@@ -905,12 +904,6 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
         # are casted, not before.
         input_spec.assert_input_compatibility(self.input_spec, inputs,
                                               self.name)
-        if (any(isinstance(x, ragged_tensor.RaggedTensor) for x in input_list)
-            and not self._supports_ragged_inputs):
-          raise ValueError('Layer %s does not support RaggedTensors as input. '
-                           'Inputs received: %s. You can try converting your '
-                           'input to an uniform tensor.' % (self.name, inputs))
-
         graph = backend.get_graph()
         with graph.as_default(), backend.name_scope(self._name_scope()):
           # Build layer if applicable (if the `build` method has been
@@ -1013,13 +1006,23 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
     """Whether the layer is dynamic (eager-only); set in the constructor."""
     # NOTE(taylorrobie): Currently self._dynamic is read-only. If that changes
     #                    then this cache logic must be updated.
-    return self._dynamic
+    return self._dynamic or any(layer.dynamic
+                                for layer in self._unique_sublayers())
+
+  def _unique_sublayers(self):
+    # Model.layers will use this as implementation, but we can't expose this
+    # one as the public property since it might conflict with subclass layers
+    # which also have user defined layers property.
+    self._maybe_create_attribute('_layers', [])
+    return list(
+        trackable_layer_utils.filter_empty_layer_containers(self._layers))
 
   @property
   @doc_controls.do_not_doc_inheritable
   @trackable_layer_utils.cache_recursive_attribute('stateful')
   def stateful(self):
-    return self._stateful
+    return self._stateful or any(
+        getattr(layer, 'stateful', False) for layer in self._unique_sublayers())
 
   @stateful.setter
   @trackable_layer_utils.invalidate_recursive_cache('stateful')
@@ -2582,6 +2585,12 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
     except AttributeError:
       pass
 
+    # Keep track of metric instance created in subclassed layer.
+    from tensorflow.python.keras import metrics as metrics_module  # pylint: disable=g-import-not-at-top
+    for val in nest.flatten(value):
+      if isinstance(val, metrics_module.Metric):
+        self._metrics.append(val)
+
     # TODO(scottzhu): Need to track Module object as well for weight tracking.
     # Be careful about metric if it becomes a Module in future.
     # Append value to self._layers if relevant
diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index 6c3fc04bf77..82c60eb34c8 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -1454,7 +1454,6 @@ class DTypeTest(keras_parameterized.TestCase):
         row_splits=array_ops.constant([0, 2, 2, 3], dtype='int64'))
 
     layer = IdentityLayer(dtype='float16')
-    layer._supports_ragged_inputs = True
 
     for x in sparse, ragged:
       self.assertEqual(x.dtype, 'float32')
@@ -1462,19 +1461,6 @@ class DTypeTest(keras_parameterized.TestCase):
       self.assertEqual(y.dtype, 'float16')
       self.assertEqual(type(x), type(y))
 
-  def test_supports_ragged_inputs_attribute_error(self):
-    with self.assertRaisesRegexp(ValueError,
-                                 'does not support RaggedTensors'):
-      ragged = ragged_tensor.RaggedTensor.from_row_splits(
-          values=array_ops.constant([1., 2., 3.], dtype='float32'),
-          row_splits=array_ops.constant([0, 2, 2, 3], dtype='int64'))
-      model = sequential.Sequential([
-          input_layer.InputLayer(input_shape=(None,), ragged=True),
-          IdentityLayer()
-      ])
-      model.compile(rmsprop.RMSprop(0.001), loss='mse')
-      model.train_on_batch(ragged)
-
   @testing_utils.enable_v2_dtype_behavior
   def test_passing_non_tensor(self):
     layer = IdentityLayer()
diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py
index 34e907f835f..c5e00d8e38e 100644
--- a/tensorflow/python/keras/engine/base_layer_utils.py
+++ b/tensorflow/python/keras/engine/base_layer_utils.py
@@ -34,6 +34,7 @@ from tensorflow.python.ops import control_flow_v2_func_graphs
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import init_ops_v2
 from tensorflow.python.ops import variables as tf_variables
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.training.tracking import base as tracking
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
@@ -675,16 +676,8 @@ def enable_v2_dtype_behavior():
   float32) instead of None. In addition, layers will automatically cast
   floating-point inputs to the layer's dtype.
 
-  >>> tf.compat.v1.keras.layers.disable_v2_dtype_behavior()
   >>> x = tf.ones((4, 4, 4, 4), dtype='float64')
   >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2)
-  >>> print(layer.dtype)  # None since V2 behavior is disabled
-  None
-  >>> y = layer(x)  # Doesn't cast inputs since V2 dtype behavior is disabled
-  >>> print(y.dtype.name)
-  float64
-  >>> tf.compat.v1.keras.layers.enable_v2_dtype_behavior()
-  >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2)
   >>> print(layer.dtype)  # float32 since V2 dtype behavior is enabled
   float32
   >>> y = layer(x)  # Layer casts inputs since V2 dtype behavior is enabled
@@ -792,6 +785,14 @@ class TrackableWeightHandler(object):
     backend.get_session().run(self._assign_op, feed_dict)
 
 
+def no_ragged_support(inputs, layer_name):
+  input_list = nest.flatten(inputs)
+  if any(isinstance(x, ragged_tensor.RaggedTensor) for x in input_list):
+    raise ValueError('Layer %s does not support RaggedTensors as input. '
+                     'Inputs received: %s. You can try converting your '
+                     'input to an uniform tensor.' % (layer_name, inputs))
+
+
 # TODO(kathywu): This is a temporary hack. When a network of layers is revived
 # from SavedModel, only the top-level layer will have losses. This causes issues
 # in eager mode because the child layers may have graph losses
diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py
index 9d6f5afd240..4a277ec3a3e 100644
--- a/tensorflow/python/keras/engine/base_layer_v1.py
+++ b/tensorflow/python/keras/engine/base_layer_v1.py
@@ -182,7 +182,6 @@ class Layer(base_layer.Layer):
     # Provides information about which inputs are compatible with the layer.
     self._input_spec = None
     self.supports_masking = False
-    self._supports_ragged_inputs = False
 
     self._init_set_name(name)
     self._activity_regularizer = kwargs.pop('activity_regularizer', None)
@@ -746,12 +745,6 @@ class Layer(base_layer.Layer):
         # are casted, not before.
         input_spec.assert_input_compatibility(self.input_spec, inputs,
                                               self.name)
-        if (any(isinstance(x, ragged_tensor.RaggedTensor) for x in input_list)
-            and self._supports_ragged_inputs is False):  # pylint: disable=g-bool-id-comparison
-          raise ValueError('Layer %s does not support RaggedTensors as input. '
-                           'Inputs received: %s. You can try converting your '
-                           'input to an uniform tensor.' % (self.name, inputs))
-
         graph = backend.get_graph()
         with graph.as_default(), backend.name_scope(self._name_scope()):
           # Build layer if applicable (if the `build` method has been
@@ -840,13 +833,15 @@ class Layer(base_layer.Layer):
   def dynamic(self):
     # NOTE(taylorrobie): Currently self._dynamic is read-only. If that changes
     #                    then this cache logic must be updated.
-    return self._dynamic
+    return self._dynamic or any(layer.dynamic
+                                for layer in self._unique_sublayers())
 
   @property
   @doc_controls.do_not_generate_docs
   @trackable_layer_utils.cache_recursive_attribute('stateful')
   def stateful(self):
-    return self._stateful
+    return self._stateful or any(
+        getattr(layer, 'stateful', False) for layer in self._unique_sublayers())
 
   @stateful.setter
   @trackable_layer_utils.invalidate_recursive_cache('stateful')
@@ -2228,6 +2223,12 @@ class Layer(base_layer.Layer):
     except AttributeError:
       pass
 
+    # Keep track of metric instance created in subclassed layer.
+    from tensorflow.python.keras import metrics as metrics_module  # pylint: disable=g-import-not-at-top
+    for val in nest.flatten(value):
+      if isinstance(val, metrics_module.Metric):
+        self._metrics.append(val)
+
     # TODO(scottzhu): Need to track Module object as well for weight tracking.
     # Be careful about metric if it becomes a Module in future.
     # Append value to self._layers if relevant
diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py
index 75ada6ed8e2..fdfd0af722f 100644
--- a/tensorflow/python/keras/engine/data_adapter.py
+++ b/tensorflow/python/keras/engine/data_adapter.py
@@ -746,7 +746,7 @@ class DatasetAdapter(DataAdapter):
       if size == cardinality.INFINITE and steps is None:
         raise ValueError(
             "When providing an infinite dataset, you must specify "
-            "the number of steps to run (if you did not intend to ."
+            "the number of steps to run (if you did not intend to "
             "create an infinite dataset, make sure to not call "
             "`repeat()` on the dataset).")
 
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/functional.py
similarity index 58%
rename from tensorflow/python/keras/engine/network.py
rename to tensorflow/python/keras/engine/functional.py
index 807576cb45b..c79e2849c4f 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/functional.py
@@ -22,83 +22,46 @@ from __future__ import print_function
 import collections
 import copy
 import itertools
-import json
-import os
 
-import six
 from six.moves import zip  # pylint: disable=redefined-builtin
 
-from tensorflow.python.eager import context
 from tensorflow.python.framework import composite_tensor
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import base_layer_utils
-from tensorflow.python.keras.engine import compile_utils
 from tensorflow.python.keras.engine import input_layer as input_layer_module
+from tensorflow.python.keras.engine import training as training_lib
 from tensorflow.python.keras.engine import training_utils
-from tensorflow.python.keras.saving import hdf5_format
-from tensorflow.python.keras.saving import save
 from tensorflow.python.keras.saving.saved_model import network_serialization
 from tensorflow.python.keras.utils import generic_utils
-from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite
-from tensorflow.python.keras.utils.io_utils import path_to_string
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import py_checkpoint_reader
 from tensorflow.python.training.tracking import base as trackable
-from tensorflow.python.training.tracking import data_structures
-from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
-from tensorflow.python.training.tracking import tracking
-from tensorflow.python.training.tracking import util as trackable_utils
 from tensorflow.python.util import nest
-from tensorflow.python.util import serialization
 from tensorflow.python.util import tf_inspect
 
 
-# pylint: disable=g-import-not-at-top
-try:
-  import h5py
-except ImportError:
-  h5py = None
+# pylint: disable=g-classes-have-attributes
+class Functional(training_lib.Model):
+  """A `Functional` model is a `Model` defined as a directed graph of layers.
 
-try:
-  import yaml
-except ImportError:
-  yaml = None
-# pylint: enable=g-import-not-at-top
-
-
-class Network(base_layer.Layer):
-  """A `Network` is a composition of layers.
-
-  `Network` is the topological form of a "model". A `Model`
-  is simply a `Network` with added training routines.
-
-  Two types of `Networks` exist: Graph Networks and Subclass Networks. Graph
-  networks are used in the Keras Functional and Sequential APIs. Subclassed
-  networks are used when a user subclasses the `Model` class. In general,
-  more Keras features are supported with Graph Networks than with Subclassed
-  Networks, specifically:
+  Three types of `Model` exist: subclassed `Model`, `Functional` model,
+  and `Sequential` (a special case of `Functional`).
+  In general, more Keras features are supported with `Functional`
+  than with subclassed `Model`s, specifically:
 
   - Model cloning (`keras.models.clone`)
   - Serialization (`model.get_config()/from_config`, `model.to_json()/to_yaml()`
   - Whole-model saving (`model.save()`)
 
-  A Graph Network can be instantiated by passing two arguments to `__init__`.
-  The first argument is the `keras.Input` Tensors that represent the inputs
-  to the Network. The second argument specifies the output Tensors that
-  represent the outputs of this Network. Both arguments can be a nested
-  structure of Tensors.
+  A `Functional` model can be instantiated by passing two arguments to
+  `__init__`. The first argument is the `keras.Input` Tensors that represent
+  the inputs to the model. The second argument specifies the output
+  tensors that represent the outputs of this model. Both arguments can be a
+  nested structure of tensors.
 
   Example:
 
@@ -106,10 +69,10 @@ class Network(base_layer.Layer):
   inputs = {'x1': keras.Input(shape=(10,)), 'x2': keras.Input(shape=(1,))}
   t = keras.layers.Dense(1, activation='relu')(inputs['x1'])
   outputs = keras.layers.Add()([t, inputs['x2'])
-  network = Network(inputs, outputs)
+  model = keras.Model(inputs, outputs)
   ```
 
-  A Graph Network constructed using the Functional API can also include raw
+  A `Functional` model constructed using the Functional API can also include raw
   TensorFlow functions, with the exception of functions that create Variables
   or assign ops.
 
@@ -119,38 +82,14 @@ class Network(base_layer.Layer):
   inputs = keras.Input(shape=(10,))
   x = keras.layers.Dense(1)(inputs)
   outputs = tf.nn.relu(x)
-  network = Network(inputs, outputs)
+  model = keras.Model(inputs, outputs)
   ```
 
-  Subclassed Networks can be instantiated via `name` and (optional) `dynamic`
-  keyword arguments. Subclassed Networks keep track of their Layers, and their
-  `call` method can be overridden. Subclassed Networks are typically created
-  indirectly, by subclassing the `Model` class.
-
-  Example:
-
-  ```
-  class MyModel(keras.Model):
-    def __init__(self):
-      super(MyModel, self).__init__(name='my_model', dynamic=False)
-
-      self.layer1 = keras.layers.Dense(10, activation='relu')
-
-    def call(self, inputs):
-      return self.layer1(inputs)
-  ```
-
-  Allowed args in `super().__init__`:
-    name: String name of the model.
-    dynamic: (Subclassed models only) Set this to `True` if your model should
-      only be run eagerly, and should not be used to generate a static
-      computation graph. This attribute is automatically set for Functional API
-      models.
+  Arguments:
+    inputs: List of input tensors (must be created via `tf.keras.Input()`).
+    outputs: List of outputs tensors.
+    name: String, optional. Name of the model.
     trainable: Boolean, whether the model's variables should be trainable.
-    dtype: (Subclassed models only) Default dtype of the model's weights (
-      default of `None` means use the type of the first input). This attribute
-      has no effect on Functional API models, which do not have weights of their
-      own.
   """
 
   # See tf.Module for the usage of this property.
@@ -159,79 +98,31 @@ class Network(base_layer.Layer):
   _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain(
       ('_layer_call_argspecs', '_compiled_trainable_state',
        '_output_mask_cache', '_output_tensor_cache', '_output_shape_cache'),
-      base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES
+      training_lib.Model._TF_MODULE_IGNORED_PROPERTIES
   ))
 
-  def __init__(self, *args, **kwargs):  # pylint: disable=super-init-not-called
-    # Signature detection
-    if (len(args) == 2 or
-        len(args) == 1 and 'outputs' in kwargs or
-        'inputs' in kwargs and 'outputs' in kwargs):
-      # Graph network
-      self._init_graph_network(*args, **kwargs)
-    else:
-      # Subclassed network
-      self._init_subclassed_network(**kwargs)
-
-    tf_utils.assert_no_legacy_layers(self.layers)
-
-  # Several Network methods have "no_automatic_dependency_tracking"
-  # annotations. Since Network does automatic dependency tracking on attribute
-  # assignment, including for common data structures such as lists, by default
-  # we'd have quite a few empty dependencies which users don't care about (or
-  # would need some way to ignore dependencies automatically, which is confusing
-  # when applied to user code). Some attributes, such as _layers, would cause
-  # structural issues (_layers being the place where Layers assigned to tracked
-  # attributes are stored).
-  #
-  # Aside from these aesthetic and structural issues, useless dependencies on
-  # empty lists shouldn't cause issues; adding or removing them will not break
-  # checkpoints, but may cause "all Python objects matched" assertions to fail
-  # (in which case less strict assertions may be substituted if necessary).
   @trackable.no_automatic_dependency_tracking
-  def _base_init(self, **kwargs):
-    # The following are implemented as property functions:
-    # self.trainable_weights
-    # self.non_trainable_weights
-    # self.input_spec
-    # self.losses
-    # self.updates
-
-    generic_utils.validate_kwargs(kwargs, {'trainable', 'dtype', 'dynamic',
-                                           'name', 'autocast'})
-
-    super(Network, self).__init__(**kwargs)
-
-    self.input_names = None
-    self.output_names = None
-    self._saved_model_inputs_spec = None
-
-    # This is True for Sequential networks and Functional networks.
-    self._compute_output_and_mask_jointly = False
-
-    # Don't reset compilation if already done. This may occur if calling
-    # `__init__` (or `_init_graph_network`) on an already-compiled model
-    # such as a Sequential model. Sequential models may need to rebuild
-    # themselves after compilation.
-    self._maybe_create_attribute('_is_compiled', False)
-    self._maybe_create_attribute('optimizer', None)
-
-    self._trackable_saver = (
-        trackable_utils.saver_with_op_caching(self))
+  def __init__(self, inputs=None, outputs=None, name=None, trainable=True):
+    # generic_utils.validate_kwargs(
+    #     kwargs, {'name', 'trainable'},
+    #     'Functional models may only specify `name` and `trainable` keyword '
+    #     'arguments during initialization. Got an unexpected argument:')
+    super(Functional, self).__init__(name=name, trainable=trainable)
+    self._init_graph_network(inputs, outputs)
 
   @trackable.no_automatic_dependency_tracking
-  def _init_graph_network(self, inputs, outputs, **kwargs):
-    generic_utils.validate_kwargs(
-        kwargs, {'name', 'trainable'},
-        'Functional models may only specify `name` and `trainable` keyword '
-        'arguments during initialization. Got an unexpected argument:')
+  def _init_graph_network(self, inputs, outputs):
+    # This method is needed for Sequential to reinitialize graph network when
+    # layer is added or removed.
+    self._is_graph_network = True
+
     # Normalize and set self.inputs, self.outputs.
     if isinstance(inputs, list) and len(nest.flatten(inputs)) == 1:
       inputs = inputs[0]
     if isinstance(outputs, list) and len(nest.flatten(outputs)) == 1:
       outputs = outputs[0]
-    self._nested_outputs = outputs
     self._nested_inputs = inputs
+    self._nested_outputs = outputs
     self.inputs = nest.flatten(inputs)
     self.outputs = nest.flatten(outputs)
 
@@ -246,7 +137,6 @@ class Network(base_layer.Layer):
     if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs):
       base_layer_utils.create_keras_history(self._nested_outputs)
 
-    self._base_init(**kwargs)
     self._validate_graph_inputs_and_outputs()
 
     # A Network does not create weights of its own, thus it is already
@@ -254,7 +144,6 @@ class Network(base_layer.Layer):
     self.built = True
     self._build_input_shape = nest.map_structure(lambda x: x.shape, inputs)
     self._compute_output_and_mask_jointly = True
-    self._is_graph_network = True
     # `_expects_training_arg` is True since the `training` argument is always
     # present in the signature of the `call` method of a graph network.
     self._expects_training_arg = True
@@ -324,6 +213,7 @@ class Network(base_layer.Layer):
 
     self._compute_tensor_usage_count()
     self._set_save_spec(self._nested_inputs)
+    tf_utils.assert_no_legacy_layers(self.layers)
 
   @property
   def input(self):
@@ -339,9 +229,7 @@ class Network(base_layer.Layer):
       RuntimeError: If called in Eager mode.
       AttributeError: If no inbound nodes are found.
     """
-    if self._is_graph_network:
-      return self._nested_inputs
-    return super(Network, self).input
+    return self._nested_inputs
 
   @property
   def input_shape(self):
@@ -359,9 +247,7 @@ class Network(base_layer.Layer):
         AttributeError: if the layer has no defined input_shape.
         RuntimeError: if called in Eager mode.
     """
-    if self._is_graph_network:
-      return nest.map_structure(backend.int_shape, self.input)
-    return super(Network, self).input_shape
+    return nest.map_structure(backend.int_shape, self.input)
 
   @property
   def output(self):
@@ -378,9 +264,7 @@ class Network(base_layer.Layer):
         layers.
       RuntimeError: if called in Eager mode.
     """
-    if self._is_graph_network:
-      return self._nested_outputs
-    return super(Network, self).output
+    return self._nested_outputs
 
   @property
   def output_shape(self):
@@ -397,9 +281,7 @@ class Network(base_layer.Layer):
         AttributeError: if the layer has no defined output shape.
         RuntimeError: if called in Eager mode.
     """
-    if self._is_graph_network:
-      return nest.map_structure(backend.int_shape, self.output)
-    return super(Network, self).output_shape
+    return nest.map_structure(backend.int_shape, self.output)
 
   def _set_output_names(self):
     """Assigns unique names to the Network's outputs.
@@ -420,33 +302,9 @@ class Network(base_layer.Layer):
       uniquified.append(proposal)
     self.output_names = uniquified
 
-  @trackable.no_automatic_dependency_tracking
-  def _init_subclassed_network(self, **kwargs):
-    self._base_init(**kwargs)
-    self._is_graph_network = False
-    self.inputs = None
-    self.outputs = None
-    # Since we don't know whether the subclass model support ragged inputs,
-    # we leave it as True, otherwise the layer will raise error when a ragged
-    # tensor is called as input.
-    self._supports_ragged_inputs = True
-
-  @property
-  @trackable_layer_utils.cache_recursive_attribute('dynamic')
-  def dynamic(self):
-    if self._is_graph_network:
-      return any(layer.dynamic for layer in self.layers)
-    return self._dynamic or any(layer.dynamic for layer in self.layers)
-
   @property
   def _layer_checkpoint_dependencies(self):
     """Dictionary of layer dependencies to be included in the checkpoint."""
-    # Use getattr because this function can be called from __setattr__, at which
-    # point the _is_graph_network attribute has not been created.
-    if (not getattr(self, '_is_graph_network', False) and
-        base_layer_utils.is_subclassed(self)):
-      return {}  # Only add layer dependencies for graph networks
-
     weight_layer_index = 0
 
     dependencies = collections.OrderedDict()
@@ -473,14 +331,14 @@ class Network(base_layer.Layer):
     dependencies = [
         trackable.TrackableReference(name=name, ref=layer)
         for name, layer in self._layer_checkpoint_dependencies.items()]
-    dependencies.extend(super(Network, self)._checkpoint_dependencies)
+    dependencies.extend(super(Functional, self)._checkpoint_dependencies)
     return dependencies
 
   def _lookup_dependency(self, name):
     layer_dependencies = self._layer_checkpoint_dependencies
     if name in layer_dependencies:
       return layer_dependencies[name]
-    return super(Network, self)._lookup_dependency(name)
+    return super(Functional, self)._lookup_dependency(name)
 
   def _handle_deferred_layer_dependencies(self, layers):
     """Handles layer checkpoint dependencies that are added after init."""
@@ -491,256 +349,17 @@ class Network(base_layer.Layer):
         self._handle_deferred_dependencies(name=layer_to_name[layer],
                                            trackable=layer)
 
-  def __setattr__(self, name, value):
-    if not getattr(self, '_self_setattr_tracking', True):
-      super(Network, self).__setattr__(name, value)
-      return
-
-    if all(
-        isinstance(v, (base_layer.Layer,
-                       data_structures.TrackableDataStructure)) or
-        trackable_layer_utils.has_weights(v) for v in nest.flatten(value)):
-      try:
-        self._is_graph_network
-      except AttributeError:
-        # six.raise_from supresses the original AttributeError from being raised
-        six.raise_from(
-            RuntimeError('It looks like you are subclassing `Model` and you '
-                         'forgot to call `super(YourClass, self).__init__()`.'
-                         ' Always start with this line.'), None)
-
-    super(Network, self).__setattr__(name, value)
-
-    # Keep track of metric instance created in subclassed model/layer.
-    # We do this so that we can maintain the correct order of metrics by adding
-    # the instance to the `metrics` list as soon as it is created.
-    from tensorflow.python.keras import metrics as metrics_module  # pylint: disable=g-import-not-at-top
-    if isinstance(value, metrics_module.Metric):
-      self._metrics.append(value)
-
   @property
-  @trackable_layer_utils.cache_recursive_attribute('stateful')
-  def stateful(self):
-    return any(getattr(layer, 'stateful', False) for layer in self.layers)
-
-  def reset_states(self):
-    for layer in self.layers:
-      if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False):
-        layer.reset_states()
-
-  @property
-  def state_updates(self):
-    """Returns the `updates` from all layers that are stateful.
-
-    This is useful for separating training updates and
-    state updates, e.g. when we need to update a layer's internal state
-    during prediction.
-
-    Returns:
-        A list of update ops.
-    """
-    state_updates = []
-    for layer in self.layers:
-      if getattr(layer, 'stateful', False):
-        if hasattr(layer, 'updates'):
-          state_updates += layer.updates
-    return state_updates
-
-  @property
-  def weights(self):
-    """Returns the list of all layer variables/weights.
-
-    Returns:
-      A list of variables.
-    """
-    return self._dedup_weights(self._undeduplicated_weights)
-
-  @property
-  def _undeduplicated_weights(self):
-    """Returns the undeduplicated list of all layer variables/weights."""
-    self._assert_weights_created()
-    weights = []
-    for layer in self._layers:
-      weights += layer.weights
-    weights += (self._trainable_weights + self._non_trainable_weights)
-    return weights
-
-  @property
-  @tracking.cached_per_instance
   def _should_compute_mask(self):
-    return self._is_graph_network and super(Network, self)._should_compute_mask
+    return True
 
   def compute_mask(self, inputs, mask):
-    if not self._is_graph_network:
-      return None
-
     # TODO(omalleyt): b/123540974 This function is not really safe to call
     # by itself because it will duplicate any updates and losses in graph
     # mode by `call`ing the Layers again.
     output_tensors = self._run_internal_graph(inputs, mask=mask)
     return nest.map_structure(lambda t: t._keras_mask, output_tensors)
 
-  @property
-  def layers(self):
-    return list(
-        trackable_layer_utils.filter_empty_layer_containers(self._layers))
-
-  def get_layer(self, name=None, index=None):
-    """Retrieves a layer based on either its name (unique) or index.
-
-    If `name` and `index` are both provided, `index` will take precedence.
-    Indices are based on order of horizontal graph traversal (bottom-up).
-
-    Arguments:
-        name: String, name of layer.
-        index: Integer, index of layer.
-
-    Returns:
-        A layer instance.
-
-    Raises:
-        ValueError: In case of invalid layer name or index.
-    """
-    # TODO(fchollet): We could build a dictionary based on layer names
-    # since they are constant, but we have not done that yet.
-    if index is not None and name is not None:
-      raise ValueError('Provide only a layer name or a layer index.')
-
-    if index is not None:
-      if len(self.layers) <= index:
-        raise ValueError('Was asked to retrieve layer at index ' + str(index) +
-                         ' but model only has ' + str(len(self.layers)) +
-                         ' layers.')
-      else:
-        return self.layers[index]
-
-    if name is not None:
-      for layer in self.layers:
-        if layer.name == name:
-          return layer
-      raise ValueError('No such layer: ' + name + '.')
-    raise ValueError('Provide either a layer name or layer index.')
-
-  @property
-  def trainable_weights(self):
-    self._assert_weights_created()
-    return self._dedup_weights(
-        trackable_layer_utils.gather_trainable_weights(
-            trainable=self.trainable,
-            sub_layers=self._layers,
-            extra_variables=self._trainable_weights))
-
-  @property
-  def non_trainable_weights(self):
-    self._assert_weights_created()
-    return self._dedup_weights(
-        trackable_layer_utils.gather_non_trainable_weights(
-            trainable=self.trainable,
-            sub_layers=self._layers,
-            extra_variables=self._non_trainable_weights +
-            self._trainable_weights))
-
-  @generic_utils.default
-  def build(self, input_shape):
-    """Builds the model based on input shapes received.
-
-    This is to be used for subclassed models, which do not know at instantiation
-    time what their inputs look like.
-
-    This method only exists for users who want to call `model.build()` in a
-    standalone way (as a substitute for calling the model on real data to
-    build it). It will never be called by the framework (and thus it will
-    never throw unexpected errors in an unrelated workflow).
-
-    Args:
-     input_shape: Single tuple, TensorShape, or list of shapes, where shapes
-         are tuples, integers, or TensorShapes.
-
-    Raises:
-      ValueError:
-        1. In case of invalid user-provided data (not of type tuple,
-           list, or TensorShape).
-        2. If the model requires call arguments that are agnostic
-           to the input shapes (positional or kwarg in call signature).
-        3. If not all layers were properly built.
-        4. If float type inputs are not supported within the layers.
-
-      In each of these cases, the user should build their model by calling it
-      on real tensor data.
-    """
-    if self._is_graph_network:
-      super(Network, self).build(input_shape)
-      return
-
-    # If subclass network
-    if input_shape is None:
-      raise ValueError('Input shape must be defined when calling build on a '
-                       'model subclass network.')
-    valid_types = (tuple, list, tensor_shape.TensorShape)
-    if not isinstance(input_shape, valid_types):
-      raise ValueError('Specified input shape is not one of the valid types. '
-                       'Please specify a batch input shape of type tuple or '
-                       'list of input shapes. User provided '
-                       'input type: {}'.format(type(input_shape)))
-
-    if input_shape and not self.inputs:
-      # We create placeholders for the `None`s in the shape and build the model
-      # in a Graph. Since tf.Variable is compatible with both eager execution
-      # and graph building, the variables created after building the model in
-      # a Graph are still valid when executing eagerly.
-      if context.executing_eagerly():
-        graph = func_graph.FuncGraph('build_graph')
-      else:
-        graph = backend.get_graph()
-      with graph.as_default():
-        if isinstance(input_shape, list):
-          x = [base_layer_utils.generate_placeholders_from_shape(shape)
-               for shape in input_shape]
-        elif isinstance(input_shape, dict):
-          x = {
-              k: base_layer_utils.generate_placeholders_from_shape(shape)
-              for k, shape in input_shape.items()
-          }
-        else:
-          x = base_layer_utils.generate_placeholders_from_shape(input_shape)
-
-        kwargs = {}
-        call_signature = self._call_full_argspec
-        call_args = call_signature.args
-        # Exclude `self`, `inputs`, and any argument with a default value.
-        if len(call_args) > 2:
-          if call_signature.defaults:
-            call_args = call_args[2:-len(call_signature.defaults)]
-          else:
-            call_args = call_args[2:]
-          for arg in call_args:
-            if arg == 'training':
-              # Case where `training` is a positional arg with no default.
-              kwargs['training'] = False
-            else:
-              # Has invalid call signature with unknown positional arguments.
-              raise ValueError(
-                  'Currently, you cannot build your model if it has '
-                  'positional or keyword arguments that are not '
-                  'inputs to the model, but are required for its '
-                  '`call` method. Instead, in order to instantiate '
-                  'and build your model, `call` your model on real '
-                  'tensor data with all expected call arguments.')
-        elif len(call_args) < 2:
-          # Signature without `inputs`.
-          raise ValueError('You can only call `build` on a model if its `call` '
-                           'method accepts an `inputs` argument.')
-        try:
-          self.call(x, **kwargs)
-        except (errors.InvalidArgumentError, TypeError):
-          raise ValueError('You cannot build your model by calling `build` '
-                           'if your layers do not support float type inputs. '
-                           'Instead, in order to instantiate and build your '
-                           'model, `call` your model on real tensor data (of '
-                           'the correct dtype).')
-
-    super(Network, self).build(input_shape)
-
   def call(self, inputs, training=None, mask=None):
     """Calls the model on new inputs.
 
@@ -759,17 +378,10 @@ class Network(base_layer.Layer):
         A tensor if there is a single output, or
         a list of tensors if there are more than one outputs.
     """
-    if not self._is_graph_network:
-      raise NotImplementedError('When subclassing the `Model` class, you should'
-                                ' implement a `call` method.')
-
     return self._run_internal_graph(
         inputs, training=training, mask=mask)
 
   def compute_output_shape(self, input_shape):
-    if not self._is_graph_network:
-      return super(Network, self).compute_output_shape(input_shape)
-
     # Convert any shapes in tuple format to TensorShapes.
     input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)
 
@@ -971,8 +583,6 @@ class Network(base_layer.Layer):
     return tensor
 
   def get_config(self):
-    if not self._is_graph_network:
-      raise NotImplementedError
     return copy.deepcopy(get_network_config(self))
 
   @classmethod
@@ -998,373 +608,6 @@ class Network(base_layer.Layer):
     connect_ancillary_layers(model, created_layers)
     return model
 
-  def save(self,
-           filepath,
-           overwrite=True,
-           include_optimizer=True,
-           save_format=None,
-           signatures=None,
-           options=None):
-    """Saves the model to Tensorflow SavedModel or a single HDF5 file.
-
-    The savefile includes:
-
-    - The model architecture, allowing to re-instantiate the model.
-    - The model weights.
-    - The state of the optimizer, allowing to resume training
-        exactly where you left off.
-
-    This allows you to save the entirety of the state of a model
-    in a single file.
-
-    Saved models can be reinstantiated via `keras.models.load_model`.
-    The model returned by `load_model` is a compiled model ready to be used
-    (unless the saved model was never compiled in the first place).
-
-    Models built with the Sequential and Functional API can be saved to both the
-    HDF5 and SavedModel formats. Subclassed models can only be saved with the
-    SavedModel format.
-
-    Note that the model weights may have different scoped names after being
-    loaded. Scoped names include the model/layer names, such as
-    `"dense_1/kernel:0"`. It is recommended that you use the layer properties to
-     access specific variables, e.g. `model.get_layer("dense_1").kernel`.
-
-    Arguments:
-        filepath: String, PathLike, path to SavedModel or H5 file to save the
-            model.
-        overwrite: Whether to silently overwrite any existing file at the
-            target location, or provide the user with a manual prompt.
-        include_optimizer: If True, save optimizer's state together.
-        save_format: Either `'tf'` or `'h5'`, indicating whether to save the
-            model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X,
-            and 'h5' in TF 1.X.
-        signatures: Signatures to save with the SavedModel. Applicable to the
-            'tf' format only. Please see the `signatures` argument in
-            `tf.saved_model.save` for details.
-        options: Optional `tf.saved_model.SaveOptions` object that specifies
-            options for saving to SavedModel.
-
-    Example:
-
-    ```python
-    from keras.models import load_model
-
-    model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
-    del model  # deletes the existing model
-
-    # returns a compiled model
-    # identical to the previous one
-    model = load_model('my_model.h5')
-    ```
-    """
-    save.save_model(self, filepath, overwrite, include_optimizer, save_format,
-                    signatures, options)
-
-  def save_weights(self, filepath, overwrite=True, save_format=None):
-    """Saves all layer weights.
-
-    Either saves in HDF5 or in TensorFlow format based on the `save_format`
-    argument.
-
-    When saving in HDF5 format, the weight file has:
-      - `layer_names` (attribute), a list of strings
-          (ordered names of model layers).
-      - For every layer, a `group` named `layer.name`
-          - For every such layer group, a group attribute `weight_names`,
-              a list of strings
-              (ordered names of weights tensor of the layer).
-          - For every weight in the layer, a dataset
-              storing the weight value, named after the weight tensor.
-
-    When saving in TensorFlow format, all objects referenced by the network are
-    saved in the same format as `tf.train.Checkpoint`, including any `Layer`
-    instances or `Optimizer` instances assigned to object attributes. For
-    networks constructed from inputs and outputs using `tf.keras.Model(inputs,
-    outputs)`, `Layer` instances used by the network are tracked/saved
-    automatically. For user-defined classes which inherit from `tf.keras.Model`,
-    `Layer` instances must be assigned to object attributes, typically in the
-    constructor. See the documentation of `tf.train.Checkpoint` and
-    `tf.keras.Model` for details.
-
-    While the formats are the same, do not mix `save_weights` and
-    `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should be
-    loaded using `Model.load_weights`. Checkpoints saved using
-    `tf.train.Checkpoint.save` should be restored using the corresponding
-    `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over
-    `save_weights` for training checkpoints.
-
-    The TensorFlow format matches objects and variables by starting at a root
-    object, `self` for `save_weights`, and greedily matching attribute
-    names. For `Model.save` this is the `Model`, and for `Checkpoint.save` this
-    is the `Checkpoint` even if the `Checkpoint` has a model attached. This
-    means saving a `tf.keras.Model` using `save_weights` and loading into a
-    `tf.train.Checkpoint` with a `Model` attached (or vice versa) will not match
-    the `Model`'s variables. See the [guide to training
-    checkpoints](https://www.tensorflow.org/guide/checkpoint) for details
-    on the TensorFlow format.
-
-    Arguments:
-        filepath: String or PathLike, path to the file to save the weights to.
-            When saving in TensorFlow format, this is the prefix used for
-            checkpoint files (multiple files are generated). Note that the '.h5'
-            suffix causes weights to be saved in HDF5 format.
-        overwrite: Whether to silently overwrite any existing file at the
-            target location, or provide the user with a manual prompt.
-        save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or
-            '.keras' will default to HDF5 if `save_format` is `None`. Otherwise
-            `None` defaults to 'tf'.
-
-    Raises:
-        ImportError: If h5py is not available when attempting to save in HDF5
-            format.
-        ValueError: For invalid/unknown format arguments.
-    """
-    self._assert_weights_created()
-    filepath = path_to_string(filepath)
-    filepath_is_h5 = _is_hdf5_filepath(filepath)
-    if save_format is None:
-      if filepath_is_h5:
-        save_format = 'h5'
-      else:
-        save_format = 'tf'
-    else:
-      user_format = save_format.lower().strip()
-      if user_format in ('tensorflow', 'tf'):
-        save_format = 'tf'
-      elif user_format in ('hdf5', 'h5', 'keras'):
-        save_format = 'h5'
-      else:
-        raise ValueError(
-            'Unknown format "%s". Was expecting one of {"tf", "h5"}.' % (
-                save_format,))
-    if save_format == 'tf' and filepath_is_h5:
-      raise ValueError(
-          ('save_weights got save_format="tf"/"tensorflow", but the '
-           'filepath ("%s") looks like an HDF5 file. Omit the ".h5"/".keras" '
-           'when saving in TensorFlow format.')
-          % filepath)
-
-    if save_format == 'h5' and h5py is None:
-      raise ImportError(
-          '`save_weights` requires h5py when saving in hdf5.')
-    if save_format == 'tf':
-      check_filepath = filepath + '.index'
-    else:
-      check_filepath = filepath
-    # If file exists and should not be overwritten:
-    if not overwrite and os.path.isfile(check_filepath):
-      proceed = ask_to_proceed_with_overwrite(check_filepath)
-      if not proceed:
-        return
-    if save_format == 'h5':
-      with h5py.File(filepath, 'w') as f:
-        hdf5_format.save_weights_to_hdf5_group(f, self.layers)
-    else:
-      if context.executing_eagerly():
-        session = None
-      else:
-        session = backend.get_session()
-      optimizer = getattr(self, 'optimizer', None)
-      if (optimizer
-          and not isinstance(optimizer, trackable.Trackable)):
-        logging.warning(
-            ('This model was compiled with a Keras optimizer (%s) but is being '
-             'saved in TensorFlow format with `save_weights`. The model\'s '
-             'weights will be saved, but unlike with TensorFlow optimizers in '
-             'the TensorFlow format the optimizer\'s state will not be '
-             'saved.\n\nConsider using a TensorFlow optimizer from `tf.train`.')
-            % (optimizer,))
-      self._trackable_saver.save(filepath, session=session)
-      # Record this checkpoint so it's visible from tf.train.latest_checkpoint.
-      checkpoint_management.update_checkpoint_state_internal(
-          save_dir=os.path.dirname(filepath),
-          model_checkpoint_path=filepath,
-          save_relative_paths=True,
-          all_model_checkpoint_paths=[filepath])
-
-  def load_weights(self, filepath, by_name=False, skip_mismatch=False):
-    """Loads all layer weights, either from a TensorFlow or an HDF5 weight file.
-
-    If `by_name` is False weights are loaded based on the network's
-    topology. This means the architecture should be the same as when the weights
-    were saved.  Note that layers that don't have weights are not taken into
-    account in the topological ordering, so adding or removing layers is fine as
-    long as they don't have weights.
-
-    If `by_name` is True, weights are loaded into layers only if they share the
-    same name. This is useful for fine-tuning or transfer-learning models where
-    some of the layers have changed.
-
-    Only topological loading (`by_name=False`) is supported when loading weights
-    from the TensorFlow format. Note that topological loading differs slightly
-    between TensorFlow and HDF5 formats for user-defined classes inheriting from
-    `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the
-    TensorFlow format loads based on the object-local names of attributes to
-    which layers are assigned in the `Model`'s constructor.
-
-    Arguments:
-        filepath: String or PathLike, path to the weights file to load. For
-            weight files in TensorFlow format, this is the file prefix (the
-            same as was passed to `save_weights`).
-        by_name: Boolean, whether to load weights by name or by topological
-            order. Only topological loading is supported for weight files in
-            TensorFlow format.
-        skip_mismatch: Boolean, whether to skip loading of layers where there is
-            a mismatch in the number of weights, or a mismatch in the shape of
-            the weight (only valid when `by_name=True`).
-
-    Returns:
-        When loading a weight file in TensorFlow format, returns the same status
-        object as `tf.train.Checkpoint.restore`. When graph building, restore
-        ops are run automatically as soon as the network is built (on first call
-        for user-defined classes inheriting from `Model`, immediately if it is
-        already built).
-
-        When loading weights in HDF5 format, returns `None`.
-
-    Raises:
-        ImportError: If h5py is not available and the weight file is in HDF5
-            format.
-        ValueError: If `skip_mismatch` is set to `True` when `by_name` is
-          `False`.
-    """
-
-    if skip_mismatch and not by_name:
-      raise ValueError(
-          'When calling model.load_weights, skip_mismatch can only be set to '
-          'True when by_name is True.')
-
-    filepath = path_to_string(filepath)
-    if _is_hdf5_filepath(filepath):
-      save_format = 'h5'
-    else:
-      try:
-        py_checkpoint_reader.NewCheckpointReader(filepath)
-        save_format = 'tf'
-      except errors_impl.DataLossError:
-        # The checkpoint is not readable in TensorFlow format. Try HDF5.
-        save_format = 'h5'
-    if save_format == 'tf':
-      status = self._trackable_saver.restore(filepath)
-      if by_name:
-        raise NotImplementedError(
-            'Weights may only be loaded based on topology into Models when '
-            'loading TensorFlow-formatted weights (got by_name=True to '
-            'load_weights).')
-      if not context.executing_eagerly():
-        session = backend.get_session()
-        # Restore existing variables (if any) immediately, and set up a
-        # streaming restore for any variables created in the future.
-        trackable_utils.streaming_restore(status=status, session=session)
-      status.assert_nontrivial_match()
-      return status
-    if h5py is None:
-      raise ImportError(
-          '`load_weights` requires h5py when loading weights from HDF5.')
-    if self._is_graph_network and not self.built:
-      raise NotImplementedError(
-          'Unable to load weights saved in HDF5 format into a subclassed '
-          'Model which has not created its variables yet. Call the Model '
-          'first, then load the weights.')
-    self._assert_weights_created()
-    with h5py.File(filepath, 'r') as f:
-      if 'layer_names' not in f.attrs and 'model_weights' in f:
-        f = f['model_weights']
-      if by_name:
-        hdf5_format.load_weights_from_hdf5_group_by_name(
-            f, self.layers, skip_mismatch=skip_mismatch)
-      else:
-        hdf5_format.load_weights_from_hdf5_group(f, self.layers)
-
-  def _updated_config(self):
-    """Util shared between different serialization methods.
-
-    Returns:
-        Model config with Keras version information added.
-    """
-    from tensorflow.python.keras import __version__ as keras_version  # pylint: disable=g-import-not-at-top
-
-    config = self.get_config()
-    model_config = {
-        'class_name': self.__class__.__name__,
-        'config': config,
-        'keras_version': keras_version,
-        'backend': backend.backend()
-    }
-    return model_config
-
-  def to_json(self, **kwargs):
-    """Returns a JSON string containing the network configuration.
-
-    To load a network from a JSON save file, use
-    `keras.models.model_from_json(json_string, custom_objects={})`.
-
-    Arguments:
-        **kwargs: Additional keyword arguments
-            to be passed to `json.dumps()`.
-
-    Returns:
-        A JSON string.
-    """
-    model_config = self._updated_config()
-    return json.dumps(
-        model_config, default=serialization.get_json_type, **kwargs)
-
-  def to_yaml(self, **kwargs):
-    """Returns a yaml string containing the network configuration.
-
-    To load a network from a yaml save file, use
-    `keras.models.model_from_yaml(yaml_string, custom_objects={})`.
-
-    `custom_objects` should be a dictionary mapping
-    the names of custom losses / layers / etc to the corresponding
-    functions / classes.
-
-    Arguments:
-        **kwargs: Additional keyword arguments
-            to be passed to `yaml.dump()`.
-
-    Returns:
-        A YAML string.
-
-    Raises:
-        ImportError: if yaml module is not found.
-    """
-    if yaml is None:
-      raise ImportError(
-          'Requires yaml module installed (`pip install pyyaml`).')
-    return yaml.dump(self._updated_config(), **kwargs)
-
-  def summary(self, line_length=None, positions=None, print_fn=None):
-    """Prints a string summary of the network.
-
-    Arguments:
-        line_length: Total length of printed lines
-            (e.g. set this to adapt the display to different
-            terminal window sizes).
-        positions: Relative or absolute positions of log elements
-            in each line. If not provided,
-            defaults to `[.33, .55, .67, 1.]`.
-        print_fn: Print function to use. Defaults to `print`.
-            It will be called on each line of the summary.
-            You can set it to a custom function
-            in order to capture the string summary.
-
-    Raises:
-        ValueError: if `summary()` is called before the model is built.
-    """
-    if not self.built:
-      raise ValueError('This model has not yet been built. '
-                       'Build the model first by calling `build()` or calling '
-                       '`fit()` with some data, or specify '
-                       'an `input_shape` argument in the first layer(s) for '
-                       'automatic build.')
-    layer_utils.print_summary(self,
-                              line_length=line_length,
-                              positions=positions,
-                              print_fn=print_fn)
-
   def _validate_graph_inputs_and_outputs(self):
     """Validates the inputs and outputs of a Graph Network."""
     # Check for redundancy in inputs.
@@ -1398,8 +641,6 @@ class Network(base_layer.Layer):
                         'Note that input tensors are '
                         'instantiated via `tensor = tf.keras.Input(shape)`.\n'
                         'The tensor that caused the issue was: ' + str(x.name))
-      if isinstance(x, ragged_tensor.RaggedTensor):
-        self._supports_ragged_inputs = True
 
     # Check compatibility of batch sizes of Input Layers.
     input_batch_sizes = [
@@ -1540,30 +781,9 @@ class Network(base_layer.Layer):
     self._tensor_usage_count = tensor_usage_count
 
   def _assert_weights_created(self):
-    """Asserts that all the weights for the network have been created.
-
-    For a non-dynamic network, the weights must already be created after the
-    layer has been called. For a dynamic network, the exact list of weights can
-    never be known for certain since it may change at any time during execution.
-
-    We run this check right before accessing weights or getting the Numpy value
-    for the current weights. Otherwise, if the layer has never been called,
-    the user would just get an empty list, which is misleading.
-
-    Raises:
-      ValueError: if the weights of the network has not yet been created.
-    """
-    if self.dynamic:
-      return
-    if (not self._is_graph_network and
-        'build' in self.__class__.__dict__ and
-        not self.built):
-      # For any model that has customized build() method but hasn't
-      # been invoked yet, this will cover both sequential and subclass model.
-      raise ValueError('Weights for model %s have not yet been created. '
-                       'Weights are created when the Model is first called on '
-                       'inputs or `build()` is called with an `input_shape`.' %
-                       self.name)
+    # Override the implementation in Model.
+    # The Functional model should always have weight created already.
+    return
 
   def _graph_network_add_loss(self, symbolic_loss):
     new_nodes, new_layers = _map_subgraph_network(self.inputs, [symbolic_loss])
@@ -1585,42 +805,11 @@ class Network(base_layer.Layer):
     new_layers.append(add_metric_layer)
     self._insert_layers(new_layers, new_nodes)
 
-  @trackable.no_automatic_dependency_tracking
-  def _set_save_spec(self, inputs):
-    if self._saved_model_inputs_spec is not None:
-      return  # Already set.
-
-    input_names = self.input_names
-    if not input_names:
-      input_names = compile_utils.create_pseudo_input_names(inputs)
-
-    flat_inputs = nest.flatten(inputs)
-    specs = []
-    for name, tensor in zip(input_names, flat_inputs):
-      specs.append(
-          tf_utils.get_tensor_spec(tensor, dynamic_batch=False, name=name))
-    specs = nest.pack_sequence_as(inputs, specs)
-
-    self._saved_model_inputs_spec = specs
-
-  def _get_save_spec(self, dynamic_batch=True):
-    if self._saved_model_inputs_spec is None:
-      return None
-
-    return nest.map_structure(
-        lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=dynamic_batch),
-        self._saved_model_inputs_spec)
-
   @property
   def _trackable_saved_model_saver(self):
     return network_serialization.NetworkSavedModelSaver(self)
 
 
-def _is_hdf5_filepath(filepath):
-  return (filepath.endswith('.h5') or filepath.endswith('.keras') or
-          filepath.endswith('.hdf5'))
-
-
 def _make_node_key(layer_name, node_index):
   return layer_name + '_ib-' + str(node_index)
 
@@ -1828,7 +1017,9 @@ def _map_subgraph_network(inputs, outputs):
 def _should_skip_first_node(layer):
   """Returns True if the first layer node should not be saved or loaded."""
   # Networks start with a pre-existing node linking their input to output.
-  return issubclass(layer.__class__, Network) and layer._is_graph_network
+  # For a sequential model, it is first created with _is_graph_network = False,
+  # we have to keep the _is_graph_network check here.
+  return isinstance(layer, Functional) and layer._is_graph_network
 
 
 def _deserialize_keras_tensors(kwargs, layer_map):
diff --git a/tensorflow/python/keras/engine/network_test.py b/tensorflow/python/keras/engine/functional_test.py
similarity index 97%
rename from tensorflow/python/keras/engine/network_test.py
rename to tensorflow/python/keras/engine/functional_test.py
index 7c19a3ae2bd..90fc9f2697f 100644
--- a/tensorflow/python/keras/engine/network_test.py
+++ b/tensorflow/python/keras/engine/functional_test.py
@@ -33,8 +33,8 @@ from tensorflow.python.keras import layers
 from tensorflow.python.keras import models
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import base_layer
+from tensorflow.python.keras.engine import functional
 from tensorflow.python.keras.engine import input_layer as input_layer_lib
-from tensorflow.python.keras.engine import network as network_lib
 from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.engine import training as training_lib
 from tensorflow.python.keras.utils import layer_utils
@@ -89,7 +89,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
 
       self.assertEqual(len(layer.updates), 3)
 
-      network = network_lib.Network(x2, y2)
+      network = functional.Functional(x2, y2)
       self.assertEqual(len(network.updates), 3)
 
       x3 = input_layer_lib.Input(shape=(1,))
@@ -120,7 +120,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     dense_a = layers.Dense(4, name='dense_a')
     dense_b = layers.Dense(2, name='dense_b')
     y = dense_b(dense_a(x))
-    network = network_lib.Network(x, y, name='dense_network')
+    network = functional.Functional(x, y, name='dense_network')
 
     # test various get_layer by index
     self.assertEqual(network.get_layer(index=1), dense_a)
@@ -251,7 +251,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
       x = input_layer_lib.Input(shape=(32,))
       dense = layers.Dense(2)
       y = dense(x)
-      network = network_lib.Network(x, y, name='dense_network')
+      network = functional.Functional(x, y, name='dense_network')
 
       # test basic attributes
       self.assertEqual(network.name, 'dense_network')
@@ -740,7 +740,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     else:
       x = input_layer_lib.Input(shape=(32,))
       y = MaskedLayer()(x)  # pylint: disable=not-callable
-      network = network_lib.Network(x, y)
+      network = functional.Functional(x, y)
 
       # test callability on Input
       x_2 = input_layer_lib.Input(shape=(32,))
@@ -967,14 +967,9 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
   @combinations.generate(combinations.keras_mode_combinations())
   def test_composite_call_kwarg_derived_from_keras_layer(self):
 
-    # Create a test layer that accepts composite tensor inputs (note the
-    # 'supports_ragged_inputs = True' in the init method.)
+    # Create a test layer that accepts composite tensor inputs.
     class MaybeAdd(layers.Layer):
 
-      def __init__(self, **kwargs):
-        super(MaybeAdd, self).__init__(**kwargs)
-        self._supports_ragged_inputs = True
-
       def call(self, x1, x2=None):
         # We need to convert this to a tensor for loss calculations -
         # losses don't play nicely with ragged tensors yet.
@@ -1107,7 +1102,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
 
   def test_subclassed_error_if_init_not_called(self):
 
-    class MyNetwork(network_lib.Network):
+    class MyNetwork(training_lib.Model):
 
       def __init__(self):
         self._foo = [layers.Dense(10), layers.Dense(10)]
@@ -1129,10 +1124,12 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     inputs = input_layer_lib.Input(shape=(32,))
     outputs = layers.Dense(4)(inputs)
 
-    with self.assertRaisesRegexp(TypeError, 'unexpected argument'):
+    with self.assertRaisesRegexp(TypeError,
+                                 'got an unexpected keyword argument'):
       model = training_lib.Model(
           inputs, outputs, name='m', trainable=False, dtype='int64')
-    with self.assertRaisesRegexp(TypeError, 'unexpected argument'):
+    with self.assertRaisesRegexp(TypeError,
+                                 'got an unexpected keyword argument'):
       model = training_lib.Model(
           inputs, outputs, name='m', trainable=False, dynamic=False)
 
@@ -1141,8 +1138,10 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     self.assertFalse(model.trainable)
     self.assertFalse(model.dynamic)
 
+    class SubclassModel(training_lib.Model):
+      pass
     # Subclassed model
-    model = training_lib.Model(
+    model = SubclassModel(
         name='subclassed', trainable=True, dtype='int64', dynamic=True)
     self.assertEqual('subclassed', model.name)
     self.assertTrue(model.dynamic)
@@ -1155,9 +1154,9 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     input_tensor2 = input_layer_lib.Input(shape=[10], name='b')
     output_tensor1 = layers.Dense(units=10)(input_tensor1)
 
-    net = network_lib.Network(
+    net = functional.Functional(
         inputs=[input_tensor1, input_tensor2], outputs=[output_tensor1])
-    net2 = network_lib.Network.from_config(net.get_config())
+    net2 = functional.Functional.from_config(net.get_config())
     self.assertLen(net2.inputs, 2)
     self.assertEqual('a', net2.layers[0].name)
     self.assertEqual('b', net2.layers[1].name)
@@ -1185,8 +1184,8 @@ class DeferredModeTest(keras_parameterized.TestCase):
       self.assertEqual(x.shape.as_list(), [None, 2])
 
     outputs = layers.Dense(4)(x)
-    network = network_lib.Network(inputs, outputs)
-    self.assertIsInstance(network, network_lib.Network)
+    network = functional.Functional(inputs, outputs)
+    self.assertIsInstance(network, functional.Functional)
 
     if context.executing_eagerly():
       # It should be possible to call such a network on EagerTensors.
@@ -1209,7 +1208,7 @@ class DeferredModeTest(keras_parameterized.TestCase):
     c = AddLayer()([a, input_b])  # pylint: disable=not-callable
     c = layers.Dense(2)(c)
 
-    network = network_lib.Network([input_a, input_b], [a, c])
+    network = functional.Functional([input_a, input_b], [a, c])
     if context.executing_eagerly():
       a_val = constant_op.constant(
           np.random.random((10, 32)).astype('float32'))
@@ -1489,9 +1488,9 @@ class NestedNetworkTest(keras_parameterized.TestCase):
         'x2': input_layer_lib.Input(shape=(1,))
     }
     outputs = layers.Add()([inputs['x1'], inputs['x2']])
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
 
-    network = network_lib.Network.from_config(network.get_config())
+    network = functional.Functional.from_config(network.get_config())
 
     result_tensor = network({
         'x': array_ops.ones((1, 1), 'float32'),
@@ -1514,9 +1513,9 @@ class NestedNetworkTest(keras_parameterized.TestCase):
         'x*x': layers.Multiply()([inputs, inputs])
     }
 
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
 
-    network = network_lib.Network.from_config(network.get_config())
+    network = functional.Functional.from_config(network.get_config())
 
     result_tensor = network(array_ops.ones((1, 1), 'float32'))
     result = self.evaluate(result_tensor)
@@ -1536,7 +1535,8 @@ class NestedNetworkTest(keras_parameterized.TestCase):
         'x1+x2': layers.Add()([inner_inputs['x1'], inner_inputs['x2']]),
         'x1*x2': layers.Multiply()([inner_inputs['x1'], inner_inputs['x2']])
     }
-    inner_network = network_lib.Network(inner_inputs, inner_outputs)
+    inner_network = functional.Functional(
+        inner_inputs, inner_outputs)
 
     inputs = [
         input_layer_lib.Input(shape=(1,)),
@@ -1544,9 +1544,9 @@ class NestedNetworkTest(keras_parameterized.TestCase):
     ]
     middle = inner_network({'x1': inputs[0], 'x2': inputs[1]})
     outputs = layers.Add()([middle['x1+x2'], middle['x1*x2']])
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
 
-    network = network_lib.Network.from_config(network.get_config())
+    network = functional.Functional.from_config(network.get_config())
 
     # Computes: `(x1+x2) + (x1*x2)`
     result_tensor = network(
@@ -1740,13 +1740,13 @@ class DTypeTest(keras_parameterized.TestCase):
   def test_graph_network_dtype(self):
     inputs = input_layer_lib.Input((10,))
     outputs = layers.Dense(10)(inputs)
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
     self.assertEqual(network.dtype, 'float32')
 
   @testing_utils.enable_v2_dtype_behavior
   def test_subclassed_network_dtype(self):
 
-    class IdentityNetwork(network_lib.Network):
+    class IdentityNetwork(training_lib.Model):
 
       def call(self, inputs):
         return inputs
@@ -1790,11 +1790,11 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
 
   def layer_and_network_test(self):
     # Top level layer
-    network = network_lib.Network()
+    network = functional.Functional()
 
     layer_0 = AttrTrackingLayer()
 
-    sub_network = network_lib.Network()
+    sub_network = functional.Functional()
     layer_1 = AttrTrackingLayer(dynamic=True)
     layer_2 = AttrTrackingLayer()
     sub_network.sub_layers = [layer_1, layer_2]
@@ -1892,7 +1892,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
     x = input_layer_lib.Input(shape=(None, 32))
     dense = layers.Dense(2)
     y = dense(x)
-    network = network_lib.Network(x, y, name='dense_network')
+    network = functional.Functional(x, y, name='dense_network')
 
     for i in range(999, 1024):
       self.assertEqual(network.compute_output_shape((1, i, 32)), (1, i, 2))
@@ -1900,7 +1900,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
   def test_2d_inputs_squeezed_to_1d(self):
     input_1d = input_layer_lib.Input(shape=())
     outputs = input_1d * 2.
-    net = network_lib.Network(input_1d, outputs)
+    net = functional.Functional(input_1d, outputs)
 
     x = np.ones((10, 1))
     y = net(x)
@@ -1909,7 +1909,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
   def test_1d_inputs_expanded_to_2d(self):
     input_1d = input_layer_lib.Input(shape=(1,))
     outputs = input_1d * 2.
-    net = network_lib.Network(input_1d, outputs)
+    net = functional.Functional(input_1d, outputs)
 
     x = np.ones((10,))
     y = net(x)
@@ -1932,14 +1932,14 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
 
     inputs = input_layer_lib.Input(10)
     outputs = my_layer(inputs, training=True)
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
 
     # Hard-coded value passed during construction is respected.
     self.assertAllEqual(network(x, training=False), x)
 
     inputs = input_layer_lib.Input(10)
     outputs = my_layer(inputs, training=False)
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
 
     network(x, training=True)
     # Hard-coded value passed during construction is respected.
@@ -1947,7 +1947,7 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
 
     inputs = input_layer_lib.Input(10)
     outputs = my_layer(inputs, training=None)
-    network = network_lib.Network(inputs, outputs)
+    network = functional.Functional(inputs, outputs)
 
     # `None` value passed during construction is overridden.
     self.assertAllEqual(network(x, training=True), x)
diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py
index 1664e927da7..ed715f61897 100644
--- a/tensorflow/python/keras/engine/input_layer.py
+++ b/tensorflow/python/keras/engine/input_layer.py
@@ -132,7 +132,6 @@ class InputLayer(base_layer.Layer):
     self.ragged = ragged
     self.batch_size = batch_size
     self.supports_masking = True
-    self._supports_ragged_inputs = True
 
     if isinstance(input_shape, tensor_shape.TensorShape):
       input_shape = tuple(input_shape.as_list())
diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index 2d5abac7fd6..d07ed477ba9 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py
@@ -26,8 +26,8 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import layers as layer_module
 from tensorflow.python.keras.engine import base_layer
+from tensorflow.python.keras.engine import functional
 from tensorflow.python.keras.engine import input_layer
-from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.saving.saved_model import model_serialization
 from tensorflow.python.keras.utils import generic_utils
@@ -35,7 +35,6 @@ from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training.tracking import base as trackable
-from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.deprecation import deprecated
@@ -48,7 +47,7 @@ SINGLE_LAYER_OUTPUT_ERROR_MSG = ('All layers in a Sequential model should have '
 
 
 @keras_export('keras.Sequential', 'keras.models.Sequential')
-class Sequential(training.Model):
+class Sequential(functional.Functional):
   """`Sequential` groups a linear stack of layers into a `tf.keras.Model`.
 
   `Sequential` provides training and inference features on this model.
@@ -113,7 +112,9 @@ class Sequential(training.Model):
       layers: Optional list of layers to add to the model.
       name: Optional name for the model.
     """
-    super(Sequential, self).__init__(name=name, autocast=False)
+    # Skip the init in FunctionalModel since model doesn't have input/output yet
+    super(functional.Functional, self).__init__(  # pylint: disable=bad-super-call
+        name=name, autocast=False)
     self.supports_masking = True
     self._compute_output_and_mask_jointly = True
     self._auto_track_sub_layers = False
@@ -152,11 +153,6 @@ class Sequential(training.Model):
       return layers[1:]
     return layers[:]
 
-  @property
-  @trackable_layer_utils.cache_recursive_attribute('dynamic')
-  def dynamic(self):
-    return any(layer.dynamic for layer in self.layers)
-
   @trackable.no_automatic_dependency_tracking
   def add(self, layer):
     """Adds a layer instance on top of the layer stack.
@@ -233,7 +229,7 @@ class Sequential(training.Model):
       self.built = True
 
     if set_inputs or self._graph_initialized:
-      self._init_graph_network(self.inputs, self.outputs, name=self.name)
+      self._init_graph_network(self.inputs, self.outputs)
       self._graph_initialized = True
     else:
       self._layers.append(layer)
@@ -267,7 +263,7 @@ class Sequential(training.Model):
     elif self._graph_initialized:
       self.layers[-1]._outbound_nodes = []
       self.outputs = [self.layers[-1].output]
-      self._init_graph_network(self.inputs, self.outputs, name=self.name)
+      self._init_graph_network(self.inputs, self.outputs)
       self.built = True
 
   @trackable.no_automatic_dependency_tracking
@@ -341,7 +337,7 @@ class Sequential(training.Model):
             # case, we fall back to the legacy deferred behavior.
             # TODO(fchollet): consider raising here, as we should not be
             # supporting such layers.
-            self._init_graph_network(inputs, outputs, name=self.name)
+            self._init_graph_network(inputs, outputs)
             self._graph_initialized = True
           except:  # pylint:disable=bare-except
             self._use_legacy_deferred_behavior = True
@@ -350,7 +346,7 @@ class Sequential(training.Model):
   @generic_utils.default
   def build(self, input_shape=None):
     if self._graph_initialized:
-      self._init_graph_network(self.inputs, self.outputs, name=self.name)
+      self._init_graph_network(self.inputs, self.outputs)
     else:
       if input_shape is None:
         raise ValueError('You must provide an `input_shape` argument.')
@@ -380,7 +376,7 @@ class Sequential(training.Model):
 
     if self._graph_initialized:
       if not self.built:
-        self._init_graph_network(self.inputs, self.outputs, name=self.name)
+        self._init_graph_network(self.inputs, self.outputs)
       return super(Sequential, self).call(inputs, training=training, mask=mask)
 
     outputs = inputs  # handle the corner case where self.layers is empty
@@ -519,6 +515,13 @@ class Sequential(training.Model):
         return False
     return True
 
+  def _assert_weights_created(self):
+    if self._graph_initialized:
+      return
+    # When the graph has not been initialized, use the Model's implementation to
+    # to check if the weights has been created.
+    super(functional.Functional, self)._assert_weights_created()  # pylint: disable=bad-super-call
+
 
 def _get_shape_tuple(t):
   if hasattr(t, 'shape'):
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 1288cf920bf..d8c95b2a972 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -20,6 +20,9 @@ from __future__ import print_function
 
 import copy
 import itertools
+import json
+import os
+import six
 
 from tensorflow.python.autograph.lang import directives
 from tensorflow.python.distribute import distribute_coordinator as dc
@@ -31,19 +34,31 @@ from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import monitoring
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import backend
 from tensorflow.python.keras import callbacks as callbacks_module
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.distribute import distributed_training_utils as dist_utils
+from tensorflow.python.keras.engine import base_layer
+from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.engine import compile_utils
 from tensorflow.python.keras.engine import data_adapter
-from tensorflow.python.keras.engine import network
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as lso
+from tensorflow.python.keras.saving import hdf5_format
+from tensorflow.python.keras.saving import save
 from tensorflow.python.keras.saving.saved_model import model_serialization
+from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.keras.utils import version_utils
+from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite
+from tensorflow.python.keras.utils.io_utils import path_to_string
 from tensorflow.python.keras.utils.mode_keys import ModeKeys
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -52,12 +67,33 @@ from tensorflow.python.ops import summary_ops_v2
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.ragged import ragged_concat_ops
 from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.profiler import trace
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import py_checkpoint_reader
 from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.training.tracking import data_structures
+from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
+from tensorflow.python.training.tracking import util as trackable_utils
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
+from tensorflow.python.util import serialization
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.tf_export import keras_export
+from tensorflow.tools.docs import doc_controls
+
+
+# pylint: disable=g-import-not-at-top
+try:
+  import h5py
+except ImportError:
+  h5py = None
+
+try:
+  import yaml
+except ImportError:
+  yaml = None
+# pylint: enable=g-import-not-at-top
 
 
 _keras_api_gauge = monitoring.BoolGauge('/tensorflow/api/keras',
@@ -97,8 +133,25 @@ def disable_multi_worker(method):
       target=method, decorator_func=_method_wrapper)
 
 
+def inject_functional_model_class(cls):
+  from tensorflow.python.keras.engine import functional  # pylint: disable=g-import-not-at-top
+  from tensorflow.python.keras.engine import training_v1  # pylint: disable=g-import-not-at-top
+  if cls == Model or cls == training_v1.Model:
+    return functional.Functional
+
+  cls.__bases__ = tuple(inject_functional_model_class(base)
+                        for base in cls.__bases__)
+  return cls
+
+
+def is_functional_model_init_params(args, kwargs):
+  return (len(args) == 2 or
+          len(args) == 1 and 'outputs' in kwargs or
+          'inputs' in kwargs and 'outputs' in kwargs)
+
+
 @keras_export('keras.Model', 'keras.models.Model')
-class Model(network.Network, version_utils.ModelVersionSelector):
+class Model(base_layer.Layer, version_utils.ModelVersionSelector):
   """`Model` groups layers into an object with training and inference features.
 
   Arguments:
@@ -174,11 +227,61 @@ class Model(network.Network, version_utils.ModelVersionSelector):
   _TF_MODULE_IGNORED_PROPERTIES = frozenset(
       itertools.chain(('_train_counter', '_test_counter', '_predict_counter',
                        '_steps_per_execution'),
-                      network.Network._TF_MODULE_IGNORED_PROPERTIES))  # pylint: disable=protected-access
+                      base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES))  # pylint: disable=protected-access
 
+  def __new__(cls, *args, **kwargs):
+    # Signature detection
+    if is_functional_model_init_params(args, kwargs) and cls == Model:
+      # Functional model
+      from tensorflow.python.keras.engine import functional  # pylint: disable=g-import-not-at-top
+      return functional.Functional(*args, **kwargs)
+    else:
+      return super(Model, cls).__new__(cls, *args, **kwargs)
+
+  @trackable.no_automatic_dependency_tracking
   def __init__(self, *args, **kwargs):
-    super(Model, self).__init__(*args, **kwargs)
-    _keras_api_gauge.get_cell('model').set(True)
+    # Special case for Subclassed Functional Model, which we couldn't detect
+    # when __new__ is called. We only realize it is a functional model when it
+    # calls super.__init__ with input and output tensor.
+    from tensorflow.python.keras.engine import functional  # pylint: disable=g-import-not-at-top
+    if (is_functional_model_init_params(args, kwargs) and
+        not isinstance(self, functional.Functional)):
+      inject_functional_model_class(self.__class__)
+      functional.Functional.__init__(self, *args, **kwargs)
+      return
+
+    # The following are implemented as property functions:
+    # self.trainable_weights
+    # self.non_trainable_weights
+    generic_utils.validate_kwargs(kwargs, {'trainable', 'dtype', 'dynamic',
+                                           'name', 'autocast'})
+    super(Model, self).__init__(**kwargs)
+    # By default, Model is a subclass model, which is not in graph network.
+    self._is_graph_network = False
+
+    self.inputs = None
+    self.outputs = None
+    self.input_names = None
+    self.output_names = None
+    # stop_training is used by callback to stop training when error happens
+    self.stop_training = False
+    self.history = None
+    # These objects are used in the default `Model.compile`. They are not
+    # guaranteed to be set after `Model.compile` is called, as users can
+    # override compile with custom logic.
+    self.compiled_loss = None
+    self.compiled_metrics = None
+
+    # This is True for Sequential networks and Functional networks.
+    self._compute_output_and_mask_jointly = False
+
+    # Don't reset compilation if already done. This may occur if calling
+    # `__init__` (or `_init_graph_network`) on an already-compiled model
+    # such as a Sequential model. Sequential models may need to rebuild
+    # themselves after compilation.
+    self._maybe_create_attribute('_is_compiled', False)
+    self._maybe_create_attribute('optimizer', None)
+
     # Model must be created under scope of DistStrat it will be trained with.
     if ds_context.has_strategy():
       self._distribution_strategy = ds_context.get_strategy()
@@ -186,23 +289,20 @@ class Model(network.Network, version_utils.ModelVersionSelector):
       self._distribution_strategy = None
     # Defaults to value of `tf.config.experimental_functions_run_eagerly`.
     self._run_eagerly = None
-    self.stop_training = False
     # Initialize cache attrs.
     self._reset_compile_cache()
 
     # Fault-tolerance handler. Set in `ModelCheckpoint`.
     self._training_state = None
-    self.history = None
-
-    # These objects are used in the default `Model.compile`. They are not
-    # guaranteed to be set after `Model.compile` is called, as users can
-    # override compile with custom logic.
-    self.compiled_loss = None
-    self.compiled_metrics = None
+    self._saved_model_inputs_spec = None
+    self._trackable_saver = (
+        trackable_utils.saver_with_op_caching(self))
 
     self._steps_per_execution = None
 
     self._init_batch_counters()
+    self._base_model_initialized = True
+    _keras_api_gauge.get_cell('model').set(True)
 
   @trackable.no_automatic_dependency_tracking
   def _init_batch_counters(self):
@@ -214,67 +314,146 @@ class Model(network.Network, version_utils.ModelVersionSelector):
     self._predict_counter = variables.Variable(
         0, dtype='int64', aggregation=agg)
 
-  def get_weights(self):
-    """Retrieves the weights of the model.
+  def __setattr__(self, name, value):
+    if not getattr(self, '_self_setattr_tracking', True):
+      super(Model, self).__setattr__(name, value)
+      return
 
-    Returns:
-        A flat list of Numpy arrays.
-    """
-    with self.distribute_strategy.scope():
-      return super(Model, self).get_weights()
+    if all(
+        isinstance(v, (base_layer.Layer,
+                       data_structures.TrackableDataStructure)) or
+        trackable_layer_utils.has_weights(v) for v in nest.flatten(value)):
+      try:
+        self._base_model_initialized
+      except AttributeError:
+        # six.raise_from supresses the original AttributeError from being raised
+        six.raise_from(
+            RuntimeError('It looks like you are subclassing `Model` and you '
+                         'forgot to call `super(YourClass, self).__init__()`.'
+                         ' Always start with this line.'), None)
 
-  def load_weights(self, filepath, by_name=False, skip_mismatch=False):
-    """Loads all layer weights, either from a TensorFlow or an HDF5 weight file.
+    super(Model, self).__setattr__(name, value)
 
-    If `by_name` is False weights are loaded based on the network's
-    topology. This means the architecture should be the same as when the weights
-    were saved.  Note that layers that don't have weights are not taken into
-    account in the topological ordering, so adding or removing layers is fine as
-    long as they don't have weights.
+  @generic_utils.default
+  def build(self, input_shape):
+    """Builds the model based on input shapes received.
 
-    If `by_name` is True, weights are loaded into layers only if they share the
-    same name. This is useful for fine-tuning or transfer-learning models where
-    some of the layers have changed.
+    This is to be used for subclassed models, which do not know at instantiation
+    time what their inputs look like.
 
-    Only topological loading (`by_name=False`) is supported when loading weights
-    from the TensorFlow format. Note that topological loading differs slightly
-    between TensorFlow and HDF5 formats for user-defined classes inheriting from
-    `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the
-    TensorFlow format loads based on the object-local names of attributes to
-    which layers are assigned in the `Model`'s constructor.
+    This method only exists for users who want to call `model.build()` in a
+    standalone way (as a substitute for calling the model on real data to
+    build it). It will never be called by the framework (and thus it will
+    never throw unexpected errors in an unrelated workflow).
 
-    Arguments:
-        filepath: String, path to the weights file to load. For weight files in
-            TensorFlow format, this is the file prefix (the same as was passed
-            to `save_weights`).
-        by_name: Boolean, whether to load weights by name or by topological
-            order. Only topological loading is supported for weight files in
-            TensorFlow format.
-        skip_mismatch: Boolean, whether to skip loading of layers where there is
-            a mismatch in the number of weights, or a mismatch in the shape of
-            the weight (only valid when `by_name=True`).
-
-    Returns:
-        When loading a weight file in TensorFlow format, returns the same status
-        object as `tf.train.Checkpoint.restore`. When graph building, restore
-        ops are run automatically as soon as the network is built (on first call
-        for user-defined classes inheriting from `Model`, immediately if it is
-        already built).
-
-        When loading weights in HDF5 format, returns `None`.
+    Args:
+     input_shape: Single tuple, TensorShape, or list of shapes, where shapes
+         are tuples, integers, or TensorShapes.
 
     Raises:
-        ImportError: If h5py is not available and the weight file is in HDF5
-            format.
-        ValueError: If `skip_mismatch` is set to `True` when `by_name` is
-          `False`.
+      ValueError:
+        1. In case of invalid user-provided data (not of type tuple,
+           list, or TensorShape).
+        2. If the model requires call arguments that are agnostic
+           to the input shapes (positional or kwarg in call signature).
+        3. If not all layers were properly built.
+        4. If float type inputs are not supported within the layers.
+
+      In each of these cases, the user should build their model by calling it
+      on real tensor data.
     """
-    if dist_utils.is_tpu_strategy(self._distribution_strategy):
-      if (self._distribution_strategy.extended.steps_per_run > 1 and
-          (not network._is_hdf5_filepath(filepath))):  # pylint: disable=protected-access
-        raise ValueError('Load weights is not yet supported with TPUStrategy '
-                         'with steps_per_run greater than 1.')
-    return super(Model, self).load_weights(filepath, by_name, skip_mismatch)
+    if self._is_graph_network:
+      super(Model, self).build(input_shape)
+      return
+
+    if input_shape is None:
+      raise ValueError('Input shape must be defined when calling build on a '
+                       'model subclass network.')
+    valid_types = (tuple, list, tensor_shape.TensorShape)
+    if not isinstance(input_shape, valid_types):
+      raise ValueError('Specified input shape is not one of the valid types. '
+                       'Please specify a batch input shape of type tuple or '
+                       'list of input shapes. User provided '
+                       'input type: {}'.format(type(input_shape)))
+
+    if input_shape and not self.inputs:
+      # We create placeholders for the `None`s in the shape and build the model
+      # in a Graph. Since tf.Variable is compatible with both eager execution
+      # and graph building, the variables created after building the model in
+      # a Graph are still valid when executing eagerly.
+      if context.executing_eagerly():
+        graph = func_graph.FuncGraph('build_graph')
+      else:
+        graph = backend.get_graph()
+      with graph.as_default():
+        if isinstance(input_shape, list):
+          x = [base_layer_utils.generate_placeholders_from_shape(shape)
+               for shape in input_shape]
+        elif isinstance(input_shape, dict):
+          x = {
+              k: base_layer_utils.generate_placeholders_from_shape(shape)
+              for k, shape in input_shape.items()
+          }
+        else:
+          x = base_layer_utils.generate_placeholders_from_shape(input_shape)
+
+        kwargs = {}
+        call_signature = self._call_full_argspec
+        call_args = call_signature.args
+        # Exclude `self`, `inputs`, and any argument with a default value.
+        if len(call_args) > 2:
+          if call_signature.defaults:
+            call_args = call_args[2:-len(call_signature.defaults)]
+          else:
+            call_args = call_args[2:]
+          for arg in call_args:
+            if arg == 'training':
+              # Case where `training` is a positional arg with no default.
+              kwargs['training'] = False
+            else:
+              # Has invalid call signature with unknown positional arguments.
+              raise ValueError(
+                  'Currently, you cannot build your model if it has '
+                  'positional or keyword arguments that are not '
+                  'inputs to the model, but are required for its '
+                  '`call` method. Instead, in order to instantiate '
+                  'and build your model, `call` your model on real '
+                  'tensor data with all expected call arguments.')
+        elif len(call_args) < 2:
+          # Signature without `inputs`.
+          raise ValueError('You can only call `build` on a model if its `call` '
+                           'method accepts an `inputs` argument.')
+        try:
+          self.call(x, **kwargs)
+        except (errors.InvalidArgumentError, TypeError):
+          raise ValueError('You cannot build your model by calling `build` '
+                           'if your layers do not support float type inputs. '
+                           'Instead, in order to instantiate and build your '
+                           'model, `call` your model on real tensor data (of '
+                           'the correct dtype).')
+
+    super(Model, self).build(input_shape)
+
+  def call(self, inputs, training=None, mask=None):
+    """Calls the model on new inputs.
+
+    In this case `call` just reapplies
+    all ops in the graph to the new inputs
+    (e.g. build a new computational graph from the provided inputs).
+
+    Arguments:
+        inputs: A tensor or list of tensors.
+        training: Boolean or boolean scalar tensor, indicating whether to run
+          the `Network` in training mode or inference mode.
+        mask: A mask or list of masks. A mask can be
+            either a tensor or None (no mask).
+
+    Returns:
+        A tensor if there is a single output, or
+        a list of tensors if there are more than one outputs.
+    """
+    raise NotImplementedError('When subclassing the `Model` class, you should '
+                              'implement a `call` method.')
 
   def compile(self,
               optimizer='rmsprop',
@@ -399,6 +578,10 @@ class Model(network.Network, version_utils.ModelVersionSelector):
         dtype='int64',
         aggregation=variables.VariableAggregationV2.ONLY_FIRST_REPLICA)
 
+  @property
+  def _should_compute_mask(self):
+    return False
+
   @property
   def metrics(self):
     """Returns the model's metrics added using `compile`, `add_metric` APIs.
@@ -1149,6 +1332,7 @@ class Model(network.Network, version_utils.ModelVersionSelector):
             epochs=1,
             steps=data_handler.inferred_steps)
 
+      logs = {}
       test_function = self.make_test_function()
       self._test_counter.assign(0)
       callbacks.on_test_begin()
@@ -1660,6 +1844,564 @@ class Model(network.Network, version_utils.ModelVersionSelector):
         verbose=verbose,
         callbacks=callbacks)
 
+  ######################################################################
+  # Functions below are not training related. They are for model weights
+  # tracking, save/load, serialization, etc.
+  ######################################################################
+
+  @property
+  def trainable_weights(self):
+    self._assert_weights_created()
+    return self._dedup_weights(
+        trackable_layer_utils.gather_trainable_weights(
+            trainable=self.trainable,
+            sub_layers=self._layers,
+            extra_variables=self._trainable_weights))
+
+  @property
+  def non_trainable_weights(self):
+    self._assert_weights_created()
+    return self._dedup_weights(
+        trackable_layer_utils.gather_non_trainable_weights(
+            trainable=self.trainable,
+            sub_layers=self._layers,
+            extra_variables=self._non_trainable_weights +
+            self._trainable_weights))
+
+  def get_weights(self):
+    """Retrieves the weights of the model.
+
+    Returns:
+        A flat list of Numpy arrays.
+    """
+    with self.distribute_strategy.scope():
+      return super(Model, self).get_weights()
+
+  def save(self,
+           filepath,
+           overwrite=True,
+           include_optimizer=True,
+           save_format=None,
+           signatures=None,
+           options=None):
+    """Saves the model to Tensorflow SavedModel or a single HDF5 file.
+
+    The savefile includes:
+
+    - The model architecture, allowing to re-instantiate the model.
+    - The model weights.
+    - The state of the optimizer, allowing to resume training
+        exactly where you left off.
+
+    This allows you to save the entirety of the state of a model
+    in a single file.
+
+    Saved models can be reinstantiated via `keras.models.load_model`.
+    The model returned by `load_model` is a compiled model ready to be used
+    (unless the saved model was never compiled in the first place).
+
+    Models built with the Sequential and Functional API can be saved to both the
+    HDF5 and SavedModel formats. Subclassed models can only be saved with the
+    SavedModel format.
+
+    Note that the model weights may have different scoped names after being
+    loaded. Scoped names include the model/layer names, such as
+    `"dense_1/kernel:0"`. It is recommended that you use the layer properties to
+     access specific variables, e.g. `model.get_layer("dense_1").kernel`.
+
+    Arguments:
+        filepath: String, PathLike, path to SavedModel or H5 file to save the
+            model.
+        overwrite: Whether to silently overwrite any existing file at the
+            target location, or provide the user with a manual prompt.
+        include_optimizer: If True, save optimizer's state together.
+        save_format: Either `'tf'` or `'h5'`, indicating whether to save the
+            model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X,
+            and 'h5' in TF 1.X.
+        signatures: Signatures to save with the SavedModel. Applicable to the
+            'tf' format only. Please see the `signatures` argument in
+            `tf.saved_model.save` for details.
+        options: Optional `tf.saved_model.SaveOptions` object that specifies
+            options for saving to SavedModel.
+
+    Example:
+
+    ```python
+    from keras.models import load_model
+
+    model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
+    del model  # deletes the existing model
+
+    # returns a compiled model
+    # identical to the previous one
+    model = load_model('my_model.h5')
+    ```
+    """
+    save.save_model(self, filepath, overwrite, include_optimizer, save_format,
+                    signatures, options)
+
+  def save_weights(self, filepath, overwrite=True, save_format=None):
+    """Saves all layer weights.
+
+    Either saves in HDF5 or in TensorFlow format based on the `save_format`
+    argument.
+
+    When saving in HDF5 format, the weight file has:
+      - `layer_names` (attribute), a list of strings
+          (ordered names of model layers).
+      - For every layer, a `group` named `layer.name`
+          - For every such layer group, a group attribute `weight_names`,
+              a list of strings
+              (ordered names of weights tensor of the layer).
+          - For every weight in the layer, a dataset
+              storing the weight value, named after the weight tensor.
+
+    When saving in TensorFlow format, all objects referenced by the network are
+    saved in the same format as `tf.train.Checkpoint`, including any `Layer`
+    instances or `Optimizer` instances assigned to object attributes. For
+    networks constructed from inputs and outputs using `tf.keras.Model(inputs,
+    outputs)`, `Layer` instances used by the network are tracked/saved
+    automatically. For user-defined classes which inherit from `tf.keras.Model`,
+    `Layer` instances must be assigned to object attributes, typically in the
+    constructor. See the documentation of `tf.train.Checkpoint` and
+    `tf.keras.Model` for details.
+
+    While the formats are the same, do not mix `save_weights` and
+    `tf.train.Checkpoint`. Checkpoints saved by `Model.save_weights` should be
+    loaded using `Model.load_weights`. Checkpoints saved using
+    `tf.train.Checkpoint.save` should be restored using the corresponding
+    `tf.train.Checkpoint.restore`. Prefer `tf.train.Checkpoint` over
+    `save_weights` for training checkpoints.
+
+    The TensorFlow format matches objects and variables by starting at a root
+    object, `self` for `save_weights`, and greedily matching attribute
+    names. For `Model.save` this is the `Model`, and for `Checkpoint.save` this
+    is the `Checkpoint` even if the `Checkpoint` has a model attached. This
+    means saving a `tf.keras.Model` using `save_weights` and loading into a
+    `tf.train.Checkpoint` with a `Model` attached (or vice versa) will not match
+    the `Model`'s variables. See the [guide to training
+    checkpoints](https://www.tensorflow.org/guide/checkpoint) for details
+    on the TensorFlow format.
+
+    Arguments:
+        filepath: String or PathLike, path to the file to save the weights to.
+            When saving in TensorFlow format, this is the prefix used for
+            checkpoint files (multiple files are generated). Note that the '.h5'
+            suffix causes weights to be saved in HDF5 format.
+        overwrite: Whether to silently overwrite any existing file at the
+            target location, or provide the user with a manual prompt.
+        save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or
+            '.keras' will default to HDF5 if `save_format` is `None`. Otherwise
+            `None` defaults to 'tf'.
+
+    Raises:
+        ImportError: If h5py is not available when attempting to save in HDF5
+            format.
+        ValueError: For invalid/unknown format arguments.
+    """
+    self._assert_weights_created()
+    filepath = path_to_string(filepath)
+    filepath_is_h5 = _is_hdf5_filepath(filepath)
+    if save_format is None:
+      if filepath_is_h5:
+        save_format = 'h5'
+      else:
+        save_format = 'tf'
+    else:
+      user_format = save_format.lower().strip()
+      if user_format in ('tensorflow', 'tf'):
+        save_format = 'tf'
+      elif user_format in ('hdf5', 'h5', 'keras'):
+        save_format = 'h5'
+      else:
+        raise ValueError(
+            'Unknown format "%s". Was expecting one of {"tf", "h5"}.' % (
+                save_format,))
+    if save_format == 'tf' and filepath_is_h5:
+      raise ValueError(
+          ('save_weights got save_format="tf"/"tensorflow", but the '
+           'filepath ("%s") looks like an HDF5 file. Omit the ".h5"/".keras" '
+           'when saving in TensorFlow format.')
+          % filepath)
+
+    if save_format == 'h5' and h5py is None:
+      raise ImportError(
+          '`save_weights` requires h5py when saving in hdf5.')
+    if save_format == 'tf':
+      check_filepath = filepath + '.index'
+    else:
+      check_filepath = filepath
+    # If file exists and should not be overwritten:
+    if not overwrite and os.path.isfile(check_filepath):
+      proceed = ask_to_proceed_with_overwrite(check_filepath)
+      if not proceed:
+        return
+    if save_format == 'h5':
+      with h5py.File(filepath, 'w') as f:
+        hdf5_format.save_weights_to_hdf5_group(f, self.layers)
+    else:
+      if context.executing_eagerly():
+        session = None
+      else:
+        session = backend.get_session()
+      optimizer = getattr(self, 'optimizer', None)
+      if (optimizer
+          and not isinstance(optimizer, trackable.Trackable)):
+        logging.warning(
+            ('This model was compiled with a Keras optimizer (%s) but is being '
+             'saved in TensorFlow format with `save_weights`. The model\'s '
+             'weights will be saved, but unlike with TensorFlow optimizers in '
+             'the TensorFlow format the optimizer\'s state will not be '
+             'saved.\n\nConsider using a TensorFlow optimizer from `tf.train`.')
+            % (optimizer,))
+      self._trackable_saver.save(filepath, session=session)
+      # Record this checkpoint so it's visible from tf.train.latest_checkpoint.
+      checkpoint_management.update_checkpoint_state_internal(
+          save_dir=os.path.dirname(filepath),
+          model_checkpoint_path=filepath,
+          save_relative_paths=True,
+          all_model_checkpoint_paths=[filepath])
+
+  def load_weights(self, filepath, by_name=False, skip_mismatch=False):
+    """Loads all layer weights, either from a TensorFlow or an HDF5 weight file.
+
+    If `by_name` is False weights are loaded based on the network's
+    topology. This means the architecture should be the same as when the weights
+    were saved.  Note that layers that don't have weights are not taken into
+    account in the topological ordering, so adding or removing layers is fine as
+    long as they don't have weights.
+
+    If `by_name` is True, weights are loaded into layers only if they share the
+    same name. This is useful for fine-tuning or transfer-learning models where
+    some of the layers have changed.
+
+    Only topological loading (`by_name=False`) is supported when loading weights
+    from the TensorFlow format. Note that topological loading differs slightly
+    between TensorFlow and HDF5 formats for user-defined classes inheriting from
+    `tf.keras.Model`: HDF5 loads based on a flattened list of weights, while the
+    TensorFlow format loads based on the object-local names of attributes to
+    which layers are assigned in the `Model`'s constructor.
+
+    Arguments:
+        filepath: String, path to the weights file to load. For weight files in
+            TensorFlow format, this is the file prefix (the same as was passed
+            to `save_weights`).
+        by_name: Boolean, whether to load weights by name or by topological
+            order. Only topological loading is supported for weight files in
+            TensorFlow format.
+        skip_mismatch: Boolean, whether to skip loading of layers where there is
+            a mismatch in the number of weights, or a mismatch in the shape of
+            the weight (only valid when `by_name=True`).
+
+    Returns:
+        When loading a weight file in TensorFlow format, returns the same status
+        object as `tf.train.Checkpoint.restore`. When graph building, restore
+        ops are run automatically as soon as the network is built (on first call
+        for user-defined classes inheriting from `Model`, immediately if it is
+        already built).
+
+        When loading weights in HDF5 format, returns `None`.
+
+    Raises:
+        ImportError: If h5py is not available and the weight file is in HDF5
+            format.
+        ValueError: If `skip_mismatch` is set to `True` when `by_name` is
+          `False`.
+    """
+    if dist_utils.is_tpu_strategy(self._distribution_strategy):
+      if (self._distribution_strategy.extended.steps_per_run > 1 and
+          (not _is_hdf5_filepath(filepath))):
+        raise ValueError('Load weights is not yet supported with TPUStrategy '
+                         'with steps_per_run greater than 1.')
+    if skip_mismatch and not by_name:
+      raise ValueError(
+          'When calling model.load_weights, skip_mismatch can only be set to '
+          'True when by_name is True.')
+
+    filepath = path_to_string(filepath)
+    if _is_hdf5_filepath(filepath):
+      save_format = 'h5'
+    else:
+      try:
+        py_checkpoint_reader.NewCheckpointReader(filepath)
+        save_format = 'tf'
+      except errors_impl.DataLossError:
+        # The checkpoint is not readable in TensorFlow format. Try HDF5.
+        save_format = 'h5'
+    if save_format == 'tf':
+      status = self._trackable_saver.restore(filepath)
+      if by_name:
+        raise NotImplementedError(
+            'Weights may only be loaded based on topology into Models when '
+            'loading TensorFlow-formatted weights (got by_name=True to '
+            'load_weights).')
+      if not context.executing_eagerly():
+        session = backend.get_session()
+        # Restore existing variables (if any) immediately, and set up a
+        # streaming restore for any variables created in the future.
+        trackable_utils.streaming_restore(status=status, session=session)
+      status.assert_nontrivial_match()
+      return status
+    if h5py is None:
+      raise ImportError(
+          '`load_weights` requires h5py when loading weights from HDF5.')
+    if not self._is_graph_network and not self.built:
+      raise ValueError(
+          'Unable to load weights saved in HDF5 format into a subclassed '
+          'Model which has not created its variables yet. Call the Model '
+          'first, then load the weights.')
+    self._assert_weights_created()
+    with h5py.File(filepath, 'r') as f:
+      if 'layer_names' not in f.attrs and 'model_weights' in f:
+        f = f['model_weights']
+      if by_name:
+        hdf5_format.load_weights_from_hdf5_group_by_name(
+            f, self.layers, skip_mismatch=skip_mismatch)
+      else:
+        hdf5_format.load_weights_from_hdf5_group(f, self.layers)
+
+  def _updated_config(self):
+    """Util shared between different serialization methods.
+
+    Returns:
+        Model config with Keras version information added.
+    """
+    from tensorflow.python.keras import __version__ as keras_version  # pylint: disable=g-import-not-at-top
+
+    config = self.get_config()
+    model_config = {
+        'class_name': self.__class__.__name__,
+        'config': config,
+        'keras_version': keras_version,
+        'backend': backend.backend()
+    }
+    return model_config
+
+  def get_config(self):
+    raise NotImplementedError
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    # Since only FunctionalModel produces config, the model can only
+    # be constructed for FunctionalModel
+    from tensorflow.python.keras.engine import functional  # pylint: disable=g-import-not-at-top
+    return functional.Functional.from_config(
+        config, custom_objects=custom_objects)
+
+  def to_json(self, **kwargs):
+    """Returns a JSON string containing the network configuration.
+
+    To load a network from a JSON save file, use
+    `keras.models.model_from_json(json_string, custom_objects={})`.
+
+    Arguments:
+        **kwargs: Additional keyword arguments
+            to be passed to `json.dumps()`.
+
+    Returns:
+        A JSON string.
+    """
+    model_config = self._updated_config()
+    return json.dumps(
+        model_config, default=serialization.get_json_type, **kwargs)
+
+  def to_yaml(self, **kwargs):
+    """Returns a yaml string containing the network configuration.
+
+    To load a network from a yaml save file, use
+    `keras.models.model_from_yaml(yaml_string, custom_objects={})`.
+
+    `custom_objects` should be a dictionary mapping
+    the names of custom losses / layers / etc to the corresponding
+    functions / classes.
+
+    Arguments:
+        **kwargs: Additional keyword arguments
+            to be passed to `yaml.dump()`.
+
+    Returns:
+        A YAML string.
+
+    Raises:
+        ImportError: if yaml module is not found.
+    """
+    if yaml is None:
+      raise ImportError(
+          'Requires yaml module installed (`pip install pyyaml`).')
+    return yaml.dump(self._updated_config(), **kwargs)
+
+  def reset_states(self):
+    for layer in self.layers:
+      if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False):
+        layer.reset_states()
+
+  @property
+  @deprecation.deprecated(
+      date=None,
+      instructions='This property should not be used in TensorFlow 2.0, '
+      'as updates are applied automatically.')
+  @doc_controls.do_not_generate_docs
+  def state_updates(self):
+    """Deprecated, do NOT use!
+
+    Returns the `updates` from all layers that are stateful.
+
+    This is useful for separating training updates and
+    state updates, e.g. when we need to update a layer's internal state
+    during prediction.
+
+    Returns:
+        A list of update ops.
+    """
+    state_updates = []
+    for layer in self.layers:
+      if getattr(layer, 'stateful', False):
+        if hasattr(layer, 'updates'):
+          state_updates += layer.updates
+    return state_updates
+
+  @property
+  def weights(self):
+    """Returns the list of all layer variables/weights.
+
+    Returns:
+      A list of variables.
+    """
+    return self._dedup_weights(self._undeduplicated_weights)
+
+  @property
+  def _undeduplicated_weights(self):
+    """Returns the undeduplicated list of all layer variables/weights."""
+    self._assert_weights_created()
+    weights = []
+    for layer in self._layers:
+      weights += layer.weights
+    weights += (self._trainable_weights + self._non_trainable_weights)
+    return weights
+
+  def summary(self, line_length=None, positions=None, print_fn=None):
+    """Prints a string summary of the network.
+
+    Arguments:
+        line_length: Total length of printed lines
+            (e.g. set this to adapt the display to different
+            terminal window sizes).
+        positions: Relative or absolute positions of log elements
+            in each line. If not provided,
+            defaults to `[.33, .55, .67, 1.]`.
+        print_fn: Print function to use. Defaults to `print`.
+            It will be called on each line of the summary.
+            You can set it to a custom function
+            in order to capture the string summary.
+
+    Raises:
+        ValueError: if `summary()` is called before the model is built.
+    """
+    if not self.built:
+      raise ValueError('This model has not yet been built. '
+                       'Build the model first by calling `build()` or calling '
+                       '`fit()` with some data, or specify '
+                       'an `input_shape` argument in the first layer(s) for '
+                       'automatic build.')
+    layer_utils.print_summary(self,
+                              line_length=line_length,
+                              positions=positions,
+                              print_fn=print_fn)
+
+  @property
+  def layers(self):
+    return self._unique_sublayers()
+
+  def get_layer(self, name=None, index=None):
+    """Retrieves a layer based on either its name (unique) or index.
+
+    If `name` and `index` are both provided, `index` will take precedence.
+    Indices are based on order of horizontal graph traversal (bottom-up).
+
+    Arguments:
+        name: String, name of layer.
+        index: Integer, index of layer.
+
+    Returns:
+        A layer instance.
+
+    Raises:
+        ValueError: In case of invalid layer name or index.
+    """
+    # TODO(fchollet): We could build a dictionary based on layer names
+    # since they are constant, but we have not done that yet.
+    if index is not None and name is not None:
+      raise ValueError('Provide only a layer name or a layer index.')
+
+    if index is not None:
+      if len(self.layers) <= index:
+        raise ValueError('Was asked to retrieve layer at index ' + str(index) +
+                         ' but model only has ' + str(len(self.layers)) +
+                         ' layers.')
+      else:
+        return self.layers[index]
+
+    if name is not None:
+      for layer in self.layers:
+        if layer.name == name:
+          return layer
+      raise ValueError('No such layer: ' + name + '.')
+    raise ValueError('Provide either a layer name or layer index.')
+
+  @trackable.no_automatic_dependency_tracking
+  def _set_save_spec(self, inputs):
+    if self._saved_model_inputs_spec is not None:
+      return  # Already set.
+
+    input_names = self.input_names
+    if not input_names:
+      input_names = compile_utils.create_pseudo_input_names(inputs)
+
+    flat_inputs = nest.flatten(inputs)
+    specs = []
+    for name, tensor in zip(input_names, flat_inputs):
+      specs.append(
+          tf_utils.get_tensor_spec(tensor, dynamic_batch=False, name=name))
+    specs = nest.pack_sequence_as(inputs, specs)
+
+    self._saved_model_inputs_spec = specs
+
+  def _get_save_spec(self, dynamic_batch=True):
+    if self._saved_model_inputs_spec is None:
+      return None
+
+    return nest.map_structure(
+        lambda t: tf_utils.get_tensor_spec(t, dynamic_batch=dynamic_batch),
+        self._saved_model_inputs_spec)
+
+  def _assert_weights_created(self):
+    """Asserts that all the weights for the model have been created.
+
+    For a non-dynamic model, the weights must already be created after the
+    layer has been called. For a dynamic model, the exact list of weights can
+    never be known for certain since it may change at any time during execution.
+
+    We run this check right before accessing weights or getting the Numpy value
+    for the current weights. Otherwise, if the layer has never been called,
+    the user would just get an empty list, which is misleading.
+
+    Raises:
+      ValueError: if the weights of the network has not yet been created.
+    """
+    if self.dynamic:
+      return
+
+    if ('build' in self.__class__.__dict__ and
+        self.__class__ != Model and
+        not self.built):
+      # For any model that has customized build() method but hasn't
+      # been invoked yet, this will cover both sequential and subclass model.
+      # Also make sure to exclude Model class itself which has build() defined.
+      raise ValueError('Weights for model %s have not yet been created. '
+                       'Weights are created when the Model is first called on '
+                       'inputs or `build()` is called with an `input_shape`.' %
+                       self.name)
+
   def _check_call_args(self, method_name):
     """Check that `call` has only one positional arg."""
     # Always allow first arg, regardless of arg name.
@@ -1989,3 +2731,8 @@ def _disallow_inside_tf_function(method_name):
         'directly on `Tensor`s inside a `tf.function` like: `model(x)`.'
     ).format(method_name=method_name)
     raise RuntimeError(error_msg)
+
+
+def _is_hdf5_filepath(filepath):
+  return (filepath.endswith('.h5') or filepath.endswith('.keras') or
+          filepath.endswith('.hdf5'))
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index e4c1ff6b1f8..c1c498b207b 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -2979,6 +2979,8 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase):
         return self.dense1(x)
 
     model = TestModel()
+    self.assertListEqual([m.name for m in model.metrics],
+                         ['metric_1', 'metric_2'])
     model.compile(
         loss='mse',
         optimizer=RMSPropOptimizer(0.01),
@@ -2998,6 +3000,41 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase):
     model.train_on_batch(x, y)
     model.test_on_batch(x, y)
 
+  @keras_parameterized.run_all_keras_modes
+  def test_multiple_add_metric_calls_layer(self):
+
+    class TestLayer(layers_module.Layer):
+
+      def __init__(self):
+        super(TestLayer, self).__init__(name='test_layer')
+        self.dense1 = layers_module.Dense(2, kernel_initializer='ones')
+        self.m1 = metrics_module.Mean(name='m_1')
+        self.m2 = [
+            metrics_module.Mean(name='m_2'),
+            metrics_module.Mean(name='m_3')
+        ]
+        self.m3 = {
+            'mean4': metrics_module.Mean(name='m_4'),
+            'mean5': metrics_module.Mean(name='m_5')
+        }
+
+      def call(self, x):
+        self.add_metric(self.m2[0](x))
+        self.add_metric(self.m2[1](x))
+        self.add_metric(self.m1(x))
+        self.add_metric(self.m3['mean4'](x))
+        self.add_metric(self.m3['mean5'](x))
+        self.add_metric(math_ops.reduce_sum(x), name='m_6', aggregation='mean')
+        return self.dense1(x)
+
+    layer = TestLayer()
+    self.assertListEqual([m.name for m in layer.metrics],
+                         ['m_1', 'm_2', 'm_3', 'm_4', 'm_5'])
+
+    layer(np.ones((10, 10)))
+    self.assertListEqual([m.name for m in layer.metrics],
+                         ['m_1', 'm_2', 'm_3', 'm_4', 'm_5', 'm_6'])
+
   @keras_parameterized.run_all_keras_modes
   def test_duplicate_metric_name_in_add_metric(self):
 
diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py
index 0a40ce3899b..c137c6e517a 100644
--- a/tensorflow/python/keras/engine/training_v1.py
+++ b/tensorflow/python/keras/engine/training_v1.py
@@ -43,7 +43,7 @@ from tensorflow.python.keras import losses
 from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.distribute import distributed_training_utils
-from tensorflow.python.keras.engine import network
+from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import training as training_lib
 from tensorflow.python.keras.engine import training_arrays
 from tensorflow.python.keras.engine import training_distributed
@@ -62,6 +62,7 @@ from tensorflow.python.ops.losses import util as tf_losses_utils
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
+from tensorflow.python.types import core
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
@@ -180,8 +181,8 @@ class Model(training_lib.Model):
                 self._compile_time_distribution_strategy)
     if strategy:
       with strategy.scope():
-        return network.Network.get_weights(self)
-    return network.Network.get_weights(self)
+        return base_layer.Layer.get_weights(self)
+    return base_layer.Layer.get_weights(self)
 
   def load_weights(self, filepath, by_name=False, skip_mismatch=False):
     """Loads all layer weights, either from a TensorFlow or an HDF5 weight file.
@@ -231,7 +232,7 @@ class Model(training_lib.Model):
     """
     if distributed_training_utils.is_tpu_strategy(self._distribution_strategy):
       if (self._distribution_strategy.extended.steps_per_run > 1 and
-          (not network._is_hdf5_filepath(filepath))):  # pylint: disable=protected-access
+          (not training_lib._is_hdf5_filepath(filepath))):  # pylint: disable=protected-access
         raise ValueError('Load weights is not yet supported with TPUStrategy '
                          'with steps_per_run greater than 1.')
     return super(Model, self).load_weights(filepath, by_name, skip_mismatch)
@@ -490,6 +491,11 @@ class Model(training_lib.Model):
     """Returns the model's metrics added using `compile`, `add_metric` APIs."""
     metrics = []
     if self._is_compiled:
+      if not hasattr(self, '_v1_compile_was_called'):
+        # See b/155687393 for more details, the model is created as a v2
+        # instance but converted to v1. Fallback to use base Model to retrieve
+        # the metrics.
+        return super(Model, self).metrics
       metrics += self._compile_metric_functions
     metrics.extend(self._metrics)
     metrics.extend(_get_metrics_from_layers(self._layers))
@@ -503,6 +509,12 @@ class Model(training_lib.Model):
     # losses for backward compatibility.
     metrics_names = ['loss']
     if self._is_compiled:
+      if not hasattr(self, '_v1_compile_was_called'):
+        # See b/155687393 for more details, the model is created as a v2
+        # instance but converted to v1. Fallback to use base Model to retrieve
+        # the metrics name
+        return super(Model, self).metrics_names
+
       # Add output loss metric names to the metric names list.
       if len(self._training_endpoints) > 1:
         metrics_names.extend([
@@ -3143,7 +3155,7 @@ def _convert_scipy_sparse_tensor(value, expected_input):
     The possibly-converted 'value'.
   """
   if issparse is not None and issparse(value):
-    if ops.is_dense_tensor_like(expected_input):
+    if isinstance(expected_input, core.Tensor):
       if ops.executing_eagerly_outside_functions():
         # In TF2 we do not silently densify sparse matrices.
         raise ValueError('A SciPy sparse matrix was passed to a model '
diff --git a/tensorflow/python/keras/feature_column/BUILD b/tensorflow/python/keras/feature_column/BUILD
new file mode 100644
index 00000000000..650efcceb52
--- /dev/null
+++ b/tensorflow/python/keras/feature_column/BUILD
@@ -0,0 +1,74 @@
+load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test")
+
+package(
+    default_visibility = [
+        "//tensorflow/python/feature_column:__subpackages__",
+        "//tensorflow/python/keras:__subpackages__",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "feature_column",
+    deps = [
+        ":sequence_feature_column",
+    ],
+)
+
+py_library(
+    name = "sequence_feature_column",
+    srcs = ["sequence_feature_column.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:check_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:tf_export",
+        "//tensorflow/python/feature_column:feature_column_v2",
+        "//tensorflow/python/keras:backend",
+    ],
+)
+
+tf_py_test(
+    name = "sequence_feature_column_test",
+    srcs = ["sequence_feature_column_test.py"],
+    deps = [
+        ":sequence_feature_column",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:extra_py_tests_deps",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/feature_column:feature_column_v2",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/keras:combinations",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+py_test(
+    name = "sequence_feature_column_integration_test",
+    srcs = ["sequence_feature_column_integration_test.py"],
+    python_version = "PY3",
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        ":sequence_feature_column",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops_v2",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:util",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/feature_column:feature_column_v2",
+        "//tensorflow/python/keras/layers:recurrent",
+    ],
+)
diff --git a/tensorflow/python/keras/feature_column/sequence_feature_column.py b/tensorflow/python/keras/feature_column/sequence_feature_column.py
new file mode 100644
index 00000000000..856e385c8fa
--- /dev/null
+++ b/tensorflow/python/keras/feature_column/sequence_feature_column.py
@@ -0,0 +1,173 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""This API defines FeatureColumn for sequential input.
+
+NOTE: This API is a work in progress and will likely be changing frequently.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.framework import ops
+from tensorflow.python.keras import backend
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.util.tf_export import keras_export
+
+# pylint: disable=protected-access
+
+
+@keras_export('keras.experimental.SequenceFeatures')
+class SequenceFeatures(fc._BaseFeaturesLayer):
+  """A layer for sequence input.
+
+    All `feature_columns` must be sequence dense columns with the same
+    `sequence_length`. The output of this method can be fed into sequence
+    networks, such as RNN.
+
+    The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
+    `T` is the maximum sequence length for this batch, which could differ from
+    batch to batch.
+
+    If multiple `feature_columns` are given with `Di` `num_elements` each, their
+    outputs are concatenated. So, the final `Tensor` has shape
+    `[batch_size, T, D0 + D1 + ... + Dn]`.
+
+    Example:
+
+    ```python
+    # Behavior of some cells or feature columns may depend on whether we are in
+    # training or inference mode, e.g. applying dropout.
+    training = True
+    rating = sequence_numeric_column('rating')
+    watches = sequence_categorical_column_with_identity(
+        'watches', num_buckets=1000)
+    watches_embedding = embedding_column(watches, dimension=10)
+    columns = [rating, watches_embedding]
+
+    sequence_input_layer = SequenceFeatures(columns)
+    features = tf.io.parse_example(...,
+                                   features=make_parse_example_spec(columns))
+    sequence_input, sequence_length = sequence_input_layer(
+       features, training=training)
+    sequence_length_mask = tf.sequence_mask(sequence_length)
+
+    rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size, training=training)
+    rnn_layer = tf.keras.layers.RNN(rnn_cell, training=training)
+    outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
+    ```
+  """
+
+  def __init__(
+      self,
+      feature_columns,
+      trainable=True,
+      name=None,
+      **kwargs):
+    """"Constructs a SequenceFeatures layer.
+
+    Args:
+      feature_columns: An iterable of dense sequence columns. Valid columns are
+        - `embedding_column` that wraps a `sequence_categorical_column_with_*`
+        - `sequence_numeric_column`.
+      trainable: Boolean, whether the layer's variables will be updated via
+        gradient descent during training.
+      name: Name to give to the SequenceFeatures.
+      **kwargs: Keyword arguments to construct a layer.
+
+    Raises:
+      ValueError: If any of the `feature_columns` is not a
+        `SequenceDenseColumn`.
+    """
+    super(SequenceFeatures, self).__init__(
+        feature_columns=feature_columns,
+        trainable=trainable,
+        name=name,
+        expected_column_type=fc.SequenceDenseColumn,
+        **kwargs)
+
+  @property
+  def _is_feature_layer(self):
+    return True
+
+  def _target_shape(self, input_shape, total_elements):
+    return (input_shape[0], input_shape[1], total_elements)
+
+  def call(self, features, training=None):
+    """Returns sequence input corresponding to the `feature_columns`.
+
+    Args:
+      features: A dict mapping keys to tensors.
+      training: Python boolean or None, indicating whether to the layer is being
+        run in training mode. This argument is passed to the call method of any
+        `FeatureColumn` that takes a `training` argument. For example, if a
+        `FeatureColumn` performed dropout, the column could expose a `training`
+        argument to control whether the dropout should be applied. If `None`,
+        defaults to `tf.keras.backend.learning_phase()`.
+
+
+    Returns:
+      An `(input_layer, sequence_length)` tuple where:
+      - input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
+          `T` is the maximum sequence length for this batch, which could differ
+          from batch to batch. `D` is the sum of `num_elements` for all
+          `feature_columns`.
+      - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
+          length for each example.
+
+    Raises:
+      ValueError: If features are not a dictionary.
+    """
+    if not isinstance(features, dict):
+      raise ValueError('We expected a dictionary here. Instead we got: ',
+                       features)
+    if training is None:
+      training = backend.learning_phase()
+    transformation_cache = fc.FeatureTransformationCache(features)
+    output_tensors = []
+    sequence_lengths = []
+
+    for column in self._feature_columns:
+      with ops.name_scope(column.name):
+        try:
+          dense_tensor, sequence_length = column.get_sequence_dense_tensor(
+              transformation_cache, self._state_manager, training=training)
+        except TypeError:
+          dense_tensor, sequence_length = column.get_sequence_dense_tensor(
+              transformation_cache, self._state_manager)
+        # Flattens the final dimension to produce a 3D Tensor.
+        output_tensors.append(self._process_dense_tensor(column, dense_tensor))
+        sequence_lengths.append(sequence_length)
+
+    # Check and process sequence lengths.
+    fc._verify_static_batch_size_equality(sequence_lengths,
+                                          self._feature_columns)
+    sequence_length = _assert_all_equal_and_return(sequence_lengths)
+
+    return self._verify_and_concat_tensors(output_tensors), sequence_length
+
+
+def _assert_all_equal_and_return(tensors, name=None):
+  """Asserts that all tensors are equal and returns the first one."""
+  with ops.name_scope(name, 'assert_all_equal', values=tensors):
+    if len(tensors) == 1:
+      return tensors[0]
+    assert_equal_ops = []
+    for t in tensors[1:]:
+      assert_equal_ops.append(check_ops.assert_equal(tensors[0], t))
+    with ops.control_dependencies(assert_equal_ops):
+      return array_ops.identity(tensors[0])
diff --git a/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py b/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py
new file mode 100644
index 00000000000..8784182e23b
--- /dev/null
+++ b/tensorflow/python/keras/feature_column/sequence_feature_column_integration_test.py
@@ -0,0 +1,259 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration test for sequence feature columns with SequenceExamples."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from google.protobuf import text_format
+
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.feature_column import dense_features
+from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.feature_column import sequence_feature_column as sfc
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc
+from tensorflow.python.keras.layers import recurrent
+from tensorflow.python.ops import init_ops_v2
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.util import compat
+
+
+class SequenceFeatureColumnIntegrationTest(test.TestCase):
+
+  def _make_sequence_example(self):
+    example = example_pb2.SequenceExample()
+    example.context.feature['int_ctx'].int64_list.value.extend([5])
+    example.context.feature['float_ctx'].float_list.value.extend([123.6])
+    for val in range(0, 10, 2):
+      feat = feature_pb2.Feature()
+      feat.int64_list.value.extend([val] * val)
+      example.feature_lists.feature_list['int_list'].feature.extend([feat])
+    for val in range(1, 11, 2):
+      feat = feature_pb2.Feature()
+      feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
+      example.feature_lists.feature_list['str_list'].feature.extend([feat])
+
+    return example
+
+  def _build_feature_columns(self):
+    col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
+    ctx_cols = [
+        fc.embedding_column(col, dimension=10),
+        fc.numeric_column('float_ctx')
+    ]
+
+    identity_col = sfc.sequence_categorical_column_with_identity(
+        'int_list', num_buckets=10)
+    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
+        'bytes_list', hash_bucket_size=100)
+    seq_cols = [
+        fc.embedding_column(identity_col, dimension=10),
+        fc.embedding_column(bucket_col, dimension=20)
+    ]
+
+    return ctx_cols, seq_cols
+
+  def test_sequence_example_into_input_layer(self):
+    examples = [_make_sequence_example().SerializeToString()] * 100
+    ctx_cols, seq_cols = self._build_feature_columns()
+
+    def _parse_example(example):
+      ctx, seq = parsing_ops.parse_single_sequence_example(
+          example,
+          context_features=fc.make_parse_example_spec_v2(ctx_cols),
+          sequence_features=fc.make_parse_example_spec_v2(seq_cols))
+      ctx.update(seq)
+      return ctx
+
+    ds = dataset_ops.Dataset.from_tensor_slices(examples)
+    ds = ds.map(_parse_example)
+    ds = ds.batch(20)
+
+    # Test on a single batch
+    features = dataset_ops.make_one_shot_iterator(ds).get_next()
+
+    # Tile the context features across the sequence features
+    sequence_input_layer = ksfc.SequenceFeatures(seq_cols)
+    seq_layer, _ = sequence_input_layer(features)
+    input_layer = dense_features.DenseFeatures(ctx_cols)
+    ctx_layer = input_layer(features)
+    input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
+
+    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
+    output = rnn_layer(input_layer)
+
+    with self.cached_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      features_r = sess.run(features)
+      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
+
+      output_r = sess.run(output)
+      self.assertAllEqual(output_r.shape, [20, 10])
+
+  @test_util.run_deprecated_v1
+  def test_shared_sequence_non_sequence_into_input_layer(self):
+    non_seq = fc.categorical_column_with_identity('non_seq',
+                                                  num_buckets=10)
+    seq = sfc.sequence_categorical_column_with_identity('seq',
+                                                        num_buckets=10)
+    shared_non_seq, shared_seq = fc.shared_embedding_columns_v2(
+        [non_seq, seq],
+        dimension=4,
+        combiner='sum',
+        initializer=init_ops_v2.Ones(),
+        shared_embedding_collection_name='shared')
+
+    seq = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [0, 1], [1, 0]],
+        values=[0, 1, 2],
+        dense_shape=[2, 2])
+    non_seq = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [0, 1], [1, 0]],
+        values=[0, 1, 2],
+        dense_shape=[2, 2])
+    features = {'seq': seq, 'non_seq': non_seq}
+
+    # Tile the context features across the sequence features
+    seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features)
+    non_seq_input = dense_features.DenseFeatures([shared_non_seq])(features)
+
+    with self.cached_session() as sess:
+      sess.run(variables.global_variables_initializer())
+      output_seq, output_seq_length, output_non_seq = sess.run(
+          [seq_input, seq_length, non_seq_input])
+      self.assertAllEqual(output_seq, [[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                       [[1, 1, 1, 1], [0, 0, 0, 0]]])
+      self.assertAllEqual(output_seq_length, [2, 1])
+      self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
+
+
+_SEQ_EX_PROTO = """
+context {
+  feature {
+    key: "float_ctx"
+    value {
+      float_list {
+        value: 123.6
+      }
+    }
+  }
+  feature {
+    key: "int_ctx"
+    value {
+      int64_list {
+        value: 5
+      }
+    }
+  }
+}
+feature_lists {
+  feature_list {
+    key: "bytes_list"
+    value {
+      feature {
+        bytes_list {
+          value: "a"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "b"
+          value: "c"
+        }
+      }
+      feature {
+        bytes_list {
+          value: "d"
+          value: "e"
+          value: "f"
+          value: "g"
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "float_list"
+    value {
+      feature {
+        float_list {
+          value: 1.0
+        }
+      }
+      feature {
+        float_list {
+          value: 3.0
+          value: 3.0
+          value: 3.0
+        }
+      }
+      feature {
+        float_list {
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+          value: 5.0
+        }
+      }
+    }
+  }
+  feature_list {
+    key: "int_list"
+    value {
+      feature {
+        int64_list {
+          value: 2
+          value: 2
+        }
+      }
+      feature {
+        int64_list {
+          value: 4
+          value: 4
+          value: 4
+          value: 4
+        }
+      }
+      feature {
+        int64_list {
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+          value: 6
+        }
+      }
+    }
+  }
+}
+"""
+
+
+def _make_sequence_example():
+  example = example_pb2.SequenceExample()
+  return text_format.Parse(_SEQ_EX_PROTO, example)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/feature_column/sequence_feature_column_test.py b/tensorflow/python/keras/feature_column/sequence_feature_column_test.py
new file mode 100644
index 00000000000..f6e24a586f2
--- /dev/null
+++ b/tensorflow/python/keras/feature_column/sequence_feature_column_test.py
@@ -0,0 +1,687 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for sequential_feature_column."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.feature_column import feature_column_v2 as fc
+from tensorflow.python.feature_column import sequence_feature_column as sfc
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import combinations
+from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc
+from tensorflow.python.keras.saving import model_config
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+
+
+def _initialized_session(config=None):
+  sess = session.Session(config=config)
+  sess.run(variables_lib.global_variables_initializer())
+  sess.run(lookup_ops.tables_initializer())
+  return sess
+
+
+class SequenceFeaturesTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args_a': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
+           # example 0, ids [1]
+           # example 1, ids [2, 0]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 2, 0),
+           'dense_shape': (2, 2)},
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [2, 0]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_args_a': {
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           'indices': (
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[2], [0]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 2, 0),
+           'dense_shape': (2, 2, 2)},
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
+           # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
+           [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_embedding_column(
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
+      expected_sequence_length):
+
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
+    vocabulary_size = 3
+    embedding_dimension_a = 2
+    embedding_values_a = (
+        (1., 2.),  # id 0
+        (3., 4.),  # id 1
+        (5., 6.)  # id 2
+    )
+    embedding_dimension_b = 3
+    embedding_values_b = (
+        (11., 12., 13.),  # id 0
+        (14., 15., 16.),  # id 1
+        (17., 18., 19.)  # id 2
+    )
+    def _get_initializer(embedding_dimension, embedding_values):
+
+      def _initializer(shape, dtype, partition_info=None):
+        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+        self.assertEqual(dtypes.float32, dtype)
+        self.assertIsNone(partition_info)
+        return embedding_values
+      return _initializer
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column_a = fc.embedding_column(
+        categorical_column_a,
+        dimension=embedding_dimension_a,
+        initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
+    categorical_column_b = sfc.sequence_categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_b = fc.embedding_column(
+        categorical_column_b,
+        dimension=embedding_dimension_b,
+        initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
+
+    # Test that columns are reordered alphabetically.
+    sequence_input_layer = ksfc.SequenceFeatures(
+        [embedding_column_b, embedding_column_a])
+    input_layer, sequence_length = sequence_input_layer({
+        'aaa': sparse_input_a, 'bbb': sparse_input_b,})
+
+    self.evaluate(variables_lib.global_variables_initializer())
+    weights = sequence_input_layer.weights
+    self.assertCountEqual(
+        ('sequence_features/aaa_embedding/embedding_weights:0',
+         'sequence_features/bbb_embedding/embedding_weights:0'),
+        tuple([v.name for v in weights]))
+    self.assertAllEqual(embedding_values_a, self.evaluate(weights[0]))
+    self.assertAllEqual(embedding_values_b, self.evaluate(weights[1]))
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_embedding_column_with_non_sequence_categorical(self):
+    """Tests that error is raised for non-sequence embedding column."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column_a = fc.embedding_column(
+        categorical_column_a, dimension=2)
+    sequence_input_layer = ksfc.SequenceFeatures([embedding_column_a])
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In embedding_column: aaa_embedding\. categorical_column must be of '
+        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
+      _, _ = sequence_input_layer({'aaa': sparse_input})
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_shared_embedding_column(self):
+    with ops.Graph().as_default():
+      vocabulary_size = 3
+      sparse_input_a = sparse_tensor.SparseTensorValue(
+          # example 0, ids [2]
+          # example 1, ids [0, 1]
+          indices=((0, 0), (1, 0), (1, 1)),
+          values=(2, 0, 1),
+          dense_shape=(2, 2))
+      sparse_input_b = sparse_tensor.SparseTensorValue(
+          # example 0, ids [1]
+          # example 1, ids [2, 0]
+          indices=((0, 0), (1, 0), (1, 1)),
+          values=(1, 2, 0),
+          dense_shape=(2, 2))
+
+      embedding_dimension = 2
+      embedding_values = (
+          (1., 2.),  # id 0
+          (3., 4.),  # id 1
+          (5., 6.)  # id 2
+      )
+
+      def _get_initializer(embedding_dimension, embedding_values):
+
+        def _initializer(shape, dtype, partition_info=None):
+          self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+          self.assertEqual(dtypes.float32, dtype)
+          self.assertIsNone(partition_info)
+          return embedding_values
+
+        return _initializer
+
+      expected_input_layer = [
+          # example 0, ids_a [2], ids_b [1]
+          [[5., 6., 3., 4.], [0., 0., 0., 0.]],
+          # example 1, ids_a [0, 1], ids_b [2, 0]
+          [[1., 2., 5., 6.], [3., 4., 1., 2.]],
+      ]
+      expected_sequence_length = [1, 2]
+
+      categorical_column_a = sfc.sequence_categorical_column_with_identity(
+          key='aaa', num_buckets=vocabulary_size)
+      categorical_column_b = sfc.sequence_categorical_column_with_identity(
+          key='bbb', num_buckets=vocabulary_size)
+      # Test that columns are reordered alphabetically.
+      shared_embedding_columns = fc.shared_embedding_columns_v2(
+          [categorical_column_b, categorical_column_a],
+          dimension=embedding_dimension,
+          initializer=_get_initializer(embedding_dimension, embedding_values))
+
+      sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns)
+      input_layer, sequence_length = sequence_input_layer({
+          'aaa': sparse_input_a, 'bbb': sparse_input_b})
+
+      global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+      self.assertCountEqual(
+          ('aaa_bbb_shared_embedding:0',),
+          tuple([v.name for v in global_vars]))
+      with _initialized_session() as sess:
+        self.assertAllEqual(embedding_values,
+                            global_vars[0].eval(session=sess))
+        self.assertAllEqual(expected_input_layer,
+                            input_layer.eval(session=sess))
+        self.assertAllEqual(
+            expected_sequence_length, sequence_length.eval(session=sess))
+
+  @test_util.run_deprecated_v1
+  def test_shared_embedding_column_with_non_sequence_categorical(self):
+    """Tests that error is raised for non-sequence shared embedding column."""
+    vocabulary_size = 3
+    sparse_input_a = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+    sparse_input_b = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    shared_embedding_columns = fc.shared_embedding_columns_v2(
+        [categorical_column_a, categorical_column_b], dimension=2)
+
+    sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns)
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In embedding_column: aaa_shared_embedding\. categorical_column must '
+        r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'):
+      _, _ = sequence_input_layer({'aaa': sparse_input_a,
+                                   'bbb': sparse_input_b})
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args_a': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (2, 0, 1),
+           'dense_shape': (2, 2)},
+       'sparse_input_args_b': {
+           # example 0, ids [1]
+           # example 1, ids [1, 0]
+           'indices': ((0, 0), (1, 0), (1, 1)),
+           'values': (1, 1, 0),
+           'dense_shape': (2, 2)},
+       'expected_input_layer': [
+           # example 0, ids_a [2], ids_b [1]
+           [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
+           # example 1, ids_a [0, 1], ids_b [1, 0]
+           [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [1, 2]},
+      {'testcase_name': '3D',
+       'sparse_input_args_a': {
+           # feature 0, ids [[2], [0, 1]]
+           # feature 1, ids [[0, 0], [1]]
+           'indices': (
+               (0, 0, 0), (0, 1, 0), (0, 1, 1),
+               (1, 0, 0), (1, 0, 1), (1, 1, 0)),
+           'values': (2, 0, 1, 0, 0, 1),
+           'dense_shape': (2, 2, 2)},
+       'sparse_input_args_b': {
+           # feature 0, ids [[1, 1], [1]]
+           # feature 1, ids [[1], [0]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (1, 1, 1, 1, 0),
+           'dense_shape': (2, 2, 2)},
+       'expected_input_layer': [
+           # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
+           [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
+           # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
+           [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_indicator_column(
+      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
+      expected_sequence_length):
+    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
+    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
+
+    vocabulary_size_a = 3
+    vocabulary_size_b = 2
+
+    categorical_column_a = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size_a)
+    indicator_column_a = fc.indicator_column(categorical_column_a)
+    categorical_column_b = sfc.sequence_categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size_b)
+    indicator_column_b = fc.indicator_column(categorical_column_b)
+    # Test that columns are reordered alphabetically.
+    sequence_input_layer = ksfc.SequenceFeatures(
+        [indicator_column_b, indicator_column_a])
+    input_layer, sequence_length = sequence_input_layer({
+        'aaa': sparse_input_a, 'bbb': sparse_input_b})
+
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_indicator_column_with_non_sequence_categorical(self):
+    """Tests that error is raised for non-sequence categorical column."""
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        indices=((0, 0), (1, 0), (1, 1)),
+        values=(2, 0, 1),
+        dense_shape=(2, 2))
+
+    categorical_column_a = fc.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    indicator_column_a = fc.indicator_column(categorical_column_a)
+
+    sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a])
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'In indicator_column: aaa_indicator\. categorical_column must be of '
+        r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
+      _, _ = sequence_input_layer({'aaa': sparse_input})
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [0., 1]
+           # example 1, [10.]
+           'indices': ((0, 0), (0, 1), (1, 0)),
+           'values': (0., 1., 10.),
+           'dense_shape': (2, 2)},
+       'expected_input_layer': [
+           [[0.], [1.]],
+           [[10.], [0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # feature 0, ids [[20, 3], [5]]
+           # feature 1, ids [[3], [8]]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
+           'values': (20., 3., 5., 3., 8.),
+           'dense_shape': (2, 2, 2)},
+       'expected_input_layer': [
+           [[20.], [3.], [5.], [0.]],
+           [[3.], [0.], [8.], [0.]]],
+       'expected_sequence_length': [2, 2]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_numeric_column(
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
+    numeric_column = sfc.sequence_numeric_column('aaa')
+
+    sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
+    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
+
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [0., 1.,  2., 3., 4., 5., 6., 7.]
+           # example 1, [10., 11., 12., 13.]
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
+       'expected_input_layer': [
+           # The output of numeric_column._get_dense_tensor should be flattened.
+           [[0., 1., 2., 3.], [4., 5., 6., 7.]],
+           [[10., 11., 12., 13.], [0., 0., 0., 0.]]],
+       'expected_sequence_length': [2, 1]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_numeric_column_multi_dim(
+      self, sparse_input_args, expected_input_layer, expected_sequence_length):
+    """Tests SequenceFeatures for multi-dimensional numeric_column."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
+    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
+
+    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
+    self.assertAllEqual(
+        expected_sequence_length, self.evaluate(sequence_length))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_sequence_length_not_equal(self):
+    """Tests that an error is raised when sequence lengths are not equal."""
+    # Input a with sequence_length = [2, 1]
+    sparse_input_a = sparse_tensor.SparseTensorValue(
+        indices=((0, 0), (0, 1), (1, 0)),
+        values=(0., 1., 10.),
+        dense_shape=(2, 2))
+    # Input b with sequence_length = [1, 1]
+    sparse_input_b = sparse_tensor.SparseTensorValue(
+        indices=((0, 0), (1, 0)),
+        values=(1., 10.),
+        dense_shape=(2, 2))
+    numeric_column_a = sfc.sequence_numeric_column('aaa')
+    numeric_column_b = sfc.sequence_numeric_column('bbb')
+
+    sequence_input_layer = ksfc.SequenceFeatures(
+        [numeric_column_a, numeric_column_b])
+
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError, r'Condition x == y did not hold.*'):
+      _, sequence_length = sequence_input_layer({
+          'aaa': sparse_input_a,
+          'bbb': sparse_input_b
+      })
+      self.evaluate(sequence_length)
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
+           # example 1, [[[10., 11.],  [12., 13.]]]
+           'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
+                       (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 8)},
+       'expected_shape': [2, 2, 4]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
+           # example 1, [[10., 11., 12., 13.], []]
+           'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
+                       (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
+                       (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
+           'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
+           'dense_shape': (2, 2, 4)},
+       'expected_shape': [2, 2, 4]},
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_static_shape_from_tensors_numeric(
+      self, sparse_input_args, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
+
+    sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
+    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+  @parameterized.named_parameters(
+      {'testcase_name': '2D',
+       'sparse_input_args': {
+           # example 0, ids [2]
+           # example 1, ids [0, 1]
+           # example 2, ids []
+           # example 3, ids [1]
+           'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
+           'values': (2, 0, 1, 1),
+           'dense_shape': (4, 2)},
+       'expected_shape': [4, 2, 3]},
+      {'testcase_name': '3D',
+       'sparse_input_args': {
+           # example 0, ids [[2]]
+           # example 1, ids [[0, 1], [2]]
+           # example 2, ids []
+           # example 3, ids [[1], [0, 2]]
+           'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
+                       (3, 0, 0), (3, 1, 0), (3, 1, 1)),
+           'values': (2, 0, 1, 2, 1, 0, 2),
+           'dense_shape': (4, 2, 2)},
+       'expected_shape': [4, 2, 3]}
+      )
+  @test_util.run_in_graph_and_eager_modes
+  def test_static_shape_from_tensors_indicator(
+      self, sparse_input_args, expected_shape):
+    """Tests that we return a known static shape when we have one."""
+    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
+    categorical_column = sfc.sequence_categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    indicator_column = fc.indicator_column(categorical_column)
+
+    sequence_input_layer = ksfc.SequenceFeatures([indicator_column])
+    input_layer, _ = sequence_input_layer({'aaa': sparse_input})
+    shape = input_layer.get_shape()
+    self.assertEqual(shape, expected_shape)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_compute_output_shape(self):
+    price1 = sfc.sequence_numeric_column('price1', shape=2)
+    price2 = sfc.sequence_numeric_column('price2')
+    features = {
+        'price1': sparse_tensor.SparseTensor(
+            indices=[[0, 0, 0], [0, 0, 1],
+                     [0, 1, 0], [0, 1, 1],
+                     [1, 0, 0], [1, 0, 1],
+                     [2, 0, 0], [2, 0, 1],
+                     [3, 0, 0], [3, 0, 1]],
+            values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
+            dense_shape=(4, 3, 2)),
+        'price2': sparse_tensor.SparseTensor(
+            indices=[[0, 0],
+                     [0, 1],
+                     [1, 0],
+                     [2, 0],
+                     [3, 0]],
+            values=[10., 11., 20., 30., 40.],
+            dense_shape=(4, 3))}
+    sequence_features = ksfc.SequenceFeatures([price1, price2])
+    seq_input, seq_len = sequence_features(features)
+    self.assertEqual(
+        sequence_features.compute_output_shape((None, None)),
+        (None, None, 3))
+    self.evaluate(variables_lib.global_variables_initializer())
+    self.evaluate(lookup_ops.tables_initializer())
+
+    self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
+                         [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
+                         [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
+                         [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
+                        self.evaluate(seq_input))
+    self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class SequenceFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(('default', None, None),
+                                  ('trainable', True, 'trainable'),
+                                  ('not_trainable', False, 'frozen'))
+  def test_get_config(self, trainable, name):
+    cols = [sfc.sequence_numeric_column('a')]
+    orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name)
+    config = orig_layer.get_config()
+
+    self.assertEqual(config['name'], orig_layer.name)
+    self.assertEqual(config['trainable'], trainable)
+    self.assertLen(config['feature_columns'], 1)
+    self.assertEqual(config['feature_columns'][0]['class_name'],
+                     'SequenceNumericColumn')
+    self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
+
+  @parameterized.named_parameters(('default', None, None),
+                                  ('trainable', True, 'trainable'),
+                                  ('not_trainable', False, 'frozen'))
+  def test_from_config(self, trainable, name):
+    cols = [sfc.sequence_numeric_column('a')]
+    orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name)
+    config = orig_layer.get_config()
+
+    new_layer = ksfc.SequenceFeatures.from_config(config)
+
+    self.assertEqual(new_layer.name, orig_layer.name)
+    self.assertEqual(new_layer.trainable, trainable)
+    self.assertLen(new_layer._feature_columns, 1)
+    self.assertEqual(new_layer._feature_columns[0].name, 'a')
+
+  def test_serialization_sequence_features(self):
+    rating = sfc.sequence_numeric_column('rating')
+    sequence_feature = ksfc.SequenceFeatures([rating])
+    config = keras.layers.serialize(sequence_feature)
+
+    revived = keras.layers.deserialize(config)
+    self.assertIsInstance(revived, ksfc.SequenceFeatures)
+
+
+class SequenceFeaturesSavingTest(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
+  def test_saving_with_sequence_features(self):
+    cols = [
+        sfc.sequence_numeric_column('a'),
+        fc.indicator_column(
+            sfc.sequence_categorical_column_with_vocabulary_list(
+                'b', ['one', 'two']))
+    ]
+    input_layers = {
+        'a':
+            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
+        'b':
+            keras.layers.Input(
+                shape=(None, 1), sparse=True, name='b', dtype='string')
+    }
+
+    fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
+    # TODO(tibell): Figure out the right dtype and apply masking.
+    # sequence_length_mask = array_ops.sequence_mask(sequence_length)
+    # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
+    x = keras.layers.GRU(32)(fc_layer)
+    output = keras.layers.Dense(10)(x)
+
+    model = keras.models.Model(input_layers, output)
+
+    model.compile(
+        loss=keras.losses.MSE,
+        optimizer='rmsprop',
+        metrics=[keras.metrics.categorical_accuracy])
+
+    config = model.to_json()
+    loaded_model = model_config.model_from_json(config)
+
+    batch_size = 10
+    timesteps = 1
+
+    values_a = np.arange(10, dtype=np.float32)
+    indices_a = np.zeros((10, 3), dtype=np.int64)
+    indices_a[:, 0] = np.arange(10)
+    inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
+                                          (batch_size, timesteps, 1))
+
+    values_b = np.zeros(10, dtype=np.str)
+    indices_b = np.zeros((10, 3), dtype=np.int64)
+    indices_b[:, 0] = np.arange(10)
+    inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
+                                          (batch_size, timesteps, 1))
+
+    with self.cached_session():
+      # Initialize tables for V1 lookup.
+      if not context.executing_eagerly():
+        self.evaluate(lookup_ops.tables_initializer())
+
+      self.assertLen(
+          loaded_model.predict({
+              'a': inputs_a,
+              'b': inputs_b
+          }, steps=1), batch_size)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/layers/BUILD b/tensorflow/python/keras/layers/BUILD
index d48cb71cb5c..46ac88754a8 100644
--- a/tensorflow/python/keras/layers/BUILD
+++ b/tensorflow/python/keras/layers/BUILD
@@ -34,6 +34,7 @@ py_library(
         ":core",
         ":cudnn_recurrent",
         ":dense_attention",
+        ":einsum_dense",
         ":embeddings",
         ":kernelized",
         ":local",
@@ -187,6 +188,22 @@ py_library(
     ],
 )
 
+py_library(
+    name = "einsum_dense",
+    srcs = ["einsum_dense.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:special_math_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
+        "//tensorflow/python/keras:activations",
+        "//tensorflow/python/keras:base_layer",
+        "//tensorflow/python/keras:constraints",
+        "//tensorflow/python/keras:initializers",
+        "//tensorflow/python/keras:regularizers",
+    ],
+)
+
 py_library(
     name = "embeddings",
     srcs = ["embeddings.py"],
@@ -455,6 +472,7 @@ tf_py_test(
     srcs = ["convolutional_recurrent_test.py"],
     python_version = "PY3",
     shard_count = 8,
+    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/keras",
@@ -581,6 +599,18 @@ cuda_py_test(
     ],
 )
 
+tf_py_test(
+    name = "einsum_dense_test",
+    srcs = ["einsum_dense_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":einsum_dense",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 tf_py_test(
     name = "local_test",
     size = "medium",
diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index c4388ec94fe..ede199a9169 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -57,6 +57,7 @@ else:
   from tensorflow.python.keras.layers.preprocessing.text_vectorization_v1 import TextVectorization
   from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization as TextVectorizationV2
   TextVectorizationV1 = TextVectorization
+from tensorflow.python.keras.layers.preprocessing.categorical_crossing import CategoryCrossing
 
 # Advanced activations.
 from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
@@ -119,6 +120,9 @@ from tensorflow.python.keras.layers.dense_attention import Attention
 # Embedding layers.
 from tensorflow.python.keras.layers.embeddings import Embedding
 
+# Einsum-based dense layer/
+from tensorflow.python.keras.layers.einsum_dense import EinsumDense
+
 # Locally-connected layers.
 from tensorflow.python.keras.layers.local import LocallyConnected1D
 from tensorflow.python.keras.layers.local import LocallyConnected2D
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index 83bfb47c5b1..f7148ccd4e9 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -1977,10 +1977,10 @@ class SeparableConv1D(SeparableConv):
 class SeparableConv2D(SeparableConv):
   """Depthwise separable 2D convolution.
 
-  Separable convolutions consist in first performing
+  Separable convolutions consist of first performing
   a depthwise spatial convolution
   (which acts on each input channel separately)
-  followed by a pointwise convolution which mixes together the resulting
+  followed by a pointwise convolution which mixes the resulting
   output channels. The `depth_multiplier` argument controls how many
   output channels are generated per input channel in the depthwise step.
 
@@ -2144,7 +2144,7 @@ class SeparableConv2D(SeparableConv):
 class DepthwiseConv2D(Conv2D):
   """Depthwise separable 2D convolution.
 
-  Depthwise Separable convolutions consists in performing
+  Depthwise Separable convolutions consist of performing
   just the first step in a depthwise spatial convolution
   (which acts on each input channel separately).
   The `depth_multiplier` argument controls how many
@@ -2631,7 +2631,7 @@ class ZeroPadding1D(Layer):
           How many zeros to add at the beginning and end of
           the padding dimension (axis 1).
           - If tuple of int (length 2):
-          How many zeros to add at the beginning and at the end of
+          How many zeros to add at the beginning and the end of
           the padding dimension (`(left_pad, right_pad)`).
 
   Input shape:
diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 00f6540a392..db9c47eca17 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -37,6 +37,7 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
 from tensorflow.python.keras import regularizers
+from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.keras.utils import conv_utils
@@ -229,7 +230,7 @@ class Dropout(Layer):
 class SpatialDropout1D(Dropout):
   """Spatial 1D version of Dropout.
 
-  This version performs the same function as Dropout, however it drops
+  This version performs the same function as Dropout, however, it drops
   entire 1D feature maps instead of individual elements. If adjacent frames
   within feature maps are strongly correlated (as is normally the case in
   early convolution layers) then regular dropout will not regularize the
@@ -271,7 +272,7 @@ class SpatialDropout1D(Dropout):
 class SpatialDropout2D(Dropout):
   """Spatial 2D version of Dropout.
 
-  This version performs the same function as Dropout, however it drops
+  This version performs the same function as Dropout, however, it drops
   entire 2D feature maps instead of individual elements. If adjacent pixels
   within feature maps are strongly correlated (as is normally the case in
   early convolution layers) then regular dropout will not regularize the
@@ -330,7 +331,7 @@ class SpatialDropout2D(Dropout):
 class SpatialDropout3D(Dropout):
   """Spatial 3D version of Dropout.
 
-  This version performs the same function as Dropout, however it drops
+  This version performs the same function as Dropout, however, it drops
   entire 3D feature maps instead of individual elements. If adjacent voxels
   within feature maps are strongly correlated (as is normally the case in
   early convolution layers) then regular dropout will not regularize the
@@ -541,7 +542,7 @@ class Reshape(Layer):
 class Permute(Layer):
   """Permutes the dimensions of the input according to a given pattern.
 
-  Useful for e.g. connecting RNNs and convnets together.
+  Useful e.g. connecting RNNs and convnets.
 
   Example:
 
@@ -553,7 +554,7 @@ class Permute(Layer):
   ```
 
   Arguments:
-    dims: Tuple of integers. Permutation pattern, does not include the
+    dims: Tuple of integers. Permutation pattern does not include the
       samples dimension. Indexing starts at 1.
       For instance, `(2, 1)` permutes the first and second dimensions
       of the input.
@@ -735,7 +736,7 @@ class Lambda(Layer):
   The `Lambda` layer exists so that arbitrary TensorFlow functions
   can be used when constructing `Sequential` and Functional API
   models. `Lambda` layers are best suited for simple operations or
-  quick experimentation. For more advanced usecases, follow
+  quick experimentation. For more advanced use cases, follow
   [this guide](https://www.tensorflow.org/guide/keras/custom_layers_and_models)
   for subclassing `tf.keras.layers.Layer`.
 
@@ -810,7 +811,7 @@ class Lambda(Layer):
       input shape: `output_shape = f(input_shape)`
     mask: Either None (indicating no masking) or a callable with the same
       signature as the `compute_mask` layer method, or a tensor that will be
-      returned as output mask regardless what the input is.
+      returned as output mask regardless of what the input is.
     arguments: Optional dictionary of keyword arguments to be passed to the
       function.
   Input shape: Arbitrary. Use the keyword argument input_shape (tuple of
@@ -830,7 +831,6 @@ class Lambda(Layer):
     if mask is not None:
       self.supports_masking = True
     self.mask = mask
-    self._supports_ragged_inputs = True
     self._output_shape = output_shape
 
     # Warning on every invocation will be quite irksome in Eager mode.
@@ -1097,7 +1097,7 @@ class Dense(Layer):
       the `kernel` weights matrix.
     bias_regularizer: Regularizer function applied to the bias vector.
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation")..
+      the output of the layer (its "activation").
     kernel_constraint: Constraint function applied to
       the `kernel` weights matrix.
     bias_constraint: Constraint function applied to the bias vector.
@@ -1177,6 +1177,7 @@ class Dense(Layer):
     self.built = True
 
   def call(self, inputs):
+    base_layer_utils.no_ragged_support(inputs, self.name)
     rank = inputs.shape.rank
     if rank is not None and rank > 2:
       # Broadcasting is required for the inputs.
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index 9cf132d68df..d25851f6569 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -267,6 +267,7 @@ class CuDNNV1OnlyTest(keras_parameterized.TestCase):
       self.assertEqual(len(layer.trainable_weights), 3)
       self.assertEqual(len(layer.non_trainable_weights), 0)
 
+  # TODO(b/156439419): Reenable after the bug is fixed.
   @parameterized.named_parameters(
       *test_util.generate_combinations_with_testcase_name(
           rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False],
@@ -274,9 +275,9 @@ class CuDNNV1OnlyTest(keras_parameterized.TestCase):
           model_nest_level=[1, 2], model_type=['seq', 'func']))
   @test_util.run_v1_only('b/120911602, b/112083752')
   @test_util.run_gpu_only
-  def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn,
-                                             bidirectional, implementation,
-                                             model_nest_level, model_type):
+  def DISALBED_test_load_weights_between_noncudnn_rnn(
+      self, rnn_type, to_cudnn, bidirectional, implementation,
+      model_nest_level, model_type):
     input_size = 10
     timesteps = 6
     input_shape = (timesteps, input_size)
diff --git a/tensorflow/python/keras/layers/einsum_dense.py b/tensorflow/python/keras/layers/einsum_dense.py
new file mode 100644
index 00000000000..7b5bd085703
--- /dev/null
+++ b/tensorflow/python/keras/layers/einsum_dense.py
@@ -0,0 +1,337 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras-based einsum dense layer."""
+# pylint: disable=g-classes-have-attributes
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import constraints
+from tensorflow.python.keras import initializers
+from tensorflow.python.keras import regularizers
+from tensorflow.python.keras.engine.base_layer import Layer
+from tensorflow.python.ops import special_math_ops
+from tensorflow.python.util.tf_export import keras_export
+
+
+@keras_export("keras.layers.experimental.EinsumDense")
+class EinsumDense(Layer):
+  """A layer that uses tf.einsum as the backing computation.
+
+  This layer can perform einsum calculations of arbitrary dimensionality.
+
+  Arguments:
+    equation: An equation describing the einsum to perform. This equation must
+      be a valid einsum string of the form `ab,bc->ac`, `...ab,bc->...ac`, or
+      `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum axis
+      expression sequence.
+    output_shape: The expected shape of the output tensor (excluding the batch
+      dimension and any dimensions represented by ellipses). You can specify
+      None for any dimension that is unknown or can be inferred from the input
+      shape.
+    activation: Activation function to use. If you don't specify anything, no
+      activation is applied (that is, a "linear" activation: `a(x) = x`).
+    bias_axes: A string containing the output dimension(s) to apply a bias to.
+      Each character in the `bias_axes` string should correspond to a character
+      in the output portion of the `equation` string.
+    kernel_initializer: Initializer for the `kernel` weights matrix.
+    bias_initializer: Initializer for the bias vector.
+    kernel_regularizer: Regularizer function applied to the `kernel` weights
+      matrix.
+    bias_regularizer: Regularizer function applied to the bias vector.
+    activity_regularizer: Regularizer function applied to the output of the
+      layer (its "activation")..
+    kernel_constraint: Constraint function applied to the `kernel` weights
+      matrix.
+    bias_constraint: Constraint function applied to the bias vector.
+
+  Examples:
+
+  **Biased dense layer with einsums**
+
+  This example shows how to instantiate a standard Keras dense layer using
+  einsum operations. This example is equivalent to
+  `tf.keras.layers.Dense(64, use_bias=True)`.
+
+  >>> layer = EinsumDense("ab,bc->ac", output_shape=64, bias_axes="c")
+  >>> input_tensor = tf.keras.Input(shape=[32])
+  >>> output_tensor = layer(input_tensor)
+  >>> output_tensor
+  <tf.Tensor '...' shape=(None, 64) dtype=...>
+
+  **Applying a dense layer to a sequence**
+
+  This example shows how to instantiate a layer that applies the same dense
+  operation to every element in a sequence. Here, the 'output_shape' has two
+  values (since there are two non-batch dimensions in the output); the first
+  dimension in the output_shape is `None`, because the sequence dimension `b`
+  has an unknown shape.
+
+  >>> layer = EinsumDense("abc,cd->abd",
+  ...                     output_shape=(None, 64),
+  ...                     bias_axes="d")
+  >>> input_tensor = tf.keras.Input(shape=[32, 128])
+  >>> output_tensor = layer(input_tensor)
+  >>> output_tensor
+  <tf.Tensor '...' shape=(None, 32, 64) dtype=...>
+
+  **Applying a dense layer to a sequence using ellipses**
+
+  This example shows how to instantiate a layer that applies the same dense
+  operation to every element in a sequence, but uses the ellipsis notation
+  instead of specifying the batch and sequence dimensions.
+
+  Because we are using ellipsis notation and have specified only one axis, the
+  output_shape arg is a single value. When instantiated in this way, the layer
+  can handle any number of sequence dimensions - including the case where no
+  sequence dimension exists.
+
+  >>> layer = EinsumDense("...x,xy->...y", output_shape=64, bias_axes="y")
+  >>> input_tensor = tf.keras.Input(shape=[32, 128])
+  >>> output_tensor = layer(input_tensor)
+  >>> output_tensor
+  <tf.Tensor '...' shape=(None, 32, 64) dtype=...>
+  """
+
+  def __init__(self,
+               equation,
+               output_shape,
+               activation=None,
+               bias_axes=None,
+               kernel_initializer="glorot_uniform",
+               bias_initializer="zeros",
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activity_regularizer=None,
+               kernel_constraint=None,
+               bias_constraint=None,
+               **kwargs):
+    super(EinsumDense, self).__init__(**kwargs)
+    self.equation = equation
+    if isinstance(output_shape, int):
+      self.partial_output_shape = [output_shape]
+    else:
+      self.partial_output_shape = list(output_shape)
+    self.bias_axes = bias_axes
+    self.activation = activations.get(activation)
+    self.kernel_initializer = initializers.get(kernel_initializer)
+    self.bias_initializer = initializers.get(bias_initializer)
+    self.kernel_regularizer = regularizers.get(kernel_regularizer)
+    self.bias_regularizer = regularizers.get(bias_regularizer)
+    self.kernel_constraint = constraints.get(kernel_constraint)
+    self.bias_constraint = constraints.get(bias_constraint)
+
+  def build(self, input_shape):
+    input_shape = tensor_shape.TensorShape(input_shape)
+    shape_data = _analyze_einsum_string(self.equation,
+                                        self.bias_axes,
+                                        input_shape,
+                                        self.partial_output_shape)
+    kernel_shape, bias_shape, self.full_output_shape = shape_data
+    self.kernel = self.add_weight(
+        "kernel",
+        shape=kernel_shape,
+        initializer=self.kernel_initializer,
+        regularizer=self.kernel_regularizer,
+        constraint=self.kernel_constraint,
+        dtype=self.dtype,
+        trainable=True)
+
+    if bias_shape is not None:
+      self.bias = self.add_weight(
+          "bias",
+          shape=bias_shape,
+          initializer=self.bias_initializer,
+          regularizer=self.bias_regularizer,
+          constraint=self.bias_constraint,
+          dtype=self.dtype,
+          trainable=True)
+    else:
+      self.bias = None
+    super(EinsumDense, self).build(input_shape)
+
+  def compute_output_shape(self, _):
+    return tensor_shape.TensorShape(self.full_output_shape)
+
+  def get_config(self):
+    config = {
+        "output_shape":
+            self.partial_output_shape,
+        "equation":
+            self.equation,
+        "activation":
+            activations.serialize(self.activation),
+        "bias_axes":
+            self.bias_axes,
+        "kernel_initializer":
+            initializers.serialize(self.kernel_initializer),
+        "bias_initializer":
+            initializers.serialize(self.bias_initializer),
+        "kernel_regularizer":
+            regularizers.serialize(self.kernel_regularizer),
+        "bias_regularizer":
+            regularizers.serialize(self.bias_regularizer),
+        "activity_regularizer":
+            regularizers.serialize(self.activity_regularizer),
+        "kernel_constraint":
+            constraints.serialize(self.kernel_constraint),
+        "bias_constraint":
+            constraints.serialize(self.bias_constraint),
+    }
+    base_config = super(EinsumDense, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def call(self, inputs):
+    ret = special_math_ops.einsum(self.equation, inputs, self.kernel)
+    if self.bias is not None:
+      ret += self.bias
+    if self.activation is not None:
+      ret = self.activation(ret)
+    return ret
+
+
+def _analyze_einsum_string(equation, bias_axes, input_shape, output_shape):
+  """Analyzes an einsum string to determine the required weight shape."""
+
+  dot_replaced_string = re.sub(r"\.\.\.", "0", equation)
+
+  # This is the case where no ellipses are present in the string.
+  split_string = re.match("([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)",
+                          dot_replaced_string)
+  if split_string:
+    return _analyze_split_string(split_string, bias_axes, input_shape,
+                                 output_shape)
+
+  # This is the case where ellipses are present on the left.
+  split_string = re.match("0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)",
+                          dot_replaced_string)
+  if split_string:
+    return _analyze_split_string(
+        split_string, bias_axes, input_shape, output_shape, left_elided=True)
+
+  # This is the case where ellipses are present on the right.
+  split_string = re.match("([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0",
+                          dot_replaced_string)
+  if split_string:
+    return _analyze_split_string(split_string, bias_axes, input_shape,
+                                 output_shape)
+
+  raise ValueError(
+      "Invalid einsum equation '%s'. Equations must be in the form "
+      "[X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]...." % equation)
+
+
+def _analyze_split_string(split_string,
+                          bias_axes,
+                          input_shape,
+                          output_shape,
+                          left_elided=False):
+  """Analyze an pre-split einsum string to find the weight shape."""
+  input_spec = split_string.group(1)
+  weight_spec = split_string.group(2)
+  output_spec = split_string.group(3)
+  elided = len(input_shape) - len(input_spec)
+
+  if isinstance(output_shape, int):
+    output_shape = [output_shape]
+  else:
+    output_shape = list(output_shape)
+
+  output_shape.insert(0, input_shape[0])
+
+  if elided > 0 and left_elided:
+    for i in range(1, elided):
+      # We already inserted the 0th input dimension at dim 0, so we need to
+      # start at location 1 here.
+      output_shape.insert(1, input_shape[i])
+  elif elided > 0 and not left_elided:
+    for i in range(len(input_shape) - elided, len(input_shape)):
+      output_shape.append(input_shape[i])
+
+  if left_elided:
+    # If we have beginning dimensions elided, we need to use negative indexing
+    # to determine where in the input dimension our values are.
+    input_dim_map = {
+        dim: (i + elided) - len(input_shape) for i, dim in enumerate(input_spec)
+    }
+    # Because we've constructed the full output shape already, we don't need
+    # to do negative indexing.
+    output_dim_map = {dim: (i + elided) for i, dim in enumerate(output_spec)}
+  else:
+    input_dim_map = {dim: i for i, dim in enumerate(input_spec)}
+    output_dim_map = {dim: i for i, dim in enumerate(output_spec)}
+
+  for i, dim in enumerate(input_spec):
+    input_shape_at_dim = input_shape[i]
+    if dim in output_dim_map:
+      output_shape_at_dim = output_shape[output_dim_map[dim]]
+      if (output_shape_at_dim is not None and
+          output_shape_at_dim != input_shape_at_dim):
+        raise ValueError(
+            "Input shape and output shape do not match at shared "
+            "dimension '%s'. Input shape is %s, and output shape "
+            "is %s." %
+            (dim, input_shape_at_dim, output_shape[output_dim_map[dim]]))
+
+  for dim in output_spec:
+    if dim not in input_spec and dim not in weight_spec:
+      raise ValueError("Dimension '%s' was specified in the output '%s' but "
+                       "has no corresponding dim in the input spec '%s' or "
+                       "weight spec '%s.'" % (dim, output_spec, input_spec,
+                                              output_spec))
+
+  weight_shape = []
+  for dim in weight_spec:
+    if dim in input_dim_map:
+      weight_shape.append(input_shape[input_dim_map[dim]])
+    elif dim in output_dim_map:
+      weight_shape.append(output_shape[output_dim_map[dim]])
+    else:
+      raise ValueError("Weight dimension '%s' did not have a match in either "
+                       "the input spec '%s' or the output spec '%s'. For this "
+                       "layer, the weight must be fully specified." %
+                       (dim, input_spec, output_spec))
+
+  if bias_axes is not None:
+    num_left_elided = elided if left_elided else 0
+    idx_map = {
+        char: output_shape[i + num_left_elided]
+        for i, char in enumerate(output_spec)
+    }
+
+    for char in bias_axes:
+      if char not in output_spec:
+        raise ValueError("Bias dimension '%s' was requested, but is not a part "
+                         "of the output specification '%s'" %
+                         (char, output_spec))
+
+    first_bias_location = min([output_spec.find(char) for char in bias_axes])
+    bias_output_spec = output_spec[first_bias_location:]
+
+    bias_shape = [
+        idx_map[char] if char in bias_axes else 1 for char in bias_output_spec
+    ]
+
+    if not left_elided:
+      for _ in range(elided):
+        bias_shape.append(1)
+  else:
+    bias_shape = None
+
+  return weight_shape, bias_shape, output_shape
diff --git a/tensorflow/python/keras/layers/einsum_dense_test.py b/tensorflow/python/keras/layers/einsum_dense_test.py
new file mode 100644
index 00000000000..e9ae7271130
--- /dev/null
+++ b/tensorflow/python/keras/layers/einsum_dense_test.py
@@ -0,0 +1,315 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras-based einsum dense layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+import numpy as np
+
+from tensorflow.python import keras
+
+from tensorflow.python.keras import keras_parameterized  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.layers import einsum_dense
+from tensorflow.python.platform import test
+
+
+@keras_parameterized.run_all_keras_modes
+@parameterized.named_parameters(
+    {
+        "testcase_name": "_1d_end_weight",
+        "equation": "ab,b->a",
+        "bias_axes": None,
+        "input_shape": (None, 32),
+        "output_shape": [],
+        "expected_weight_shape": [32],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None,)
+    }, {
+        "testcase_name": "_2d_middle_weight",
+        "equation": "ab,bc->ac",
+        "bias_axes": None,
+        "input_shape": (None, 32),
+        "output_shape": (64),
+        "expected_weight_shape": [32, 64],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 64)
+    }, {
+        "testcase_name": "_3d_bert",
+        "equation": "abc,cde->abde",
+        "bias_axes": None,
+        "input_shape": (None, 1, 2),
+        "output_shape": (1, 3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_3d_3_bias",
+        "equation": "abc,cde->abde",
+        "bias_axes": "e",
+        "input_shape": (None, 1, 2),
+        "output_shape": (1, 3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": [4],
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_3d_2_bias",
+        "equation": "abc,cde->abde",
+        "bias_axes": "d",
+        "input_shape": (None, 1, 2),
+        "output_shape": (1, 3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": [3, 1],
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_3d_1_3_bias",
+        "equation": "abc,cde->abde",
+        "bias_axes": "be",
+        "input_shape": (None, 7, 2),
+        "output_shape": (7, 3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": [7, 1, 4],
+        "expected_output_shape": (None, 7, 3, 4)
+    }, {
+        "testcase_name": "_3d_bert_projection",
+        "equation": "BFNH,NHD->BFD",
+        "bias_axes": None,
+        "input_shape": (None, 1, 2, 3),
+        "output_shape": (1, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 1, 4)
+    }, {
+        "testcase_name": "_2d_bert",
+        "equation": "abc,cd->abd",
+        "bias_axes": None,
+        "input_shape": (None, 1, 2),
+        "output_shape": (1, 4),
+        "expected_weight_shape": [2, 4],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 1, 4)
+    }, {
+        "testcase_name": "_embedding_1d",
+        "equation": "i,d->id",
+        "bias_axes": None,
+        "input_shape": (None,),
+        "output_shape": (2),
+        "expected_weight_shape": [2],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 2)
+    }, {
+        "testcase_name": "_xlnet_lm",
+        "equation": "ibd,nd->ibn",
+        "bias_axes": None,
+        "input_shape": (None, None, 1),
+        "output_shape": (None, 2),
+        "expected_weight_shape": [2, 1],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, None, 2)
+    }, {
+        "testcase_name": "_2d_precast",
+        "equation": "...b,bc->...c",
+        "bias_axes": None,
+        "input_shape": (None, 32),
+        "output_shape": (64),
+        "expected_weight_shape": [32, 64],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 64)
+    }, {
+        "testcase_name": "_2d_precast_multiple_elided_dims",
+        "equation": "...b,bc->...c",
+        "bias_axes": None,
+        "input_shape": (None, None, 32),
+        "output_shape": (64),
+        "expected_weight_shape": [32, 64],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, None, 64)
+    }, {
+        "testcase_name": "_3d_precast",
+        "equation": "...c,cde->...de",
+        "bias_axes": None,
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_3d_precast_3_bias",
+        "equation": "...c,cde->...de",
+        "bias_axes": "e",
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": [4],
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_3d_precast_2_bias",
+        "equation": "...c,cde->...de",
+        "bias_axes": "d",
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": [3, 1],
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_3d_precast_2_3_bias",
+        "equation": "...c,cde->...de",
+        "bias_axes": "de",
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [2, 3, 4],
+        "expected_bias_shape": [3, 4],
+        "expected_output_shape": (None, 1, 3, 4)
+    }, {
+        "testcase_name": "_2d_postcast",
+        "equation": "bc...,cd->bd...",
+        "bias_axes": None,
+        "input_shape": (None, 1, 2, 3),
+        "output_shape": (4),
+        "expected_weight_shape": [1, 4],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 4, 2, 3)
+    }, {
+        "testcase_name": "_3d_postcast",
+        "equation": "bc...,cde->bde...",
+        "bias_axes": None,
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [1, 3, 4],
+        "expected_bias_shape": None,
+        "expected_output_shape": (None, 3, 4, 2)
+    }, {
+        "testcase_name": "_3d_postcast_1_bias",
+        "equation": "bc...,cde->bde...",
+        "bias_axes": "d",
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [1, 3, 4],
+        "expected_bias_shape": [3, 1, 1],
+        "expected_output_shape": (None, 3, 4, 2)
+    }, {
+        "testcase_name": "_3d_postcast_2_bias",
+        "equation": "bc...,cde->bde...",
+        "bias_axes": "e",
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [1, 3, 4],
+        "expected_bias_shape": [4, 1],
+        "expected_output_shape": (None, 3, 4, 2)
+    }, {
+        "testcase_name": "_3d_postcast_1_2_bias",
+        "equation": "bc...,cde->bde...",
+        "bias_axes": "de",
+        "input_shape": (None, 1, 2),
+        "output_shape": (3, 4),
+        "expected_weight_shape": [1, 3, 4],
+        "expected_bias_shape": [3, 4, 1],
+        "expected_output_shape": (None, 3, 4, 2)
+    })
+class TestEinsumDenseLayer(keras_parameterized.TestCase):
+
+  def test_weight_shapes(self, equation, bias_axes, input_shape, output_shape,
+                         expected_weight_shape, expected_bias_shape,
+                         expected_output_shape):
+    del expected_output_shape  # Not used in this test.
+
+    weight_shape, bias_shape, _ = einsum_dense._analyze_einsum_string(
+        equation, bias_axes, input_shape, output_shape)
+
+    self.assertAllEqual(expected_weight_shape, weight_shape)
+    self.assertAllEqual(expected_bias_shape, bias_shape)
+
+  def test_layer_creation(self, equation, bias_axes, input_shape, output_shape,
+                          expected_weight_shape, expected_bias_shape,
+                          expected_output_shape):
+    # Keras elides the 0-dimension of the input shape when constructing inputs.
+    non_batch_input_shape = list(input_shape)[1:]
+
+    input_tensor = keras.Input(shape=non_batch_input_shape)
+    layer = einsum_dense.EinsumDense(
+        equation=equation, output_shape=output_shape, bias_axes=bias_axes)
+    output_tensor = layer(input_tensor)
+
+    self.assertAllEqual(expected_weight_shape, layer.kernel.shape.as_list())
+    if expected_bias_shape is None:
+      self.assertIsNone(layer.bias)
+    else:
+      self.assertAllEqual(expected_bias_shape, layer.bias.shape.as_list())
+    self.assertAllEqual(expected_output_shape, output_tensor.shape.as_list())
+
+
+@keras_parameterized.run_all_keras_modes
+class TestEinsumLayerAPI(keras_parameterized.TestCase):
+
+  def test_layer_api(self):
+    input_data = np.array([[1.0, 2.0], [3.0, 4.0]])
+    kwargs = {
+        "equation": "...b,bc->...c",
+        "bias_axes": "c",
+        "output_shape": 4,
+        "bias_initializer": keras.initializers.constant(0.03),
+        "kernel_initializer": keras.initializers.constant(0.5),
+        "dtype": input_data.dtype
+    }
+    expected_output = np.array([[1.53, 1.53, 1.53, 1.53],
+                                [3.53, 3.53, 3.53, 3.53]])
+
+    output_data = testing_utils.layer_test(
+        einsum_dense.EinsumDense,
+        kwargs=kwargs,
+        input_shape=(None, 2),
+        input_data=input_data)
+
+    self.assertAllClose(expected_output, output_data)
+
+  def test_unspecified_bias_dim_fails(self):
+    input_tensor = keras.Input(shape=(32,))
+    layer = einsum_dense.EinsumDense(
+        equation="ab,bc->ac", output_shape=64, bias_axes="y")
+    with self.assertRaisesRegexp(
+        ValueError, ".*is not a part of the output specification.*"):
+      _ = layer(input_tensor)
+
+  def test_incompatible_input_output_shape_fails(self):
+    input_tensor = keras.Input(shape=(32, 64))
+    layer = einsum_dense.EinsumDense(
+        equation="abc,cd->abd", output_shape=(10, 96))
+    with self.assertRaisesRegexp(
+        ValueError, ".*Input shape and output shape do not match at shared "
+        "dimension 'b'.*"):
+      _ = layer(input_tensor)
+
+  def test_unspecified_output_dim_fails(self):
+    input_tensor = keras.Input(shape=(32,))
+    layer = einsum_dense.EinsumDense(equation="ab,bc->cd", output_shape=64)
+    with self.assertRaisesRegexp(
+        ValueError, ".*Dimension 'd' was specified in the output 'cd' but has "
+        "no corresponding dim.*"):
+      _ = layer(input_tensor)
+
+  def test_unspecified_weight_dim_fails(self):
+    input_tensor = keras.Input(shape=(32,))
+    layer = einsum_dense.EinsumDense(equation="ab,zd->ad", output_shape=64)
+    with self.assertRaisesRegexp(
+        ValueError, ".*Weight dimension 'z' did not have a match "):
+      _ = layer(input_tensor)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index c8a4cbc5952..3f57fd6cb63 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -121,7 +121,6 @@ class Embedding(Layer):
     self.mask_zero = mask_zero
     self.supports_masking = mask_zero
     self.input_length = input_length
-    self._supports_ragged_inputs = True
 
   @tf_utils.shape_type_conversion
   def build(self, input_shape):
diff --git a/tensorflow/python/keras/layers/merge.py b/tensorflow/python/keras/layers/merge.py
index 83c49064fe8..73646a638ea 100644
--- a/tensorflow/python/keras/layers/merge.py
+++ b/tensorflow/python/keras/layers/merge.py
@@ -21,6 +21,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.keras import backend as K
+from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
@@ -43,7 +44,6 @@ class _Merge(Layer):
     """
     super(_Merge, self).__init__(**kwargs)
     self.supports_masking = True
-    self._supports_ragged_inputs = True
 
   def _merge_function(self, inputs):
     raise NotImplementedError
@@ -651,7 +651,6 @@ class Dot(_Merge):
     self.normalize = normalize
     self.supports_masking = True
     self._reshape_required = False
-    self._supports_ragged_inputs = False
 
   @tf_utils.shape_type_conversion
   def build(self, input_shape):
@@ -677,6 +676,7 @@ class Dot(_Merge):
                        'Chosen axes: %s, %s' % (axes[0], axes[1]))
 
   def _merge_function(self, inputs):
+    base_layer_utils.no_ragged_support(inputs, self.name)
     if len(inputs) != 2:
       raise ValueError('A `Dot` layer should be called on exactly 2 inputs')
     x1 = inputs[0]
diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index 97da2954b65..a6d3c3c3e1c 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -28,7 +28,6 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
 from tensorflow.python.keras import regularizers
-from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.keras.utils import tf_utils
@@ -234,7 +233,6 @@ class BatchNormalizationBase(Layer):
 
     self.fused = fused
     self._bessels_correction_test_only = True
-    self._trainable_var = None
     self.trainable = trainable
 
     if renorm:
@@ -294,14 +292,6 @@ class BatchNormalizationBase(Layer):
   @trainable.setter
   def trainable(self, value):
     self._trainable = value
-    if self._trainable_var is not None:
-      self._trainable_var.update_value(value)
-
-  def _get_trainable_var(self):
-    if self._trainable_var is None:
-      self._trainable_var = K.freezable_variable(
-          self._trainable, name=self.name + '_trainable')
-    return self._trainable_var
 
   @property
   def _param_dtype(self):
@@ -722,12 +712,9 @@ class BatchNormalizationBase(Layer):
     if self._USE_V2_BEHAVIOR:
       if isinstance(training, int):
         training = bool(training)
-      if base_layer_utils.is_in_keras_graph():
-        training = math_ops.logical_and(training, self._get_trainable_var())
-      elif not self.trainable:
-        # When the layer is not trainable, it overrides the value passed from
-        # model.
-        training = self.trainable
+      # When the layer is not trainable, it overrides the value passed from
+      # model.
+      training = math_ops.logical_and(training, self.trainable)
     return training
 
   def call(self, inputs, training=None):
@@ -736,8 +723,14 @@ class BatchNormalizationBase(Layer):
     if self.virtual_batch_size is not None:
       # Virtual batches (aka ghost batches) can be simulated by reshaping the
       # Tensor and reusing the existing batch norm implementation
-      original_shape = [-1] + inputs.shape.as_list()[1:]
-      expanded_shape = [self.virtual_batch_size, -1] + original_shape[1:]
+      original_shape = array_ops.shape(inputs)
+      original_shape = array_ops.concat(
+          [constant_op.constant([-1]), original_shape[1:]], axis=0)
+      expanded_shape = array_ops.concat([
+          constant_op.constant([self.virtual_batch_size, -1]),
+          original_shape[1:]
+      ],
+                                        axis=0)
 
       # Will cause errors if virtual_batch_size does not divide the batch size
       inputs = array_ops.reshape(inputs, expanded_shape)
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index 6a615c2ecdc..4d1e3213ba7 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -22,7 +22,6 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python import keras
-from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import wrap_function
@@ -35,7 +34,6 @@ from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.layers import normalization
 from tensorflow.python.keras.layers import normalization_v2
 from tensorflow.python.keras.mixed_precision.experimental import policy
-from tensorflow.python.keras.optimizer_v2 import rmsprop as rmsprop_v2
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import math_ops
@@ -170,6 +168,13 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
 
   @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
   def test_batchnorm_non_trainable_with_fit(self):
+    # We use the same data shape for all the data we use in this test.
+    # This will prevent any used tf.functions from retracing.
+    # This helps us verify that changing trainable and recompiling really
+    # does update the training loop, rather than a different data shape
+    # triggering a retrace.
+    data_shape = (100, 3)
+
     inputs = keras.Input((3,))
     bn = normalization_v2.BatchNormalization()
     outputs = bn(inputs)
@@ -178,10 +183,10 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
         'rmsprop',
         'mse',
         run_eagerly=testing_utils.should_run_eagerly())
-    model.fit(np.random.random((100, 3)), np.random.random((100, 3)))
+    model.fit(np.random.random(data_shape), np.random.random(data_shape))
 
-    test_data = np.random.random((10, 3))
-    test_targets = np.random.random((10, 3))
+    test_data = np.random.random(data_shape)
+    test_targets = np.random.random(data_shape)
     test_loss = model.evaluate(test_data, test_targets)
 
     bn.trainable = False
@@ -192,41 +197,6 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
     train_loss = model.train_on_batch(test_data, test_targets)
     self.assertAlmostEqual(test_loss, train_loss)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_batchnorm_non_trainable_with_tf_function(self):
-    inputs = keras.Input((3,))
-    bn = normalization_v2.BatchNormalization()
-    outputs = bn(inputs)
-    model = keras.Model(inputs, outputs)
-    loss_fn = keras.losses.MeanSquaredError()
-    optimizer = rmsprop_v2.RMSprop()
-
-    @def_function.function()
-    def train_step(x, y):
-      with backprop.GradientTape() as tape:
-        y_pred = model(x, training=True)
-        loss = loss_fn(y, y_pred)
-      grads = tape.gradient(loss, model.trainable_weights)
-      optimizer.apply_gradients(zip(grads, model.trainable_weights))
-      return loss
-
-    @def_function.function()
-    def test_step(x, y):
-      y_pred = model(x, training=False)
-      loss = loss_fn(y, y_pred)
-      return loss
-
-    train_step(np.random.random((100, 3)), np.random.random((100, 3)))
-
-    test_data = np.random.random((10, 3))
-    test_targets = np.random.random((10, 3))
-    test_loss = test_step(test_data, test_targets)
-
-    bn.trainable = False
-    train_loss = train_step(test_data, test_targets)
-    if context.executing_eagerly():
-      self.assertAlmostEqual(test_loss.numpy(), train_loss.numpy())
-
   def test_eager_batchnorm_in_custom_model_call_with_tf_function(self):
 
     class MyModel(keras.Model):
@@ -354,6 +324,13 @@ class BatchNormalizationV2Test(keras_parameterized.TestCase):
       # Updates should be tracked in a `wrap_function`.
       self.assertLen(layer.updates, 2)
 
+  @keras_parameterized.run_all_keras_modes
+  def test_basic_batchnorm_v2_none_shape_and_virtual_batch_size(self):
+    # Test case for GitHub issue for 32380
+    norm = normalization_v2.BatchNormalization(virtual_batch_size=8)
+    inp = keras.layers.Input(shape=(None, None, 3))
+    _ = norm(inp)
+
 
 def _run_batchnorm_correctness_test(layer, dtype='float32', fused=False):
   model = keras.models.Sequential()
diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py
index 7617303624e..ff7d157acad 100644
--- a/tensorflow/python/keras/layers/pooling.py
+++ b/tensorflow/python/keras/layers/pooling.py
@@ -714,7 +714,6 @@ class GlobalPooling1D(Layer):
     super(GlobalPooling1D, self).__init__(**kwargs)
     self.input_spec = InputSpec(ndim=3)
     self.data_format = conv_utils.normalize_data_format(data_format)
-    self._supports_ragged_inputs = True
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
@@ -849,7 +848,6 @@ class GlobalPooling2D(Layer):
     super(GlobalPooling2D, self).__init__(**kwargs)
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.input_spec = InputSpec(ndim=4)
-    self._supports_ragged_inputs = True
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
@@ -957,7 +955,6 @@ class GlobalPooling3D(Layer):
     super(GlobalPooling3D, self).__init__(**kwargs)
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.input_spec = InputSpec(ndim=5)
-    self._supports_ragged_inputs = True
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD
index 82ed3e4797f..c1e1d5573e5 100644
--- a/tensorflow/python/keras/layers/preprocessing/BUILD
+++ b/tensorflow/python/keras/layers/preprocessing/BUILD
@@ -27,10 +27,12 @@ py_library(
         ":discretization",
         ":hashing",
         ":image_preprocessing",
+        ":integer_lookup",
         ":normalization",
         ":preprocessing_stage",
         ":preprocessing_test_utils",
         ":reduction",
+        ":string_lookup",
         ":text_vectorization",
     ],
 )
@@ -110,6 +112,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":table_utils",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
@@ -145,6 +148,44 @@ py_library(
     ],
 )
 
+py_library(
+    name = "integer_lookup",
+    srcs = [
+        "integer_lookup.py",
+        "integer_lookup_v1.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":index_lookup",
+        ":table_utils",
+        "//tensorflow/python:dtypes",
+    ],
+)
+
+py_library(
+    name = "table_utils",
+    srcs = [
+        "table_utils.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:string_ops",
+        "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:tensor_spec",
+        "//tensorflow/python:util",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/keras:backend",
+        "//tensorflow/python/keras/engine:base_preprocessing_layer",
+        "//tensorflow/python/ops/ragged",
+    ],
+)
+
 py_library(
     name = "text_vectorization",
     srcs = [
@@ -154,7 +195,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":categorical_encoding",
-        ":index_lookup",
+        ":string_lookup",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
@@ -210,6 +251,20 @@ py_library(
     ],
 )
 
+py_library(
+    name = "string_lookup",
+    srcs = [
+        "string_lookup.py",
+        "string_lookup_v1.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":index_lookup",
+        ":table_utils",
+        "//tensorflow/python:dtypes",
+    ],
+)
+
 py_library(
     name = "preprocessing_stage",
     srcs = [
@@ -274,6 +329,9 @@ distribute_py_test(
     tags = [
         "multi_and_single_gpu",
     ],
+    tpu_tags = [
+        "no_oss",  # b/155502591
+    ],
     deps = [
         ":categorical_encoding",
         "//tensorflow/python/distribute:combinations",
@@ -282,11 +340,31 @@ distribute_py_test(
     ],
 )
 
+distribute_py_test(
+    name = "categorical_crossing_distribution_test",
+    srcs = ["categorical_crossing_distribution_test.py"],
+    main = "categorical_crossing_distribution_test.py",
+    python_version = "PY3",
+    tags = [
+        "multi_and_single_gpu",
+    ],
+    tpu_tags = [
+        "no_oss",  # b/155502591
+    ],
+    deps = [
+        ":categorical_crossing",
+        "//tensorflow/python/distribute:combinations",
+        "//tensorflow/python/distribute:strategy_combinations",
+        "//tensorflow/python/keras",
+    ],
+)
+
 tf_py_test(
     name = "discretization_test",
     size = "small",
     srcs = ["discretization_test.py"],
     python_version = "PY3",
+    tags = ["no_rocm"],
     deps = [
         ":discretization",
         ":preprocessing_test_utils",
@@ -394,6 +472,22 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "integer_lookup_test",
+    size = "medium",
+    srcs = ["integer_lookup_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":integer_lookup",
+        ":preprocessing_test_utils",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/keras/utils:generic_utils",
+        "//tensorflow/python/ops/ragged:ragged_string_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 distribute_py_test(
     name = "normalization_distribution_test",
     srcs = ["normalization_distribution_test.py"],
@@ -408,6 +502,20 @@ distribute_py_test(
     ],
 )
 
+tf_py_test(
+    name = "table_utils_test",
+    srcs = ["table_utils_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":table_utils",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/keras/utils:generic_utils",
+        "//tensorflow/python/ops/ragged:ragged_string_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 tf_py_test(
     name = "text_vectorization_test",
     size = "medium",
@@ -432,6 +540,9 @@ distribute_py_test(
     tags = [
         "multi_and_single_gpu",
     ],
+    tpu_tags = [
+        "no_oss",  # b/155502591
+    ],
     deps = [
         ":text_vectorization",
         "//tensorflow/python/distribute:combinations",
@@ -452,6 +563,22 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "string_lookup_test",
+    size = "medium",
+    srcs = ["string_lookup_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":preprocessing_test_utils",
+        ":string_lookup",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/keras/utils:generic_utils",
+        "//tensorflow/python/ops/ragged:ragged_string_ops",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 tf_py_test(
     name = "preprocessing_stage_test",
     srcs = ["preprocessing_stage_test.py"],
diff --git a/tensorflow/python/keras/layers/preprocessing/benchmarks/BUILD b/tensorflow/python/keras/layers/preprocessing/benchmarks/BUILD
index 276fb4767af..0c7e6ba856d 100644
--- a/tensorflow/python/keras/layers/preprocessing/benchmarks/BUILD
+++ b/tensorflow/python/keras/layers/preprocessing/benchmarks/BUILD
@@ -17,6 +17,16 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "categorical_crossing_benchmark",
+    srcs = ["categorical_crossing_benchmark.py"],
+    python_version = "PY3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python/keras/layers/preprocessing:categorical_crossing",
+    ],
+)
+
 tf_py_test(
     name = "index_lookup_adapt_benchmark",
     srcs = ["index_lookup_adapt_benchmark.py"],
diff --git a/tensorflow/python/keras/layers/preprocessing/benchmarks/categorical_crossing_benchmark.py b/tensorflow/python/keras/layers/preprocessing/benchmarks/categorical_crossing_benchmark.py
new file mode 100644
index 00000000000..80a7903f0b9
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/benchmarks/categorical_crossing_benchmark.py
@@ -0,0 +1,116 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark for Keras categorical_encoding preprocessing layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+import time
+
+from absl import flags
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.compat import v2_compat
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras.layers.preprocessing import categorical_crossing
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.platform import benchmark
+from tensorflow.python.platform import test
+
+FLAGS = flags.FLAGS
+
+v2_compat.enable_v2_behavior()
+
+
+# word_gen creates random sequences of ASCII letters (both lowercase and upper).
+# The number of unique strings is ~2,700.
+def int_gen():
+  for _ in itertools.count(1):
+    yield (np.random.randint(0, 5, (1,)), np.random.randint(0, 7, (1,)))
+
+
+class BenchmarkLayer(benchmark.Benchmark):
+  """Benchmark the layer forward pass."""
+
+  def run_dataset_implementation(self, batch_size):
+    num_repeats = 5
+    starts = []
+    ends = []
+    for _ in range(num_repeats):
+      ds = dataset_ops.Dataset.from_generator(
+          int_gen, (dtypes.int64, dtypes.int64),
+          (tensor_shape.TensorShape([1]), tensor_shape.TensorShape([1])))
+      ds = ds.shuffle(batch_size * 100)
+      ds = ds.batch(batch_size)
+      num_batches = 5
+      ds = ds.take(num_batches)
+      ds = ds.prefetch(num_batches)
+      starts.append(time.time())
+      # Benchmarked code begins here.
+      for i in ds:
+        _ = sparse_ops.sparse_cross([i[0], i[1]])
+      # Benchmarked code ends here.
+      ends.append(time.time())
+
+    avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
+    return avg_time
+
+  def bm_layer_implementation(self, batch_size):
+    input_1 = keras.Input(shape=(1,), dtype=dtypes.int64, name="word")
+    input_2 = keras.Input(shape=(1,), dtype=dtypes.int64, name="int")
+    layer = categorical_crossing.CategoryCrossing()
+    _ = layer([input_1, input_2])
+
+    num_repeats = 5
+    starts = []
+    ends = []
+    for _ in range(num_repeats):
+      ds = dataset_ops.Dataset.from_generator(
+          int_gen, (dtypes.int64, dtypes.int64),
+          (tensor_shape.TensorShape([1]), tensor_shape.TensorShape([1])))
+      ds = ds.shuffle(batch_size * 100)
+      ds = ds.batch(batch_size)
+      num_batches = 5
+      ds = ds.take(num_batches)
+      ds = ds.prefetch(num_batches)
+      starts.append(time.time())
+      # Benchmarked code begins here.
+      for i in ds:
+        _ = layer([i[0], i[1]])
+      # Benchmarked code ends here.
+      ends.append(time.time())
+
+    avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
+    name = "categorical_crossing|batch_%s" % batch_size
+    baseline = self.run_dataset_implementation(batch_size)
+    extras = {
+        "dataset implementation baseline": baseline,
+        "delta seconds": (baseline - avg_time),
+        "delta percent": ((baseline - avg_time) / baseline) * 100
+    }
+    self.report_benchmark(
+        iters=num_repeats, wall_time=avg_time, extras=extras, name=name)
+
+  def benchmark_vocab_size_by_batch(self):
+    for batch in [32, 64, 256]:
+      self.bm_layer_implementation(batch_size=batch)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/categorical_crossing.py b/tensorflow/python/keras/layers/preprocessing/categorical_crossing.py
index 4ed112a0f29..68848458bb2 100644
--- a/tensorflow/python/keras/layers/preprocessing/categorical_crossing.py
+++ b/tensorflow/python/keras/layers/preprocessing/categorical_crossing.py
@@ -23,46 +23,31 @@ import itertools
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops.ragged import ragged_array_ops
 from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.util.tf_export import keras_export
 
 
+@keras_export('keras.layers.experimental.preprocessing.CategoryCrossing')
 class CategoryCrossing(Layer):
   """Category crossing layer.
 
-  This layer transforms multiple categorical inputs to categorical outputs
-  by Cartesian product, and hash the output if necessary. Without hashing
-  (`num_bins=None`) the output dtype is string, with hashing the output dtype
-  is int64.
-
-  For each input, the hash function uses a specific fingerprint method, i.e.,
-  [FarmHash64](https://github.com/google/farmhash) to compute the hashed output,
-  that provides a consistent hashed output across different platforms.
-  For multiple inputs, the final output is calculated by first computing the
-  fingerprint of `hash_key`, and concatenate it with the fingerprints of
-  each input. The user can also obfuscate the output with customized `hash_key`.
-
-  If [SipHash64[(https://github.com/google/highwayhash) is desired instead, the
-  user can set `num_bins=None` to get string outputs, and use Hashing layer to
-  get hashed output with SipHash64.
+  This layer concatenates multiple categorical inputs into a single categorical
+  output (similar to Cartesian product). The output dtype is string.
 
   Usage:
-
-  Use with string output.
   >>> inp_1 = tf.constant([['a'], ['b'], ['c']])
   >>> inp_2 = tf.constant([['d'], ['e'], ['f']])
-  >>> layer = categorical_crossing.CategoryCrossing()
-  >>> output = layer([inp_1, inp_2])
-
-  Use with hashed output.
-  >>> layer = categorical_crossing.CategoryCrossing(num_bins=2)
-  >>> output = layer([inp_1, inp_2])
-
-  Use with customized hashed output.
-  >>> layer = categorical_crossing.CategoryCrossing(num_bins=2, hash_key=133)
-  >>> output = layer([inp_1, inp_2])
+  >>> layer = tf.keras.layers.experimental.preprocessing.CategoryCrossing()
+  >>> layer([inp_1, inp_2])
+  <tf.Tensor: shape=(3, 1), dtype=string, numpy=
+    array([[b'a_X_d'],
+           [b'b_X_e'],
+           [b'c_X_f']], dtype=object)>
 
   Arguments:
     depth: depth of input crossing. By default None, all inputs are crossed into
@@ -74,10 +59,6 @@ class CategoryCrossing(Layer):
       equal to N1 or N2. Passing `None` means a single crossed output with all
       inputs. For example, with inputs `a`, `b` and `c`, `depth=2` means the
       output will be [a;b;c;cross(a, b);cross(bc);cross(ca)].
-    num_bins: Number of hash bins. By default None, no hashing is performed.
-    hash_key: Integer hash_key that will be used by the concatenate
-      fingerprints. If not given, will use a default key from
-      `tf.sparse.cross_hashed`. This is only valid when `num_bins` is not None.
     name: Name to give to the layer.
     **kwargs: Keyword arguments to construct a layer.
 
@@ -87,115 +68,66 @@ class CategoryCrossing(Layer):
   Output shape: a single string or int tensor or sparse tensor of shape
     `[batch_size, d1, ..., dm]`
 
-  Below 'hash' stands for tf.fingerprint, and cat stands for 'FingerprintCat'.
+  Returns:
+    If any input is `RaggedTensor`, the output is `RaggedTensor`.
+    Else, if any input is `SparseTensor`, the output is `SparseTensor`.
+    Otherwise, the output is `Tensor`.
 
   Example: (`depth`=None)
     If the layer receives three inputs:
     `a=[[1], [4]]`, `b=[[2], [5]]`, `c=[[3], [6]]`
-    the output will be a string tensor if not hashed:
+    the output will be a string tensor:
     `[[b'1_X_2_X_3'], [b'4_X_5_X_6']]`
-    the output will be an int64 tensor if hashed:
-    `[[cat(hash(3), cat(hash(2), cat(hash(1), hash(hash_key))))],
-     [[cat(hash(6), cat(hash(5), cat(hash(4), hash(hash_key))))]`
 
   Example: (`depth` is an integer)
     With the same input above, and if `depth`=2,
-    the output will be a list of 6 string tensors if not hashed:
+    the output will be a list of 6 string tensors:
     `[[b'1'], [b'4']]`
     `[[b'2'], [b'5']]`
     `[[b'3'], [b'6']]`
     `[[b'1_X_2'], [b'4_X_5']]`,
     `[[b'2_X_3'], [b'5_X_6']]`,
     `[[b'3_X_1'], [b'6_X_4']]`
-    the output will be a list of 6 int64 tensors if hashed:
-    `[[hash(b'1')], [hash(b'4')]]`
-    `[[hash(b'2')], [hash(b'5')]]`
-    `[[hash(b'3')], [hash(b'6')]]`
-    `[[cat(hash(2), cat(hash(1), hash(hash_key)))],
-      [cat(hash(5), cat(hash(4), hash(hash_key)))]`,
-    `[[cat(hash(3), cat(hash(1), hash(hash_key)))],
-      [cat(hash(6), cat(hash(4), hash(hash_key)))]`,
-    `[[cat(hash(3), cat(hash(2), hash(hash_key)))],
-      [cat(hash(6), cat(hash(5), hash(hash_key)))]`,
 
   Example: (`depth` is a tuple/list of integers)
     With the same input above, and if `depth`=(2, 3)
-    the output will be a list of 4 string tensors if not hashed:
+    the output will be a list of 4 string tensors:
     `[[b'1_X_2'], [b'4_X_5']]`,
     `[[b'2_X_3'], [b'5_X_6']]`,
     `[[b'3_X_1'], [b'6_X_4']]`,
     `[[b'1_X_2_X_3'], [b'4_X_5_X_6']]`
-    the output will be a list of 4 int64 tensors if hashed:
-    `[
-      [cat(hash(2), cat(hash(1), hash(hash_key)))],
-      [cat(hash(5), cat(hash(4), hash(hash_key)))]
-     ]`,
-    `[
-      [cat(hash(3), cat(hash(1), hash(hash_key)))],
-      [cat(hash(6), cat(hash(4), hash(hash_key)))]
-     ]`,
-    `[
-      [cat(hash(3), cat(hash(2), hash(hash_key)))],
-      [cat(hash(6), cat(hash(5), hash(hash_key)))]
-     ]`,
-    `[
-      [cat(hash(3), cat(hash(2), cat(hash(1), hash(hash_key))))],
-      [cat(hash(6), cat(hash(5), cat(hash(4), hash(hash_key))))]
-     ]`
   """
 
   def __init__(self,
                depth=None,
-               num_bins=None,
-               hash_key=None,
                name=None,
                **kwargs):
     # TODO(tanzheny): Consider making seperator configurable.
-    if num_bins is None and hash_key is not None:
-      raise ValueError('`hash_key` is only valid when `num_bins` is not None')
     super(CategoryCrossing, self).__init__(name=name, **kwargs)
     self.depth = depth
-    self.num_bins = num_bins
-    self.hash_key = hash_key
     if isinstance(depth, (tuple, list)):
       self._depth_tuple = depth
     elif depth is not None:
       self._depth_tuple = tuple([i for i in range(1, depth + 1)])
-    self._supports_ragged_inputs = True
 
   def partial_crossing(self, partial_inputs, ragged_out, sparse_out):
     """Gets the crossed output from a partial list/tuple of inputs."""
-    if self.num_bins is not None:
-      partial_output = sparse_ops.sparse_cross_hashed(
-          partial_inputs, num_buckets=self.num_bins, hash_key=self.hash_key)
-    else:
-      partial_output = sparse_ops.sparse_cross(partial_inputs)
-
     # If ragged_out=True, convert output from sparse to ragged.
     if ragged_out:
-      return ragged_tensor.RaggedTensor.from_sparse(partial_output)
+      return ragged_array_ops.cross(partial_inputs)
     elif sparse_out:
-      return partial_output
+      return sparse_ops.sparse_cross(partial_inputs)
     else:
-      return sparse_ops.sparse_tensor_to_dense(partial_output)
+      return sparse_ops.sparse_tensor_to_dense(
+          sparse_ops.sparse_cross(partial_inputs))
 
   def call(self, inputs):
     depth_tuple = self._depth_tuple if self.depth else (len(inputs),)
     ragged_out = sparse_out = False
-    if all([ragged_tensor.is_ragged(inp) for inp in inputs]):
-      # (b/144500510) ragged.map_flat_values(sparse_cross_hashed, inputs) will
-      # cause kernel failure. Investigate and find a more efficient
-      # implementation
-      inputs = [inp.to_sparse() for inp in inputs]
+    if any([ragged_tensor.is_ragged(inp) for inp in inputs]):
       ragged_out = True
-    else:
-      if any([ragged_tensor.is_ragged(inp) for inp in inputs]):
-        raise ValueError(
-            'Inputs must be either all `RaggedTensor`, or none of them should '
-            'be `RaggedTensor`, got {}'.format(inputs))
-
-      if any([isinstance(inp, sparse_tensor.SparseTensor) for inp in inputs]):
-        sparse_out = True
+    elif any([isinstance(inp, sparse_tensor.SparseTensor) for inp in inputs]):
+      sparse_out = True
 
     outputs = []
     for depth in depth_tuple:
@@ -230,15 +162,22 @@ class CategoryCrossing(Layer):
   def compute_output_signature(self, input_spec):
     input_shapes = [x.shape for x in input_spec]
     output_shape = self.compute_output_shape(input_shapes)
-    output_dtype = dtypes.int64 if self.num_bins else dtypes.string
-    return sparse_tensor.SparseTensorSpec(
-        shape=output_shape, dtype=output_dtype)
+    if any([
+        isinstance(inp_spec, ragged_tensor.RaggedTensorSpec)
+        for inp_spec in input_spec
+    ]):
+      return tensor_spec.TensorSpec(shape=output_shape, dtype=dtypes.string)
+    elif any([
+        isinstance(inp_spec, sparse_tensor.SparseTensorSpec)
+        for inp_spec in input_spec
+    ]):
+      return sparse_tensor.SparseTensorSpec(
+          shape=output_shape, dtype=dtypes.string)
+    return tensor_spec.TensorSpec(shape=output_shape, dtype=dtypes.string)
 
   def get_config(self):
     config = {
         'depth': self.depth,
-        'num_bins': self.num_bins,
-        'hash_key': self.hash_key
     }
     base_config = super(CategoryCrossing, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
diff --git a/tensorflow/python/keras/layers/preprocessing/categorical_crossing_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/categorical_crossing_distribution_test.py
new file mode 100644
index 00000000000..57dea6edf4a
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/categorical_crossing_distribution_test.py
@@ -0,0 +1,83 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for keras.layers.preprocessing.normalization."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from tensorflow.python.distribute import tpu_strategy
+from tensorflow.python.framework import config
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras.layers.preprocessing import categorical_crossing
+from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
+from tensorflow.python.platform import test
+
+
+def batch_wrapper(dataset, batch_size, distribution, repeat=None):
+  if repeat:
+    dataset = dataset.repeat(repeat)
+  # TPUs currently require fully defined input shapes, drop_remainder ensures
+  # the input will have fully defined shapes.
+  if isinstance(distribution,
+                (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1)):
+    return dataset.batch(batch_size, drop_remainder=True)
+  else:
+    return dataset.batch(batch_size)
+
+
+@combinations.generate(
+    combinations.combine(
+        # Investigate why crossing is not supported with TPU.
+        distribution=strategy_combinations.all_strategies,
+        mode=['eager', 'graph']))
+class CategoryCrossingDistributionTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_distribution(self, distribution):
+    input_array_1 = np.array([['a', 'b'], ['c', 'd']])
+    input_array_2 = np.array([['e', 'f'], ['g', 'h']])
+    inp_dataset = dataset_ops.DatasetV2.from_tensor_slices(
+        {'input_1': input_array_1, 'input_2': input_array_2})
+    inp_dataset = batch_wrapper(inp_dataset, 2, distribution)
+
+    # pyformat: disable
+    expected_output = [[b'a_X_e', b'a_X_f', b'b_X_e', b'b_X_f'],
+                       [b'c_X_g', b'c_X_h', b'd_X_g', b'd_X_h']]
+    config.set_soft_device_placement(True)
+
+    with distribution.scope():
+      input_data_1 = keras.Input(shape=(2,), dtype=dtypes.string,
+                                 name='input_1')
+      input_data_2 = keras.Input(shape=(2,), dtype=dtypes.string,
+                                 name='input_2')
+      input_data = [input_data_1, input_data_2]
+      layer = categorical_crossing.CategoryCrossing()
+      int_data = layer(input_data)
+      model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(inp_dataset)
+    self.assertAllEqual(expected_output, output_dataset)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/categorical_crossing_test.py b/tensorflow/python/keras/layers/preprocessing/categorical_crossing_test.py
index 49d8f0d7003..5bbcf5ce022 100644
--- a/tensorflow/python/keras/layers/preprocessing/categorical_crossing_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/categorical_crossing_test.py
@@ -40,7 +40,7 @@ from tensorflow.python.platform import test
 @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
 class CategoryCrossingTest(keras_parameterized.TestCase):
 
-  def test_crossing_basic(self):
+  def test_crossing_sparse_inputs(self):
     layer = categorical_crossing.CategoryCrossing()
     inputs_0 = sparse_tensor.SparseTensor(
         indices=[[0, 0], [1, 0], [1, 1]],
@@ -52,36 +52,6 @@ class CategoryCrossingTest(keras_parameterized.TestCase):
     self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
     self.assertAllEqual([b'a_X_d', b'b_X_e', b'c_X_e'], output.values)
 
-  def test_crossing_sparse_inputs(self):
-    layer = categorical_crossing.CategoryCrossing(num_bins=1)
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [1, 1]],
-        values=['a', 'b', 'c'],
-        dense_shape=[2, 2])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    self.assertAllClose([0, 0, 0], output.values)
-
-  def test_crossing_sparse_inputs_with_hash_key(self):
-    layer = categorical_crossing.CategoryCrossing(num_bins=2, hash_key=133)
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [1, 1]],
-        values=['a', 'b', 'c'],
-        dense_shape=[2, 2])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    self.assertAllClose([1, 0, 1], output.values)
-
-    layer_2 = categorical_crossing.CategoryCrossing(num_bins=2, hash_key=137)
-    output = layer_2([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    # Note the output is different with above.
-    self.assertAllClose([0, 1, 0], output.values)
-
   def test_crossing_sparse_inputs_depth_int(self):
     layer = categorical_crossing.CategoryCrossing(depth=1)
     inputs_0 = sparse_tensor.SparseTensor(
@@ -127,35 +97,15 @@ class CategoryCrossingTest(keras_parameterized.TestCase):
         [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0)
     self.assertAllEqual(expected_out, output)
 
-  def test_crossing_hashed_two_bins(self):
-    layer = categorical_crossing.CategoryCrossing(num_bins=2)
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [1, 1]],
-        values=['a', 'b', 'c'],
-        dense_shape=[2, 2])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    self.assertEqual(output.values.numpy().max(), 1)
-    self.assertEqual(output.values.numpy().min(), 0)
-
-  def test_crossing_hashed_ragged_inputs(self):
-    layer = categorical_crossing.CategoryCrossing(num_bins=2)
+  def test_crossing_ragged_inputs(self):
     inputs_0 = ragged_factory_ops.constant(
         [['omar', 'skywalker'], ['marlo']],
         dtype=dtypes.string)
     inputs_1 = ragged_factory_ops.constant(
         [['a'], ['b']],
         dtype=dtypes.string)
-    out_data = layer([inputs_0, inputs_1])
-    expected_output = [[0, 0], [0]]
-    self.assertAllClose(expected_output, out_data)
     inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
     inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    out_t = layer([inp_0_t, inp_1_t])
-    model = training.Model(inputs=[inp_0_t, inp_1_t], outputs=out_t)
-    self.assertAllClose(expected_output, model.predict([inputs_0, inputs_1]))
 
     non_hashed_layer = categorical_crossing.CategoryCrossing()
     out_t = non_hashed_layer([inp_0_t, inp_1_t])
@@ -198,16 +148,6 @@ class CategoryCrossingTest(keras_parameterized.TestCase):
     self.assertIsInstance(output, ragged_tensor.RaggedTensor)
     self.assertAllEqual(expected_output, output)
 
-  def test_invalid_mixed_sparse_and_ragged_input(self):
-    with self.assertRaises(ValueError):
-      layer = categorical_crossing.CategoryCrossing(num_bins=2)
-      inputs_0 = ragged_factory_ops.constant(
-          [['omar'], ['marlo']],
-          dtype=dtypes.string)
-      inputs_1 = sparse_tensor.SparseTensor(
-          indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-      layer([inputs_0, inputs_1])
-
   def test_crossing_with_dense_inputs(self):
     layer = categorical_crossing.CategoryCrossing()
     inputs_0 = np.asarray([[1, 2]])
@@ -251,13 +191,6 @@ class CategoryCrossingTest(keras_parameterized.TestCase):
     self.assertAllEqual(expected_output,
                         model.predict([inputs_0, inputs_1, inputs_2]))
 
-  def test_crossing_hashed_with_dense_inputs(self):
-    layer = categorical_crossing.CategoryCrossing(num_bins=2)
-    inputs_0 = np.asarray([[1, 2]])
-    inputs_1 = np.asarray([[1, 3]])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose([[1, 1, 0, 0]], output)
-
   def test_crossing_compute_output_signature(self):
     input_shapes = [
         tensor_shape.TensorShape([2, 2]),
@@ -272,18 +205,9 @@ class CategoryCrossingTest(keras_parameterized.TestCase):
     self.assertEqual(output_spec.shape.dims[0], input_shapes[0].dims[0])
     self.assertEqual(output_spec.dtype, dtypes.string)
 
-    layer = categorical_crossing.CategoryCrossing(num_bins=2)
-    output_spec = layer.compute_output_signature(input_specs)
-    self.assertEqual(output_spec.shape.dims[0], input_shapes[0].dims[0])
-    self.assertEqual(output_spec.dtype, dtypes.int64)
-
-  def test_crossing_with_invalid_hash_key(self):
-    with self.assertRaises(ValueError):
-      _ = categorical_crossing.CategoryCrossing(hash_key=133)
-
   @tf_test_util.run_v2_only
   def test_config_with_custom_name(self):
-    layer = categorical_crossing.CategoryCrossing(num_bins=2, name='hashing')
+    layer = categorical_crossing.CategoryCrossing(depth=2, name='hashing')
     config = layer.get_config()
     layer_1 = categorical_crossing.CategoryCrossing.from_config(config)
     self.assertEqual(layer_1.name, layer.name)
diff --git a/tensorflow/python/keras/layers/preprocessing/categorical_encoding.py b/tensorflow/python/keras/layers/preprocessing/categorical_encoding.py
index a7dc159cdb6..466405a27a9 100644
--- a/tensorflow/python/keras/layers/preprocessing/categorical_encoding.py
+++ b/tensorflow/python/keras/layers/preprocessing/categorical_encoding.py
@@ -102,9 +102,6 @@ class CategoricalEncoding(base_preprocessing_layer.CombinerPreprocessingLayer):
     self._sparse = sparse
     self._called = False
 
-    # This layer supports RaggedTensor inputs.
-    self._supports_ragged_inputs = True
-
     # We are adding these here instead of in build() since they do not depend
     # on the input shape at all.
     if max_tokens is None:
diff --git a/tensorflow/python/keras/layers/preprocessing/discretization.py b/tensorflow/python/keras/layers/preprocessing/discretization.py
index 3427a311078..003b6e64f90 100644
--- a/tensorflow/python/keras/layers/preprocessing/discretization.py
+++ b/tensorflow/python/keras/layers/preprocessing/discretization.py
@@ -56,7 +56,6 @@ class Discretization(Layer):
 
   def __init__(self, bins, output_mode=INTEGER, **kwargs):
     super(Discretization, self).__init__(**kwargs)
-    self._supports_ragged_inputs = True
     self.bins = bins
     self.output_mode = output_mode
 
diff --git a/tensorflow/python/keras/layers/preprocessing/hashing.py b/tensorflow/python/keras/layers/preprocessing/hashing.py
index 23d553a49cd..dfd4761f193 100644
--- a/tensorflow/python/keras/layers/preprocessing/hashing.py
+++ b/tensorflow/python/keras/layers/preprocessing/hashing.py
@@ -90,7 +90,6 @@ class Hashing(Layer):
     super(Hashing, self).__init__(name=name, **kwargs)
     self.num_bins = num_bins
     self.salt = salt
-    self._supports_ragged_inputs = True
 
   def call(self, inputs):
     # Converts integer inputs to string.
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
index 4f909b648b6..832915dac68 100644
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
+++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
@@ -51,6 +51,19 @@ _RESIZE_METHODS = {
     'mitchellcubic': ResizeMethod.MITCHELLCUBIC
 }
 
+H_AXIS = 1
+W_AXIS = 2
+
+
+def check_fill_mode_and_interpolation(fill_mode, interpolation):
+  if fill_mode not in {'reflect', 'wrap', 'constant'}:
+    raise NotImplementedError(
+        'Unknown `fill_mode` {}. Only `reflect`, `wrap` and '
+        '`constant` are supported.'.format(fill_mode))
+  if interpolation not in {'nearest', 'bilinear'}:
+    raise NotImplementedError('Unknown `interpolation` {}. Only `nearest` and '
+                              '`bilinear` are supported.'.format(interpolation))
+
 
 @keras_export('keras.layers.experimental.preprocessing.Resizing')
 class Resizing(Layer):
@@ -132,9 +145,8 @@ class CenterCrop(Layer):
 
   def call(self, inputs):
     inputs_shape = array_ops.shape(inputs)
-    h_axis, w_axis = 1, 2
-    img_hd = inputs_shape[h_axis]
-    img_wd = inputs_shape[w_axis]
+    img_hd = inputs_shape[H_AXIS]
+    img_wd = inputs_shape[W_AXIS]
     img_hd_diff = img_hd - self.target_height
     img_wd_diff = img_wd - self.target_width
     checks = []
@@ -230,9 +242,9 @@ class RandomCrop(Layer):
     def resize_and_center_cropped_inputs():
       """Deterministically resize to shorter side and center crop."""
       input_shape = array_ops.shape(inputs)
-      input_height_t = input_shape[1]
-      input_width_t = input_shape[2]
-      ratio_cond = (input_height_t / input_width_t > 1.)
+      input_height_t = input_shape[H_AXIS]
+      input_width_t = input_shape[W_AXIS]
+      ratio_cond = (input_height_t / input_width_t > (self.height / self.width))
       # pylint: disable=g-long-lambda
       resized_height = tf_utils.smart_cond(
           ratio_cond,
@@ -407,17 +419,24 @@ class RandomTranslation(Layer):
   """Randomly translate each image during training.
 
   Arguments:
-    height_factor: a positive float represented as fraction of value, or a tuple
-      of size 2 representing lower and upper bound for shifting vertically. When
-      represented as a single float, this value is used for both the upper and
-      lower bound. For instance, `height_factor=(0.2, 0.3)` results in an output
-      height varying in the range `[original - 20%, original + 30%]`.
-      `height_factor=0.2` results in an output height varying in the range
-      `[original - 20%, original + 20%]`.
-    width_factor: a positive float represented as fraction of value, or a tuple
+    height_factor: a float represented as fraction of value, or a tuple
+      of size 2 representing lower and upper bound for shifting vertically.
+      A negative value means shifting image up, while a positive value
+      means shifting image down. When represented as a single positive float,
+      this value is used for both the upper and lower bound. For instance,
+      `height_factor=(-0.2, 0.3)` results in an output shifted by a random
+      amount in the range [-20%, +30%].
+      `height_factor=0.2` results in an output height shifted by a random
+      amount in the range [-20%, +20%].
+    width_factor: a float represented as fraction of value, or a tuple
       of size 2 representing lower and upper bound for shifting horizontally.
-      When represented as a single float, this value is used for both the upper
-      and lower bound.
+      A negative value means shifting image left, while a positive value
+      means shifting image right. When represented as a single positive float,
+      this value is used for both the upper and lower bound. For instance,
+      `width_factor=(-0.2, 0.3)` results in an output shifted left by 20%, and
+      shifted right by 30%.
+      `width_factor=0.2` results in an output height shifted left or right
+      by 20%.
     fill_mode: Points outside the boundaries of the input are filled according
       to the given mode (one of `{'constant', 'reflect', 'wrap'}`).
       - *reflect*: `(d c b a | a b c d | d c b a)`
@@ -440,8 +459,8 @@ class RandomTranslation(Layer):
       data_format='channels_last'.
 
   Raise:
-    ValueError: if lower bound is not between [0, 1], or upper bound is
-      negative.
+    ValueError: if either bound is not between [0, 1], or upper bound is
+      less than lower bound.
   """
 
   def __init__(self,
@@ -454,38 +473,34 @@ class RandomTranslation(Layer):
                **kwargs):
     self.height_factor = height_factor
     if isinstance(height_factor, (tuple, list)):
-      self.height_lower = abs(height_factor[0])
+      self.height_lower = height_factor[0]
       self.height_upper = height_factor[1]
     else:
-      self.height_lower = self.height_upper = height_factor
-    if self.height_upper < 0.:
-      raise ValueError('`height_factor` cannot have negative values as upper '
-                       'bound, got {}'.format(height_factor))
+      self.height_lower = -height_factor
+      self.height_upper = height_factor
+    if self.height_upper < self.height_lower:
+      raise ValueError('`height_factor` cannot have upper bound less than '
+                       'lower bound, got {}'.format(height_factor))
     if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.:
       raise ValueError('`height_factor` must have values between [-1, 1], '
                        'got {}'.format(height_factor))
 
     self.width_factor = width_factor
     if isinstance(width_factor, (tuple, list)):
-      self.width_lower = abs(width_factor[0])
+      self.width_lower = width_factor[0]
       self.width_upper = width_factor[1]
     else:
-      self.width_lower = self.width_upper = width_factor
-    if self.width_upper < 0.:
-      raise ValueError('`width_factor` cannot have negative values as upper '
-                       'bound, got {}'.format(width_factor))
+      self.width_lower = -width_factor
+      self.width_upper = width_factor
+    if self.width_upper < self.width_lower:
+      raise ValueError('`width_factor` cannot have upper bound less than '
+                       'lower bound, got {}'.format(width_factor))
     if abs(self.width_lower) > 1. or abs(self.width_upper) > 1.:
       raise ValueError('`width_factor` must have values between [-1, 1], '
                        'got {}'.format(width_factor))
 
-    if fill_mode not in {'reflect', 'wrap', 'constant'}:
-      raise NotImplementedError(
-          'Unknown `fill_mode` {}. Only `reflect`, `wrap` and '
-          '`constant` are supported.'.format(fill_mode))
-    if interpolation not in {'nearest', 'bilinear'}:
-      raise NotImplementedError(
-          'Unknown `interpolation` {}. Only `nearest` and '
-          '`bilinear` are supported.'.format(interpolation))
+    check_fill_mode_and_interpolation(fill_mode, interpolation)
+
     self.fill_mode = fill_mode
     self.interpolation = interpolation
     self.seed = seed
@@ -501,22 +516,24 @@ class RandomTranslation(Layer):
       """Translated inputs with random ops."""
       inputs_shape = array_ops.shape(inputs)
       batch_size = inputs_shape[0]
-      h_axis, w_axis = 1, 2
+      h_axis, w_axis = H_AXIS, W_AXIS
       img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32)
       img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32)
       height_translate = self._rng.uniform(
           shape=[batch_size, 1],
-          minval=-self.height_lower,
-          maxval=self.height_upper)
+          minval=self.height_lower,
+          maxval=self.height_upper,
+          dtype=dtypes.float32)
       height_translate = height_translate * img_hd
       width_translate = self._rng.uniform(
           shape=[batch_size, 1],
-          minval=-self.width_lower,
-          maxval=self.width_upper)
+          minval=self.width_lower,
+          maxval=self.width_upper,
+          dtype=dtypes.float32)
       width_translate = width_translate * img_wd
       translations = math_ops.cast(
-          array_ops.concat([height_translate, width_translate], axis=1),
-          dtype=inputs.dtype)
+          array_ops.concat([width_translate, height_translate], axis=1),
+          dtype=dtypes.float32)
       return transform(
           inputs,
           get_translation_matrix(translations),
@@ -713,9 +730,15 @@ class RandomRotation(Layer):
     `(samples, height, width, channels)`, data_format='channels_last'.
 
   Attributes:
-    factor: a positive float represented as fraction of 2pi, or a tuple of size
+    factor: a float represented as fraction of 2pi, or a tuple of size
       2 representing lower and upper bound for rotating clockwise and
-      counter-clockwise. When represented as a single float, lower = upper.
+      counter-clockwise. A positive values means rotating counter clock-wise,
+      while a negative value means clock-wise. When represented as a single
+      float, this value is used for both the upper and lower bound. For
+      instance, `factor=(-0.2, 0.3)` results in an output
+      rotation by a random amount in the range `[-20% * 2pi, 30% * 2pi]`.
+      `factor=0.2` results in an output rotating by a random amount in the range
+      `[-20% * 2pi, 20% * 2pi]`.
     fill_mode: Points outside the boundaries of the input are filled according
       to the given mode (one of `{'constant', 'reflect', 'wrap'}`).
       - *reflect*: `(d c b a | a b c d | d c b a)`
@@ -736,8 +759,8 @@ class RandomRotation(Layer):
       data_format='channels_last'.
 
   Raise:
-    ValueError: if lower bound is not between [0, 1], or upper bound is
-      negative.
+    ValueError: if either bound is not between [0, 1], or upper bound is
+      less than lower bound.
   """
 
   def __init__(self,
@@ -752,18 +775,12 @@ class RandomRotation(Layer):
       self.lower = factor[0]
       self.upper = factor[1]
     else:
-      self.lower = self.upper = factor
-    if self.lower < 0. or self.upper < 0.:
+      self.lower = -factor
+      self.upper = factor
+    if self.upper < self.lower:
       raise ValueError('Factor cannot have negative values, '
                        'got {}'.format(factor))
-    if fill_mode not in {'reflect', 'wrap', 'constant'}:
-      raise NotImplementedError(
-          'Unknown `fill_mode` {}. Only `reflect`, `wrap` and '
-          '`constant` are supported.'.format(fill_mode))
-    if interpolation not in {'nearest', 'bilinear'}:
-      raise NotImplementedError(
-          'Unknown `interpolation` {}. Only `nearest` and '
-          '`bilinear` are supported.'.format(interpolation))
+    check_fill_mode_and_interpolation(fill_mode, interpolation)
     self.fill_mode = fill_mode
     self.interpolation = interpolation
     self.seed = seed
@@ -779,13 +796,12 @@ class RandomRotation(Layer):
       """Rotated inputs with random ops."""
       inputs_shape = array_ops.shape(inputs)
       batch_size = inputs_shape[0]
-      h_axis, w_axis = 1, 2
-      img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32)
-      img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32)
+      img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32)
+      img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32)
       min_angle = self.lower * 2. * np.pi
       max_angle = self.upper * 2. * np.pi
       angles = self._rng.uniform(
-          shape=[batch_size], minval=-min_angle, maxval=max_angle)
+          shape=[batch_size], minval=min_angle, maxval=max_angle)
       return transform(
           inputs,
           get_rotation_matrix(angles, img_hd, img_wd),
@@ -811,20 +827,29 @@ class RandomRotation(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+@keras_export('keras.layers.experimental.preprocessing.RandomZoom')
 class RandomZoom(Layer):
   """Randomly zoom each image during training.
 
   Arguments:
-    height_factor: a positive float represented as fraction of value, or a tuple
-      of size 2 representing lower and upper bound for zooming horizontally.
-      When represented as a single float, this value is used for both the
-      upper and lower bound. For instance, `height_factor=(0.2, 0.3)` result in
-      an output zoom varying in the range `[original * 20%, original * 30%]`.
-    width_factor: a positive float represented as fraction of value, or a tuple
+    height_factor: a float represented as fraction of value, or a tuple
       of size 2 representing lower and upper bound for zooming vertically.
       When represented as a single float, this value is used for both the
-      upper and lower bound. For instance, `width_factor=(0.2, 0.3)` result in
-      an output zoom varying in the range `[original * 20%, original * 30%]`.
+      upper and lower bound. A positive value means zooming out, while a
+      negative value means zooming in.
+      For instance, `height_factor=(0.2, 0.3)` result in an output zoomed out
+      by a random amount in the range [+20%, +30%].
+      `height_factor=(-0.3, -0.2)` result in an output zoomed in by a random
+      amount in the range [+20%, +30%].
+    width_factor: a float represented as fraction of value, or a tuple
+      of size 2 representing lower and upper bound for zooming horizontally.
+      When represented as a single float, this value is used for both the
+      upper and lower bound.
+      For instance, `width_factor=(0.2, 0.3)` result in an output zooming out
+      between 20% to 30%.
+      `width_factor=(-0.3, -0.2)` result in an output zooming in between 20%
+      to 30%. Defaults to `None`, i.e., zooming vertical and horizontal
+      directions by preserving the aspect ratio.
     fill_mode: Points outside the boundaries of the input are filled according
       to the given mode (one of `{'constant', 'reflect', 'wrap'}`).
       - *reflect*: `(d c b a | a b c d | d c b a)`
@@ -837,6 +862,14 @@ class RandomZoom(Layer):
     seed: Integer. Used to create a random seed.
     name: A string, the name of the layer.
 
+  Example:
+
+  >>> input_img = np.random.random((32, 224, 224, 3))
+  >>> layer = tf.keras.layers.experimental.preprocessing.RandomZoom(.5, .2)
+  >>> out_img = layer(input_img)
+  >>> out_img.shape
+  TensorShape([32, 224, 224, 3])
+
   Input shape:
     4D tensor with shape:
     `(samples, height, width, channels)`, data_format='channels_last'.
@@ -850,9 +883,10 @@ class RandomZoom(Layer):
       negative.
   """
 
+  # TODO(b/156526279): Add `fill_value` argument.
   def __init__(self,
                height_factor,
-               width_factor,
+               width_factor=None,
                fill_mode='reflect',
                interpolation='bilinear',
                seed=None,
@@ -863,35 +897,28 @@ class RandomZoom(Layer):
       self.height_lower = height_factor[0]
       self.height_upper = height_factor[1]
     else:
-      self.height_lower = self.height_upper = height_factor
-    if self.height_lower < 0. or self.height_upper < 0.:
-      raise ValueError('`height_factor` cannot have negative values, '
+      self.height_lower = -height_factor
+      self.height_upper = height_factor
+
+    if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.:
+      raise ValueError('`height_factor` must have values between [-1, 1], '
                        'got {}'.format(height_factor))
-    if self.height_lower > self.height_upper:
-      raise ValueError('`height_factor` cannot have lower bound larger than '
-                       'upper bound, got {}.'.format(height_factor))
 
     self.width_factor = width_factor
-    if isinstance(width_factor, (tuple, list)):
-      self.width_lower = width_factor[0]
-      self.width_upper = width_factor[1]
-    else:
-      self.width_lower = self.width_upper = width_factor
-    if self.width_lower < 0. or self.width_upper < 0.:
-      raise ValueError('`width_factor` cannot have negative values, '
-                       'got {}'.format(width_factor))
-    if self.width_lower > self.width_upper:
-      raise ValueError('`width_factor` cannot have lower bound larger than '
-                       'upper bound, got {}.'.format(width_factor))
+    if width_factor is not None:
+      if isinstance(width_factor, (tuple, list)):
+        self.width_lower = width_factor[0]
+        self.width_upper = width_factor[1]
+      else:
+        self.width_lower = -width_factor  # pylint: disable=invalid-unary-operand-type
+        self.width_upper = width_factor
+
+      if self.width_lower < -1. or self.width_upper < -1.:
+        raise ValueError('`width_factor` must have values larger than -1, '
+                         'got {}'.format(width_factor))
+
+    check_fill_mode_and_interpolation(fill_mode, interpolation)
 
-    if fill_mode not in {'reflect', 'wrap', 'constant'}:
-      raise NotImplementedError(
-          'Unknown `fill_mode` {}. Only `reflect`, `wrap` and '
-          '`constant` are supported.'.format(fill_mode))
-    if interpolation not in {'nearest', 'bilinear'}:
-      raise NotImplementedError(
-          'Unknown `interpolation` {}. Only `nearest` and '
-          '`bilinear` are supported.'.format(interpolation))
     self.fill_mode = fill_mode
     self.interpolation = interpolation
     self.seed = seed
@@ -907,22 +934,22 @@ class RandomZoom(Layer):
       """Zoomed inputs with random ops."""
       inputs_shape = array_ops.shape(inputs)
       batch_size = inputs_shape[0]
-      h_axis, w_axis = 1, 2
-      img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32)
-      img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32)
+      img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32)
+      img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32)
       height_zoom = self._rng.uniform(
           shape=[batch_size, 1],
-          minval=-self.height_lower,
-          maxval=self.height_upper)
-      height_zoom = height_zoom * img_hd
-      width_zoom = self._rng.uniform(
-          shape=[batch_size, 1],
-          minval=-self.width_lower,
-          maxval=self.width_upper)
-      width_zoom = width_zoom * img_wd
+          minval=1. + self.height_lower,
+          maxval=1. + self.height_upper)
+      if self.width_factor is not None:
+        width_zoom = self._rng.uniform(
+            shape=[batch_size, 1],
+            minval=1. + self.width_lower,
+            maxval=1. + self.width_upper)
+      else:
+        width_zoom = height_zoom
       zooms = math_ops.cast(
-          array_ops.concat([height_zoom, width_zoom], axis=1),
-          dtype=inputs.dtype)
+          array_ops.concat([width_zoom, height_zoom], axis=1),
+          dtype=dtypes.float32)
       return transform(
           inputs, get_zoom_matrix(zooms, img_hd, img_wd),
           fill_mode=self.fill_mode,
@@ -974,8 +1001,8 @@ def get_zoom_matrix(zooms, image_height, image_width, name=None):
     #      [0 0 1]]
     # where the last entry is implicit.
     # Zoom matrices are always float32.
-    x_offset = ((image_height + 1.) / 2.0) * (zooms[:, 0, None] - 1.)
-    y_offset = ((image_width + 1.) / 2.0) * (zooms[:, 1, None] - 1.)
+    x_offset = ((image_width - 1.) / 2.0) * (1.0 - zooms[:, 0, None])
+    y_offset = ((image_height - 1.) / 2.0) * (1.0 - zooms[:, 1, None])
     return array_ops.concat(
         values=[
             zooms[:, 0, None],
@@ -1073,11 +1100,11 @@ class RandomHeight(Layer):
     factor: A positive float (fraction of original height), or a tuple of size 2
       representing lower and upper bound for resizing vertically. When
       represented as a single float, this value is used for both the upper and
-      lower bound. For instance, `factor=(0.2, 0.3)` results in an output height
-      varying in the range `[original + 20%, original + 30%]`. `factor=(-0.2,
-      0.3)` results in an output height varying in the range `[original - 20%,
-      original + 30%]`. `factor=0.2` results in an output height varying in the
-      range `[original - 20%, original + 20%]`.
+      lower bound. For instance, `factor=(0.2, 0.3)` results in an output with
+      height changed by a random amount in the range `[20%, 30%]`.
+      `factor=(-0.2, 0.3)` results in an output with height changed by a random
+      amount in the range `[-20%, +30%]. `factor=0.2` results in an output with
+      height changed by a random amount in the range `[-20%, +20%]`.
     interpolation: String, the interpolation method. Defaults to `bilinear`.
       Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
       `gaussian`, `mitchellcubic`
@@ -1099,12 +1126,17 @@ class RandomHeight(Layer):
                **kwargs):
     self.factor = factor
     if isinstance(factor, (tuple, list)):
-      self.height_lower = -factor[0]
+      self.height_lower = factor[0]
       self.height_upper = factor[1]
     else:
-      self.height_lower = self.height_upper = factor
-    if self.height_lower > 1.:
-      raise ValueError('`factor` cannot have abs lower bound larger than 1.0, '
+      self.height_lower = -factor
+      self.height_upper = factor
+
+    if self.height_upper < self.height_lower:
+      raise ValueError('`factor` cannot have upper bound less than '
+                       'lower bound, got {}'.format(factor))
+    if self.height_lower < -1. or self.height_upper < -1.:
+      raise ValueError('`factor` must have values larger than -1, '
                        'got {}'.format(factor))
     self.interpolation = interpolation
     self._interpolation_method = get_interpolation(interpolation)
@@ -1120,12 +1152,11 @@ class RandomHeight(Layer):
     def random_height_inputs():
       """Inputs height-adjusted with random ops."""
       inputs_shape = array_ops.shape(inputs)
-      h_axis, w_axis = 1, 2
-      img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32)
-      img_wd = inputs_shape[w_axis]
+      img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32)
+      img_wd = inputs_shape[W_AXIS]
       height_factor = self._rng.uniform(
           shape=[],
-          minval=(1.0 - self.height_lower),
+          minval=(1.0 + self.height_lower),
           maxval=(1.0 + self.height_upper))
       adjusted_height = math_ops.cast(height_factor * img_hd, dtypes.int32)
       adjusted_size = array_ops.stack([adjusted_height, img_wd])
@@ -1163,14 +1194,14 @@ class RandomWidth(Layer):
   By default, this layer is inactive during inference.
 
   Arguments:
-    factor: A positive float (fraction of original width), or a tuple of
-      size 2 representing lower and upper bound for resizing horizontally. When
+    factor: A positive float (fraction of original height), or a tuple of size 2
+      representing lower and upper bound for resizing vertically. When
       represented as a single float, this value is used for both the upper and
-      lower bound. For instance, `factor=(0.2, 0.3)` results in an output width
-      varying in the range `[original + 20%, original + 30%]`. `factor=(-0.2,
-      0.3)` results in an output width varying in the range `[original - 20%,
-      original + 30%]`. `factor=0.2` results in an output width varying in the
-      range `[original - 20%, original + 20%]`.
+      lower bound. For instance, `factor=(0.2, 0.3)` results in an output with
+      width changed by a random amount in the range `[20%, 30%]`.
+      `factor=(-0.2, 0.3)` results in an output with width changed by a random
+      amount in the range `[-20%, +30%]. `factor=0.2` results in an output with
+      width changed by a random amount in the range `[-20%, +20%]`.
     interpolation: String, the interpolation method. Defaults to `bilinear`.
       Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
       `gaussian`, `mitchellcubic`
@@ -1183,7 +1214,7 @@ class RandomWidth(Layer):
 
   Output shape:
     4D tensor with shape:
-    `(samples, random_height, width, channels)`.
+    `(samples, height, random_width, channels)`.
   """
 
   def __init__(self,
@@ -1194,12 +1225,16 @@ class RandomWidth(Layer):
                **kwargs):
     self.factor = factor
     if isinstance(factor, (tuple, list)):
-      self.width_lower = -factor[0]
+      self.width_lower = factor[0]
       self.width_upper = factor[1]
     else:
-      self.width_lower = self.width_upper = factor
-    if self.width_lower > 1.:
-      raise ValueError('`factor` cannot have abs lower bound larger than 1.0, '
+      self.width_lower = -factor
+      self.width_upper = factor
+    if self.width_upper < self.width_lower:
+      raise ValueError('`factor` cannot have upper bound less than '
+                       'lower bound, got {}'.format(factor))
+    if self.width_lower < -1. or self.width_upper < -1.:
+      raise ValueError('`factor` must have values larger than -1, '
                        'got {}'.format(factor))
     self.interpolation = interpolation
     self._interpolation_method = get_interpolation(interpolation)
@@ -1215,12 +1250,11 @@ class RandomWidth(Layer):
     def random_width_inputs():
       """Inputs width-adjusted with random ops."""
       inputs_shape = array_ops.shape(inputs)
-      h_axis, w_axis = 1, 2
-      img_hd = inputs_shape[h_axis]
-      img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32)
+      img_hd = inputs_shape[H_AXIS]
+      img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32)
       width_factor = self._rng.uniform(
           shape=[],
-          minval=(1.0 - self.width_lower),
+          minval=(1.0 + self.width_lower),
           maxval=(1.0 + self.width_upper))
       adjusted_width = math_ops.cast(width_factor * img_wd, dtypes.int32)
       adjusted_size = array_ops.stack([img_hd, adjusted_width])
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
index a741ee1c069..38d2d25916a 100644
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
@@ -74,6 +74,40 @@ class ResizingTest(keras_parameterized.TestCase):
     with CustomObjectScope({'Resizing': image_preprocessing.Resizing}):
       self._run_test(kwargs, expected_height, expected_width)
 
+  def test_down_sampling_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype)
+        layer = image_preprocessing.Resizing(
+            height=2, width=2, interpolation='nearest')
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [5, 7],
+            [13, 15]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 2, 2, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_up_sampling_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype)
+        layer = image_preprocessing.Resizing(
+            height=4, width=4, interpolation='nearest')
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 0, 1, 1],
+            [0, 0, 1, 1],
+            [2, 2, 3, 3],
+            [2, 2, 3, 3]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 4, 4, 1))
+        self.assertAllEqual(expected_output, output_image)
+
   @parameterized.named_parameters(
       ('reshape_bilinear_10_by_4', {'interpolation': 'bilinear'}, 10, 4))
   def test_reshaping(self, kwargs, expected_height, expected_width):
@@ -223,6 +257,21 @@ class RandomCropTest(keras_parameterized.TestCase):
     with CustomObjectScope({'RandomCrop': image_preprocessing.RandomCrop}):
       self._run_test(expected_height, expected_width)
 
+  def test_random_crop_full_height(self):
+    self._run_test(5, 2)
+
+  def test_random_crop_full_width(self):
+    self._run_test(3, 8)
+
+  def test_random_crop_full(self):
+    np.random.seed(1337)
+    height, width = 8, 16
+    inp = np.random.random((12, 8, 16, 3))
+    with tf_test_util.use_gpu():
+      layer = image_preprocessing.RandomCrop(height, width)
+      actual_output = layer(inp, training=0)
+      self.assertAllClose(inp, actual_output)
+
   def test_predicting_with_mock_longer_height(self):
     np.random.seed(1337)
     height, width = 3, 3
@@ -242,8 +291,7 @@ class RandomCropTest(keras_parameterized.TestCase):
     with tf_test_util.use_gpu():
       layer = image_preprocessing.RandomCrop(height, width)
       actual_output = layer(inp, training=0)
-      resized_inp = image_ops.resize_images_v2(
-          inp, size=[4, 8])
+      resized_inp = image_ops.resize_images_v2(inp, size=[4, 8])
       expected_output = resized_inp[:, :, 1:7, :]
       self.assertAllClose(expected_output, actual_output)
 
@@ -475,21 +523,152 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   @parameterized.named_parameters(
       ('random_translate_4_by_6', .4, .6), ('random_translate_3_by_2', .3, .2),
-      ('random_translate_tuple_factor', (.5, .4), (.2, .3)))
+      ('random_translate_tuple_factor', (-.5, .4), (.2, .3)))
   def test_random_translation(self, height_factor, width_factor):
     self._run_test(height_factor, width_factor)
 
-  def test_random_translation_negative_lower(self):
-    mock_offset = np.random.random((12, 1))
-    with test.mock.patch.object(
-        gen_stateful_random_ops, 'stateful_uniform', return_value=mock_offset):
-      with self.cached_session(use_gpu=True):
-        layer = image_preprocessing.RandomTranslation((-0.2, .3), .4)
-        layer_2 = image_preprocessing.RandomTranslation((0.2, .3), .4)
-        inp = np.random.random((12, 5, 8, 3)).astype(np.float32)
-        actual_output = layer(inp, training=1)
-        actual_output_2 = layer_2(inp, training=1)
-        self.assertAllClose(actual_output, actual_output_2)
+  def test_random_translation_up_numeric_reflect(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
+        # Shifting by -.2 * 5 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=(-.2, -.2), width_factor=0.)
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [5, 6, 7, 8, 9],
+            [10, 11, 12, 13, 14],
+            [15, 16, 17, 18, 19],
+            [20, 21, 22, 23, 24],
+            [20, 21, 22, 23, 24]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_translation_up_numeric_constant(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
+        # Shifting by -.2 * 5 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=(-.2, -.2), width_factor=0., fill_mode='constant')
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [5, 6, 7, 8, 9],
+            [10, 11, 12, 13, 14],
+            [15, 16, 17, 18, 19],
+            [20, 21, 22, 23, 24],
+            [0, 0, 0, 0, 0]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_translation_down_numeric_reflect(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
+        # Shifting by .2 * 5 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=(.2, .2), width_factor=0.)
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [5, 6, 7, 8, 9],
+            [10, 11, 12, 13, 14],
+            [15, 16, 17, 18, 19]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_translation_asymmetric_size_numeric_reflect(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype)
+        # Shifting by .5 * 8 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=(.5, .5), width_factor=0.)
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [6, 7],
+            [4, 5],
+            [2, 3],
+            [0, 1],
+            [0, 1],
+            [2, 3],
+            [4, 5],
+            [6, 7],
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 8, 2, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_translation_down_numeric_constant(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
+        # Shifting by -.2 * 5 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=(.2, .2), width_factor=0., fill_mode='constant')
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 0, 0, 0, 0],
+            [0, 1, 2, 3, 4],
+            [5, 6, 7, 8, 9],
+            [10, 11, 12, 13, 14],
+            [15, 16, 17, 18, 19]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_translation_left_numeric_reflect(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
+        # Shifting by .2 * 5 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=0., width_factor=(-.2, -.2))
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [1, 2, 3, 4, 4],
+            [6, 7, 8, 9, 9],
+            [11, 12, 13, 14, 14],
+            [16, 17, 18, 19, 19],
+            [21, 22, 23, 24, 24]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_translation_left_numeric_constant(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
+        # Shifting by -.2 * 5 = 1 pixel.
+        layer = image_preprocessing.RandomTranslation(
+            height_factor=0., width_factor=(-.2, -.2), fill_mode='constant')
+        output_image = layer(input_image)
+        # pyformat: disable
+        expected_output = np.asarray([
+            [1, 2, 3, 4, 0],
+            [6, 7, 8, 9, 0],
+            [11, 12, 13, 14, 0],
+            [16, 17, 18, 19, 0],
+            [21, 22, 23, 24, 0]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
 
   def test_random_translation_inference(self):
     with CustomObjectScope(
@@ -768,7 +947,7 @@ class RandomRotationTest(keras_parameterized.TestCase):
 
   @parameterized.named_parameters(('random_rotate_4', .4),
                                   ('random_rotate_3', .3),
-                                  ('random_rotate_tuple_factor', (.5, .4)))
+                                  ('random_rotate_tuple_factor', (-.5, .4)))
   def test_random_rotation(self, factor):
     self._run_test(factor)
 
@@ -808,22 +987,75 @@ class RandomZoomTest(keras_parameterized.TestCase):
           expected_output_shape=(None, orig_height, orig_width, channels))
 
   @parameterized.named_parameters(
-      ('random_zoom_4_by_6', .4, .6), ('random_zoom_2_by_3', .2, .3),
-      ('random_zoom_tuple_factor', (.4, .5), (.2, .3)))
+      ('random_zoom_4_by_6', -.4, -.6), ('random_zoom_2_by_3', -.2, -.3),
+      ('random_zoom_tuple_factor', (-.4, -.5), (-.2, -.3)))
   def test_random_zoom_in(self, height_factor, width_factor):
     self._run_test(height_factor, width_factor)
 
   @parameterized.named_parameters(
-      ('random_zoom_4_by_6', 1.4, 1.6), ('random_zoom_2_by_3', 1.2, 1.3),
-      ('random_zoom_tuple_factor', (1.4, 1.5), (1.2, 1.3)))
+      ('random_zoom_4_by_6', .4, .6), ('random_zoom_2_by_3', .2, .3),
+      ('random_zoom_tuple_factor', (.4, .5), (.2, .3)))
   def test_random_zoom_out(self, height_factor, width_factor):
     self._run_test(height_factor, width_factor)
 
-  def test_random_zoom_invalid_factor(self):
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomZoom((.5, .4), .2)
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomZoom(.2, (.5, .4))
+  def test_random_zoom_in_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
+        layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5),
+                                               interpolation='nearest')
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [6, 7, 7, 8, 8],
+            [11, 12, 12, 13, 13],
+            [11, 12, 12, 13, 13],
+            [16, 17, 17, 18, 18],
+            [16, 17, 17, 18, 18]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_zoom_out_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
+        layer = image_preprocessing.RandomZoom((.5, .5), (.8, .8),
+                                               fill_mode='constant',
+                                               interpolation='nearest')
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 0, 0, 0, 0],
+            [0, 5, 7, 9, 0],
+            [0, 10, 12, 14, 0],
+            [0, 20, 22, 24, 0],
+            [0, 0, 0, 0, 0]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_zoom_out_numeric_preserve_aspect_ratio(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
+        layer = image_preprocessing.RandomZoom((.5, .5),
+                                               fill_mode='constant',
+                                               interpolation='nearest')
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 0, 0, 0, 0],
+            [0, 6, 7, 9, 0],
+            [0, 11, 12, 14, 0],
+            [0, 21, 22, 24, 0],
+            [0, 0, 0, 0, 0]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
+        self.assertAllEqual(expected_output, output_image)
 
   def test_random_zoom_inference(self):
     with CustomObjectScope(
@@ -861,7 +1093,7 @@ class RandomHeightTest(keras_parameterized.TestCase):
       self.assertEqual(img_out.shape[3], 3)
 
   @parameterized.named_parameters(('random_height_4_by_6', (.4, .6)),
-                                  ('random_height_3_by_2', (.3, 1.2)),
+                                  ('random_height_3_by_2', (-.3, .2)),
                                   ('random_height_3', .3))
   def test_random_height_basic(self, factor):
     self._run_test(factor)
@@ -877,6 +1109,39 @@ class RandomHeightTest(keras_parameterized.TestCase):
         img_out = layer(img, training=True)
         self.assertEqual(img_out.shape[1], 3)
 
+  def test_random_height_longer_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype(dtype)
+        layer = image_preprocessing.RandomHeight(factor=(1., 1.))
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 1, 2],
+            [0.75, 1.75, 2.75],
+            [2.25, 3.25, 4.25],
+            [3, 4, 5]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 4, 3, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_height_shorter_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype(dtype)
+        layer = image_preprocessing.RandomHeight(
+            factor=(-.5, -.5), interpolation='nearest')
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [2, 3],
+            [6, 7]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 2, 2, 1))
+        self.assertAllEqual(expected_output, output_image)
+
   def test_random_height_invalid_factor(self):
     with self.assertRaises(ValueError):
       image_preprocessing.RandomHeight((-1.5, .4))
@@ -916,7 +1181,7 @@ class RandomWidthTest(keras_parameterized.TestCase):
       self.assertEqual(img_out.shape[3], 3)
 
   @parameterized.named_parameters(('random_width_4_by_6', (.4, .6)),
-                                  ('random_width_3_by_2', (.3, 1.2)),
+                                  ('random_width_3_by_2', (-.3, .2)),
                                   ('random_width_3', .3))
   def test_random_width_basic(self, factor):
     self._run_test(factor)
@@ -932,6 +1197,38 @@ class RandomWidthTest(keras_parameterized.TestCase):
         img_out = layer(img, training=True)
         self.assertEqual(img_out.shape[2], 3)
 
+  def test_random_width_longer_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype(dtype)
+        layer = image_preprocessing.RandomWidth(factor=(1., 1.))
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [0, 0.25, 0.75, 1],
+            [2, 2.25, 2.75, 3],
+            [4, 4.25, 4.75, 5]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 3, 4, 1))
+        self.assertAllEqual(expected_output, output_image)
+
+  def test_random_width_shorter_numeric(self):
+    for dtype in (np.int64, np.float32):
+      with tf_test_util.use_gpu():
+        input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype(dtype)
+        layer = image_preprocessing.RandomWidth(
+            factor=(-.5, -.5), interpolation='nearest')
+        output_image = layer(np.expand_dims(input_image, axis=0))
+        # pyformat: disable
+        expected_output = np.asarray([
+            [1, 3],
+            [5, 7]
+        ]).astype(dtype)
+        # pyformat: enable
+        expected_output = np.reshape(expected_output, (1, 2, 2, 1))
+        self.assertAllEqual(expected_output, output_image)
+
   def test_random_width_invalid_factor(self):
     with self.assertRaises(ValueError):
       image_preprocessing.RandomWidth((-1.5, .4))
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup.py b/tensorflow/python/keras/layers/preprocessing/index_lookup.py
index 6c40d1618bc..ba9b0d740e1 100644
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup.py
+++ b/tensorflow/python/keras/layers/preprocessing/index_lookup.py
@@ -24,17 +24,11 @@ import operator
 import numpy as np
 
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.ops import array_ops
+from tensorflow.python.keras.layers.preprocessing import table_utils
 from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops.ragged import ragged_functional_ops
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.platform import gfile
 from tensorflow.python.util import compat
 
 # The string tokens in the extracted vocabulary
@@ -47,14 +41,16 @@ _ACCUMULATOR_COUNTS_NAME = "counts"
 
 
 class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
-  """Maps strings (or integers) from a vocabulary to integer indices.
+  """Maps values from a vocabulary to integer indices.
 
-  This layer translates a set of arbitrary strings or integers into an integer
-  output via a table-based lookup, with optional out-of-vocabulary handling.
+  This layer translates a set of arbitrary hashables into an integer output via
+  a table-based lookup, with optional out-of-vocabulary handling. This is the
+  basis layer for both IntegerLookup and IndexLookup; it holds the common
+  logic but is not intended to be exported as part of the Keras API.
 
   If desired, the user can call this layer's `adapt()` method on a data set,
   which will analyze the data set, determine the frequency of individual string
-  or integer values, and create a vocabulary from them. This vocabulary can have
+  values, and create a vocabulary from them. This vocabulary can have
   unlimited size or be capped, depending on the configuration options for this
   layer; if there are more unique values in the input than the maximum
   vocabulary size, the most frequent terms will be used to create the
@@ -62,79 +58,47 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
 
   Attributes:
     max_tokens: The maximum size of the vocabulary for this layer. If None,
-      there is no cap on the size of the vocabulary. Note that the vocabulary
-      does include OOV buckets, so the effective number of unique values in the
-      vocabulary is `(max_tokens - num_oov_tokens)` when this value is set.
-    num_oov_tokens: The number of out-of-vocabulary tokens to use; defaults to
-      1. If this value is more than 1, OOV inputs are hashed to determine their
-      OOV value; if this value is 0, passing an OOV input will result in a '-1'
-      being returned for that value in the output tensor. (Note that, because
-      the value is -1 and not 0, this will allow you to effectively drop OOV
-      values from categorical encodings.)
-    vocabulary: An optional list of vocabulary terms, or a path to a text file
-      containing a vocabulary to load into this layer. The file should contain
-      one token per line. In either case, the vocabulary must be unique; if
-      the list or file contains the same token multiple times, an error will
-      be thrown. Note that when passing a vocabulary - either as a list or as
-      a file - the vocabulary will not be present in the layer's config dict;
-      it will instead be a part of the layer's weights.
-    reserve_zero: Whether to reserve the index 0, which indicates pad values in
-      the Keras masking system. If True, the output of this layer will be in the
-      range `[1...max_tokens+1)`; if False, the output will be in the range
-      `[0...max_tokens)`. Defaults to True.
-    mask_zero: If True, input values of 0 (for integers) and `""` (for strings)
-      will be treated as masked values and assigned an output value of 0. If
-      this option is set, `reserve_zero` must also be set. Defaults to False.
-
-  Call arguments:
-    inputs: The data to look up. Can be a tf.Tensor or RaggedTensor.
-    invert: Controls the lookup direction. If False, the layer will map strings
-      to integers; if true, the layer will map integers to strings. Defaults
-      to False.
+      there is no cap on the size of the vocabulary. Note that this vocabulary
+      includes the OOV and mask tokens, so the effective number of tokens is
+      (max_tokens - num_oov_indices - (1 if mask_token else 0))
+    num_oov_indices: The number of out-of-vocabulary tokens to use. If this
+      value is more than 1, OOV inputs are hashed to determine their OOV value;
+      if this value is 0, passing an OOV input will result in a '-1' being
+      returned for that value in the output tensor. (Note that, because the
+      value is -1 and not 0, this will allow you to effectively drop OOV values
+      from categorical encodings.)
+    mask_token: A token that represents masked values, and which is mapped to
+      index 0. If set to None, no mask term will be added and the OOV tokens, if
+      any, will be indexed from (0...num_oov_indices) instead of
+      (1...num_oov_indices+1).
+    oov_token: The token representing an out-of-vocabulary value. This token is
+      only used when performing an inverse lookup.
+    vocabulary: An optional list of vocabulary terms. If the list contains the
+      same token multiple times, an error will be thrown.
   """
   # TODO(momernick): Add an examples section to the docstring.
 
   def __init__(self,
-               max_tokens=None,
-               num_oov_tokens=1,
+               max_tokens,
+               num_oov_indices,
+               mask_token,
+               oov_token,
                vocabulary=None,
-               reserve_zero=True,
-               mask_zero=False,
                **kwargs):
-    allowed_dtypes = [dtypes.string, dtypes.int64]
-    if "dtype" in kwargs and kwargs["dtype"] not in allowed_dtypes:
-      raise ValueError(
-          "TextVectorization may only have a dtype of string or int64.")
-    elif "dtype" not in kwargs:
-      kwargs["dtype"] = dtypes.string
 
     # If max_tokens is set, the value must be greater than 1 - otherwise we
     # are creating a 0-element vocab, which doesn't make sense.
     if max_tokens is not None and max_tokens <= 1:
-      raise ValueError("max_tokens must be greater than 1.")
+      raise ValueError("If set, max_tokens must be greater than 1.")
 
-    # For now, limit the num_oov_tokens to one.
-    if num_oov_tokens < 0:
-      raise ValueError("num_oov_tokens must be greater than 0. You passed %s" %
-                       num_oov_tokens)
+    if num_oov_indices < 0:
+      raise ValueError("num_oov_indices must be greater than 0. You passed %s" %
+                       num_oov_indices)
 
     self.max_tokens = max_tokens
-    self.num_oov_tokens = num_oov_tokens
-    self.reserve_zero = reserve_zero
-    self.mask_zero = mask_zero
-
-    # We need to reserve at least num_oov_tokens tokens, plus one additional
-    # value if we are reserving the zero value in our output.
-    if reserve_zero:
-      self._reserved_values = (num_oov_tokens + 1)
-    else:
-      self._reserved_values = num_oov_tokens
-
-    # We need to account for the OOV buckets in our vocabulary size.
-    if max_tokens is not None:
-      self._max_elements = max_tokens - num_oov_tokens
-    else:
-      self._max_elements = None
+    self.num_oov_indices = num_oov_indices
+    self.oov_token = oov_token
+    self.mask_token = mask_token
 
     # If there is only one OOV bucket, we can determine the OOV value (either 0
     # or 1 depending on whether 0 is reserved) and set that as the default
@@ -142,23 +106,17 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
     # do a further hashing step; to make this easier, we set the OOV value to
     # -1. (This lets us do a vectorized add and cast to boolean to determine
     # locations where we need to do extra hashing.)
-    if self.num_oov_tokens == 1:
-      self._oov_value = 1 if reserve_zero else 0
+    if self.num_oov_indices == 1:
+      self._oov_value = 0 if mask_token is None else 1
     else:
       self._oov_value = -1
 
     super(IndexLookup, self).__init__(
-        combiner=_IndexLookupCombiner(self.max_tokens), **kwargs)
+        combiner=_IndexLookupCombiner(self.max_tokens, self.mask_token),
+        **kwargs)
 
-    # This layer supports RaggedTensor inputs.
-    self._supports_ragged_inputs = True
+    self._output_dtype = dtypes.int64
 
-    # If the layer's input type is int32, we can only output int32 values -
-    # MutableHashTable doesn't allow us to map int32->int64.
-    if self.dtype == dtypes.int32:
-      self._output_dtype = dtypes.int32
-    else:
-      self._output_dtype = dtypes.int64
     self._table = lookup_ops.MutableHashTable(
         key_dtype=self.dtype,
         value_dtype=self._output_dtype,
@@ -171,100 +129,27 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
     # counting code in the Model object doesn't throw an attribute error.
     tracked_table.shape = tensor_shape.TensorShape((0,))
 
-    self._inverse_table = None
+    if self.num_oov_indices <= 1:
+      oov_indices = None
+    else:
+      oov_start = 1 if mask_token is not None else 0
+      oov_end = oov_start + num_oov_indices
+      oov_indices = list(range(oov_start, oov_end))
+
+    self._table_handler = table_utils.TableHandler(
+        table=self._table,
+        oov_tokens=oov_indices,
+        use_v1_apis=self._use_v1_apis())
 
     if vocabulary is not None:
-      if isinstance(vocabulary, str):
-        vocabulary = self._get_vocabulary_from_file(vocabulary)
-
-      vocabulary_set = set(vocabulary)
-      if len(vocabulary) != len(vocabulary_set):
-        repeated_items = [
-            item for item, count in collections.Counter(vocabulary).items()
-            if count > 1
-        ]
-        raise ValueError("The passed vocabulary has at least one repeated "
-                         "term. Please uniquify your dataset before passing "
-                         "it to IndexLookup(). The repeated terms are %s" %
-                         repeated_items)
       self.set_vocabulary(vocabulary)
 
-  def _get_vocabulary_from_file(self, vocabulary_path):
-    vocab = []
-    with gfile.GFile(vocabulary_path, "r") as reader:
-      while True:
-        # Get the next line, and break if it is None.
-        text = reader.readline()
-        if not text:
-          break
-
-        # Convert the raw text into UTF8 and strip whitespace.
-        if isinstance(text, str):
-          token = text
-        elif isinstance(text, bytes):
-          token = text.decode("utf-8", "ignore")
-        token = token.strip()
-        vocab.append(token)
-    return vocab
-
-  def _get_table_data(self):
-    keys, values = self._table.export()
-    return (keys.numpy(), values.numpy())
-
-  def vocab_size(self):
-    return self._table.size().numpy()
-
-  def _clear_table(self):
-    keys, _ = self._table.export()
-    self._table.remove(keys)
-    if self._inverse_table:
-      keys, _ = self._inverse_table.export()
-      self._inverse_table.remove(keys)
-
-  def _insert_table_data(self, keys, values):
-    if len(values) != len(keys):
-      raise RuntimeError("Size mismatch between values and key arrays. "
-                         "Keys had size %s, values had size %s." %
-                         (len(keys), len(values)))
-    self._table.insert(keys, values)
-    if self._inverse_table:
-      self._inverse_table.insert(values, keys)
-
-  def _initialize_inverse_table(self):
-    keys, values = self._table.export()
-    self._inverse_table.insert(values, keys)
-
-  def _to_numpy(self, preprocessed_data):
-    """Converts preprocessed inputs into numpy arrays."""
-    if isinstance(preprocessed_data, np.ndarray):
-      return preprocessed_data
-    return np.array(preprocessed_data.to_list())
-  # End of V1/V2 shim points.
-
-  def _assert_same_type(self, expected_type, values, value_name):
-    if dtypes.as_dtype(expected_type) != dtypes.as_dtype(values.dtype):
-      raise RuntimeError("Expected %s type %s, got %s" %
-                         (value_name, expected_type, values.dtype))
-
-  def _convert_to_ndarray(self, x, dtype=None):
-    array = np.array(x) if isinstance(x, (list, tuple)) else x
-    if dtype not in (None, dtypes.string):
-      # If the dtype is an integer, we do permissive casting. This allows
-      # users to examine int32 data if the dtype is int64 without trouble.
-      np_dtype = dtypes.as_dtype(dtype).as_numpy_dtype
-      if np.can_cast(array.dtype, np_dtype):
-        array = array.astype(np_dtype, casting="safe")
-    return array
-
   def compute_output_shape(self, input_shape):
     return input_shape
 
-  def compute_output_signature(self, input_spec, invert=False):
+  def compute_output_signature(self, input_spec):
     output_shape = self.compute_output_shape(input_spec.shape.as_list())
-    if invert:
-      output_dtype = dtypes.string
-    else:
-      output_dtype = dtypes.int64
+    output_dtype = dtypes.int64
     return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
 
   def adapt(self, data, reset_state=True):
@@ -285,21 +170,23 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
     super(IndexLookup, self).adapt(data, reset_state)
 
   def get_vocabulary(self):
-    if self.vocab_size() == 0:
+    if self._table_handler.vocab_size() == 0:
       return []
 
-    keys, values = self._get_table_data()
+    keys, values = self._table_handler.data()
     # This is required because the MutableHashTable doesn't preserve insertion
     # order, but we rely on the order of the array to assign indices.
     return [x for _, x in sorted(zip(values, keys))]
 
+  def vocab_size(self):
+    return self._table_handler.vocab_size()
+
   def get_config(self):
     config = {
         "max_tokens": self.max_tokens,
-        "num_oov_tokens": self.num_oov_tokens,
-        "vocabulary": None,
-        "reserve_zero": self.reserve_zero,
-        "mask_zero": self.mask_zero,
+        "num_oov_indices": self.num_oov_indices,
+        "oov_token": self.oov_token,
+        "mask_token": self.mask_token,
     }
     base_config = super(IndexLookup, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
@@ -311,121 +198,111 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
     # abstraction for ease of saving!) we return 0.
     return 0
 
-  def set_vocabulary(self,
-                     vocab,
-                     append=False):
+  def set_vocabulary(self, vocab):
     """Sets vocabulary (and optionally document frequency) data for this layer.
 
     This method sets the vocabulary for this layer directly, instead of
     analyzing a dataset through 'adapt'. It should be used whenever the vocab
     information is already known. If vocabulary data is already present in the
-    layer, this method will either replace it, if 'append' is set to False, or
-    append to it (if 'append' is set to True).
+    layer, this method will either replace it
 
     Arguments:
       vocab: An array of string tokens.
-      append: Whether to overwrite or append any existing vocabulary data.
 
     Raises:
       ValueError: If there are too many inputs, the inputs do not match, or
         input data is missing.
     """
-    current_table_size = self.vocab_size()
-    total_vocab_size = len(vocab) + (current_table_size if append else 0)
-    if self.max_tokens is not None and total_vocab_size > self._max_elements:
+
+    table_utils.validate_vocabulary_is_unique(vocab)
+
+    should_have_mask = self.mask_token is not None
+    if should_have_mask:
+      has_mask = vocab[0] == self.mask_token
+      oov_start = 1
+    else:
+      has_mask = False
+      oov_start = 0
+
+    should_have_oov = self.num_oov_indices > 0
+    if should_have_oov:
+      oov_end = oov_start + self.num_oov_indices
+      expected_oov = [self.oov_token] * self.num_oov_indices
+      has_oov = vocab[oov_start:oov_end] == expected_oov
+      # If we get a numpy array, then has_oov may end up being a numpy array
+      # instead of a bool. Fix this by collapsing the variable if it's not bool.
+      if not isinstance(has_oov, bool):
+        has_oov = any(has_oov)
+    else:
+      has_oov = False
+
+    if all([should_have_mask, has_mask, should_have_oov]) and not has_oov:
+      raise ValueError("The passed vocabulary has the correct mask token `%s` "
+                       "at index 0, but does not have the OOV token `%s` in "
+                       "indices [%s:%s]. Instead, we found `%s`. Was this "
+                       "vocabulary generated by a layer with incompatible "
+                       "settings?" %
+                       (self.mask_token, self.oov_token, oov_start, oov_end,
+                        vocab[oov_start:oov_end]))
+
+    if all([should_have_oov, has_oov, should_have_mask]) and not has_mask:
+      raise ValueError(
+          "The passed vocabulary has the correct OOV token `%s` at "
+          "indices [%s:%s], but does not have the mask token `%s` in "
+          "index 0. Instead, we found `%s`. Was this vocabulary "
+          "generated by a layer with incompatible settings?" %
+          (self.oov_token, oov_start, oov_end, self.mask_token, vocab[0]))
+
+    insert_special_tokens = not has_oov and not has_mask
+
+    special_tokens = [] if self.mask_token is None else [self.mask_token]
+    special_tokens.extend([self.oov_token] * self.num_oov_indices)
+
+    num_special_tokens = len(special_tokens)
+    tokens = vocab if insert_special_tokens else vocab[num_special_tokens:]
+    if self.mask_token in tokens:
+      raise ValueError("Reserved mask token %s was found in the passed "
+                       "vocabulary at index %s. Please either remove the "
+                       "reserved token from the vocabulary or change the "
+                       "mask token for this layer." %
+                       (self.mask_token, tokens.index(self.mask_token)))
+    if self.oov_token in tokens:
+      raise ValueError("Reserved OOV token %s was found in the passed "
+                       "vocabulary at index %s. Please either remove the "
+                       "reserved token from the vocabulary or change the "
+                       "OOV token for this layer." %
+                       (self.oov_token, tokens.index(self.oov_token)))
+
+    if insert_special_tokens:
+      total_vocab_size = len(vocab) + num_special_tokens
+    else:
+      total_vocab_size = len(vocab)
+    if self.max_tokens is not None and total_vocab_size > self.max_tokens:
       raise ValueError(
           "Attempted to set a vocabulary larger than the maximum vocab size. "
-          "Passed vocab size is %s, max vocab size is %s. Note that the OOV "
-          "token(s) are automatically added to the number of tokens." %
+          "Passed vocab size is %s, max vocab size is %s." %
           (total_vocab_size, self.max_tokens))
 
-    start_index = self._reserved_values + (self.vocab_size() if append else 0)
+    start_index = num_special_tokens
     values = np.arange(start_index, len(vocab) + start_index, dtype=np.int64)
-    vocab = self._convert_to_ndarray(vocab, self.dtype)
-    self._assert_same_type(self.dtype, vocab, "vocab")
 
-    values = self._convert_to_ndarray(values, self._output_dtype)
-    self._assert_same_type(self._output_dtype, values, "values")
+    self._table_handler.clear()
+    self._table_handler.insert(vocab, values)
 
-    if not append and self.vocab_size() > 0:
-      self._clear_table()
-    self._insert_table_data(vocab, values)
+    if insert_special_tokens and num_special_tokens > 0:
+      special_token_values = np.arange(num_special_tokens, dtype=np.int64)
+      self._table_handler.insert(special_tokens, special_token_values)
 
   def _set_state_variables(self, updates):
     if not self.built:
       raise RuntimeError("_set_state_variables() must be called after build().")
     self.set_vocabulary(updates[_VOCAB_NAME])
 
-  def __call__(self, inputs, invert=False, **kwargs):
-    if invert and not self._inverse_table:
-      # If the user wants to perform an inverse lookup, we need to build an
-      # inverse lookup table and initialize it to have the inverse of the
-      # forward table's vocabulary.
-      self._inverse_table = lookup_ops.MutableHashTable(
-          key_dtype=self._output_dtype,
-          value_dtype=self.dtype,
-          default_value="",
-          name=(self._name + "_inverse_index_table"))
+  def call(self, inputs):
+    return self._table_handler.lookup(inputs)
 
-      tracked_inverse_table = self._add_trackable(
-          self._inverse_table, trainable=False)
-      # This is a workaround for summary() on this layer. Because the table is
-      # not mutable during training, the effective number of parameters (and so
-      # the weight shape) is 0; we add this as an attr so that the parameter
-      # counting code in the Model object doesn't throw an attribute error.
-      tracked_inverse_table.shape = tensor_shape.TensorShape((0,))
-
-      # This is a workaround for saving not working yet for MutableHashTables.
-      # By replacing the existing function call by an explicit failure, we
-      # can provide a more user-friendly error message.
-      def fail(_):
-        raise NotImplementedError(
-            "Saving is not yet supported for IndexLookup layers.")
-
-      self._inverse_table._list_extra_dependencies_for_serialization = fail  # pylint: disable=protected-access
-      self._initialize_inverse_table()
-
-    return super(IndexLookup, self).__call__(inputs, invert=invert, **kwargs)
-
-  def replace_oov_buckets(self, inputs, lookups):
-    if self.num_oov_tokens <= 1:
-      return lookups
-
-    if inputs.dtype.is_integer:
-      inputs = string_ops.as_string(inputs)
-    hashed_inputs = string_ops.string_to_hash_bucket_fast(
-        inputs, num_buckets=self.num_oov_tokens)
-    if self.reserve_zero:
-      hashed_inputs = math_ops.add(hashed_inputs, 1)
-    return array_ops.where(math_ops.equal(lookups, -1), hashed_inputs, lookups)
-
-  def call(self, inputs, invert=False):
-    table = self._inverse_table if invert else self._table
-    # The table lookup ops don't natively support ragged tensors, so if we have
-    # a RT we need to use map_flat_values to look up every element.
-    if ragged_tensor.is_ragged(inputs):
-      indexed_data = ragged_functional_ops.map_flat_values(table.lookup, inputs)
-      if not invert:
-        indexed_data = ragged_functional_ops.map_flat_values(
-            self.replace_oov_buckets, inputs, indexed_data)
-    elif isinstance(
-        inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
-      if not invert:
-        values = self.replace_oov_buckets(inputs.values,
-                                          table.lookup(inputs.values))
-      indexed_data = sparse_tensor.SparseTensor(inputs.indices, values,
-                                                inputs.dense_shape)
-    else:
-      indexed_data = table.lookup(inputs)
-      if not invert:
-        indexed_data = self.replace_oov_buckets(inputs, indexed_data)
-      # (b/149446477): output does not preserve input shape.
-      indexed_data.set_shape(inputs.shape)
-
-    # Composite tensors can pass tensor values through, which will cause
-    # errors if this is the only layer in the model. To fix this, pass
-    # the output through an identity op.
-    return array_ops.identity(indexed_data)
+  def _use_v1_apis(self):
+    return False
 
 
 class _IndexLookupAccumulator(
@@ -446,23 +323,28 @@ class _IndexLookupCombiner(base_preprocessing_layer.Combiner):
       dataset, all tokens are retained.s
   """
 
-  def __init__(self, vocab_size=None):
+  def __init__(self, vocab_size=None, mask_value=None):
     self._vocab_size = vocab_size
+    self._mask_value = mask_value
 
   def compute(self, values, accumulator=None):
     """Compute a step in this computation, returning a new accumulator."""
-    values = base_preprocessing_layer.convert_to_list(values)
+    values = base_preprocessing_layer.convert_to_list(
+        values, sparse_default_value=self._mask_value)
 
     if accumulator is None:
       accumulator = self._create_accumulator()
 
     # TODO(momernick): Benchmark improvements to this algorithm.
-    for document in values:
-      if not isinstance(document, list):
-        accumulator.count_dict[document] += 1
-      else:
-        for token in document:
-          accumulator.count_dict[token] += 1
+    if isinstance(values, (str, bytes, np.int64)):
+      accumulator.count_dict[values] += 1
+    else:
+      for document in values:
+        if not isinstance(document, list):
+          accumulator.count_dict[document] += 1
+        else:
+          for token in document:
+            accumulator.count_dict[token] += 1
 
     return accumulator
 
@@ -489,6 +371,8 @@ class _IndexLookupCombiner(base_preprocessing_layer.Combiner):
         "vocab": A list of the retained items in the vocabulary.
     """
     vocab_counts = accumulator.count_dict
+    if self._mask_value in vocab_counts:
+      del vocab_counts[self._mask_value]
     sorted_counts = sorted(
         vocab_counts.items(), key=operator.itemgetter(1, 0), reverse=True)
     vocab_data = (
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py
index 3360dad6ffe..098e67f5f6b 100644
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py
@@ -65,7 +65,12 @@ class IndexLookupDistributionTest(
 
     with distribution.scope():
       input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = get_layer_class()()
+      layer = get_layer_class()(
+          max_tokens=None,
+          num_oov_indices=1,
+          mask_token="",
+          oov_token="[OOV]",
+          dtype=dtypes.string)
       layer.adapt(vocab_dataset)
       int_data = layer(input_data)
       model = keras.Model(inputs=input_data, outputs=int_data)
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py
index ca34bf995db..a95834233b3 100644
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py
@@ -30,7 +30,6 @@ from tensorflow.python import keras
 from tensorflow.python import tf2
 
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import one_device_strategy
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor
@@ -43,7 +42,6 @@ from tensorflow.python.keras.layers.preprocessing import preprocessing_test_util
 from tensorflow.python.keras.saving import save
 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
 from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 
 
@@ -70,6 +68,10 @@ def _get_end_to_end_test_cases():
                         ["and"], ["earth"], ["michigan"]]),
           "kwargs": {
               "max_tokens": None,
+              "num_oov_indices": 1,
+              "mask_token": "",
+              "oov_token": "[OOV]",
+              "dtype": dtypes.string,
           },
           "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
           "input_dtype":
@@ -90,6 +92,9 @@ def _get_end_to_end_test_cases():
                        dtype=np.int64),
           "kwargs": {
               "max_tokens": None,
+              "num_oov_indices": 1,
+              "mask_token": 0,
+              "oov_token": -1,
               "dtype": dtypes.int64,
           },
           "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
@@ -171,7 +176,12 @@ class CategoricalEncodingInputTest(
     expected_dense_shape = [3, 4]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string, sparse=True)
-    layer = get_layer_class()(max_tokens=None)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -192,7 +202,12 @@ class CategoricalEncodingInputTest(
     expected_dense_shape = [3, 4]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = get_layer_class()(max_tokens=None, dtype=dtypes.int64)
+    layer = get_layer_class()(
+        max_tokens=None,
+        dtype=dtypes.int64,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -208,7 +223,12 @@ class CategoricalEncodingInputTest(
     expected_output = [[2, 3, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = get_layer_class()(max_tokens=None)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -222,7 +242,12 @@ class CategoricalEncodingInputTest(
     expected_output = [[2, 3, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = get_layer_class()(max_tokens=None, dtype=dtypes.int64)
+    layer = get_layer_class()(
+        max_tokens=None,
+        dtype=dtypes.int64,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -247,7 +272,12 @@ class CategoricalEncodingMultiOOVTest(
     expected_dense_shape = [3, 4]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string, sparse=True)
-    layer = get_layer_class()(max_tokens=None, num_oov_tokens=2)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=2,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -260,7 +290,7 @@ class CategoricalEncodingMultiOOVTest(
     vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
     input_array = sparse_tensor.SparseTensor(
         indices=[[0, 0], [1, 2]],
-        values=np.array([13, 132], dtype=np.int64),
+        values=np.array([13, 133], dtype=np.int64),
         dense_shape=[3, 4])
 
     expected_indices = [[0, 0], [1, 2]]
@@ -269,7 +299,11 @@ class CategoricalEncodingMultiOOVTest(
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
     layer = get_layer_class()(
-        max_tokens=None, dtype=dtypes.int64, num_oov_tokens=2)
+        max_tokens=None,
+        dtype=dtypes.int64,
+        num_oov_indices=2,
+        mask_token=0,
+        oov_token=-1)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -285,7 +319,12 @@ class CategoricalEncodingMultiOOVTest(
     expected_output = [[3, 4, 6], [6, 5, 3, 2]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = get_layer_class()(max_tokens=None, num_oov_tokens=2)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=2,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -294,13 +333,17 @@ class CategoricalEncodingMultiOOVTest(
 
   def test_ragged_int_input_multi_bucket(self):
     vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 132]],
+    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 133]],
                                               dtype=np.int64)
     expected_output = [[3, 4, 6], [6, 5, 3, 2]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
     layer = get_layer_class()(
-        max_tokens=None, dtype=dtypes.int64, num_oov_tokens=2)
+        max_tokens=None,
+        dtype=dtypes.int64,
+        num_oov_indices=2,
+        mask_token=0,
+        oov_token=-1)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -320,13 +363,14 @@ class CategoricalEncodingAdaptTest(
         dense_shape=[3, 4])
     vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
 
-    layer = get_layer_class()(max_tokens=None)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.adapt(vocab_dataset)
-    # Note that the expected vocabulary has a null string (''). This is because
-    # we assume that sparse tensors are in fact dense tensors with elided
-    # values, not ragged tensors. Therefore, we assume that any missing data
-    # is important and give it a spot in our vocab.
-    expected_vocabulary = ["", "michigan", "fire"]
+    expected_vocabulary = ["", "[OOV]", "michigan", "fire"]
     self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
 
   def test_ragged_adapt(self):
@@ -334,9 +378,14 @@ class CategoricalEncodingAdaptTest(
                                               ["fire", "michigan"]])
     vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
 
-    layer = get_layer_class()(max_tokens=None)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.adapt(vocab_dataset)
-    expected_vocabulary = ["michigan", "fire"]
+    expected_vocabulary = ["", "[OOV]", "michigan", "fire"]
     self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
 
   def test_sparse_int_input(self):
@@ -351,7 +400,12 @@ class CategoricalEncodingAdaptTest(
     expected_dense_shape = [3, 4]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = get_layer_class()(max_tokens=None, dtype=dtypes.int64)
+    layer = get_layer_class()(
+        max_tokens=None,
+        dtype=dtypes.int64,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -367,7 +421,12 @@ class CategoricalEncodingAdaptTest(
     expected_output = [[2, 3, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = get_layer_class()(max_tokens=None)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -381,7 +440,12 @@ class CategoricalEncodingAdaptTest(
     expected_output = [[2, 3, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = get_layer_class()(max_tokens=None, dtype=dtypes.int64)
+    layer = get_layer_class()(
+        max_tokens=None,
+        dtype=dtypes.int64,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -396,37 +460,18 @@ class CategoricalEncodingAdaptTest(
 
     ds = dataset_ops.Dataset.from_generator(word_gen, dtypes.string,
                                             tensor_shape.TensorShape([]))
-    batched_ds = ds.take(100).batch(1)
+    batched_ds = ds.take(2)
     input_t = keras.Input(shape=(), dtype=dtypes.string)
     layer = get_layer_class()(
-        max_tokens=10, num_oov_tokens=0, reserve_zero=False)
+        max_tokens=10,
+        num_oov_indices=0,
+        mask_token=None,
+        oov_token=None,
+        dtype=dtypes.string)
     _ = layer(input_t)
     layer.adapt(batched_ds)
 
 
-@keras_parameterized.run_all_keras_modes
-class IndexLookupDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_cpu_distribution(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    strategy = one_device_strategy.OneDeviceStrategy("/cpu:0")
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = get_layer_class()()
-      layer.set_vocabulary(vocab_data)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
 @keras_parameterized.run_all_keras_modes
 class IndexLookupOutputTest(keras_parameterized.TestCase,
                             preprocessing_test_utils.PreprocessingLayerTest):
@@ -438,7 +483,12 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
     expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()()
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -447,7 +497,12 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
 
   def test_output_shape(self):
     input_data = keras.Input(shape=(4,), dtype=dtypes.string)
-    layer = get_layer_class()()
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     int_data = layer(input_data)
     self.assertAllEqual(int_data.shape[1:], input_data.shape[1:])
 
@@ -458,7 +513,12 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
     expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(reserve_zero=False)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=None,
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -472,7 +532,13 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
     expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(vocabulary=vocab_data)
+    layer = get_layer_class()(
+        vocabulary=vocab_data,
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
     output_dataset = model.predict(input_array)
@@ -484,15 +550,6 @@ class IndexLookupVocabularyTest(keras_parameterized.TestCase,
                                 preprocessing_test_utils.PreprocessingLayerTest
                                ):
 
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(vocab + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
   def test_int_output_explicit_vocab(self):
     vocab_data = ["earth", "wind", "and", "fire"]
     input_array = np.array([["earth", "wind", "and", "fire"],
@@ -500,100 +557,195 @@ class IndexLookupVocabularyTest(keras_parameterized.TestCase,
     expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(vocabulary=vocab_data)
+    layer = get_layer_class()(
+        vocabulary=vocab_data,
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
     output_dataset = model.predict(input_array)
     self.assertAllEqual(expected_output, output_dataset)
 
-  def test_int_output_explicit_vocab_from_file(self):
-    vocab_list = ["earth", "wind", "and", "fire"]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
+  def test_vocab_with_max_cap(self):
+    vocab_data = ["", "[OOV]", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=5,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
+    layer.set_vocabulary(vocab_data)
+    returned_vocab = layer.get_vocabulary()
+    self.assertAllEqual(vocab_data, returned_vocab)
 
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(vocabulary=vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_vocab_appending(self):
-    vocab_data = [["earth", "wind"], ["and", "fire"]]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(max_tokens=5)
-    layer.set_vocabulary(vocab_data[0])
-    layer.set_vocabulary(vocab_data[1], append=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
+  def test_int_vocab_with_max_cap(self):
+    vocab_data = [0, -1, 42, 1276, 1138]
+    layer = get_layer_class()(
+        max_tokens=5,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    layer.set_vocabulary(vocab_data)
+    returned_vocab = layer.get_vocabulary()
+    self.assertAllEqual(vocab_data, returned_vocab)
 
   def test_non_unique_vocab_fails(self):
     vocab_data = ["earth", "wind", "and", "fire", "fire"]
     with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"):
-      _ = get_layer_class()(vocabulary=vocab_data)
+      _ = get_layer_class()(
+          vocabulary=vocab_data,
+          max_tokens=None,
+          num_oov_indices=1,
+          mask_token="",
+          oov_token="[OOV]",
+          dtype=dtypes.string)
 
-  def test_non_unique_vocab_from_file_fails(self):
-    vocab_list = ["earth", "wind", "and", "fire", "earth"]
-    vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list)
+  def test_vocab_with_oov_and_wrong_mask_fails(self):
+    vocab_data = ["custom_mask", "[OOV]", "earth", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
+    with self.assertRaisesRegex(ValueError, ".*does not have the mask token.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_vocab_with_oov_and_no_mask_fails(self):
+    vocab_data = ["[OOV]", "earth", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
+    with self.assertRaisesRegex(ValueError, ".*Reserved OOV.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_vocab_with_mask_but_no_oov_fails(self):
+    vocab_data = ["", "earth", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
+    with self.assertRaisesRegex(ValueError, ".*does not have the OOV token.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_vocab_with_repeated_element_fails(self):
+    vocab_data = ["earth", "earth", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     with self.assertRaisesRegex(ValueError, ".*repeated term.*earth.*"):
-      _ = get_layer_class()(vocabulary=vocab_path)
+      layer.set_vocabulary(vocab_data)
 
+  def test_vocab_with_reserved_oov_element_fails(self):
+    vocab_data = ["earth", "test", "[OOV]", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
+    with self.assertRaisesRegex(ValueError, ".*Reserved OOV.*"):
+      layer.set_vocabulary(vocab_data)
 
-@keras_parameterized.run_all_keras_modes
-class InverseLookupOutputTest(keras_parameterized.TestCase,
-                              preprocessing_test_utils.PreprocessingLayerTest):
+  def test_vocab_with_reserved_mask_element_fails(self):
+    vocab_data = ["earth", "mask_token", "wind", "and", "fire"]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="mask_token",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
+    with self.assertRaisesRegex(ValueError, ".*Reserved mask.*"):
+      layer.set_vocabulary(vocab_data)
 
-  def test_inverse_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_ints = [[2, 3, 4, 5], [5, 4, 2, 1]]
-    # Note that the token 'michigan' has been replaced by ''. This is because
-    # 'michigan' is OOV for this layer.
-    expected_strings = np.array([["earth", "wind", "and", "fire"],
-                                 ["fire", "and", "earth", ""]])
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(max_tokens=None)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    string_data = layer(int_data, invert=True)
-    model = keras.Model(inputs=input_data, outputs=[int_data, string_data])
-    int_outputs, string_outputs = model.predict(input_array)
-    self.assertAllEqual(expected_ints, int_outputs)
-    self.assertAllEqual(expected_strings, string_outputs)
+  def test_non_unique_int_vocab_fails(self):
+    vocab_data = [12, 13, 14, 15, 15]
+    with self.assertRaisesRegex(ValueError, ".*repeated term.*15.*"):
+      _ = get_layer_class()(
+          vocabulary=vocab_data,
+          max_tokens=None,
+          num_oov_indices=1,
+          mask_token=0,
+          oov_token=-1,
+          dtype=dtypes.int64)
 
-  def test_inverse_output_serialization(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_ints = [[2, 3, 4, 5], [5, 4, 2, 1]]
-    # Note that the token 'michigan' has been replaced by ''. This is because
-    # 'michigan' is OOV for this layer.
-    expected_strings = np.array([["earth", "wind", "and", "fire"],
-                                 ["fire", "and", "earth", ""]])
+  def test_int_vocab_with_oov_and_wrong_mask_fails(self):
+    vocab_data = [1234, -1, 11, 21, 13, 14]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    with self.assertRaisesRegex(ValueError, ".*does not have the mask token.*"):
+      layer.set_vocabulary(vocab_data)
 
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(max_tokens=None)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    string_data = layer(int_data, invert=True)
-    model = keras.Model(inputs=input_data, outputs=[int_data, string_data])
+  def test_int_vocab_with_oov_and_no_mask_fails(self):
+    vocab_data = [-1, 11, 12, 13, 14]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    with self.assertRaisesRegex(ValueError, ".*Reserved OOV.*"):
+      layer.set_vocabulary(vocab_data)
 
-    with CustomObjectScope({"IndexLookup": get_layer_class()}):
-      new_model = keras.Model.from_config(model.get_config())
-    new_model.set_weights(model.get_weights())
-    int_outputs, string_outputs = new_model.predict(input_array)
-    self.assertAllEqual(expected_ints, int_outputs)
-    self.assertAllEqual(expected_strings, string_outputs)
+  def test_int_vocab_with_mask_but_no_oov_fails(self):
+    vocab_data = [0, 11, 12, 13, 14]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    with self.assertRaisesRegex(ValueError, ".*does not have the OOV token.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_int_vocab_with_repeated_element_fails(self):
+    vocab_data = [11, 11, 34, 23, 124]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    with self.assertRaisesRegex(ValueError, ".*repeated term.*11.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_int_vocab_with_reserved_oov_element_fails(self):
+    vocab_data = [14, 38, -1, 34, 3, 84]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    with self.assertRaisesRegex(ValueError, ".*Reserved OOV.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_int_vocab_with_reserved_mask_element_fails(self):
+    vocab_data = [125, 0, 3, 4, 94]
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token=0,
+        oov_token=-1,
+        dtype=dtypes.int64)
+    with self.assertRaisesRegex(ValueError, ".*Reserved mask.*"):
+      layer.set_vocabulary(vocab_data)
 
 
 @keras_parameterized.run_all_keras_modes(always_skip_eager=True)
@@ -604,7 +756,12 @@ class IndexLookupSaveableTest(keras_parameterized.TestCase,
     vocab_data = ["earth", "wind", "and", "fire"]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(max_tokens=10)
+    layer = get_layer_class()(
+        max_tokens=10,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -618,7 +775,12 @@ class IndexLookupSaveableTest(keras_parameterized.TestCase,
     vocab_data = ["earth", "wind", "and", "fire"]
 
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(max_tokens=10)
+    layer = get_layer_class()(
+        max_tokens=10,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -635,25 +797,24 @@ class IndexLookupErrorTest(keras_parameterized.TestCase,
   def test_too_long_vocab_fails_in_single_setting(self):
     vocab_data = ["earth", "wind", "and", "fire"]
 
-    layer = get_layer_class()(max_tokens=4)
+    layer = get_layer_class()(
+        max_tokens=4,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     with self.assertRaisesRegex(ValueError,
                                 "vocabulary larger than the maximum vocab.*"):
       layer.set_vocabulary(vocab_data)
 
-  def test_too_long_vocab_fails_in_multiple_settings(self):
-    vocab_data = [["earth", "wind"], ["and", "fire"]]
-    layer = get_layer_class()(max_tokens=4)
-
-    # The first time we call set_vocabulary, we're under the max_tokens
-    # so it should be fine.
-    layer.set_vocabulary(vocab_data[0])
-    with self.assertRaisesRegex(ValueError,
-                                "vocabulary larger than the maximum vocab.*"):
-      layer.set_vocabulary(vocab_data[1], append=True)
-
   def test_zero_max_tokens_fails(self):
     with self.assertRaisesRegex(ValueError, ".*max_tokens.*"):
-      _ = get_layer_class()(max_tokens=0)
+      _ = get_layer_class()(
+          max_tokens=0,
+          num_oov_indices=1,
+          mask_token="",
+          oov_token="[OOV]",
+          dtype=dtypes.string)
 
 
 @keras_parameterized.run_all_keras_modes
@@ -668,7 +829,12 @@ class IndexLookupSavingTest(keras_parameterized.TestCase,
 
     # Build and validate a golden model.
     input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(max_tokens=None)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="",
+        oov_token="[OOV]",
+        dtype=dtypes.string)
     layer.set_vocabulary(vocab_data)
     int_data = layer(input_data)
     model = keras.Model(inputs=input_data, outputs=int_data)
@@ -697,8 +863,9 @@ class IndexLookupSavingTest(keras_parameterized.TestCase,
 
 
 @keras_parameterized.run_all_keras_modes
-class IndexLookupCombinerTest(keras_parameterized.TestCase,
-                              preprocessing_test_utils.PreprocessingLayerTest):
+class IndexLookupStringCombinerTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
 
   def compare_text_accumulators(self, a, b, msg=None):
     if a is None or b is None:
@@ -826,5 +993,123 @@ class IndexLookupCombinerTest(keras_parameterized.TestCase,
     self.validate_accumulator_extract(combiner, data, expected_extract_output)
 
 
+@keras_parameterized.run_all_keras_modes
+class IndexLookupIntCombinerTest(keras_parameterized.TestCase,
+                                 preprocessing_test_utils.PreprocessingLayerTest
+                                ):
+
+  def compare_text_accumulators(self, a, b, msg=None):
+    if a is None or b is None:
+      self.assertAllEqual(a, b, msg=msg)
+
+    self.assertAllEqual(a.count_dict, b.count_dict, msg=msg)
+
+  compare_accumulators = compare_text_accumulators
+
+  def update_accumulator(self, accumulator, data):
+    accumulator.count_dict.update(dict(zip(data["vocab"], data["counts"])))
+
+    return accumulator
+
+  def test_combiner_api_compatibility_int_mode(self):
+    data = np.array([[42, 1138, 725, 1729], [42, 1138, 725, 203]])
+    combiner = index_lookup._IndexLookupCombiner()
+    expected_accumulator_output = {
+        "vocab": np.array([1138, 725, 42, 1729, 203]),
+        "counts": np.array([2, 2, 2, 1, 1]),
+    }
+    expected_extract_output = {
+        "vocab": np.array([1138, 725, 42, 1729, 203]),
+    }
+    expected_accumulator = combiner._create_accumulator()
+    expected_accumulator = self.update_accumulator(expected_accumulator,
+                                                   expected_accumulator_output)
+    self.validate_accumulator_serialize_and_deserialize(combiner, data,
+                                                        expected_accumulator)
+    self.validate_accumulator_uniqueness(combiner, data)
+    self.validate_accumulator_extract(combiner, data, expected_extract_output)
+
+  # TODO(askerryryan): Add tests confirming equivalence to behavior of
+  # existing tf.keras.preprocessing.text.Tokenizer.
+  @parameterized.named_parameters(
+      {
+          "testcase_name": "top_k_smaller_than_full_vocab",
+          "data": np.array([[42, 1138], [1729, 1138], [725], [1729, 1138]]),
+          "vocab_size": 3,
+          "expected_accumulator_output": {
+              "vocab": np.array([1138, 1729, 725, 42]),
+              "counts": np.array([3, 2, 1, 1]),
+          },
+          "expected_extract_output": {
+              "vocab": np.array([1138, 1729, 725]),
+          },
+      },
+      {
+          "testcase_name": "top_k_larger_than_full_vocab",
+          "data": np.array([[42, 1138], [1729, 1138], [725], [1729, 1138]]),
+          "vocab_size": 10,
+          "expected_accumulator_output": {
+              "vocab": np.array([1138, 1729, 725, 42]),
+              "counts": np.array([3, 2, 1, 1]),
+          },
+          "expected_extract_output": {
+              "vocab": np.array([1138, 1729, 725, 42]),
+          },
+      },
+      {
+          "testcase_name": "no_top_k",
+          "data": np.array([[42, 1138], [1729, 1138], [725], [1729, 1138]]),
+          "vocab_size": None,
+          "expected_accumulator_output": {
+              "vocab": np.array([1138, 1729, 725, 42]),
+              "counts": np.array([3, 2, 1, 1]),
+          },
+          "expected_extract_output": {
+              "vocab": np.array([1138, 1729, 725, 42]),
+          },
+      },
+      {
+          "testcase_name": "single_element_per_row",
+          "data": np.array([[42], [1138], [1729], [1138], [725]]),
+          "vocab_size": 3,
+          "expected_accumulator_output": {
+              "vocab": np.array([1138, 1729, 725, 42]),
+              "counts": np.array([2, 1, 1, 1]),
+          },
+          "expected_extract_output": {
+              "vocab": np.array([1138, 1729, 725]),
+          },
+      },
+      # Which tokens are retained are based on global frequency, and thus are
+      # sensitive to frequency within a document. In contrast, because idf only
+      # considers the presence of a token in a document, it is insensitive
+      # to the frequency of the token within the document.
+      {
+          "testcase_name":
+              "retained_tokens_sensitive_to_within_document_frequency",
+          "data":
+              np.array([[42, 42], [1138, 1138], [1729, 1729], [1138, 1138],
+                        [725, 203]]),
+          "vocab_size":
+              3,
+          "expected_accumulator_output": {
+              "vocab": np.array([1138, 42, 1729, 725, 203]),
+              "counts": np.array([4, 2, 2, 1, 1]),
+          },
+          "expected_extract_output": {
+              "vocab": np.array([1138, 1729, 42]),
+          },
+      })
+  def test_combiner_computation(self, data, vocab_size,
+                                expected_accumulator_output,
+                                expected_extract_output):
+    combiner = index_lookup._IndexLookupCombiner(vocab_size=vocab_size)
+    expected_accumulator = combiner._create_accumulator()
+    expected_accumulator = self.update_accumulator(expected_accumulator,
+                                                   expected_accumulator_output)
+    self.validate_accumulator_computation(combiner, data, expected_accumulator)
+    self.validate_accumulator_extract(combiner, data, expected_extract_output)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py
index c6e0b6ed286..47fea11dd57 100644
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py
+++ b/tensorflow/python/keras/layers/preprocessing/index_lookup_v1.py
@@ -18,12 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 
-from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine import base_preprocessing_layer_v1
 from tensorflow.python.keras.layers.preprocessing import index_lookup
-from tensorflow.python.ops.ragged import ragged_tensor_value
 
 
 class IndexLookup(index_lookup.IndexLookup,
@@ -59,37 +56,5 @@ class IndexLookup(index_lookup.IndexLookup,
       this option is set, reserve_zero must also be set. Defaults to False.
   """
 
-  def _get_table_data(self):
-    keys, values = self._table.export()
-    np_keys = K.get_session().run(keys)
-    np_values = K.get_session().run(values)
-    return (np_keys, np_values)
-
-  def vocab_size(self):
-    return K.get_session().run(self._table.size())
-
-  def _clear_table(self):
-    keys, _ = self._table.export()
-    K.get_session().run(self._table.remove(keys))
-    if self._inverse_table:
-      keys, _ = self._inverse_table.export()
-      K.get_session().run(self._inverse_table.remove(keys))
-
-  def _insert_table_data(self, keys, values):
-    K.get_session().run(self._table.insert(keys, values))
-    if self._inverse_table:
-      K.get_session().run(self._inverse_table.insert(values, keys))
-
-  def _initialize_inverse_table(self):
-    keys, values = self._table.export()
-    K.get_session().run(self._inverse_table.insert(values, keys))
-
-  def _to_numpy(self, data):
-    """Converts preprocessed inputs into numpy arrays."""
-    if isinstance(data, np.ndarray):
-      return data
-    session = K.get_session()
-    data = session.run(data)
-    if isinstance(data, ragged_tensor_value.RaggedTensorValue):
-      data = np.array(data.to_list())
-    return data
+  def _use_v1_apis(self):
+    return True
diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup.py
new file mode 100644
index 00000000000..671c02573db
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/integer_lookup.py
@@ -0,0 +1,112 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras string lookup preprocessing layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras.layers.preprocessing import index_lookup
+from tensorflow.python.keras.layers.preprocessing import table_utils
+
+
+class IntegerLookup(index_lookup.IndexLookup):
+  """Maps integers from a vocabulary to integer indices.
+
+  This layer translates a set of arbitrary integers into an integer output via a
+  table-based lookup, with optional out-of-vocabulary handling.
+
+  If desired, the user can call this layer's `adapt()` method on a data set,
+  which will analyze the data set, determine the frequency of individual string
+  values, and create a vocabulary from them. This vocabulary can have
+  unlimited size or be capped, depending on the configuration options for this
+  layer; if there are more unique values in the input than the maximum
+  vocabulary size, the most frequent terms will be used to create the
+  vocabulary.
+
+  Attributes:
+    max_values: The maximum size of the vocabulary for this layer. If None,
+      there is no cap on the size of the vocabulary. Note that this vocabulary
+      includes the OOV and mask tokens, so the effective number of tokens is
+      (max_tokens - num_oov_tokens - (1 if mask_token else 0))
+    num_oov_indices: The number of out-of-vocabulary values to use; defaults to
+      1. If this value is more than 1, OOV inputs are hashed to determine their
+      OOV value; if this value is 0, passing an OOV input will result in a '-1'
+      being returned for that value in the output tensor. (Note that, because
+      the value is -1 and not 0, this will allow you to effectively drop OOV
+      values from categorical encodings.)
+    mask_value: A value that represents masked inputs, and which is mapped to
+      index 0. Defaults to 0. If set to None, no mask term will be added and the
+      OOV tokens, if any, will be indexed from (0...num_oov_tokens) instead of
+      (1...num_oov_tokens+1).
+    oov_value: The value representing an out-of-vocabulary value. Defaults to
+      -1.
+    vocabulary: An optional list of values, or a path to a text file containing
+      a vocabulary to load into this layer. The file should contain one value
+      per line. If the list or file contains the same token multiple times, an
+      error will be thrown.
+  """
+
+  def __init__(self,
+               max_values=None,
+               num_oov_indices=1,
+               mask_value=0,
+               oov_value=-1,
+               vocabulary=None,
+               **kwargs):
+    allowed_dtypes = [dtypes.int64]
+
+    if "dtype" in kwargs and kwargs["dtype"] not in allowed_dtypes:
+      raise ValueError("IntegerLookup may only have a dtype in %s." %
+                       allowed_dtypes)
+
+    if "dtype" not in kwargs:
+      kwargs["dtype"] = dtypes.int64
+
+    # If max_values is set, the value must be greater than 1 - otherwise we
+    # are creating a 0-element vocab, which doesn't make sense.
+    if max_values is not None and max_values <= 1:
+      raise ValueError("If set, max_values must be greater than 1.")
+
+    if num_oov_indices < 0:
+      raise ValueError("num_oov_indices must be greater than 0. You passed %s" %
+                       num_oov_indices)
+
+    if vocabulary is not None:
+      if isinstance(vocabulary, str):
+        vocabulary = table_utils.get_vocabulary_from_file(vocabulary)
+        vocabulary = [int(v) for v in vocabulary]
+
+    super(IntegerLookup, self).__init__(
+        max_tokens=max_values,
+        num_oov_indices=num_oov_indices,
+        mask_token=mask_value,
+        oov_token=oov_value,
+        vocabulary=vocabulary,
+        **kwargs)
+
+  def get_config(self):
+    base_config = super(IntegerLookup, self).get_config()
+    # Because the super config has a bunch of args we're also passing,
+    # we need to rename and remove them from the config dict.
+    base_config["max_values"] = base_config["max_tokens"]
+    del base_config["max_tokens"]
+
+    base_config["mask_value"] = base_config["mask_token"]
+    del base_config["mask_token"]
+
+    base_config["oov_value"] = base_config["oov_token"]
+    del base_config["oov_token"]
+    return base_config
diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup_test.py
new file mode 100644
index 00000000000..515a1ca6667
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/integer_lookup_test.py
@@ -0,0 +1,501 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras text vectorization preprocessing layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+import os
+import random
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python import tf2
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.layers.preprocessing import integer_lookup
+from tensorflow.python.keras.layers.preprocessing import integer_lookup_v1
+from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
+from tensorflow.python.keras.saving import save
+from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+
+
+def get_layer_class():
+  if context.executing_eagerly():
+    return integer_lookup.IntegerLookup
+  else:
+    return integer_lookup_v1.IntegerLookup
+
+
+def _get_end_to_end_test_cases():
+  test_cases = (
+      {
+          "testcase_name":
+              "test_ints_soft_vocab_cap",
+          # Create an array where 1138 is the most frequent term, followed by
+          # 1729, then 725, then 42. This ensures that the vocab accumulator
+          # is sorting by frequency.
+          "vocab_data":
+              np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729],
+                        [1729], [725], [725]],
+                       dtype=np.int64),
+          "input_data":
+              np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]],
+                       dtype=np.int64),
+          "kwargs": {
+              "max_values": None,
+              "dtype": dtypes.int64,
+          },
+          "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
+          "input_dtype":
+              dtypes.int64
+      },)
+
+  crossed_test_cases = []
+  # Cross above test cases with use_dataset in (True, False)
+  for use_dataset in (True, False):
+    for case in test_cases:
+      case = case.copy()
+      if use_dataset:
+        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
+      case["use_dataset"] = use_dataset
+      crossed_test_cases.append(case)
+
+  return crossed_test_cases
+
+
+@keras_parameterized.run_all_keras_modes
+class IntegerLookupLayerTest(keras_parameterized.TestCase,
+                             preprocessing_test_utils.PreprocessingLayerTest):
+
+  @parameterized.named_parameters(*_get_end_to_end_test_cases())
+  def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs,
+                                       use_dataset, expected_output,
+                                       input_dtype):
+    cls = get_layer_class()
+    expected_output_dtype = dtypes.int64
+    input_shape = input_data.shape
+
+    if use_dataset:
+      # Keras APIs expect batched datasets.
+      # TODO(rachelim): `model.predict` predicts the result on each
+      # dataset batch separately, then tries to concatenate the results
+      # together. When the results have different shapes on the non-concat
+      # axis (which can happen in the output_mode = INT case for
+      # IntegerLookup), the concatenation fails. In real use cases, this may
+      # not be an issue because users are likely to pipe the preprocessing layer
+      # into other keras layers instead of predicting it directly. A workaround
+      # for these unit tests is to have the dataset only contain one batch, so
+      # no concatenation needs to happen with the result. For consistency with
+      # numpy input, we should make `predict` join differently shaped results
+      # together sensibly, with 0 padding.
+      input_data = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
+          input_shape[0])
+      vocab_data = dataset_ops.Dataset.from_tensor_slices(vocab_data).batch(
+          input_shape[0])
+
+    with CustomObjectScope({"IntegerLookup": cls}):
+      output_data = testing_utils.layer_test(
+          cls,
+          kwargs=kwargs,
+          input_shape=input_shape,
+          input_data=input_data,
+          input_dtype=input_dtype,
+          expected_output_dtype=expected_output_dtype,
+          validate_training=False,
+          adapt_data=vocab_data)
+    self.assertAllClose(expected_output, output_data)
+
+
+@keras_parameterized.run_all_keras_modes
+class CategoricalEncodingInputTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_sparse_int_input(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]],
+        values=np.array([13, 32], dtype=np.int64),
+        dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [5, 1]
+    expected_dense_shape = [3, 4]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
+    layer = get_layer_class()(max_values=None)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_data = model.predict(input_array, steps=1)
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_ragged_int_input(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
+                                              dtype=np.int64)
+    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
+    layer = get_layer_class()(max_values=None)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+
+@keras_parameterized.run_all_keras_modes
+class CategoricalEncodingMultiOOVTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_sparse_int_input_multi_bucket(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]],
+        values=np.array([13, 133], dtype=np.int64),
+        dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [6, 2]
+    expected_dense_shape = [3, 4]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
+    layer = get_layer_class()(
+        max_values=None,
+        dtype=dtypes.int64,
+        num_oov_indices=2,
+        mask_value=0,
+        oov_value=-1)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_data = model.predict(input_array, steps=1)
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_ragged_int_input_multi_bucket(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 133]],
+                                              dtype=np.int64)
+    expected_output = [[3, 4, 6], [6, 5, 3, 2]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
+    layer = get_layer_class()(max_values=None, num_oov_indices=2)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+
+@keras_parameterized.run_all_keras_modes
+class CategoricalEncodingAdaptTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_sparse_adapt(self):
+    vocab_data = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [0, 1], [1, 2]],
+        values=[203, 1729, 203],
+        dense_shape=[3, 4])
+    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
+
+    layer = get_layer_class()()
+    layer.adapt(vocab_dataset)
+    expected_vocabulary = [0, -1, 203, 1729]
+    self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
+
+  def test_ragged_adapt(self):
+    vocab_data = ragged_factory_ops.constant([[203], [1729, 203]])
+    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
+
+    layer = get_layer_class()()
+    layer.adapt(vocab_dataset)
+    expected_vocabulary = [0, -1, 203, 1729]
+    self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
+
+  def test_sparse_int_input(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]],
+        values=np.array([13, 32], dtype=np.int64),
+        dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [5, 1]
+    expected_dense_shape = [3, 4]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
+    layer = get_layer_class()(max_values=None)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_data = model.predict(input_array, steps=1)
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_ragged_int_input(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
+                                              dtype=np.int64)
+    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
+    layer = get_layer_class()(max_values=None)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_single_int_generator_dataset(self):
+
+    def word_gen():
+      for _ in itertools.count(1):
+        yield random.randint(0, 100)
+
+    ds = dataset_ops.Dataset.from_generator(word_gen, dtypes.int64,
+                                            tensor_shape.TensorShape([]))
+    batched_ds = ds.take(2)
+    input_t = keras.Input(shape=(), dtype=dtypes.int64)
+    layer = get_layer_class()(
+        max_values=10, num_oov_indices=0, mask_value=None, oov_value=None)
+    _ = layer(input_t)
+    layer.adapt(batched_ds)
+
+
+@keras_parameterized.run_all_keras_modes
+class IntegerLookupOutputTest(keras_parameterized.TestCase,
+                              preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_int_output(self):
+    vocab_data = [42, 1138, 725, 1729]
+    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()()
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_output_shape(self):
+    input_data = keras.Input(shape=(4,), dtype=dtypes.int64)
+    layer = get_layer_class()(max_values=None, num_oov_indices=1)
+    int_data = layer(input_data)
+    self.assertAllEqual(int_data.shape[1:], input_data.shape[1:])
+
+  def test_int_output_no_reserved_zero(self):
+    vocab_data = [42, 1138, 725, 1729]
+    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
+    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(max_values=None, mask_value=None)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_int_output_explicit_vocab(self):
+    vocab_data = [42, 1138, 725, 1729]
+    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(
+        vocabulary=vocab_data,
+        max_values=None,
+    )
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+
+@keras_parameterized.run_all_keras_modes
+class IntegerLookupVocabularyTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def _write_to_temp_file(self, file_name, vocab_list):
+    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
+    with gfile.GFile(vocab_path, "w") as writer:
+      for vocab in vocab_list:
+        writer.write(str(vocab) + "\n")
+      writer.flush()
+      writer.close()
+    return vocab_path
+
+  def test_int_output_explicit_vocab(self):
+    vocab_data = [42, 1138, 725, 1729]
+    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(vocabulary=vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_get_vocab_returns_int(self):
+    vocab_data = [42, 1138, 725, 1729]
+    expected_vocab = [0, -1, 42, 1138, 725, 1729]
+    layer = get_layer_class()(vocabulary=vocab_data)
+    layer_vocab = layer.get_vocabulary()
+    self.assertAllEqual(expected_vocab, layer_vocab)
+    self.assertIsInstance(layer_vocab[0], np.int64)
+
+  def test_int_output_explicit_vocab_from_file(self):
+    vocab_list = [42, 1138, 725, 1729]
+    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
+
+    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(vocabulary=vocab_path)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_non_unique_vocab_fails(self):
+    vocab_data = [42, 1138, 725, 1729, 1729]
+    with self.assertRaisesRegex(ValueError, ".*repeated term.*1729.*"):
+      _ = get_layer_class()(vocabulary=vocab_data)
+
+  def test_non_unique_vocab_from_file_fails(self):
+    vocab_list = [42, 1138, 725, 1729, 42]
+    vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list)
+    with self.assertRaisesRegex(ValueError, ".*repeated term.*42.*"):
+      _ = get_layer_class()(vocabulary=vocab_path)
+
+
+@keras_parameterized.run_all_keras_modes(always_skip_eager=True)
+class IntegerLookupSaveableTest(keras_parameterized.TestCase,
+                                preprocessing_test_utils.PreprocessingLayerTest
+                               ):
+
+  def test_ops_are_not_added_with_multiple_get_set_weights(self):
+    vocab_data = [42, 1138, 725, 1729]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(max_values=10)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    weights = model.get_weights()
+    model.set_weights(weights)
+    keras.backend.get_session().graph.finalize()
+    weights = model.get_weights()
+    model.set_weights(weights)
+
+  def test_layer_saving_with_h5(self):
+    vocab_data = [42, 1138, 725, 1729]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(max_values=10)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    path = os.path.join(self.get_temp_dir(), "model")
+    with self.assertRaisesRegex(NotImplementedError,
+                                "Save or restore weights that is not.*"):
+      save.save_model(model, path, save_format="h5")
+
+
+@keras_parameterized.run_all_keras_modes
+class IntegerLookupErrorTest(keras_parameterized.TestCase,
+                             preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_too_long_vocab_fails_in_single_setting(self):
+    vocab_data = [42, 1138, 725, 1729]
+
+    layer = get_layer_class()(max_values=4, num_oov_indices=1)
+    with self.assertRaisesRegex(ValueError,
+                                "vocabulary larger than the maximum vocab.*"):
+      layer.set_vocabulary(vocab_data)
+
+  def test_zero_max_values_fails(self):
+    with self.assertRaisesRegex(ValueError, ".*max_values.*"):
+      _ = get_layer_class()(max_values=0, num_oov_indices=1)
+
+
+@keras_parameterized.run_all_keras_modes
+class IntegerLookupSavingTest(keras_parameterized.TestCase,
+                              preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_vocabulary_persistence_across_saving(self):
+    vocab_data = [42, 1138, 725, 1729]
+    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    # Build and validate a golden model.
+    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
+    layer = get_layer_class()(max_values=None, num_oov_indices=1)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(output_dataset, expected_output)
+
+    # Save the model to disk.
+    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
+    model.save(output_path, save_format="tf")
+
+    # Delete the session and graph to ensure that the loaded model is generated
+    # from scratch.
+    # TODO(b/149526183): Can't clear session when TF2 is disabled.
+    if tf2.enabled():
+      keras.backend.clear_session()
+
+    loaded_model = keras.models.load_model(
+        output_path, custom_objects={"IntegerLookup": get_layer_class()})
+
+    # Ensure that the loaded model is unique (so that the save/load is real)
+    self.assertIsNot(model, loaded_model)
+
+    # Validate correctness of the new model.
+    new_output_dataset = loaded_model.predict(input_array)
+    self.assertAllEqual(new_output_dataset, expected_output)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py
new file mode 100644
index 00000000000..ec326f4d78b
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py
@@ -0,0 +1,25 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras string lookup preprocessing layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.layers.preprocessing import index_lookup_v1
+from tensorflow.python.keras.layers.preprocessing import integer_lookup
+
+
+class IntegerLookup(integer_lookup.IntegerLookup, index_lookup_v1.IndexLookup):
+  pass
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py
index b087a2101c7..cf9600a63ab 100644
--- a/tensorflow/python/keras/layers/preprocessing/normalization.py
+++ b/tensorflow/python/keras/layers/preprocessing/normalization.py
@@ -41,7 +41,7 @@ _VARIANCE_NAME = 'variance'
 class Normalization(CombinerPreprocessingLayer):
   """Feature-wise normalization of the data.
 
-  This layer will coerce its inputs into a normal distribution centered around
+  This layer will coerce its inputs into a distribution centered around
   0 with standard deviation 1. It accomplishes this by precomputing the mean and
   variance of the data, and calling (input-mean)/sqrt(var) at runtime.
 
diff --git a/tensorflow/python/keras/layers/preprocessing/reduction.py b/tensorflow/python/keras/layers/preprocessing/reduction.py
index 7cd5c2ffacd..dc1ea0a0db6 100644
--- a/tensorflow/python/keras/layers/preprocessing/reduction.py
+++ b/tensorflow/python/keras/layers/preprocessing/reduction.py
@@ -75,7 +75,6 @@ class Reduction(Layer):
     # We temporarily turn off autocasting, as it does not apply to named call
     # kwargs.
     super(Reduction, self).__init__(**kwargs)
-    self._supports_ragged_inputs = True
 
   def call(self, inputs, weights=None):
     # If we are not weighting the inputs we can immediately reduce the data
diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup.py b/tensorflow/python/keras/layers/preprocessing/string_lookup.py
new file mode 100644
index 00000000000..4032486b5f0
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/string_lookup.py
@@ -0,0 +1,106 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras string lookup preprocessing layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras.layers.preprocessing import index_lookup
+from tensorflow.python.keras.layers.preprocessing import table_utils
+
+
+class StringLookup(index_lookup.IndexLookup):
+  """Maps strings from a vocabulary to integer indices.
+
+  This layer translates a set of arbitrary strings into an integer output via a
+  table-based lookup, with optional out-of-vocabulary handling.
+
+  If desired, the user can call this layer's `adapt()` method on a data set,
+  which will analyze the data set, determine the frequency of individual string
+  values, and create a vocabulary from them. This vocabulary can have
+  unlimited size or be capped, depending on the configuration options for this
+  layer; if there are more unique values in the input than the maximum
+  vocabulary size, the most frequent terms will be used to create the
+  vocabulary.
+
+  Attributes:
+    max_tokens: The maximum size of the vocabulary for this layer. If None,
+      there is no cap on the size of the vocabulary. Note that this vocabulary
+      includes the OOV and mask tokens, so the effective number of tokens is
+      (max_tokens - num_oov_indices - (1 if mask_token else 0))
+    num_oov_indices: The number of out-of-vocabulary tokens to use; defaults to
+      1. If this value is more than 1, OOV inputs are hashed to determine their
+      OOV value; if this value is 0, passing an OOV input will result in a '-1'
+      being returned for that value in the output tensor. (Note that, because
+      the value is -1 and not 0, this will allow you to effectively drop OOV
+      values from categorical encodings.)
+    mask_token: A token that represents masked values, and which is mapped to
+      index 0. Defaults to the empty string "". If set to None, no mask term
+      will be added and the OOV tokens, if any, will be indexed from
+      (0...num_oov_indices) instead of (1...num_oov_indices+1).
+    oov_token: The token representing an out-of-vocabulary value. Defaults to
+      "[OOV]".
+    vocabulary: An optional list of vocabulary terms, or a path to a text file
+      containing a vocabulary to load into this layer. The file should contain
+      one token per line. If the list or file contains the same token multiple
+      times, an error will be thrown.
+    encoding: The Python string encoding to use. Defaults to `'utf-8'`.
+  """
+
+  def __init__(self,
+               max_tokens=None,
+               num_oov_indices=1,
+               mask_token="",
+               oov_token="[OOV]",
+               vocabulary=None,
+               encoding="utf-8",
+               **kwargs):
+    allowed_dtypes = [dtypes.string]
+
+    if "dtype" in kwargs and kwargs["dtype"] not in allowed_dtypes:
+      raise ValueError("StringLookup may only have a dtype in %s." %
+                       allowed_dtypes)
+
+    if "dtype" not in kwargs:
+      kwargs["dtype"] = dtypes.string
+
+    if vocabulary is not None:
+      if isinstance(vocabulary, str):
+        vocabulary = table_utils.get_vocabulary_from_file(vocabulary, encoding)
+
+    self.encoding = encoding
+
+    super(StringLookup, self).__init__(
+        max_tokens=max_tokens,
+        num_oov_indices=num_oov_indices,
+        mask_token=mask_token,
+        oov_token=oov_token,
+        vocabulary=vocabulary,
+        **kwargs)
+
+  def get_config(self):
+    config = {"encoding": self.encoding}
+    base_config = super(StringLookup, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+
+  def get_vocabulary(self):
+    if self._table_handler.vocab_size() == 0:
+      return []
+
+    keys, values = self._table_handler.data()
+    # This is required because the MutableHashTable doesn't preserve insertion
+    # order, but we rely on the order of the array to assign indices.
+    return [x.decode(self.encoding) for _, x in sorted(zip(values, keys))]
diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/string_lookup_test.py
new file mode 100644
index 00000000000..b2a610ac328
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/string_lookup_test.py
@@ -0,0 +1,224 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras text vectorization preprocessing layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from absl.testing import parameterized
+import numpy as np
+import six
+
+from tensorflow.python import keras
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
+from tensorflow.python.keras.layers.preprocessing import string_lookup
+from tensorflow.python.keras.layers.preprocessing import string_lookup_v1
+from tensorflow.python.keras.saving import save
+from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+
+
+def get_layer_class():
+  if context.executing_eagerly():
+    return string_lookup.StringLookup
+  else:
+    return string_lookup_v1.StringLookup
+
+
+def _get_end_to_end_test_cases():
+  test_cases = (
+      {
+          "testcase_name":
+              "test_strings_soft_vocab_cap",
+          # Create an array where 'earth' is the most frequent term, followed by
+          # 'wind', then 'and', then 'fire'. This ensures that the vocab
+          # accumulator is sorting by frequency.
+          "vocab_data":
+              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
+                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
+          "input_data":
+              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
+                        ["and"], ["earth"], ["michigan"]]),
+          "kwargs": {
+              "max_tokens": None,
+          },
+          "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
+          "input_dtype":
+              dtypes.string
+      },
+  )
+
+  crossed_test_cases = []
+  # Cross above test cases with use_dataset in (True, False)
+  for use_dataset in (True, False):
+    for case in test_cases:
+      case = case.copy()
+      if use_dataset:
+        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
+      case["use_dataset"] = use_dataset
+      crossed_test_cases.append(case)
+
+  return crossed_test_cases
+
+
+@keras_parameterized.run_all_keras_modes
+class StringLookupLayerTest(keras_parameterized.TestCase,
+                            preprocessing_test_utils.PreprocessingLayerTest):
+
+  @parameterized.named_parameters(*_get_end_to_end_test_cases())
+  def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs,
+                                       use_dataset, expected_output,
+                                       input_dtype):
+    cls = get_layer_class()
+    expected_output_dtype = dtypes.int64
+    input_shape = input_data.shape
+
+    if use_dataset:
+      # Keras APIs expect batched datasets.
+      # TODO(rachelim): `model.predict` predicts the result on each
+      # dataset batch separately, then tries to concatenate the results
+      # together. When the results have different shapes on the non-concat
+      # axis (which can happen in the output_mode = INT case for
+      # StringLookup), the concatenation fails. In real use cases, this may
+      # not be an issue because users are likely to pipe the preprocessing layer
+      # into other keras layers instead of predicting it directly. A workaround
+      # for these unit tests is to have the dataset only contain one batch, so
+      # no concatenation needs to happen with the result. For consistency with
+      # numpy input, we should make `predict` join differently shaped results
+      # together sensibly, with 0 padding.
+      input_data = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
+          input_shape[0])
+      vocab_data = dataset_ops.Dataset.from_tensor_slices(vocab_data).batch(
+          input_shape[0])
+
+    with CustomObjectScope({"StringLookup": cls}):
+      output_data = testing_utils.layer_test(
+          cls,
+          kwargs=kwargs,
+          input_shape=input_shape,
+          input_data=input_data,
+          input_dtype=input_dtype,
+          expected_output_dtype=expected_output_dtype,
+          validate_training=False,
+          adapt_data=vocab_data)
+    self.assertAllClose(expected_output, output_data)
+
+
+@keras_parameterized.run_all_keras_modes
+class StringLookupVocabularyTest(keras_parameterized.TestCase,
+                                 preprocessing_test_utils.PreprocessingLayerTest
+                                ):
+
+  def _write_to_temp_file(self, file_name, vocab_list):
+    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
+    with gfile.GFile(vocab_path, "w") as writer:
+      for vocab in vocab_list:
+        writer.write(vocab + "\n")
+      writer.flush()
+      writer.close()
+    return vocab_path
+
+  def test_int_output_explicit_vocab(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = np.array([["earth", "wind", "and", "fire"],
+                            ["fire", "and", "earth", "michigan"]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
+    layer = get_layer_class()(vocabulary=vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_get_vocab_returns_str(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    expected_vocab = ["", "[OOV]", "earth", "wind", "and", "fire"]
+    layer = get_layer_class()(vocabulary=vocab_data)
+    layer_vocab = layer.get_vocabulary()
+    self.assertAllEqual(expected_vocab, layer_vocab)
+    self.assertIsInstance(layer_vocab[0], six.text_type)
+
+  def test_int_output_explicit_vocab_from_file(self):
+    vocab_list = ["earth", "wind", "and", "fire"]
+    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
+
+    input_array = np.array([["earth", "wind", "and", "fire"],
+                            ["fire", "and", "earth", "michigan"]])
+    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
+    layer = get_layer_class()(vocabulary=vocab_path)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_non_unique_vocab_fails(self):
+    vocab_data = ["earth", "wind", "and", "fire", "fire"]
+    with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"):
+      _ = get_layer_class()(vocabulary=vocab_data)
+
+  def test_non_unique_vocab_from_file_fails(self):
+    vocab_list = ["earth", "wind", "and", "fire", "earth"]
+    vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list)
+    with self.assertRaisesRegex(ValueError, ".*repeated term.*earth.*"):
+      _ = get_layer_class()(vocabulary=vocab_path)
+
+
+@keras_parameterized.run_all_keras_modes(always_skip_eager=True)
+class StringLookupSaveableTest(keras_parameterized.TestCase,
+                               preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_ops_are_not_added_with_multiple_get_set_weights(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
+    layer = get_layer_class()(max_tokens=10)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    weights = model.get_weights()
+    model.set_weights(weights)
+    keras.backend.get_session().graph.finalize()
+    weights = model.get_weights()
+    model.set_weights(weights)
+
+  def test_layer_saving_with_h5(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
+    layer = get_layer_class()(max_tokens=10)
+    layer.set_vocabulary(vocab_data)
+    int_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    path = os.path.join(self.get_temp_dir(), "model")
+    with self.assertRaisesRegex(NotImplementedError,
+                                "Save or restore weights that is not.*"):
+      save.save_model(model, path, save_format="h5")
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py b/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py
new file mode 100644
index 00000000000..0d4c70de655
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py
@@ -0,0 +1,25 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keras string lookup preprocessing layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.layers.preprocessing import index_lookup_v1
+from tensorflow.python.keras.layers.preprocessing import string_lookup
+
+
+class StringLookup(string_lookup.StringLookup, index_lookup_v1.IndexLookup):
+  pass
diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils.py b/tensorflow/python/keras/layers/preprocessing/table_utils.py
new file mode 100644
index 00000000000..05447f6e9ff
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/table_utils.py
@@ -0,0 +1,191 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for working with tf.lookup tables in Keras."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import numpy as np
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.keras import backend as K
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.ragged import ragged_functional_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.platform import gfile
+
+
+class TableHandler(object):
+  """Wrapper object that holds a lookup table and provides accessors."""
+
+  def __init__(self, table, oov_tokens=None, use_v1_apis=False):
+    self.table = table
+    self.use_v1_apis = use_v1_apis
+    if oov_tokens is None:
+      self.oov_tokens = oov_tokens
+    else:
+      if not isinstance(oov_tokens, (list, tuple, np.ndarray)):
+        oov_tokens = [oov_tokens]
+      self.oov_tokens = math_ops.cast(oov_tokens, table._value_dtype)  # pylint: disable=protected-access
+
+  def data(self):
+    keys, values = self.table.export()
+    return (self._eval(keys), self._eval(values))
+
+  def vocab_size(self):
+    return self._eval(self.table.size())
+
+  def clear(self):
+    keys, _ = self.table.export()
+    self._run(self.table.remove(keys))
+
+  def insert(self, keys, values):
+    if len(values) != len(keys):
+      raise RuntimeError("Size mismatch between values and key arrays. "
+                         "Keys had size %s, values had size %s." %
+                         (len(keys), len(values)))
+    self._run(self.table.insert(keys, values))
+
+  def _replace_oov_buckets(self, inputs, lookups):
+    """Replace the default OOV value with one of the OOV bucket values."""
+    if self.oov_tokens is None:
+      return lookups
+
+    num_oov_elements = self.oov_tokens.shape.num_elements()
+    if inputs.dtype.is_integer:
+      oov_indices = math_ops.floormod(inputs, num_oov_elements)
+    else:
+      oov_indices = string_ops.string_to_hash_bucket_fast(
+          inputs, num_buckets=num_oov_elements)
+
+    oov_values = array_ops.gather(self.oov_tokens, oov_indices)
+    oov_locations = math_ops.equal(lookups, self.table._default_value)  # pylint: disable=protected-access
+
+    return array_ops.where(oov_locations, oov_values, lookups)
+
+  def _ragged_lookup(self, inputs):
+    """Perform a table lookup on a ragged tensor."""
+    # The table lookup ops don't natively support ragged tensors, so if we have
+    # a RT we need to use map_flat_values to look up every element.
+    indexed_data = ragged_functional_ops.map_flat_values(
+        self.table.lookup, inputs)
+    indexed_data = ragged_functional_ops.map_flat_values(
+        self._replace_oov_buckets, inputs, indexed_data)
+    # Composite tensors can pass tensor values through, which will cause
+    # errors if all operations in the TF graph do so. We can break this chain
+    # with an identity here.
+    return array_ops.identity(indexed_data)
+
+  def _sparse_lookup(self, inputs):
+    """Perform a table lookup on a sparse tensor."""
+    values = self.table.lookup(inputs.values)
+    values = self._replace_oov_buckets(inputs.values, values)
+    indexed_data = sparse_tensor.SparseTensor(inputs.indices, values,
+                                              inputs.dense_shape)
+    # Composite tensors can pass tensor values through, which will cause
+    # errors if all operations in the TF graph do so. We can break this chain
+    # with an identity here.
+    return array_ops.identity(indexed_data)
+
+  def _tensor_lookup(self, inputs):
+    """Perform a table lookup on a tf.tensor."""
+    values = self.table.lookup(inputs)
+    indexed_data = self._replace_oov_buckets(inputs, values)
+    # (b/149446477): output does not preserve input shape.
+    indexed_data.set_shape(inputs.shape)
+    return indexed_data
+
+  def lookup(self, inputs):
+    """Perform a table lookup."""
+    # Sparse tensors don't play nicely with tensor conversion, so we handle
+    # them before attempting to convert lists or arrays to tensors.
+    if isinstance(
+        inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
+      return self._sparse_lookup(inputs)
+
+    # Try to convert lists/arrays to tensors or RaggedTensors.
+    inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs)
+
+    # Run the lookup operation on the converted tensor.
+    if ragged_tensor.is_ragged(inputs):
+      return self._ragged_lookup(inputs)
+    else:
+      return self._tensor_lookup(inputs)
+
+  def _eval(self, tensor):
+    if self.use_v1_apis:
+      return K.get_session().run(tensor)
+    else:
+      return tensor.numpy()
+
+  def _run(self, op):
+    if self.use_v1_apis:
+      K.get_session().run(op)
+
+
+def get_vocabulary_from_file(vocabulary_path, encoding="utf-8"):
+  """Read a vocabulary in from a file."""
+  vocab = []
+  with gfile.GFile(vocabulary_path, "r") as reader:
+    while True:
+      # Get the next line (incl. \n), and break if nothing is left to read.
+      text = reader.readline()
+      if not text:
+        break
+
+      # Convert the raw text and strip whitespace.
+      if isinstance(text, str):
+        token = text
+      elif isinstance(text, bytes):
+        token = text.decode(encoding, "ignore")
+      token = token.strip()
+      vocab.append(token)
+  return vocab
+
+
+def validate_vocabulary_is_unique(vocabulary):
+  """Validate that a vocabulary contains no repeated tokens."""
+  vocabulary_set = set(vocabulary)
+  if len(vocabulary) != len(vocabulary_set):
+    repeated_items = [
+        item for item, count in collections.Counter(vocabulary).items()
+        if count > 1
+    ]
+    raise ValueError("The passed vocabulary has at least one repeated "
+                     "term. Please uniquify your dataset. The repeated terms "
+                     "are %s" % repeated_items)
+
+
+def assert_same_type(expected_type, values, value_name):
+  """Assert that 'values' is of type 'expected_type'."""
+  if dtypes.as_dtype(expected_type) != dtypes.as_dtype(values.dtype):
+    raise RuntimeError("Expected %s type %s, got %s" %
+                       (value_name, expected_type, values.dtype))
+
+
+def convert_to_ndarray(x, dtype=None):
+  """Convert 'x' to a numpy array."""
+  array = np.array(x) if isinstance(x, (list, tuple)) else x
+  if dtype not in (None, dtypes.string):
+    # If the dtype is an integer, we do permissive casting. This allows
+    # users to examine int32 data if the dtype is int64 without trouble.
+    np_dtype = dtypes.as_dtype(dtype).as_numpy_dtype
+    if np.can_cast(array.dtype, np_dtype):
+      array = array.astype(np_dtype, casting="safe")
+  return array
diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils_test.py b/tensorflow/python/keras/layers/preprocessing/table_utils_test.py
new file mode 100644
index 00000000000..60a891f6ba8
--- /dev/null
+++ b/tensorflow/python/keras/layers/preprocessing/table_utils_test.py
@@ -0,0 +1,243 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras lookup table utils."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
+from tensorflow.python.keras.layers.preprocessing import table_utils
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.platform import test
+
+
+def get_table(dtype=dtypes.string, oov_tokens=None):
+  table = lookup_ops.MutableHashTable(
+      key_dtype=dtype,
+      value_dtype=dtypes.int64,
+      default_value=-7,
+      name="index_table")
+  return table_utils.TableHandler(
+      table, oov_tokens, use_v1_apis=(not context.executing_eagerly()))
+
+
+@keras_parameterized.run_all_keras_modes
+class CategoricalEncodingInputTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_sparse_string_input(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]],
+        values=["fire", "michigan"],
+        dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [5, 1]
+    expected_dense_shape = [3, 4]
+
+    table = get_table(oov_tokens=[1])
+    table.insert(vocab_data, range(2, len(vocab_data) + 2))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_sparse_int_input(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]],
+        values=np.array([13, 32], dtype=np.int64),
+        dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [5, 1]
+    expected_dense_shape = [3, 4]
+
+    table = get_table(dtype=dtypes.int64, oov_tokens=[1])
+    table.insert(vocab_data, range(2, len(vocab_data) + 2))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_ragged_string_input(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = ragged_factory_ops.constant(
+        [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]])
+    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
+
+    table = get_table(oov_tokens=[1])
+    table.insert(vocab_data, range(2, len(vocab_data) + 2))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+  def test_ragged_int_input(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
+                                              dtype=np.int64)
+    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
+
+    table = get_table(dtype=dtypes.int64, oov_tokens=[1])
+    table.insert(vocab_data, range(2, len(vocab_data) + 2))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+
+@keras_parameterized.run_all_keras_modes
+class CategoricalEncodingMultiOOVTest(
+    keras_parameterized.TestCase,
+    preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_sparse_string_input_multi_bucket(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]], values=["fire", "ohio"], dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [6, 2]
+    expected_dense_shape = [3, 4]
+
+    table = get_table(oov_tokens=[1, 2])
+    table.insert(vocab_data, range(3, len(vocab_data) + 3))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_sparse_int_input_multi_bucket(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [1, 2]],
+        values=np.array([13, 132], dtype=np.int64),
+        dense_shape=[3, 4])
+
+    expected_indices = [[0, 0], [1, 2]]
+    expected_values = [6, 1]
+    expected_dense_shape = [3, 4]
+
+    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
+    table.insert(vocab_data, range(3, len(vocab_data) + 3))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_indices, output_data.indices)
+    self.assertAllEqual(expected_values, output_data.values)
+    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
+
+  def test_ragged_string_input_multi_bucket(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = ragged_factory_ops.constant([["earth", "wind", "fire"],
+                                               ["fire", "and", "earth",
+                                                "ohio"]])
+    expected_output = [[3, 4, 6], [6, 5, 3, 2]]
+
+    table = get_table(oov_tokens=[1, 2])
+    table.insert(vocab_data, range(3, len(vocab_data) + 3))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+  def test_ragged_int_input_multi_bucket(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 132]],
+                                              dtype=np.int64)
+    expected_output = [[3, 4, 6], [6, 5, 3, 1]]
+
+    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
+    table.insert(vocab_data, range(3, len(vocab_data) + 3))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+  def test_tensor_int_input_multi_bucket(self):
+    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
+    input_array = np.array([[13, 132], [13, 133]], dtype=np.int64)
+    expected_values = [[6, 1], [6, 2]]
+
+    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
+    table.insert(vocab_data, range(3, len(vocab_data) + 3))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_values, output_data)
+
+  def test_tensor_string_input_multi_bucket(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = [["earth", "wind", "fire", "michigan"],
+                   ["fire", "and", "earth", "ohio"]]
+    expected_output = [[3, 4, 6, 1], [6, 5, 3, 2]]
+
+    table = get_table(oov_tokens=[1, 2])
+    table.insert(vocab_data, range(3, len(vocab_data) + 3))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+
+@keras_parameterized.run_all_keras_modes
+class IndexLookupOutputTest(keras_parameterized.TestCase,
+                            preprocessing_test_utils.PreprocessingLayerTest):
+
+  def test_int_output_default_lookup_value(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = np.array([["earth", "wind", "and", "fire"],
+                            ["fire", "and", "earth", "michigan"]])
+    expected_output = [[1, 2, 3, 4], [4, 3, 1, -7]]
+
+    table = get_table(oov_tokens=None)
+    table.insert(vocab_data, range(1, len(vocab_data) + 1))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+  def test_output_shape(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = np.array([["earth", "wind", "and", "fire"],
+                            ["fire", "and", "earth", "michigan"]])
+
+    table = get_table()
+    table.insert(vocab_data, range(1, len(vocab_data) + 1))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(input_array.shape[1:], output_data.shape[1:])
+
+  def test_int_output_no_reserved_zero_default_lookup_value(self):
+    vocab_data = ["earth", "wind", "and", "fire"]
+    input_array = np.array([["earth", "wind", "and", "fire"],
+                            ["fire", "and", "earth", "michigan"]])
+    expected_output = [[0, 1, 2, 3], [3, 2, 0, -7]]
+
+    table = get_table(oov_tokens=None)
+    table.insert(vocab_data, range(len(vocab_data)))
+    output_data = table.lookup(input_array)
+
+    self.assertAllEqual(expected_output, output_data)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
index 89dd48e7b0e..4156ba50c02 100644
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
+++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
@@ -32,7 +32,7 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine.base_preprocessing_layer import Combiner
 from tensorflow.python.keras.engine.base_preprocessing_layer import CombinerPreprocessingLayer
 from tensorflow.python.keras.layers.preprocessing import categorical_encoding
-from tensorflow.python.keras.layers.preprocessing import index_lookup
+from tensorflow.python.keras.layers.preprocessing import string_lookup
 from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -269,10 +269,6 @@ class TextVectorization(CombinerPreprocessingLayer):
 
     self._max_tokens = max_tokens
 
-    # In INT mode, we have two reserved values (PAD and OOV). However, non-INT
-    # modes don't have a PAD value, so we only need to reserve one value.
-    self._reserved_values = 2 if output_mode == INT else 1
-
     # In INT mode, the zero value is reserved for padding (per Keras standard
     # padding approaches). In non-INT modes, there is no padding so we can set
     # the OOV value to zero instead of one.
@@ -302,11 +298,10 @@ class TextVectorization(CombinerPreprocessingLayer):
         combiner=_TextVectorizationCombiner(
             self._max_vocab_size, compute_idf=output_mode == TFIDF),
         **kwargs)
-    self._supports_ragged_inputs = True
 
-    reserve_zero = output_mode in [None, INT]
+    mask_token = "" if output_mode in [None, INT] else None
     self._index_lookup_layer = self._get_index_lookup_class()(
-        max_tokens=max_tokens, reserve_zero=reserve_zero, dtype=dtypes.string)
+        max_tokens=max_tokens, mask_token=mask_token)
 
     # If this layer is configured for string or integer output, we do not
     # create a vectorization layer (as the output is not vectorized).
@@ -329,7 +324,7 @@ class TextVectorization(CombinerPreprocessingLayer):
     return (keys.numpy(), values.numpy())
 
   def _get_index_lookup_class(self):
-    return index_lookup.IndexLookup
+    return string_lookup.StringLookup
 
   def _to_numpy(self, preprocessed_data):
     """Converts preprocessed inputs into numpy arrays."""
@@ -429,26 +424,21 @@ class TextVectorization(CombinerPreprocessingLayer):
   def set_vocabulary(self,
                      vocab,
                      df_data=None,
-                     oov_df_value=None,
-                     append=False):
+                     oov_df_value=None):
     """Sets vocabulary (and optionally document frequency) data for this layer.
 
     This method sets the vocabulary and DF data for this layer directly, instead
     of analyzing a dataset through 'adapt'. It should be used whenever the vocab
     (and optionally document frequency) information is already known. If
-    vocabulary data is already present in the layer, this method will either
-    replace it, if 'append' is set to False, or append to it (if 'append' is set
-    to True).
+    vocabulary data is already present in the layer, this method will replace
+    it.
 
     Arguments:
       vocab: An array of string tokens.
       df_data: An array of document frequency data. Only necessary if the layer
         output_mode is TFIDF.
       oov_df_value: The document frequency of the OOV token. Only necessary if
-        output_mode is TFIDF. OOV data is optional when appending additional
-        data in TFIDF mode; if an OOV value is supplied it will overwrite the
-        existing OOV value.
-      append: Whether to overwrite or append any existing vocabulary data.
+        output_mode is TFIDF.
 
     Raises:
       ValueError: If there are too many inputs, the inputs do not match, or
@@ -469,8 +459,7 @@ class TextVectorization(CombinerPreprocessingLayer):
                           "be changed after the layer is "
                           "called.").format(mode=self._output_mode))
 
-    current_table_size = self._index_lookup_layer.vocab_size()
-    self._index_lookup_layer.set_vocabulary(vocab, append)
+    self._index_lookup_layer.set_vocabulary(vocab)
 
     # When doing raw or integer output, we don't have a Vectorize layer to
     # manage. In this case, we can return directly.
@@ -478,14 +467,9 @@ class TextVectorization(CombinerPreprocessingLayer):
       return
 
     if not self._pad_to_max or self._max_tokens is None:
-      num_tokens = self._index_lookup_layer.vocab_size() + self._reserved_values
+      num_tokens = self._index_lookup_layer.vocab_size()
       self._vectorize_layer.set_num_elements(num_tokens)
 
-    # We're only _really_ appending if the table_size is nonzero. This is
-    # important for some sanity checks in tfidf mode (specifically, checking if
-    # oov_df_value is set or not) and handling existing tfidf weight data.
-    append = append if current_table_size > 0 else False
-
     if self._output_mode == TFIDF:
       if df_data is None:
         raise ValueError("df_data must be set if output_mode is TFIDF")
@@ -493,31 +477,14 @@ class TextVectorization(CombinerPreprocessingLayer):
         raise ValueError("df_data must be the same length as vocab. "
                          "len(df_data) is %s, len(vocab) is %s" %
                          (len(vocab), len(df_data)))
-      if not append and oov_df_value is None:
-        raise ValueError("You must pass an oov_df_value the first time "
-                         "'set_vocabulary' is called when output_mode is "
+      if oov_df_value is None:
+        raise ValueError("You must pass an oov_df_value when output_mode is "
                          "TFIDF.")
 
       df_data = self._convert_to_ndarray(df_data)
-      if append:
-        # The existing IDF data is stored in a Keras weight, so we can get it
-        # by calling K.get_value() on the weight object. Take the first
-        # table_size+1 values in case we're padding the weight with zeros
-        existing_df_data = K.get_value(
-            self._vectorize_layer.tf_idf_weights)[:current_table_size + 1]
-        df_data = np.append(existing_df_data, df_data, axis=0)
-        # If we are appending and need to replace the OOV DF value, we can
-        # assign it over the existing OOV DF value at index 0 of the (already-
-        # concatenated) DF value array.
-        if oov_df_value is not None:
-          df_data[0] = oov_df_value
-      else:
-        # If we are not appending (that is, we have only new data) we need to
-        # insert the OOV value to the front of the array. (This is a append to
-        # the head, not a replacement of the zeroth value.)
-        if not isinstance(oov_df_value, np.ndarray):
-          oov_df_value = np.array([oov_df_value])
-        df_data = np.insert(df_data, 0, oov_df_value)
+      if not isinstance(oov_df_value, np.ndarray):
+        oov_df_value = np.array([oov_df_value])
+      df_data = np.insert(df_data, 0, oov_df_value)
       self._vectorize_layer.set_tfidf_data(df_data)
 
   def build(self, input_shape):
@@ -537,8 +504,10 @@ class TextVectorization(CombinerPreprocessingLayer):
     if not self.built:
       raise RuntimeError("_set_state_variables() must be called after build().")
     if self._output_mode == TFIDF:
-      self.set_vocabulary(updates[_VOCAB_NAME], updates[_IDF_NAME],
-                          updates[_OOV_IDF_NAME])
+      self.set_vocabulary(
+          updates[_VOCAB_NAME],
+          updates[_IDF_NAME],
+          updates[_OOV_IDF_NAME])
     else:
       self.set_vocabulary(updates[_VOCAB_NAME])
 
@@ -682,6 +651,12 @@ class _TextVectorizationCombiner(Combiner):
     if accumulator is None:
       accumulator = self._create_accumulator()
 
+    # If we are being passed raw strings or bytestrings, we need to wrap them
+    # in an array so we don't accidentally iterate over the bytes instead of
+    # treating the string as one object.
+    if isinstance(values, (str, bytes)):
+      values = [values]
+
     # TODO(momernick): Benchmark improvements to this algorithm.
     for document in values:
       current_doc_id = accumulator.metadata[0]
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py
index d8325f39149..f8a1f5b9434 100644
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py
@@ -619,25 +619,6 @@ class TextVectorizationOutputTest(
     output_dataset = model.predict(input_array)
     self.assertAllEqual(expected_output, output_dataset)
 
-  def test_vocab_appending(self):
-    vocab_data = [["earth", "wind"], ["and", "fire"]]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_data[0])
-    layer.set_vocabulary(vocab_data[1], append=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
   def test_int_output_densifies_with_zeros(self):
     vocab_data = ["earth", "wind", "and", "fire"]
     # Create an input array that has 5 elements in the first example and 4 in
@@ -1046,7 +1027,10 @@ class TextVectorizationOutputTest(
         split=None,
         output_mode=text_vectorization.TFIDF,
         pad_to_max_tokens=True)
-    layer.set_vocabulary(vocab_data, df_data=tfidf_data, oov_df_value=.05)
+    layer.set_vocabulary(
+        vocab_data,
+        df_data=tfidf_data,
+        oov_df_value=.05)
     int_data = layer(input_data)
     self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
 
@@ -1084,60 +1068,6 @@ class TextVectorizationOutputTest(
     output_dataset = model.predict(input_array)
     self.assertAllClose(expected_output, output_dataset)
 
-  def test_tfidf_appending(self):
-    vocab_data = [["earth", "wind"], ["and", "fire"]]
-    tfidf_data = [[.5, .25], [.2, .125]]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "fire", "earth", "michigan"]])
-
-    # pyformat: disable
-    # pylint: disable=bad-whitespace
-    expected_output = [[ 0,  1, .25, .2,    0],
-                       [.1, .5,   0,  0, .125]]
-    # pylint: enable=bad-whitespace
-    # pyformat: enable
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TFIDF)
-    layer.set_vocabulary(vocab_data[0], df_data=tfidf_data[0], oov_df_value=.05)
-    layer.set_vocabulary(vocab_data[1], df_data=tfidf_data[1], append=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_tfidf_appending_with_oov_replacement(self):
-    vocab_data = [["earth", "wind"], ["and", "fire"]]
-    tfidf_data = [[.5, .25], [.2, .125]]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "fire", "earth", "michigan"]])
-
-    # pyformat: disable
-    # pylint: disable=bad-whitespace
-    expected_output = [[ 0,  1, .25, .2,    0],
-                       [1.5, .5,   0,  0, .125]]
-    # pylint: enable=bad-whitespace
-    # pyformat: enable
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = get_layer_class()(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TFIDF)
-    layer.set_vocabulary(vocab_data[0], df_data=tfidf_data[0], oov_df_value=.05)
-    # Note that here we've replaced the OOV vaue.
-    layer.set_vocabulary(
-        vocab_data[1], df_data=tfidf_data[1], oov_df_value=.75, append=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
   def test_accept_1D_input(self):
     input_array = np.array(["earth wind and fire",
                             "fire and earth michigan"])
@@ -1274,22 +1204,6 @@ class TextVectorizationErrorTest(keras_parameterized.TestCase,
                                 "vocabulary larger than the maximum vocab.*"):
       layer.set_vocabulary(vocab_data)
 
-  def test_too_long_vocab_fails_in_multiple_settings(self):
-    vocab_data = [["earth", "wind"], ["and", "fire"]]
-
-    layer = get_layer_class()(
-        max_tokens=4,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-
-    # The first time we call set_vocabulary, we're under the max_tokens limit
-    # so it should be fine.
-    layer.set_vocabulary(vocab_data[0])
-    with self.assertRaisesRegex(ValueError,
-                                "vocabulary larger than the maximum vocab.*"):
-      layer.set_vocabulary(vocab_data[1], append=True)
-
   def test_setting_vocab_without_tfidf_data_fails_in_tfidf_mode(self):
     vocab_data = ["earth", "wind", "and", "fire"]
 
@@ -1326,18 +1240,6 @@ class TextVectorizationErrorTest(keras_parameterized.TestCase,
                                 "You must pass an oov_df_value.*"):
       layer.set_vocabulary(vocab_data, df_data)
 
-  def test_tfidf_set_vocab_with_no_oov_fails_with_append_set(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    df_data = [1, 2, 3, 4]
-    layer = get_layer_class()(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TFIDF)
-    with self.assertRaisesRegex(ValueError,
-                                "You must pass an oov_df_value.*"):
-      layer.set_vocabulary(vocab_data, df_data, append=True)
-
   def test_set_tfidf_in_non_tfidf_fails(self):
     vocab_data = ["earth", "wind", "and", "fire"]
     df_data = [1, 2, 3, 4]
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py
index b869bee52ab..59cf2c61288 100644
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py
+++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py
@@ -23,7 +23,7 @@ import numpy as np
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine import base_preprocessing_layer_v1
 from tensorflow.python.keras.layers.preprocessing import categorical_encoding_v1
-from tensorflow.python.keras.layers.preprocessing import index_lookup_v1
+from tensorflow.python.keras.layers.preprocessing import string_lookup_v1
 from tensorflow.python.keras.layers.preprocessing import text_vectorization
 from tensorflow.python.ops.ragged import ragged_tensor_value
 from tensorflow.python.util.tf_export import keras_export
@@ -84,7 +84,7 @@ class TextVectorization(text_vectorization.TextVectorization,
     return categorical_encoding_v1.CategoricalEncoding
 
   def _get_index_lookup_class(self):
-    return index_lookup_v1.IndexLookup
+    return string_lookup_v1.StringLookup
 
   def _to_numpy(self, data):
     """Converts preprocessed inputs into numpy arrays."""
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index dc41c8b067f..f11f4e88f21 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -438,7 +438,6 @@ class RNN(Layer):
     self._states = None
     self.constants_spec = None
     self._num_constants = 0
-    self._supports_ragged_inputs = True
 
     if stateful:
       if ds_context.has_strategy():
diff --git a/tensorflow/python/keras/layers/serialization.py b/tensorflow/python/keras/layers/serialization.py
index f74bf51aae0..0a90441d8a0 100644
--- a/tensorflow/python/keras/layers/serialization.py
+++ b/tensorflow/python/keras/layers/serialization.py
@@ -33,6 +33,7 @@ from tensorflow.python.keras.layers import convolutional_recurrent
 from tensorflow.python.keras.layers import core
 from tensorflow.python.keras.layers import cudnn_recurrent
 from tensorflow.python.keras.layers import dense_attention
+from tensorflow.python.keras.layers import einsum_dense
 from tensorflow.python.keras.layers import embeddings
 from tensorflow.python.keras.layers import local
 from tensorflow.python.keras.layers import merge
@@ -52,26 +53,11 @@ from tensorflow.python.util import tf_inspect as inspect
 from tensorflow.python.util.tf_export import keras_export
 
 
-ALL_MODULES = (
-    base_layer,
-    input_layer,
-    advanced_activations,
-    convolutional,
-    convolutional_recurrent,
-    core,
-    cudnn_recurrent,
-    dense_attention,
-    embeddings,
-    local,
-    merge,
-    noise,
-    normalization,
-    pooling,
-    image_preprocessing,
-    preprocessing_normalization_v1,
-    recurrent,
-    wrappers
-)
+ALL_MODULES = (base_layer, input_layer, advanced_activations, convolutional,
+               convolutional_recurrent, core, cudnn_recurrent, dense_attention,
+               embeddings, einsum_dense, local, merge, noise, normalization,
+               pooling, image_preprocessing, preprocessing_normalization_v1,
+               recurrent, wrappers)
 ALL_V2_MODULES = (
     rnn_cell_wrapper_v2,
     normalization_v2,
@@ -136,11 +122,13 @@ def populate_deserializable_objects():
   from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
   from tensorflow.python.keras.premade.linear import LinearModel  # pylint: disable=g-import-not-at-top
   from tensorflow.python.keras.premade.wide_deep import WideDeepModel  # pylint: disable=g-import-not-at-top
+  from tensorflow.python.keras.feature_column.sequence_feature_column import SequenceFeatures  # pylint: disable=g-import-not-at-top
 
   LOCAL.ALL_OBJECTS['Input'] = input_layer.Input
   LOCAL.ALL_OBJECTS['InputSpec'] = input_spec.InputSpec
-  LOCAL.ALL_OBJECTS['Network'] = models.Network
+  LOCAL.ALL_OBJECTS['Functional'] = models.Functional
   LOCAL.ALL_OBJECTS['Model'] = models.Model
+  LOCAL.ALL_OBJECTS['SequenceFeatures'] = SequenceFeatures
   LOCAL.ALL_OBJECTS['Sequential'] = models.Sequential
   LOCAL.ALL_OBJECTS['LinearModel'] = LinearModel
   LOCAL.ALL_OBJECTS['WideDeepModel'] = WideDeepModel
diff --git a/tensorflow/python/keras/layers/serialization_test.py b/tensorflow/python/keras/layers/serialization_test.py
index cd88b072224..920881c6a3e 100644
--- a/tensorflow/python/keras/layers/serialization_test.py
+++ b/tensorflow/python/keras/layers/serialization_test.py
@@ -53,7 +53,7 @@ class LayerSerializationTest(parameterized.TestCase, test.TestCase):
     new_layer = keras.layers.deserialize(config)
     self.assertEqual(new_layer.activation, keras.activations.relu)
     self.assertEqual(new_layer.bias_regularizer.__class__,
-                     keras.regularizers.L1L2)
+                     keras.regularizers.L2)
     if tf2.enabled():
       self.assertEqual(new_layer.kernel_initializer.__class__,
                        keras.initializers.OnesV2)
@@ -88,7 +88,7 @@ class LayerSerializationTest(parameterized.TestCase, test.TestCase):
         config, custom_objects={'SerializableInt': SerializableInt})
     self.assertEqual(new_layer.activation, keras.activations.relu)
     self.assertEqual(new_layer.bias_regularizer.__class__,
-                     keras.regularizers.L1L2)
+                     keras.regularizers.L2)
     if tf2.enabled():
       self.assertEqual(new_layer.kernel_initializer.__class__,
                        keras.initializers.OnesV2)
@@ -116,7 +116,7 @@ class LayerSerializationTest(parameterized.TestCase, test.TestCase):
       self.assertEqual(new_layer.beta_initializer.__class__,
                        keras.initializers.Zeros)
     self.assertEqual(new_layer.gamma_regularizer.__class__,
-                     keras.regularizers.L1L2)
+                     keras.regularizers.L2)
 
   @parameterized.parameters(
       [batchnorm_v1.BatchNormalization, batchnorm_v2.BatchNormalization])
@@ -135,7 +135,7 @@ class LayerSerializationTest(parameterized.TestCase, test.TestCase):
       self.assertEqual(new_layer.beta_initializer.__class__,
                        keras.initializers.Zeros)
     self.assertEqual(new_layer.gamma_regularizer.__class__,
-                     keras.regularizers.L1L2)
+                     keras.regularizers.L2)
 
   @parameterized.parameters([rnn_v1.LSTM, rnn_v2.LSTM])
   def test_serialize_deserialize_lstm(self, layer):
@@ -165,5 +165,6 @@ class LayerSerializationTest(parameterized.TestCase, test.TestCase):
       self.assertIsInstance(new_layer, rnn_v1.GRU)
       self.assertNotIsInstance(new_layer, rnn_v2.GRU)
 
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index 518863c828b..8fe3b3b20bb 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py
@@ -125,7 +125,6 @@ class TimeDistributed(Wrapper):
               input=layer))
     super(TimeDistributed, self).__init__(layer, **kwargs)
     self.supports_masking = True
-    self._supports_ragged_inputs = True
 
     # It is safe to use the fast, reshape-based approach with all of our
     # built-in Layers.
@@ -356,6 +355,10 @@ class Bidirectional(Wrapper):
   Call arguments:
     The call arguments for this layer are the same as those of the wrapped RNN
       layer.
+    Beware that when passing the `initial_state` argument during the call of
+    this layer, the first half in the list of elements in the `initial_state`
+    list will be passed to the forward RNN call and the last half in the list
+    of elements will be passed to the backward RNN call.
 
   Raises:
     ValueError:
@@ -449,7 +452,6 @@ class Bidirectional(Wrapper):
     self._trainable = True
     self._num_constants = 0
     self.input_spec = layer.input_spec
-    self._supports_ragged_inputs = True
 
   def _verify_layer_config(self):
     """Ensure the forward and backward layers have valid common property."""
diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py
index a3173f4d11f..bb22db25591 100644
--- a/tensorflow/python/keras/layers/wrappers_test.py
+++ b/tensorflow/python/keras/layers/wrappers_test.py
@@ -377,7 +377,8 @@ class TimeDistributedTest(keras_parameterized.TestCase):
           input_layer.compute_output_shape([None, 2, 4]).as_list(),
           [None, 2, 8])
 
-  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  # TODO(scottzhu): check why v1 session failed.
   def test_TimeDistributed_with_mask_first_implementation(self):
     np.random.seed(100)
     rnn_layer = keras.layers.LSTM(4, return_sequences=True, stateful=True)
diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index 5629829351d..99fb015288b 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py
@@ -22,6 +22,8 @@ import abc
 
 import six
 
+from tensorflow.python.autograph.core import ag_ctx
+from tensorflow.python.autograph.impl import api as autograph
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
@@ -142,7 +144,8 @@ class Loss(object):
     graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
         y_true, y_pred, sample_weight)
     with K.name_scope(self._name_scope), graph_ctx:
-      losses = self.call(y_true, y_pred)
+      ag_call = autograph.tf_convert(self.call, ag_ctx.control_status_ctx())
+      losses = ag_call(y_true, y_pred)
       return losses_utils.compute_weighted_loss(
           losses, sample_weight, reduction=self._get_reduction())
 
@@ -245,7 +248,8 @@ class LossFunctionWrapper(Loss):
     if tensor_util.is_tensor(y_pred) and tensor_util.is_tensor(y_true):
       y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
           y_pred, y_true)
-    return self.fn(y_true, y_pred, **self._fn_kwargs)
+    ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
+    return ag_fn(y_true, y_pred, **self._fn_kwargs)
 
   def get_config(self):
     config = {}
@@ -1428,6 +1432,7 @@ def huber(y_true, y_pred, delta=1.0):
   """
   y_pred = math_ops.cast(y_pred, dtype=K.floatx())
   y_true = math_ops.cast(y_true, dtype=K.floatx())
+  delta = math_ops.cast(delta, dtype=K.floatx())
   error = math_ops.subtract(y_pred, y_true)
   abs_error = math_ops.abs(error)
   quadratic = math_ops.minimum(abs_error, delta)
@@ -1876,8 +1881,8 @@ def get(identifier):
   elif callable(identifier):
     return identifier
   else:
-    raise ValueError('Could not interpret '
-                     'loss function identifier:', identifier)
+    raise ValueError(
+        'Could not interpret loss function identifier: {}'.format(identifier))
 
 
 LABEL_DTYPES_FOR_LOSSES = {
diff --git a/tensorflow/python/keras/losses_test.py b/tensorflow/python/keras/losses_test.py
index 119cc5db87d..574d3d3f756 100644
--- a/tensorflow/python/keras/losses_test.py
+++ b/tensorflow/python/keras/losses_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -29,6 +30,7 @@ from tensorflow.python.keras import backend
 from tensorflow.python.keras import combinations
 from tensorflow.python.keras import losses
 from tensorflow.python.keras.utils import losses_utils
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 ALL_LOSSES = [
@@ -193,6 +195,35 @@ class KerasLossesTest(test.TestCase, parameterized.TestCase):
     # reduced_weighted_mse = (6 + 26) / 2 =
     self.assertAllClose(self.evaluate(loss), 16, 1e-2)
 
+  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
+  def test_loss_wrapper_autograph(self):
+    # Test that functions with control flow wrapped in a LossFunctionWrapper
+    # get autographed when in a tf.function
+    def loss_fn(y_true, y_pred):
+      mse_loss_fn = losses.get('mse')
+      if math_ops.reduce_mean(y_true) > 0:
+        return mse_loss_fn(y_true, y_pred)
+      else:
+        return mse_loss_fn(y_true, y_pred)
+
+    mse_obj = losses.LossFunctionWrapper(loss_fn)
+
+    y_true = constant_op.constant([[1., 9.], [2., 5.]])
+    y_pred = constant_op.constant([[4., 8.], [12., 3.]])
+    sample_weight = constant_op.constant([1.2, 0.5])
+
+    @def_function.function
+    def tf_functioned_loss_fn(y_true, y_pred, sample_weight=None):
+      return mse_obj(y_true, y_pred, sample_weight=sample_weight)
+
+    loss = tf_functioned_loss_fn(y_true, y_pred, sample_weight=sample_weight)
+
+    # mse = [((4 - 1)^2 + (8 - 9)^2) / 2, ((12 - 2)^2 + (3 - 5)^2) / 2]
+    # mse = [5, 52]
+    # weighted_mse = [5 * 1.2, 52 * 0.5] = [6, 26]
+    # reduced_weighted_mse = (6 + 26) / 2 =
+    self.assertAllClose(self.evaluate(loss), 16, 1e-2)
+
   def test_invalid_reduction(self):
     with self.assertRaisesRegexp(ValueError, 'Invalid Reduction Key Foo.'):
       losses.MeanSquaredError(reduction='Foo')
@@ -203,6 +234,10 @@ class KerasLossesTest(test.TestCase, parameterized.TestCase):
     with self.assertRaisesRegexp(ValueError, 'Invalid Reduction Key Bar.'):
       mse_obj(y, y)
 
+  def test_deserialization_error(self):
+    with self.assertRaisesRegex(ValueError, 'Could not interpret loss'):
+      losses.get(0)
+
 
 @combinations.generate(combinations.combine(mode=['graph', 'eager']))
 class MeanSquaredErrorTest(test.TestCase):
@@ -1578,6 +1613,56 @@ class HuberLossTest(test.TestCase):
     actual_loss = sample_weight * np.sum(self.expected_losses) / self.batch_size
     self.assertAlmostEqual(self.evaluate(loss), actual_loss, 3)
 
+  def test_loss_with_non_default_dtype(self):
+    # Test case for GitHub issue:
+    # https://github.com/tensorflow/tensorflow/issues/39004
+    self.setup()
+    h_obj = losses.Huber()
+    try:
+      backend.set_floatx('float64')
+      loss = h_obj(self.y_true, self.y_true)
+      self.assertAlmostEqual(self.evaluate(loss), 0.0, 3)
+    finally:
+      backend.set_floatx('float32')
+
+
+class BinaryTruePositivesViaControlFlow(losses.Loss):
+
+  def __init__(self, reduction=losses_utils.ReductionV2.AUTO):
+    super(BinaryTruePositivesViaControlFlow, self).__init__(reduction=reduction)
+
+  def call(self, y_true, y_pred):
+    y_true = math_ops.cast(y_true, dtypes.bool)
+    y_pred = math_ops.cast(y_pred, dtypes.bool)
+
+    result = constant_op.constant(0.0)
+    for i in range(len(y_true)):
+      for j in range(len(y_true[i])):
+        if y_true[i][j] and y_pred[i][j]:
+          result = result + 1
+    return result
+
+
+@combinations.generate(combinations.combine(mode=['graph', 'eager']))
+class CustomLossTest(test.TestCase):
+
+  def test_autograph(self):
+    y_true = constant_op.constant([[0, 0.9, 0, 1, 0], [0, 0, 1, 1, 1],
+                                   [1, 1, 1, 1, 0], [0, 0, 0, 0, 1.5]])
+    y_pred = constant_op.constant([[0, 0, 1, 5, 0], [1, 1, 1, 1, 1],
+                                   [0, 1, 0, 1, 0], [1, 10, 1, 1, 1]])
+
+    @def_function.function
+    def loss_fn(y_true, y_pred):
+      loss_obj = BinaryTruePositivesViaControlFlow()
+      return loss_obj(y_true, y_pred)
+
+    loss = loss_fn(y_true, y_pred)
+    self.assertAllEqual(
+        self.evaluate(loss),
+        7.0,
+    )
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index e81ab325ed2..63cf7c578bc 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -26,6 +26,8 @@ import types
 import numpy as np
 import six
 
+from tensorflow.python.autograph.core import ag_ctx
+from tensorflow.python.autograph.impl import api as autograph
 from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
@@ -138,7 +140,7 @@ class Metric(base_layer.Layer):
       values = tf.cast(values, self.dtype)
       if sample_weight is not None:
         sample_weight = tf.cast(sample_weight, self.dtype)
-        sample_weight = tf.broadcast_weights(sample_weight, values)
+        sample_weight = tf.broadcast_to(sample_weight, values.shape)
         values = tf.multiply(values, sample_weight)
       self.true_positives.assign_add(tf.reduce_sum(values))
 
@@ -165,7 +167,12 @@ class Metric(base_layer.Layer):
     # return ops.
     if (base_layer_utils.is_in_eager_or_tf_function() or
         is_built_in(cls)):
-      update_state_fn = obj.update_state
+      obj_update_state = obj.update_state
+
+      def update_state_fn(*args, **kwargs):
+        control_status = ag_ctx.control_status_ctx()
+        ag_update_state = autograph.tf_convert(obj_update_state, control_status)
+        return ag_update_state(*args, **kwargs)
     else:
       if isinstance(obj.update_state, def_function.Function):
         update_state_fn = obj.update_state
@@ -174,7 +181,16 @@ class Metric(base_layer.Layer):
 
     obj.update_state = types.MethodType(
         metrics_utils.update_state_wrapper(update_state_fn), obj)
-    obj.result = types.MethodType(metrics_utils.result_wrapper(obj.result), obj)
+
+    obj_result = obj.result
+
+    def result_fn(*args, **kwargs):
+      control_status = ag_ctx.control_status_ctx()
+      ag_result = autograph.tf_convert(obj_result, control_status)
+      return ag_result(*args, **kwargs)
+
+    obj.result = types.MethodType(metrics_utils.result_wrapper(result_fn), obj)
+
     return obj
 
   def __call__(self, *args, **kwargs):
@@ -591,7 +607,8 @@ class MeanMetricWrapper(Mean):
     y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions(
         y_pred, y_true)
 
-    matches = self._fn(y_true, y_pred, **self._fn_kwargs)
+    ag_fn = autograph.tf_convert(self._fn, ag_ctx.control_status_ctx())
+    matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
     return super(MeanMetricWrapper, self).update_state(
         matches, sample_weight=sample_weight)
 
@@ -3171,7 +3188,8 @@ class SumOverBatchSizeMetricWrapper(SumOverBatchSize):
     y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions(
         y_pred, y_true)
 
-    matches = self._fn(y_true, y_pred, **self._fn_kwargs)
+    ag_fn = autograph.tf_convert(self._fn, ag_ctx.control_status_ctx())
+    matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
     return super(SumOverBatchSizeMetricWrapper, self).update_state(
         matches, sample_weight=sample_weight)
 
@@ -3461,9 +3479,8 @@ def get(identifier):
   elif callable(identifier):
     return identifier
   else:
-    error_msg = 'Could not interpret metric function identifier: {}'.format(
-        identifier)
-    raise ValueError(error_msg)
+    raise ValueError(
+        'Could not interpret metric function identifier: {}'.format(identifier))
 
 
 def is_built_in(cls):
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index ba1b76bab32..99eadaec4c8 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -1951,6 +1951,30 @@ class BinaryTruePositives(metrics.Metric):
     return self.true_positives
 
 
+class BinaryTruePositivesViaControlFlow(metrics.Metric):
+
+  def __init__(self, name='binary_true_positives', **kwargs):
+    super(BinaryTruePositivesViaControlFlow, self).__init__(name=name, **kwargs)
+    self.true_positives = self.add_weight(name='tp', initializer='zeros')
+
+  def update_state(self, y_true, y_pred, sample_weight=None):
+    y_true = math_ops.cast(y_true, dtypes.bool)
+    y_pred = math_ops.cast(y_pred, dtypes.bool)
+
+    for i in range(len(y_true)):
+      for j in range(len(y_true[i])):
+        if y_true[i][j] and y_pred[i][j]:
+          if sample_weight is None:
+            self.true_positives.assign_add(1)
+          else:
+            self.true_positives.assign_add(sample_weight[i][0])
+
+  def result(self):
+    if constant_op.constant(True):
+      return self.true_positives
+    return 0.0
+
+
 @combinations.generate(combinations.combine(mode=['graph', 'eager']))
 class CustomMetricsTest(test.TestCase):
 
@@ -1988,6 +2012,55 @@ class CustomMetricsTest(test.TestCase):
     result = btp_obj(y_true, y_pred, sample_weight=sample_weight)
     self.assertEqual(12, self.evaluate(result))
 
+  def test_autograph(self):
+    metric = BinaryTruePositivesViaControlFlow()
+    self.evaluate(variables.variables_initializer(metric.variables))
+    y_true = constant_op.constant([[0, 0.9, 0, 1, 0], [0, 0, 1, 1, 1],
+                                   [1, 1, 1, 1, 0], [0, 0, 0, 0, 1.5]])
+    y_pred = constant_op.constant([[0, 0, 1, 5, 0], [1, 1, 1, 1, 1],
+                                   [0, 1, 0, 1, 0], [1, 10, 1, 1, 1]])
+    sample_weight = constant_op.constant([[1.], [1.5], [2.], [2.5]])
+
+    @def_function.function
+    def compute_metric(y_true, y_pred, sample_weight):
+      metric(y_true, y_pred, sample_weight)
+      return metric.result()
+
+    result = compute_metric(y_true, y_pred, sample_weight)
+    self.assertEqual(12, self.evaluate(result))
+
+  def test_metric_wrappers_autograph(self):
+    def metric_fn(y_true, y_pred):
+      x = constant_op.constant(0.0)
+      for i in range(len(y_true)):
+        for j in range(len(y_true[i])):
+          if math_ops.equal(y_true[i][j], y_pred[i][j]) and y_true[i][j] > 0:
+            x += 1.0
+      return x
+
+    mean_metric = metrics.MeanMetricWrapper(metric_fn)
+    sum_metric = metrics.SumOverBatchSizeMetricWrapper(metric_fn)
+    self.evaluate(variables.variables_initializer(mean_metric.variables))
+    self.evaluate(variables.variables_initializer(sum_metric.variables))
+
+    y_true = constant_op.constant([[0, 0, 0, 1, 0],
+                                   [0, 0, 1, 1, 1],
+                                   [1, 1, 1, 1, 0],
+                                   [1, 1, 1, 0, 1]])
+    y_pred = constant_op.constant([[0, 0, 1, 1, 0],
+                                   [1, 1, 1, 1, 1],
+                                   [0, 1, 0, 1, 0],
+                                   [1, 1, 1, 1, 1]])
+
+    @def_function.function
+    def tf_functioned_metric_fn(metric, y_true, y_pred):
+      return metric(y_true, y_pred)
+
+    metric_result = tf_functioned_metric_fn(mean_metric, y_true, y_pred)
+    self.assertAllClose(self.evaluate(metric_result), 10, 1e-2)
+    metric_result = tf_functioned_metric_fn(sum_metric, y_true, y_pred)
+    self.assertAllClose(self.evaluate(metric_result), 10, 1e-2)
+
 
 def _get_model(compile_metrics):
   model_layers = [
diff --git a/tensorflow/python/keras/mixed_precision/experimental/BUILD b/tensorflow/python/keras/mixed_precision/experimental/BUILD
index 9462051cb83..25d05b78c3e 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/BUILD
+++ b/tensorflow/python/keras/mixed_precision/experimental/BUILD
@@ -251,6 +251,7 @@ py_test(
     srcs = ["layer_correctness_test.py"],
     python_version = "PY3",
     shard_count = 10,
+    tags = ["no_rocm"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/compat:v2_compat",
diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
index c43ca21ea06..29e5a68c854 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
@@ -23,9 +23,10 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
+from tensorflow.python.types import core
 
 
-class AutoCastVariable(variables.Variable):
+class AutoCastVariable(variables.Variable, core.Tensor):
   """Variable that will cast itself to a different dtype in applicable contexts.
 
   This class wraps a floating-point `tf.Variable`. It emulates the variable
@@ -417,7 +418,6 @@ class AutoCastVariable(variables.Variable):
 
 ops.register_tensor_conversion_function(AutoCastVariable,
                                         AutoCastVariable._dense_var_to_tensor)  # pylint:disable=protected-access
-ops.register_dense_tensor_like_type(AutoCastVariable)
 
 
 def create_autocast_variable(variable):
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index eaffb90e64b..9f5099e100e 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -23,7 +23,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import optimizers
-from tensorflow.python.keras.engine import network
+from tensorflow.python.keras.engine import functional
 from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.engine import training_v1
@@ -31,7 +31,6 @@ from tensorflow.python.keras.engine.base_layer import AddMetric
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_layer import Input
 from tensorflow.python.keras.engine.input_layer import InputLayer
-from tensorflow.python.keras.engine.network import Network
 from tensorflow.python.keras.saving import model_config
 from tensorflow.python.keras.saving import save
 from tensorflow.python.keras.utils import generic_utils
@@ -45,6 +44,7 @@ from tensorflow.python.util.tf_export import keras_export
 # API entries importable from `keras.models`:
 Model = training.Model  # pylint: disable=invalid-name
 Sequential = sequential.Sequential  # pylint: disable=invalid-name
+Functional = functional.Functional  # pylint: disable=invalid-name
 save_model = save.save_model
 load_model = save.load_model
 model_from_config = model_config.model_from_config
@@ -193,12 +193,12 @@ def _clone_functional_model(model, input_tensors=None, layer_fn=_clone_layer):
   if not callable(layer_fn):
     raise ValueError('Expected `layer_fn` argument to be a callable.')
 
-  model_config, created_layers = _clone_layers_and_model_config(
+  model_configs, created_layers = _clone_layers_and_model_config(
       model, new_input_layers, layer_fn)
   # Reconstruct model from the config, using the cloned layers.
   input_tensors, output_tensors, created_layers = (
-      network.reconstruct_from_config(model_config,
-                                      created_layers=created_layers))
+      functional.reconstruct_from_config(model_configs,
+                                         created_layers=created_layers))
   metrics_names = model.metrics_names
   model = Model(input_tensors, output_tensors, name=model.name)
   # Layers not directly tied to outputs of the Model, such as loss layers
@@ -209,8 +209,8 @@ def _clone_functional_model(model, input_tensors=None, layer_fn=_clone_layer):
   if ancillary_layers:
     new_nodes = nest.flatten([
         layer.inbound_nodes[1:]
-        if network._should_skip_first_node(layer) else layer.inbound_nodes
-        for layer in created_layers.values()
+        if functional._should_skip_first_node(layer)
+        else layer.inbound_nodes for layer in created_layers.values()
     ])
     _insert_ancillary_layers(model, ancillary_layers, metrics_names, new_nodes)
   return model
@@ -244,7 +244,8 @@ def _clone_layers_and_model_config(model, input_layers, layer_fn):
       created_layers[layer.name] = layer_fn(layer)
     return {}
 
-  config = network.get_network_config(model, serialize_layer_fn=_copy_layer)
+  config = functional.get_network_config(
+      model, serialize_layer_fn=_copy_layer)
   return config, created_layers
 
 
@@ -495,7 +496,7 @@ def _in_place_subclassed_model_reset(model):
     # This will not work for nested subclassed models used as layers.
     # This would be theoretically possible to support, but would add complexity.
     # Only do it if users complain.
-    if isinstance(layer, Network) and not layer._is_graph_network:
+    if isinstance(layer, training.Model) and not layer._is_graph_network:
       raise ValueError('We do not support the use of nested subclassed models '
                        'in `model_to_estimator` at this time. Found nested '
                        'model: %s' % layer)
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
index 5de5e59b385..d1deaf34f45 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -121,16 +121,19 @@ class RMSprop(optimizer_v2.OptimizerV2):
         Setting this to `True` may help with training, but is slightly more
         expensive in terms of computation and memory. Defaults to `False`.
       name: Optional name prefix for the operations created when applying
-        gradients. Defaults to "RMSprop".  @compatibility(eager) When eager
-        execution is enabled, `learning_rate`, `decay`, `momentum`, and
-        `epsilon` can each be a callable that takes no arguments and returns the
-        actual value to use. This can be useful for changing these values across
-        different invocations of optimizer functions. @end_compatibility
+        gradients. Defaults to "RMSprop".
       **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`,
         `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip
         gradients by value, `decay` is included for backward compatibility to
         allow time inverse decay of learning rate. `lr` is included for backward
         compatibility, recommended to use `learning_rate` instead.
+
+    @compatibility(eager)
+    When eager execution is enabled, `learning_rate`, `decay`, `momentum`, and
+    `epsilon` can each be a callable that takes no arguments and returns the
+    actual value to use. This can be useful for changing these values across
+    different invocations of optimizer functions.
+    @end_compatibility
     """
     super(RMSprop, self).__init__(name, **kwargs)
     self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index f218681c12f..5d8e1351ae3 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -899,4 +899,5 @@ def get(identifier):
     config = {'class_name': str(identifier), 'config': {}}
     return deserialize(config)
   else:
-    raise ValueError('Could not interpret optimizer identifier:', identifier)
+    raise ValueError(
+        'Could not interpret optimizer identifier: {}'.format(identifier))
diff --git a/tensorflow/python/keras/optimizers_test.py b/tensorflow/python/keras/optimizers_test.py
index c178c8c03c9..db051eafea0 100644
--- a/tensorflow/python/keras/optimizers_test.py
+++ b/tensorflow/python/keras/optimizers_test.py
@@ -253,6 +253,9 @@ class KerasOptimizersTest(keras_parameterized.TestCase):
         batch_size=5,
         verbose=0)
 
+  def test_deserialization_error(self):
+    with self.assertRaisesRegex(ValueError, 'Could not interpret optimizer'):
+      keras.optimizers.get(0)
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/preprocessing/BUILD b/tensorflow/python/keras/preprocessing/BUILD
index 403bc6e4808..24260fb71db 100644
--- a/tensorflow/python/keras/preprocessing/BUILD
+++ b/tensorflow/python/keras/preprocessing/BUILD
@@ -85,6 +85,7 @@ tf_py_test(
     deps = [
         ":image",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/keras/preprocessing/image.py b/tensorflow/python/keras/preprocessing/image.py
index 731d99ca9d4..953962c7771 100644
--- a/tensorflow/python/keras/preprocessing/image.py
+++ b/tensorflow/python/keras/preprocessing/image.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 # pylint: disable=invalid-name
 # pylint: disable=g-import-not-at-top
+# pylint: disable=g-classes-have-attributes
 """Set of tools for real-time data augmentation on image data.
 """
 from __future__ import absolute_import
@@ -21,15 +22,21 @@ from __future__ import division
 from __future__ import print_function
 
 from keras_preprocessing import image
+import numpy as np
 try:
   from scipy import linalg  # pylint: disable=unused-import
   from scipy import ndimage  # pylint: disable=unused-import
 except ImportError:
   pass
 
+from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.preprocessing.image_dataset import image_dataset_from_directory  # pylint: disable=unused-import
 from tensorflow.python.keras.utils import data_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import image_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import tf_logging
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.tf_export import keras_export
 
@@ -44,6 +51,104 @@ random_brightness = image.random_brightness
 apply_affine_transform = image.apply_affine_transform
 
 
+@keras_export('keras.preprocessing.image.smart_resize', v1=[])
+def smart_resize(x, size, interpolation='bilinear'):
+  """Resize images to a target size without aspect ratio distortion.
+
+  TensorFlow image datasets typically yield images that have each a different
+  size. However, these images need to be batched before they can be
+  processed by Keras layers. To be batched, images need to share the same height
+  and width.
+
+  You could simply do:
+
+  ````python
+  size = (200, 200)
+  ds = ds.map(lambda img: tf.image.resize(img, size))
+  ```
+
+  However, if you do this, you distort the aspect ratio of your images, since
+  in general they do not all have the same aspect ratio as `size`. This is
+  fine in many cases, but not always (e.g. for GANs this can be a problem).
+
+  Note that passing the argument `preserve_aspect_ratio=True` to `resize`
+  will preserve the aspect ratio, but at the cost of no longer respecting the
+  provided target size. Because `tf.image.resize` doesn't crop images,
+  your output images will still have different sizes.
+
+  This calls for:
+
+  ```python
+  size = (200, 200)
+  ds = ds.map(lambda img: smart_resize(img, size))
+  ```
+
+  Your output images will actually be `(200, 200)`, and will not be distorted.
+  Instead, the parts of the image that do not fit within the target size
+  get cropped out.
+
+  The resizing process is:
+
+  1. Take the largest centered crop of the image that has the same aspect ratio
+  as the target size. For instance, if `size=(200, 200)` and the input image has
+  size `(340, 500)`, we take a crop of `(340, 340)` centered along the width.
+  2. Resize the cropped image to the target size. In the example above,
+  we resize the `(340, 340)` crop to `(200, 200)`.
+
+  Arguments:
+    x: Input image (as a tensor or NumPy array). Must be in format
+      `(height, width, channels)`.
+    size: Tuple of `(height, width)` integer. Target size.
+    interpolation: String, interpolation to use for resizing.
+      Defaults to `'bilinear'`. Supports `bilinear`, `nearest`, `bicubic`,
+      `area`, `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic`.
+
+  Returns:
+    Array with shape `(size[0], size[1], channels)`. If the input image was a
+    NumPy array, the output is a NumPy array, and if it was a TF tensor,
+    the output is a TF tensor.
+  """
+  if len(size) != 2:
+    raise ValueError('Expected `size` to be a tuple of 2 integers, '
+                     'but got: %s' % (size,))
+  img = ops.convert_to_tensor(x)
+  if img.shape.rank is not None:
+    if img.shape.rank != 3:
+      raise ValueError(
+          'Expected an image array with shape `(height, width, channels)`, but '
+          'got input with incorrect rank, of shape %s' % (img.shape,))
+  shape = array_ops.shape(img)
+  height, width = shape[0], shape[1]
+  target_height, target_width = size
+  target_ratio = float(target_height) / target_width
+  img_ratio = math_ops.cast(
+      height, 'float32') / math_ops.cast(width, 'float32')
+  if target_ratio < img_ratio:
+    crop_height = math_ops.cast(
+        math_ops.cast(width, 'float32') * target_height / target_width, 'int32')
+    crop_box_hstart = math_ops.cast(
+        math_ops.cast(height - crop_height, 'float32') / 2, 'int32')
+    crop_box_start = [crop_box_hstart, 0, 0]
+    crop_box_size = [crop_height, -1, -1]
+  else:
+    crop_width = math_ops.cast(
+        math_ops.cast(height * target_width, 'float32') / target_height,
+        'int32')
+    crop_box_wstart = math_ops.cast((width - crop_width) / 2, 'int32')
+    crop_box_start = [0, crop_box_wstart, 0]
+    crop_box_size = [-1, crop_width, -1]
+  crop_box_start = array_ops.stack(crop_box_start)
+  crop_box_size = array_ops.stack(crop_box_size)
+  img = array_ops.slice(img, crop_box_start, crop_box_size)
+  img = image_ops.resize_images_v2(
+      images=img,
+      size=size,
+      method=interpolation)
+  if isinstance(x, np.ndarray):
+    return img.numpy()
+  return img
+
+
 @keras_export('keras.preprocessing.image.array_to_img')
 def array_to_img(x, data_format=None, scale=True, dtype=None):
   """Converts a 3D Numpy array to a PIL Image instance.
@@ -356,6 +461,123 @@ class NumpyArrayIterator(image.NumpyArrayIterator, Iterator):
         **kwargs)
 
 
+class DataFrameIterator(image.DataFrameIterator, Iterator):
+  """Iterator capable of reading images from a directory on disk as a dataframe.
+
+  Arguments:
+      dataframe: Pandas dataframe containing the filepaths relative to
+        `directory` (or absolute paths if `directory` is None) of the images in
+        a string column. It should include other column/s
+          depending on the `class_mode`: - if `class_mode` is `"categorical"`
+            (default value) it must include the `y_col` column with the class/es
+            of each image. Values in column can be string/list/tuple if a single
+            class or list/tuple if multiple classes. - if `class_mode` is
+            `"binary"` or `"sparse"` it must include the given `y_col` column
+            with class values as strings. - if `class_mode` is `"raw"` or
+            `"multi_output"` it should contain the columns specified in `y_col`.
+            - if `class_mode` is `"input"` or `None` no extra column is needed.
+      directory: string, path to the directory to read images from. If `None`,
+        data in `x_col` column should be absolute paths.
+      image_data_generator: Instance of `ImageDataGenerator` to use for random
+        transformations and normalization. If None, no transformations and
+        normalizations are made.
+      x_col: string, column in `dataframe` that contains the filenames (or
+        absolute paths if `directory` is `None`).
+      y_col: string or list, column/s in `dataframe` that has the target data.
+      weight_col: string, column in `dataframe` that contains the sample
+          weights. Default: `None`.
+      target_size: tuple of integers, dimensions to resize input images to.
+      color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read
+        images.
+      classes: Optional list of strings, classes to use (e.g. `["dogs",
+        "cats"]`). If None, all classes in `y_col` will be used.
+      class_mode: one of "binary", "categorical", "input", "multi_output",
+          "raw", "sparse" or None. Default: "categorical".
+          Mode for yielding the targets:
+          - `"binary"`: 1D numpy array of binary labels,
+          - `"categorical"`: 2D numpy array of one-hot encoded labels. Supports
+            multi-label output.
+          - `"input"`: images identical to input images (mainly used to work
+            with autoencoders),
+          - `"multi_output"`: list with the values of the different columns,
+          - `"raw"`: numpy array of values in `y_col` column(s),
+          - `"sparse"`: 1D numpy array of integer labels, - `None`, no targets
+            are returned (the generator will only yield batches of image data,
+            which is useful to use in `model.predict_generator()`).
+      batch_size: Integer, size of a batch.
+      shuffle: Boolean, whether to shuffle the data between epochs.
+      seed: Random seed for data shuffling.
+      data_format: String, one of `channels_first`, `channels_last`.
+      save_to_dir: Optional directory where to save the pictures being yielded,
+        in a viewable format. This is useful for visualizing the random
+        transformations being applied, for debugging purposes.
+      save_prefix: String prefix to use for saving sample images (if
+        `save_to_dir` is set).
+      save_format: Format to use for saving sample images (if `save_to_dir` is
+        set).
+      subset: Subset of data (`"training"` or `"validation"`) if
+        validation_split is set in ImageDataGenerator.
+      interpolation: Interpolation method used to resample the image if the
+        target size is different from that of the loaded image. Supported
+        methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3
+        or newer is installed, "lanczos" is also supported. If PIL version 3.4.0
+        or newer is installed, "box" and "hamming" are also supported. By
+        default, "nearest" is used.
+      dtype: Dtype to use for the generated arrays.
+      validate_filenames: Boolean, whether to validate image filenames in
+        `x_col`. If `True`, invalid images will be ignored. Disabling this
+        option
+      can lead to speed-up in the instantiation of this class. Default: `True`.
+  """
+
+  def __init__(
+      self,
+      dataframe,
+      directory=None,
+      image_data_generator=None,
+      x_col='filename',
+      y_col='class',
+      weight_col=None,
+      target_size=(256, 256),
+      color_mode='rgb',
+      classes=None,
+      class_mode='categorical',
+      batch_size=32,
+      shuffle=True,
+      seed=None,
+      data_format='channels_last',
+      save_to_dir=None,
+      save_prefix='',
+      save_format='png',
+      subset=None,
+      interpolation='nearest',
+      dtype='float32',
+      validate_filenames=True):
+    super(DataFrameIterator, self).__init__(
+        dataframe=dataframe,
+        directory=directory,
+        image_data_generator=image_data_generator,
+        x_col=x_col,
+        y_col=y_col,
+        weight_col=weight_col,
+        target_size=target_size,
+        color_mode=color_mode,
+        classes=classes,
+        class_mode=class_mode,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        seed=seed,
+        data_format=data_format,
+        save_to_dir=save_to_dir,
+        save_prefix=save_prefix,
+        save_format=save_format,
+        subset=subset,
+        interpolation=interpolation,
+        dtype=dtype,
+        validate_filenames=validate_filenames
+    )
+
+
 @keras_export('keras.preprocessing.image.ImageDataGenerator')
 class ImageDataGenerator(image.ImageDataGenerator):
   """Generate batches of tensor image data with real-time data augmentation.
@@ -583,6 +805,302 @@ class ImageDataGenerator(image.ImageDataGenerator):
         validation_split=validation_split,
         **kwargs)
 
+  def flow(self,
+           x,
+           y=None,
+           batch_size=32,
+           shuffle=True,
+           sample_weight=None,
+           seed=None,
+           save_to_dir=None,
+           save_prefix='',
+           save_format='png',
+           subset=None):
+    """Takes data & label arrays, generates batches of augmented data.
+
+    Arguments:
+        x: Input data. Numpy array of rank 4 or a tuple. If tuple, the first
+          element should contain the images and the second element another numpy
+          array or a list of numpy arrays that gets passed to the output without
+          any modifications. Can be used to feed the model miscellaneous data
+          along with the images. In case of grayscale data, the channels axis of
+          the image array should have value 1, in case of RGB data, it should
+          have value 3, and in case of RGBA data, it should have value 4.
+        y: Labels.
+        batch_size: Int (default: 32).
+        shuffle: Boolean (default: True).
+        sample_weight: Sample weights.
+        seed: Int (default: None).
+        save_to_dir: None or str (default: None). This allows you to optionally
+          specify a directory to which to save the augmented pictures being
+          generated (useful for visualizing what you are doing).
+        save_prefix: Str (default: `''`). Prefix to use for filenames of saved
+          pictures (only relevant if `save_to_dir` is set).
+        save_format: one of "png", "jpeg"
+            (only relevant if `save_to_dir` is set). Default: "png".
+        subset: Subset of data (`"training"` or `"validation"`) if
+          `validation_split` is set in `ImageDataGenerator`.
+
+    Returns:
+        An `Iterator` yielding tuples of `(x, y)`
+            where `x` is a numpy array of image data
+            (in the case of a single image input) or a list
+            of numpy arrays (in the case with
+            additional inputs) and `y` is a numpy array
+            of corresponding labels. If 'sample_weight' is not None,
+            the yielded tuples are of the form `(x, y, sample_weight)`.
+            If `y` is None, only the numpy array `x` is returned.
+    """
+    return NumpyArrayIterator(
+        x,
+        y,
+        self,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        sample_weight=sample_weight,
+        seed=seed,
+        data_format=self.data_format,
+        save_to_dir=save_to_dir,
+        save_prefix=save_prefix,
+        save_format=save_format,
+        subset=subset)
+
+  def flow_from_directory(self,
+                          directory,
+                          target_size=(256, 256),
+                          color_mode='rgb',
+                          classes=None,
+                          class_mode='categorical',
+                          batch_size=32,
+                          shuffle=True,
+                          seed=None,
+                          save_to_dir=None,
+                          save_prefix='',
+                          save_format='png',
+                          follow_links=False,
+                          subset=None,
+                          interpolation='nearest'):
+    """Takes the path to a directory & generates batches of augmented data.
+
+    Arguments:
+        directory: string, path to the target directory. It should contain one
+          subdirectory per class. Any PNG, JPG, BMP, PPM or TIF images inside
+          each of the subdirectories directory tree will be included in the
+          generator. See [this script](
+            https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d)
+              for more details.
+        target_size: Tuple of integers `(height, width)`, defaults to `(256,
+          256)`. The dimensions to which all images found will be resized.
+        color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". Whether
+          the images will be converted to have 1, 3, or 4 channels.
+        classes: Optional list of class subdirectories
+            (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list
+              of classes will be automatically inferred from the subdirectory
+              names/structure under `directory`, where each subdirectory will be
+              treated as a different class (and the order of the classes, which
+              will map to the label indices, will be alphanumeric). The
+              dictionary containing the mapping from class names to class
+              indices can be obtained via the attribute `class_indices`.
+        class_mode: One of "categorical", "binary", "sparse",
+            "input", or None. Default: "categorical".
+            Determines the type of label arrays that are returned: -
+              "categorical" will be 2D one-hot encoded labels, - "binary" will
+              be 1D binary labels, "sparse" will be 1D integer labels, - "input"
+              will be images identical to input images (mainly used to work with
+              autoencoders). - If None, no labels are returned (the generator
+              will only yield batches of image data, which is useful to use with
+              `model.predict_generator()`). Please note that in case of
+              class_mode None, the data still needs to reside in a subdirectory
+              of `directory` for it to work correctly.
+        batch_size: Size of the batches of data (default: 32).
+        shuffle: Whether to shuffle the data (default: True) If set to False,
+          sorts the data in alphanumeric order.
+        seed: Optional random seed for shuffling and transformations.
+        save_to_dir: None or str (default: None). This allows you to optionally
+          specify a directory to which to save the augmented pictures being
+          generated (useful for visualizing what you are doing).
+        save_prefix: Str. Prefix to use for filenames of saved pictures (only
+          relevant if `save_to_dir` is set).
+        save_format: One of "png", "jpeg"
+            (only relevant if `save_to_dir` is set). Default: "png".
+        follow_links: Whether to follow symlinks inside
+            class subdirectories (default: False).
+        subset: Subset of data (`"training"` or `"validation"`) if
+          `validation_split` is set in `ImageDataGenerator`.
+        interpolation: Interpolation method used to resample the image if the
+          target size is different from that of the loaded image. Supported
+          methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version
+          1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL
+          version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also
+          supported. By default, `"nearest"` is used.
+
+    Returns:
+        A `DirectoryIterator` yielding tuples of `(x, y)`
+            where `x` is a numpy array containing a batch
+            of images with shape `(batch_size, *target_size, channels)`
+            and `y` is a numpy array of corresponding labels.
+    """
+    return DirectoryIterator(
+        directory,
+        self,
+        target_size=target_size,
+        color_mode=color_mode,
+        classes=classes,
+        class_mode=class_mode,
+        data_format=self.data_format,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        seed=seed,
+        save_to_dir=save_to_dir,
+        save_prefix=save_prefix,
+        save_format=save_format,
+        follow_links=follow_links,
+        subset=subset,
+        interpolation=interpolation)
+
+  def flow_from_dataframe(self,
+                          dataframe,
+                          directory=None,
+                          x_col='filename',
+                          y_col='class',
+                          weight_col=None,
+                          target_size=(256, 256),
+                          color_mode='rgb',
+                          classes=None,
+                          class_mode='categorical',
+                          batch_size=32,
+                          shuffle=True,
+                          seed=None,
+                          save_to_dir=None,
+                          save_prefix='',
+                          save_format='png',
+                          subset=None,
+                          interpolation='nearest',
+                          validate_filenames=True,
+                          **kwargs):
+    """Takes the dataframe and the path to a directory + generates batches.
+
+     The generated batches contain augmented/normalized data.
+
+    **A simple tutorial can be found **[here](
+                                http://bit.ly/keras_flow_from_dataframe).
+
+    Arguments:
+        dataframe: Pandas dataframe containing the filepaths relative to
+          `directory` (or absolute paths if `directory` is None) of the images
+          in a string column. It should include other column/s
+            depending on the `class_mode`: - if `class_mode` is `"categorical"`
+              (default value) it must include the `y_col` column with the
+              class/es of each image. Values in column can be string/list/tuple
+              if a single class or list/tuple if multiple classes. - if
+              `class_mode` is `"binary"` or `"sparse"` it must include the given
+              `y_col` column with class values as strings. - if `class_mode` is
+              `"raw"` or `"multi_output"` it should contain the columns
+              specified in `y_col`. - if `class_mode` is `"input"` or `None` no
+              extra column is needed.
+        directory: string, path to the directory to read images from. If `None`,
+          data in `x_col` column should be absolute paths.
+        x_col: string, column in `dataframe` that contains the filenames (or
+          absolute paths if `directory` is `None`).
+        y_col: string or list, column/s in `dataframe` that has the target data.
+        weight_col: string, column in `dataframe` that contains the sample
+            weights. Default: `None`.
+        target_size: tuple of integers `(height, width)`, default: `(256, 256)`.
+          The dimensions to which all images found will be resized.
+        color_mode: one of "grayscale", "rgb", "rgba". Default: "rgb". Whether
+          the images will be converted to have 1 or 3 color channels.
+        classes: optional list of classes (e.g. `['dogs', 'cats']`). Default is
+          None. If not provided, the list of classes will be automatically
+          inferred from the `y_col`, which will map to the label indices, will
+          be alphanumeric). The dictionary containing the mapping from class
+          names to class indices can be obtained via the attribute
+          `class_indices`.
+        class_mode: one of "binary", "categorical", "input", "multi_output",
+            "raw", sparse" or None. Default: "categorical".
+            Mode for yielding the targets:
+            - `"binary"`: 1D numpy array of binary labels,
+            - `"categorical"`: 2D numpy array of one-hot encoded labels.
+              Supports multi-label output.
+            - `"input"`: images identical to input images (mainly used to work
+              with autoencoders),
+            - `"multi_output"`: list with the values of the different columns,
+            - `"raw"`: numpy array of values in `y_col` column(s),
+            - `"sparse"`: 1D numpy array of integer labels, - `None`, no targets
+              are returned (the generator will only yield batches of image data,
+              which is useful to use in `model.predict_generator()`).
+        batch_size: size of the batches of data (default: 32).
+        shuffle: whether to shuffle the data (default: True)
+        seed: optional random seed for shuffling and transformations.
+        save_to_dir: None or str (default: None). This allows you to optionally
+          specify a directory to which to save the augmented pictures being
+          generated (useful for visualizing what you are doing).
+        save_prefix: str. Prefix to use for filenames of saved pictures (only
+          relevant if `save_to_dir` is set).
+        save_format: one of "png", "jpeg"
+            (only relevant if `save_to_dir` is set). Default: "png".
+        subset: Subset of data (`"training"` or `"validation"`) if
+          `validation_split` is set in `ImageDataGenerator`.
+        interpolation: Interpolation method used to resample the image if the
+          target size is different from that of the loaded image. Supported
+          methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version
+          1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL
+          version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also
+          supported. By default, `"nearest"` is used.
+        validate_filenames: Boolean, whether to validate image filenames in
+          `x_col`. If `True`, invalid images will be ignored. Disabling this
+          option can lead to speed-up in the execution of this function.
+          Defaults to `True`.
+        **kwargs: legacy arguments for raising deprecation warnings.
+
+    Returns:
+        A `DataFrameIterator` yielding tuples of `(x, y)`
+        where `x` is a numpy array containing a batch
+        of images with shape `(batch_size, *target_size, channels)`
+        and `y` is a numpy array of corresponding labels.
+    """
+    if 'has_ext' in kwargs:
+      tf_logging.warn(
+          'has_ext is deprecated, filenames in the dataframe have '
+          'to match the exact filenames in disk.', DeprecationWarning)
+    if 'sort' in kwargs:
+      tf_logging.warn(
+          'sort is deprecated, batches will be created in the'
+          'same order than the filenames provided if shuffle'
+          'is set to False.', DeprecationWarning)
+    if class_mode == 'other':
+      tf_logging.warn(
+          '`class_mode` "other" is deprecated, please use '
+          '`class_mode` "raw".', DeprecationWarning)
+      class_mode = 'raw'
+    if 'drop_duplicates' in kwargs:
+      tf_logging.warn(
+          'drop_duplicates is deprecated, you can drop duplicates '
+          'by using the pandas.DataFrame.drop_duplicates method.',
+          DeprecationWarning)
+
+    return DataFrameIterator(
+        dataframe,
+        directory,
+        self,
+        x_col=x_col,
+        y_col=y_col,
+        weight_col=weight_col,
+        target_size=target_size,
+        color_mode=color_mode,
+        classes=classes,
+        class_mode=class_mode,
+        data_format=self.data_format,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        seed=seed,
+        save_to_dir=save_to_dir,
+        save_prefix=save_prefix,
+        save_format=save_format,
+        subset=subset,
+        interpolation=interpolation,
+        validate_filenames=validate_filenames)
+
+
 keras_export('keras.preprocessing.image.random_rotation')(random_rotation)
 keras_export('keras.preprocessing.image.random_shift')(random_shift)
 keras_export('keras.preprocessing.image.random_shear')(random_shear)
diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py
index 1245c1ecc8e..d2f4b18f7dd 100644
--- a/tensorflow/python/keras/preprocessing/image_test.py
+++ b/tensorflow/python/keras/preprocessing/image_test.py
@@ -24,6 +24,10 @@ import tempfile
 
 import numpy as np
 
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import layers
+from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.preprocessing import image as preprocessing_image
 from tensorflow.python.platform import test
 
@@ -51,7 +55,27 @@ def _generate_test_images():
   return [rgb_images, gray_images]
 
 
-class TestImage(test.TestCase):
+class TestImage(keras_parameterized.TestCase):
+
+  @test_util.run_v2_only
+  def test_smart_resize(self):
+    test_input = np.random.random((20, 40, 3))
+    output = preprocessing_image.smart_resize(test_input, size=(50, 50))
+    self.assertIsInstance(output, np.ndarray)
+    self.assertListEqual(list(output.shape), [50, 50, 3])
+    output = preprocessing_image.smart_resize(test_input, size=(10, 10))
+    self.assertListEqual(list(output.shape), [10, 10, 3])
+    output = preprocessing_image.smart_resize(test_input, size=(100, 50))
+    self.assertListEqual(list(output.shape), [100, 50, 3])
+    output = preprocessing_image.smart_resize(test_input, size=(5, 15))
+    self.assertListEqual(list(output.shape), [5, 15, 3])
+
+  def test_smart_resize_errors(self):
+    with self.assertRaisesRegex(ValueError, 'a tuple of 2 integers'):
+      preprocessing_image.smart_resize(
+          np.random.random((20, 20, 2)), size=(10, 5, 3))
+    with self.assertRaisesRegex(ValueError, 'incorrect rank'):
+      preprocessing_image.smart_resize(np.random.random((20, 40)), size=(10, 5))
 
   def test_image_data_generator(self):
     if PIL is None:
@@ -122,8 +146,7 @@ class TestImage(test.TestCase):
       generator = preprocessing_image.ImageDataGenerator(
           data_format='unknown')
 
-    generator = preprocessing_image.ImageDataGenerator(
-        zoom_range=(2, 2))
+    generator = preprocessing_image.ImageDataGenerator(zoom_range=(2., 2.))
 
   def test_image_data_generator_fit(self):
     generator = preprocessing_image.ImageDataGenerator(
@@ -298,14 +321,21 @@ class TestImage(test.TestCase):
     self.assertEqual(
         len(set(train_iterator.filenames) & set(filenames)), num_training)
 
+    model = sequential.Sequential([layers.Flatten(), layers.Dense(2)])
+    model.compile(optimizer='sgd', loss='mse')
+    model.fit(train_iterator, epochs=1)
+
     shutil.rmtree(tmp_folder)
 
+  @keras_parameterized.run_all_keras_modes
   def test_directory_iterator_with_validation_split_25_percent(self):
     self.directory_iterator_with_validation_split_test_helper(0.25)
 
+  @keras_parameterized.run_all_keras_modes
   def test_directory_iterator_with_validation_split_40_percent(self):
     self.directory_iterator_with_validation_split_test_helper(0.40)
 
+  @keras_parameterized.run_all_keras_modes
   def test_directory_iterator_with_validation_split_50_percent(self):
     self.directory_iterator_with_validation_split_test_helper(0.50)
 
diff --git a/tensorflow/python/keras/regularizers.py b/tensorflow/python/keras/regularizers.py
index 4c33b0b3e92..b8bae4cc155 100644
--- a/tensorflow/python/keras/regularizers.py
+++ b/tensorflow/python/keras/regularizers.py
@@ -14,13 +14,14 @@
 # ==============================================================================
 """Built-in regularizers.
 """
+# pylint: disable=invalid-name
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 import six
 
-from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import backend
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
 from tensorflow.python.ops import math_ops
@@ -60,8 +61,8 @@ class Regularizer(object):
   >>> layer = tf.keras.layers.Dense(
   ...     5, input_dim=5,
   ...     kernel_initializer='ones',
-  ...     kernel_regularizer=tf.keras.regularizers.l1(0.01),
-  ...     activity_regularizer=tf.keras.regularizers.l2(0.01))
+  ...     kernel_regularizer=tf.keras.regularizers.L1(0.01),
+  ...     activity_regularizer=tf.keras.regularizers.L2(0.01))
   >>> tensor = tf.ones(shape=(5, 5)) * 2.0
   >>> out = layer(tensor)
 
@@ -73,9 +74,9 @@ class Regularizer(object):
   ## Available penalties
 
   ```python
-  tf.keras.regularizers.l1(0.3)  # L1 Regularization Penalty
-  tf.keras.regularizers.l2(0.1)  # L2 Regularization Penalty
-  tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)  # L1 + L2 penalties
+  tf.keras.regularizers.L1(0.3)  # L1 Regularization Penalty
+  tf.keras.regularizers.L2(0.1)  # L2 Regularization Penalty
+  tf.keras.regularizers.L1L2(l1=0.01, l2=0.01)  # L1 + L2 penalties
   ```
 
   ## Directly calling a regularizer
@@ -84,7 +85,7 @@ class Regularizer(object):
   as if it is a one-argument function.
 
   E.g.
-  >>> regularizer = tf.keras.regularizers.l2(2.)
+  >>> regularizer = tf.keras.regularizers.L2(2.)
   >>> tensor = tf.ones(shape=(5, 5))
   >>> regularizer(tensor)
   <tf.Tensor: shape=(), dtype=float32, numpy=50.0>
@@ -194,13 +195,19 @@ class Regularizer(object):
 
 @keras_export('keras.regularizers.L1L2')
 class L1L2(Regularizer):
-  r"""A regularizer that applies both L1 and L2 regularization penalties.
+  """A regularizer that applies both L1 and L2 regularization penalties.
 
   The L1 regularization penalty is computed as:
-  $$\ell_1\,\,penalty =\ell_1\sum_{i=0}^n|x_i|$$
+  `loss = l1 * reduce_sum(abs(x))`
 
   The L2 regularization penalty is computed as
-  $$\ell_2\,\,penalty =\ell_2\sum_{i=0}^nx_i^2$$
+  `loss = l2 * reduce_sum(square(x))`
+
+  L1L2 may be passed to a layer as a string identifier:
+
+  >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l1_l2')
+
+  In this case, the default values used are `l1=0.01` and `l2=0.01`.
 
   Attributes:
       l1: Float; L1 regularization factor.
@@ -208,13 +215,11 @@ class L1L2(Regularizer):
   """
 
   def __init__(self, l1=0., l2=0.):  # pylint: disable=redefined-outer-name
-    self.l1 = K.cast_to_floatx(l1)
-    self.l2 = K.cast_to_floatx(l2)
+    self.l1 = backend.cast_to_floatx(l1)
+    self.l2 = backend.cast_to_floatx(l2)
 
   def __call__(self, x):
-    if not self.l1 and not self.l2:
-      return K.constant(0.)
-    regularization = 0.
+    regularization = backend.constant(0., dtype=x.dtype)
     if self.l1:
       regularization += self.l1 * math_ops.reduce_sum(math_ops.abs(x))
     if self.l2:
@@ -225,39 +230,64 @@ class L1L2(Regularizer):
     return {'l1': float(self.l1), 'l2': float(self.l2)}
 
 
-# Aliases.
-
-
-@keras_export('keras.regularizers.l1')
-def l1(l=0.01):
-  r"""Create a regularizer that applies an L1 regularization penalty.
+@keras_export('keras.regularizers.L1', 'keras.regularizers.l1')
+class L1(Regularizer):
+  """A regularizer that applies a L1 regularization penalty.
 
   The L1 regularization penalty is computed as:
-  $$\ell_1\,\,penalty =\ell_1\sum_{i=0}^n|x_i|$$
+  `loss = l1 * reduce_sum(abs(x))`
 
-  Arguments:
-      l: Float; L1 regularization factor.
+  L1 may be passed to a layer as a string identifier:
 
-  Returns:
-    An L1 Regularizer with the given regularization factor.
+  >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l1')
+
+  In this case, the default value used is `l1=0.01`.
+
+  Attributes:
+      l1: Float; L1 regularization factor.
   """
-  return L1L2(l1=l)
+
+  def __init__(self, l1=0.01, **kwargs):  # pylint: disable=redefined-outer-name
+    l1 = kwargs.pop('l', l1)  # Backwards compatibility
+    if kwargs:
+      raise TypeError('Argument(s) not recognized: %s' % (kwargs,))
+    self.l1 = backend.cast_to_floatx(l1)
+
+  def __call__(self, x):
+    return self.l1 * math_ops.reduce_sum(math_ops.abs(x))
+
+  def get_config(self):
+    return {'l1': float(self.l1)}
 
 
-@keras_export('keras.regularizers.l2')
-def l2(l=0.01):
-  r"""Create a regularizer that applies an L2 regularization penalty.
+@keras_export('keras.regularizers.L2', 'keras.regularizers.l2')
+class L2(Regularizer):
+  """A regularizer that applies a L2 regularization penalty.
 
   The L2 regularization penalty is computed as:
-  $$\ell_2\,\,penalty =\ell_2\sum_{i=0}^nx_i^2$$
+  `loss = l2 * reduce_sum(square(x))`
 
-  Arguments:
-      l: Float; L2 regularization factor.
+  L2 may be passed to a layer as a string identifier:
 
-  Returns:
-    An L2 Regularizer with the given regularization factor.
+  >>> dense = tf.keras.layers.Dense(3, kernel_regularizer='l2')
+
+  In this case, the default value used is `l2=0.01`.
+
+  Attributes:
+      l2: Float; L2 regularization factor.
   """
-  return L1L2(l2=l)
+
+  def __init__(self, l2=0.01, **kwargs):  # pylint: disable=redefined-outer-name
+    l2 = kwargs.pop('l', l2)  # Backwards compatibility
+    if kwargs:
+      raise TypeError('Argument(s) not recognized: %s' % (kwargs,))
+    self.l2 = backend.cast_to_floatx(l2)
+
+  def __call__(self, x):
+    return self.l2 * math_ops.reduce_sum(math_ops.square(x))
+
+  def get_config(self):
+    return {'l2': float(self.l2)}
 
 
 @keras_export('keras.regularizers.l1_l2')
@@ -265,10 +295,10 @@ def l1_l2(l1=0.01, l2=0.01):  # pylint: disable=redefined-outer-name
   r"""Create a regularizer that applies both L1 and L2 penalties.
 
   The L1 regularization penalty is computed as:
-  $$\ell_1\,\,penalty =\ell_1\sum_{i=0}^n|x_i|$$
+  `loss = l1 * reduce_sum(abs(x))`
 
   The L2 regularization penalty is computed as:
-  $$\ell_2\,\,penalty =\ell_2\sum_{i=0}^nx_i^2$$
+  `loss = l2 * reduce_sum(square(x))`
 
   Arguments:
       l1: Float; L1 regularization factor.
@@ -280,6 +310,11 @@ def l1_l2(l1=0.01, l2=0.01):  # pylint: disable=redefined-outer-name
   return L1L2(l1=l1, l2=l2)
 
 
+# Deserialization aliases.
+l1 = L1
+l2 = L2
+
+
 @keras_export('keras.regularizers.serialize')
 def serialize(regularizer):
   return serialize_keras_object(regularizer)
@@ -287,6 +322,10 @@ def serialize(regularizer):
 
 @keras_export('keras.regularizers.deserialize')
 def deserialize(config, custom_objects=None):
+  if config == 'l1_l2':
+    # Special case necessary since the defaults used for "l1_l2" (string)
+    # differ from those of the L1L2 class.
+    return L1L2(l1=0.01, l2=0.01)
   return deserialize_keras_object(
       config,
       module_objects=globals(),
@@ -296,20 +335,15 @@ def deserialize(config, custom_objects=None):
 
 @keras_export('keras.regularizers.get')
 def get(identifier):
+  """Retrieve a regularizer instance from a config or identifier."""
   if identifier is None:
     return None
   if isinstance(identifier, dict):
     return deserialize(identifier)
   elif isinstance(identifier, six.string_types):
-    identifier = str(identifier)
-    # We have to special-case functions that return classes.
-    # TODO(omalleyt): Turn these into classes or class aliases.
-    special_cases = ['l1', 'l2', 'l1_l2']
-    if identifier in special_cases:
-      # Treat like a class.
-      return deserialize({'class_name': identifier, 'config': {}})
     return deserialize(str(identifier))
   elif callable(identifier):
     return identifier
   else:
-    raise ValueError('Could not interpret regularizer identifier:', identifier)
+    raise ValueError(
+        'Could not interpret regularizer identifier: {}'.format(identifier))
diff --git a/tensorflow/python/keras/regularizers_test.py b/tensorflow/python/keras/regularizers_test.py
index b88cd08c079..b10218ba114 100644
--- a/tensorflow/python/keras/regularizers_test.py
+++ b/tensorflow/python/keras/regularizers_test.py
@@ -199,6 +199,10 @@ class KerasRegularizersTest(keras_parameterized.TestCase,
     # - 4 from activity regularizers on the shared_dense layer.
     self.assertLen(model.losses, 9)
 
+  def test_deserialization_error(self):
+    with self.assertRaisesRegex(ValueError, 'Could not interpret regularizer'):
+      keras.regularizers.get(0)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py
index cae58329005..757385a25ea 100644
--- a/tensorflow/python/keras/saving/hdf5_format_test.py
+++ b/tensorflow/python/keras/saving/hdf5_format_test.py
@@ -1210,7 +1210,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
   def test_incompatible_checkpoint(self):
     save_path = trackable.Checkpoint().save(
         os.path.join(self.get_temp_dir(), 'ckpt'))
-    m = keras.Model()
+    m = DummySubclassModel()
     with self.assertRaisesRegexp(AssertionError, 'Nothing to load'):
       m.load_weights(save_path)
     m.dense = keras.layers.Dense(2)
@@ -1222,7 +1222,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def test_directory_passed(self):
     with self.cached_session():
-      m = keras.Model()
+      m = DummySubclassModel()
       v = m.add_weight(name='v', shape=[])
       self.evaluate(v.assign(42.))
       prefix = os.path.join(self.get_temp_dir(),
@@ -1235,7 +1235,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def test_relative_path(self):
     with self.cached_session():
-      m = keras.Model()
+      m = DummySubclassModel()
       v = m.add_weight(name='v', shape=[])
       os.chdir(self.get_temp_dir())
 
@@ -1266,7 +1266,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def test_nonexistent_prefix_directory(self):
     with self.cached_session():
-      m = keras.Model()
+      m = DummySubclassModel()
       v = m.add_weight(name='v', shape=[])
       self.evaluate(v.assign(42.))
       prefix = os.path.join(self.get_temp_dir(),
@@ -1276,5 +1276,10 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
       m.load_weights(prefix)
       self.assertEqual(42., self.evaluate(v))
 
+
+class DummySubclassModel(training.Model):
+  pass
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/keras/saving/saved_model/load.py b/tensorflow/python/keras/saving/saved_model/load.py
index 5ffeb0671a1..13af49e3a0d 100644
--- a/tensorflow/python/keras/saving/saved_model/load.py
+++ b/tensorflow/python/keras/saving/saved_model/load.py
@@ -62,9 +62,9 @@ layers_module = LazyLoader(
 input_layer = LazyLoader(
     "input_layer", globals(),
     "tensorflow.python.keras.engine.input_layer")
-network_lib = LazyLoader(
-    "network_lib", globals(),
-    "tensorflow.python.keras.engine.network")
+functional_lib = LazyLoader(
+    "functional_lib", globals(),
+    "tensorflow.python.keras.engine.functional")
 training_lib = LazyLoader(
     "training_lib", globals(),
     "tensorflow.python.keras.engine.training")
@@ -142,7 +142,7 @@ def _is_graph_network(layer):
   # pylint: disable=protected-access
   if isinstance(layer, RevivedNetwork):
     return False
-  elif isinstance(layer, network_lib.Network):
+  elif isinstance(layer, functional_lib.Functional):
     return (layer._is_graph_network or
             isinstance(layer, models_lib.Sequential))
   return False
@@ -371,7 +371,8 @@ class KerasObjectLoader(tf_load.Loader):
     # functional or Sequential model.
     model_is_functional_or_sequential = (
         metadata.get('is_graph_network', False) or
-        metadata['class_name'] == 'Sequential')
+        metadata['class_name'] == 'Sequential' or
+        metadata['class_name'] == 'Functional')
     if not (generic_utils.validate_config(config) and
             model_is_functional_or_sequential):
       return None  # Revive as custom model.
@@ -383,7 +384,8 @@ class KerasObjectLoader(tf_load.Loader):
     if class_name == 'Sequential':
       model = models_lib.Sequential(name=config['name'])
     else:
-      model = models_lib.Model(name=config['name'])
+      model = models_lib.Functional(
+          inputs=[], outputs=[], name=config['name'])
 
     # Record this model and its layers. This will later be used to reconstruct
     # the model.
@@ -561,10 +563,11 @@ class KerasObjectLoader(tf_load.Loader):
         if not model.built and not isinstance(input_specs, dict):
           model.build(input_shapes)
     else:
-      (inputs, outputs, created_layers) = network_lib.reconstruct_from_config(
-          config, created_layers={layer.name: layer for layer in layers})
+      (inputs, outputs,
+       created_layers) = functional_lib.reconstruct_from_config(
+           config, created_layers={layer.name: layer for layer in layers})
       model.__init__(inputs, outputs, name=config['name'])
-      network_lib.connect_ancillary_layers(model, created_layers)
+      functional_lib.connect_ancillary_layers(model, created_layers)
 
     # Set model dtype and trainable status.
     _set_network_attributes_from_metadata(model)
@@ -764,7 +767,7 @@ def revive_custom_object(identifier, metadata):
   revived_classes = {
       '_tf_keras_layer': (RevivedLayer, base_layer.Layer),
       '_tf_keras_input_layer': (RevivedInputLayer, input_layer.InputLayer),
-      '_tf_keras_network': (RevivedNetwork, network_lib.Network),
+      '_tf_keras_network': (RevivedNetwork, functional_lib.Functional),
       '_tf_keras_model': (RevivedNetwork, model_class),
       '_tf_keras_sequential': (RevivedNetwork, models_lib.Sequential),
   }
@@ -852,7 +855,7 @@ def _revive_setter(layer, name, value):
       layer._track_trackable(value, name=name)
     layer._serialized_attributes[name] = value
     # pylint: enable=protected-access
-  elif (isinstance(layer, network_lib.Network) and
+  elif (isinstance(layer, functional_lib.Functional) and
         re.match(r'^layer(_with_weights)?-[\d+]', name) is not None):
     # Edges named "layer-n" or "layer_with_weights-n", which are tracked in
     # network._track_layers, should not be added as an attribute.
diff --git a/tensorflow/python/keras/saving/saved_model/model_serialization.py b/tensorflow/python/keras/saving/saved_model/model_serialization.py
index 412fb0b54e5..c711e82a045 100644
--- a/tensorflow/python/keras/saving/saved_model/model_serialization.py
+++ b/tensorflow/python/keras/saving/saved_model/model_serialization.py
@@ -20,11 +20,11 @@ from __future__ import print_function
 
 from tensorflow.python.keras.saving import saving_utils
 from tensorflow.python.keras.saving.saved_model import constants
-from tensorflow.python.keras.saving.saved_model import network_serialization
+from tensorflow.python.keras.saving.saved_model import layer_serialization
 from tensorflow.python.keras.saving.saved_model import save_impl
 
 
-class ModelSavedModelSaver(network_serialization.NetworkSavedModelSaver):
+class ModelSavedModelSaver(layer_serialization.LayerSavedModelSaver):
   """Model SavedModel serialization."""
 
   @property
@@ -33,6 +33,10 @@ class ModelSavedModelSaver(network_serialization.NetworkSavedModelSaver):
 
   def _python_properties_internal(self):
     metadata = super(ModelSavedModelSaver, self)._python_properties_internal()
+    # Network stateful property is dependent on the child layers.
+    metadata.pop('stateful')
+    metadata['is_graph_network'] = self.obj._is_graph_network  # pylint: disable=protected-access
+
     metadata.update(
         saving_utils.model_metadata(
             self.obj, include_optimizer=True, require_config=False))
diff --git a/tensorflow/python/keras/saving/saved_model/network_serialization.py b/tensorflow/python/keras/saving/saved_model/network_serialization.py
index 1c94377e3db..c98cba47155 100644
--- a/tensorflow/python/keras/saving/saved_model/network_serialization.py
+++ b/tensorflow/python/keras/saving/saved_model/network_serialization.py
@@ -18,22 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.keras.saving.saved_model import layer_serialization
+from tensorflow.python.keras.saving.saved_model import model_serialization
 
 
-# Network serialization is pretty much the same as layer serialization.
-class NetworkSavedModelSaver(layer_serialization.LayerSavedModelSaver):
+# FunctionalModel serialization is pretty much the same as Model serialization.
+class NetworkSavedModelSaver(model_serialization.ModelSavedModelSaver):
   """Network serialization."""
 
   @property
   def object_identifier(self):
     return '_tf_keras_network'
-
-  def _python_properties_internal(self):
-    metadata = super(NetworkSavedModelSaver, self)._python_properties_internal()
-
-    # Network stateful property is dependent on the child layers.
-    metadata.pop('stateful')
-
-    metadata['is_graph_network'] = self.obj._is_graph_network  # pylint: disable=protected-access
-    return metadata
diff --git a/tensorflow/python/keras/saving/saved_model/saved_model_test.py b/tensorflow/python/keras/saving/saved_model/saved_model_test.py
index 9cbe8607a54..30a93e2bba3 100644
--- a/tensorflow/python/keras/saving/saved_model/saved_model_test.py
+++ b/tensorflow/python/keras/saving/saved_model/saved_model_test.py
@@ -391,6 +391,37 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase):
       self.evaluate(loaded.get_updates_for(input_arr2))
     self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12])
 
+  def testDisablingBatchNormTrainableBeforeSaving(self):
+    # We disable trainable on the batchnorm layers before saving
+    model = keras.models.Sequential(
+        keras.layers.BatchNormalization(input_shape=(1,)))
+    model.trainable = False
+    self.evaluate(variables.variables_initializer(model.variables))
+    saved_model_dir = self._save_model_dir()
+    model.save(saved_model_dir, save_format='tf')
+    loaded = keras_load.load(saved_model_dir)
+    self.evaluate(variables.variables_initializer(loaded.variables))
+    input_arr = array_ops.constant([[11], [12], [13]], dtype=dtypes.float32)
+    input_arr2 = array_ops.constant([[14], [15], [16]], dtype=dtypes.float32)
+    self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0])
+
+    # Trainable should still be disabled after loading
+    self.evaluate(loaded(input_arr, training=True))
+    if not context.executing_eagerly():
+      self.evaluate(loaded.get_updates_for(input_arr))
+    self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.0])
+
+    # Re-enabling trainable on the loaded model should cause the batchnorm
+    # layer to start training again.
+    # Note: this only works in v2.
+    if context.executing_eagerly():
+      loaded.trainable = True
+      self.evaluate(loaded(input_arr, training=True))
+      self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12])
+
+      self.evaluate(loaded(input_arr2, training=False))
+      self.assertAllClose(self.evaluate(loaded.layers[-1].moving_mean), [0.12])
+
   def testSaveWithSignatures(self):
     model = keras.models.Sequential()
     model.add(keras.layers.Dense(5, input_shape=(3,),
diff --git a/tensorflow/python/keras/tests/add_loss_correctness_test.py b/tensorflow/python/keras/tests/add_loss_correctness_test.py
index 323a2626c15..a19eec75ffb 100644
--- a/tensorflow/python/keras/tests/add_loss_correctness_test.py
+++ b/tensorflow/python/keras/tests/add_loss_correctness_test.py
@@ -288,7 +288,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase):
           model_layers, input_shape=(10,))
 
       x = np.ones((10, 10), 'float32')
-      y = np.ones((10, 1), 'float32')
+      y = np.zeros((10, 1), 'float32')
 
       optimizer = RMSPropOptimizer(learning_rate=0.001)
       model.compile(
diff --git a/tensorflow/python/keras/utils/composite_tensor_support_test.py b/tensorflow/python/keras/utils/composite_tensor_support_test.py
index 83dc82ff198..5b3c3df42b8 100644
--- a/tensorflow/python/keras/utils/composite_tensor_support_test.py
+++ b/tensorflow/python/keras/utils/composite_tensor_support_test.py
@@ -55,7 +55,6 @@ class ToDense(Layer):
   def __init__(self, default_value, **kwargs):
     super(ToDense, self).__init__(**kwargs)
     self._default_value = default_value
-    self._supports_ragged_inputs = True
 
   def call(self, inputs):
     if isinstance(inputs, dict):  # Dicts are no longer flattened.
@@ -83,7 +82,6 @@ class ToRagged(Layer):
     super(ToRagged, self).__init__(**kwargs)
     self._padding = padding
     self._ragged_rank = ragged_rank
-    self._supports_ragged_inputs = True
 
   def call(self, inputs):
     return ragged_tensor.RaggedTensor.from_tensor(
diff --git a/tensorflow/python/keras/utils/generic_utils_test.py b/tensorflow/python/keras/utils/generic_utils_test.py
index 334758871fa..ddaa60c3c24 100644
--- a/tensorflow/python/keras/utils/generic_utils_test.py
+++ b/tensorflow/python/keras/utils/generic_utils_test.py
@@ -201,7 +201,7 @@ class SerializeKerasObjectTest(test.TestCase):
         config, custom_objects={'SerializableInt': SerializableInt})
     self.assertEqual(new_layer.activation, keras.activations.relu)
     self.assertEqual(new_layer.bias_regularizer.__class__,
-                     keras.regularizers.L1L2)
+                     keras.regularizers.L2)
     self.assertEqual(new_layer.units.__class__, SerializableInt)
     self.assertEqual(new_layer.units, 3)
 
@@ -253,7 +253,7 @@ class SerializeKerasObjectTest(test.TestCase):
     self.assertEqual(new_layer.name, 'SerializableNestedInt')
     self.assertEqual(new_layer.activation, keras.activations.relu)
     self.assertEqual(new_layer.bias_regularizer.__class__,
-                     keras.regularizers.L1L2)
+                     keras.regularizers.L2)
     self.assertEqual(new_layer.units.__class__, SerializableNestedInt)
     self.assertEqual(new_layer.units, 3)
     self.assertEqual(new_layer.units.int_obj.__class__, SerializableInt)
@@ -293,7 +293,7 @@ class SerializeKerasObjectTest(test.TestCase):
             'SerializableNestedInt': SerializableNestedInt
         })
     self.assertEqual(new_layer.activation, keras.activations.relu)
-    self.assertIsInstance(new_layer.bias_regularizer, keras.regularizers.L1L2)
+    self.assertIsInstance(new_layer.bias_regularizer, keras.regularizers.L2)
     self.assertIsInstance(new_layer.units, SerializableNestedInt)
     self.assertEqual(new_layer.units, 3)
     self.assertIs(new_layer.units.fn, serializable_fn)
diff --git a/tensorflow/python/keras/utils/version_utils_test.py b/tensorflow/python/keras/utils/version_utils_test.py
index 76e888ca553..0a3cd53f3c0 100644
--- a/tensorflow/python/keras/utils/version_utils_test.py
+++ b/tensorflow/python/keras/utils/version_utils_test.py
@@ -53,12 +53,12 @@ class SplitUtilsTest(keras_parameterized.TestCase):
     inputs = keras.Input(10)
     outputs = keras.layers.Dense(1)(inputs)
     model = keras.Model(inputs, outputs)
-    self._check_model_class(model.__class__)
+    self._check_model_class(model.__class__.__bases__[0])
     self._check_layer_class(model)
 
   def test_sequential_model(self):
     model = keras.Sequential([keras.layers.Dense(1)])
-    model_class = model.__class__.__bases__[0]
+    model_class = model.__class__.__bases__[0].__bases__[0]
     self._check_model_class(model_class)
     self._check_layer_class(model)
 
diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py
index 87c436a5bd7..158f6c83748 100644
--- a/tensorflow/python/keras/utils/vis_utils.py
+++ b/tensorflow/python/keras/utils/vis_utils.py
@@ -55,10 +55,10 @@ def check_pydot():
 
 
 def is_wrapped_model(layer):
-  from tensorflow.python.keras.engine import network
+  from tensorflow.python.keras.engine import functional
   from tensorflow.python.keras.layers import wrappers
   return (isinstance(layer, wrappers.Wrapper) and
-          isinstance(layer.layer, network.Network))
+          isinstance(layer.layer, functional.Functional))
 
 
 def add_edge(dot, src, dst):
@@ -98,7 +98,7 @@ def model_to_dot(model,
   """
   from tensorflow.python.keras.layers import wrappers
   from tensorflow.python.keras.engine import sequential
-  from tensorflow.python.keras.engine import network
+  from tensorflow.python.keras.engine import functional
 
   if not check_pydot():
     message = (
@@ -147,7 +147,8 @@ def model_to_dot(model,
     class_name = layer.__class__.__name__
 
     if isinstance(layer, wrappers.Wrapper):
-      if expand_nested and isinstance(layer.layer, network.Network):
+      if expand_nested and isinstance(layer.layer,
+                                      functional.Functional):
         submodel_wrapper = model_to_dot(layer.layer, show_shapes,
                                         show_layer_names, rankdir,
                                         expand_nested,
@@ -162,7 +163,7 @@ def model_to_dot(model,
         child_class_name = layer.layer.__class__.__name__
         class_name = '{}({})'.format(class_name, child_class_name)
 
-    if expand_nested and isinstance(layer, network.Network):
+    if expand_nested and isinstance(layer, functional.Functional):
       submodel_not_wrapper = model_to_dot(layer, show_shapes,
                                           show_layer_names, rankdir,
                                           expand_nested,
@@ -200,7 +201,8 @@ def model_to_dot(model,
                                                      inputlabels,
                                                      outputlabels)
 
-    if not expand_nested or not isinstance(layer, network.Network):
+    if not expand_nested or not isinstance(
+        layer, functional.Functional):
       node = pydot.Node(layer_id, label=label)
       dot.add_node(node)
 
@@ -218,16 +220,17 @@ def model_to_dot(model,
             add_edge(dot, inbound_layer_id, layer_id)
           else:
             # if inbound_layer is not Model or wrapped Model
-            if (not isinstance(inbound_layer, network.Network) and
+            if (not isinstance(inbound_layer,
+                               functional.Functional) and
                 not is_wrapped_model(inbound_layer)):
               # if current layer is not Model or wrapped Model
-              if (not isinstance(layer, network.Network) and
+              if (not isinstance(layer, functional.Functional) and
                   not is_wrapped_model(layer)):
                 assert dot.get_node(inbound_layer_id)
                 assert dot.get_node(layer_id)
                 add_edge(dot, inbound_layer_id, layer_id)
               # if current layer is Model
-              elif isinstance(layer, network.Network):
+              elif isinstance(layer, functional.Functional):
                 add_edge(dot, inbound_layer_id,
                          sub_n_first_node[layer.name].get_name())
               # if current layer is wrapped Model
@@ -236,9 +239,9 @@ def model_to_dot(model,
                 name = sub_w_first_node[layer.layer.name].get_name()
                 add_edge(dot, layer_id, name)
             # if inbound_layer is Model
-            elif isinstance(inbound_layer, network.Network):
+            elif isinstance(inbound_layer, functional.Functional):
               name = sub_n_last_node[inbound_layer.name].get_name()
-              if isinstance(layer, network.Network):
+              if isinstance(layer, functional.Functional):
                 output_name = sub_n_first_node[layer.name].get_name()
                 add_edge(dot, name, output_name)
               else:
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 0c061dfcddd..13f59b74baf 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -120,6 +120,9 @@ cuda_py_test(
     size = "small",
     srcs = ["list_ops_test.py"],
     grpc_enabled = True,
+    tags = [
+        "noasan",  # TODO(b/155406705): flaky
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -477,6 +480,7 @@ tf_py_test(
     name = "fifo_queue_test",
     size = "small",
     srcs = ["fifo_queue_test.py"],
+    tags = ["no_rocm"],
     deps = [
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
@@ -3464,7 +3468,7 @@ cuda_py_test(
     name = "svd_op_test",
     size = "medium",
     srcs = ["svd_op_test.py"],
-    shard_count = 20,
+    shard_count = 30,
     tags = [
         "no_oss",  # b/117185141.
         "nomsan",  # TODO(b/117236102): Re-enable in msan build.
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index ec3ed932996..bea08ac70bf 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -154,37 +154,43 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
   def testMaskDim1ArrDim2Axis1(self):
     ndims_mask = 1
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
+      with self.subTest(arr_shape=arr_shape):
+        self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
 
   @test_util.run_deprecated_v1
   def testMaskDim2ArrDim2Axis1(self):
     ndims_mask = 2
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
+      with self.subTest(arr_shape=arr_shape):
+        self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
 
   @test_util.run_deprecated_v1
   def testMaskDim1ArrDim1(self):
     ndims_mask = 1
     for arr_shape in [(1,), (2,), (3,), (10,)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape)
+      with self.subTest(arr_shape=arr_shape):
+        self.CheckVersusNumpy(ndims_mask, arr_shape)
 
   @test_util.run_deprecated_v1
   def testMaskDim1ArrDim2(self):
     ndims_mask = 1
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape)
+      with self.subTest(arr_shape=arr_shape):
+        self.CheckVersusNumpy(ndims_mask, arr_shape)
 
   @test_util.run_deprecated_v1
   def testMaskDim2ArrDim2(self):
     ndims_mask = 2
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape)
+      with self.subTest(arr_shape=arr_shape):
+        self.CheckVersusNumpy(ndims_mask, arr_shape)
 
   @test_util.run_deprecated_v1
   def testMaskDim2ArrDim3(self):
     ndims_mask = 2
     for arr_shape in [(1, 1, 1), (1, 2, 2), (2, 2, 1)]:
-      self.CheckVersusNumpy(ndims_mask, arr_shape)
+      with self.subTest(arr_shape=arr_shape):
+        self.CheckVersusNumpy(ndims_mask, arr_shape)
 
   @test_util.run_deprecated_v1
   def testEmptyInput2D(self):
@@ -212,8 +218,9 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     for ndims_mask in range(1, 4):
       for ndims_arr in range(ndims_mask, ndims_mask + 3):
         for _ in range(3):
-          arr_shape = np.random.randint(1, 5, size=ndims_arr)
-          self.CheckVersusNumpy(ndims_mask, arr_shape, make_mask=make_mask)
+          with self.subTest(ndims_mask=ndims_mask, ndims_arr=ndims_arr, _=_):
+            arr_shape = np.random.randint(1, 5, size=ndims_arr)
+            self.CheckVersusNumpy(ndims_mask, arr_shape, make_mask=make_mask)
 
   @test_util.run_deprecated_v1
   def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
@@ -281,6 +288,29 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
         result = sess.run(masked_tensor, feed_dict={tile_placeholder: [2, 2]})
         self.assertAllEqual([b"hello", b"hello", b"hello", b"hello"], result)
 
+  def testMaskWithAxisTensor(self):
+
+    @def_function.function(autograph=False)
+    def f():
+      return array_ops.boolean_mask([1, 2, 3], [True, False, True],
+                                    axis=constant_op.constant(
+                                        0, dtype=dtypes.int32))
+
+    self.assertAllEqual(self.evaluate(f()), [1, 3])
+
+  def testMaskWithAxisNonConstTensor(self):
+
+    @def_function.function(
+        autograph=False,
+        input_signature=[
+            tensor_spec.TensorSpec(shape=None, dtype=dtypes.int32)
+        ])
+    def f(axis):
+      return array_ops.boolean_mask([1, 2, 3], [True, False, True], axis=axis)
+
+    self.assertAllEqual(
+        self.evaluate(f(constant_op.constant(0, dtype=dtypes.int32))), [1, 3])
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class OperatorShapeTest(test_util.TensorFlowTestCase):
@@ -321,19 +351,21 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
   def testReverse0DimAuto(self):
     x_np = 4
     for use_gpu in [False, True]:
-      with self.cached_session(use_gpu=use_gpu):
-        x_tf = array_ops.reverse_v2(x_np, []).eval()
-        self.assertAllEqual(x_tf, x_np)
+      with self.subTest(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
+          x_tf = array_ops.reverse_v2(x_np, []).eval()
+          self.assertAllEqual(x_tf, x_np)
 
   def _reverse1DimAuto(self, np_dtype):
     x_np = np.array([1, 200, 3, 40, 5], dtype=np_dtype)
 
     for use_gpu in [False, True]:
       for axis_dtype in [dtypes.int32, dtypes.int64]:
-        with self.cached_session(use_gpu=use_gpu):
-          x_tf = array_ops.reverse_v2(
-              x_np, constant_op.constant([0], dtype=axis_dtype)).eval()
-          self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
+        with self.subTest(use_gpu=use_gpu, axis_dtype=axis_dtype):
+          with self.cached_session(use_gpu=use_gpu):
+            x_tf = array_ops.reverse_v2(
+                x_np, constant_op.constant([0], dtype=axis_dtype)).eval()
+            self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
 
   def _reverse2DimAuto(self, np_dtype):
     x_np = np.array([[1, 200, 3], [4, 5, 60]], dtype=np_dtype)
@@ -341,27 +373,29 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
       for use_gpu in [False, True]:
         for axis_dtype in [dtypes.int32, dtypes.int64]:
-          with self.cached_session(use_gpu=use_gpu):
-            x_tf_1 = reverse_f(x_np,
-                               constant_op.constant([0],
-                                                    dtype=axis_dtype)).eval()
-            x_tf_2 = reverse_f(x_np,
-                               constant_op.constant([-2],
-                                                    dtype=axis_dtype)).eval()
-            x_tf_3 = reverse_f(x_np,
-                               constant_op.constant([1],
-                                                    dtype=axis_dtype)).eval()
-            x_tf_4 = reverse_f(x_np,
-                               constant_op.constant([-1],
-                                                    dtype=axis_dtype)).eval()
-            x_tf_5 = reverse_f(x_np,
-                               constant_op.constant([1, 0],
-                                                    dtype=axis_dtype)).eval()
-            self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
-            self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
-            self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
-            self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
-            self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
+          with self.subTest(
+              reverse_f=reverse_f, use_gpu=use_gpu, axis_dtype=axis_dtype):
+            with self.cached_session(use_gpu=use_gpu):
+              x_tf_1 = reverse_f(x_np,
+                                 constant_op.constant([0],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_2 = reverse_f(x_np,
+                                 constant_op.constant([-2],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_3 = reverse_f(x_np,
+                                 constant_op.constant([1],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_4 = reverse_f(x_np,
+                                 constant_op.constant([-1],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_5 = reverse_f(x_np,
+                                 constant_op.constant([1, 0],
+                                                      dtype=axis_dtype)).eval()
+              self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
+              self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
+              self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
+              self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
+              self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
 
   # This test covers the axis validation in the shape function
   # (no eval())
@@ -441,12 +475,16 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
       for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
         for outer_size in (1, 2):
           for middle_size in list(range(50)) + [100000]:
-            x_np = np.reshape(
-                np.arange(outer_size * middle_size * 3, dtype=np.float32),
-                newshape=(outer_size, middle_size, 3))
-            x_tf = reverse_f(x_np, [1]).eval()
-            np_answer = x_np[:, ::-1, :]
-            self.assertAllEqual(x_tf, np_answer)
+            with self.subTest(
+                reverse_f=reverse_f,
+                outer_size=outer_size,
+                middle_size=middle_size):
+              x_np = np.reshape(
+                  np.arange(outer_size * middle_size * 3, dtype=np.float32),
+                  newshape=(outer_size, middle_size, 3))
+              x_tf = reverse_f(x_np, [1]).eval()
+              np_answer = x_np[:, ::-1, :]
+              self.assertAllEqual(x_tf, np_answer)
 
   @test_util.run_deprecated_v1
   def testReverseRowsOf4Channels(self):
@@ -454,12 +492,16 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
       for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
         for outer_size in (1, 2):
           for middle_size in list(range(50)) + [100000]:
-            x_np = np.reshape(
-                np.arange(outer_size * middle_size * 4, dtype=np.float32),
-                newshape=(outer_size, middle_size, 4))
-            x_tf = reverse_f(x_np, [1]).eval()
-            np_answer = x_np[:, ::-1, :]
-            self.assertAllEqual(x_tf, np_answer)
+            with self.subTest(
+                reverse_f=reverse_f,
+                outer_size=outer_size,
+                middle_size=middle_size):
+              x_np = np.reshape(
+                  np.arange(outer_size * middle_size * 4, dtype=np.float32),
+                  newshape=(outer_size, middle_size, 4))
+              x_tf = reverse_f(x_np, [1]).eval()
+              np_answer = x_np[:, ::-1, :]
+              self.assertAllEqual(x_tf, np_answer)
 
   @test_util.run_deprecated_v1
   def testReverseColumnsOf3Channels(self):
@@ -467,12 +509,16 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
       for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
         for outer_size in list(range(50)) + [100000]:
           for middle_size in (1, 2):
-            x_np = np.reshape(
-                np.arange(outer_size * middle_size * 3, dtype=np.float32),
-                newshape=(outer_size, middle_size, 3))
-            x_tf = reverse_f(x_np, [0]).eval()
-            np_answer = x_np[::-1, :, :]
-            self.assertAllEqual(x_tf, np_answer)
+            with self.subTest(
+                reverse_f=reverse_f,
+                outer_size=outer_size,
+                middle_size=middle_size):
+              x_np = np.reshape(
+                  np.arange(outer_size * middle_size * 3, dtype=np.float32),
+                  newshape=(outer_size, middle_size, 3))
+              x_tf = reverse_f(x_np, [0]).eval()
+              np_answer = x_np[::-1, :, :]
+              self.assertAllEqual(x_tf, np_answer)
 
 
 class MeshgridTest(test_util.TensorFlowTestCase):
@@ -503,16 +549,17 @@ class MeshgridTest(test_util.TensorFlowTestCase):
   def testCompare(self):
     for t in (np.float16, np.float32, np.float64, np.int32, np.int64,
               np.complex64, np.complex128):
-      self._compareDiffType(2, t, False)
-      self._compareDiffType(3, t, False)
+      with self.subTest(t=t):
+        self._compareDiffType(2, t, False)
+        self._compareDiffType(3, t, False)
 
-      x = [1, 2, 3]
-      y = [4, 5]
+        x = [1, 2, 3]
+        y = [4, 5]
 
-      a = [[1, 1], [1, 1]]
+        a = [[1, 1], [1, 1]]
 
-      self._compareDiff(x, y, False)
-      self._compareDiff(x, a, False)
+        self._compareDiff(x, y, False)
+        self._compareDiff(x, a, False)
 
 
 class StridedSliceChecker(object):
@@ -585,30 +632,31 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
   @test_util.run_deprecated_v1
   def test_basic_slice(self):
     for tensor_type in STRIDED_SLICE_TYPES:
-      with self.cached_session(use_gpu=True):
-        checker = StridedSliceChecker(
-            self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
-        _ = checker[:, :, :]
-        # Various ways of representing identity slice
-        _ = checker[:, :, :]
-        _ = checker[::, ::, ::]
-        _ = checker[::1, ::1, ::1]
-        # Not zero slice
-        _ = checker[::1, ::5, ::2]
-        # Reverse in each dimension independently
-        _ = checker[::-1, :, :]
-        _ = checker[:, ::-1, :]
-        _ = checker[:, :, ::-1]
-        ## negative index tests i.e. n-2 in first component
-        _ = checker[-2::-1, :, ::1]
-        # negative index tests i.e. n-2 in first component, non-unit stride
-        _ = checker[-2::-1, :, ::2]
+      with self.subTest(tensor_type=tensor_type):
+        with self.cached_session(use_gpu=True):
+          checker = StridedSliceChecker(
+              self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
+          _ = checker[:, :, :]
+          # Various ways of representing identity slice
+          _ = checker[:, :, :]
+          _ = checker[::, ::, ::]
+          _ = checker[::1, ::1, ::1]
+          # Not zero slice
+          _ = checker[::1, ::5, ::2]
+          # Reverse in each dimension independently
+          _ = checker[::-1, :, :]
+          _ = checker[:, ::-1, :]
+          _ = checker[:, :, ::-1]
+          ## negative index tests i.e. n-2 in first component
+          _ = checker[-2::-1, :, ::1]
+          # negative index tests i.e. n-2 in first component, non-unit stride
+          _ = checker[-2::-1, :, ::2]
 
-        # Check rank-0 examples
-        checker2 = StridedSliceChecker(self, 5, tensor_type=tensor_type)
-        _ = checker2[None]
-        _ = checker2[...]
-        _ = checker2[tuple()]
+          # Check rank-0 examples
+          checker2 = StridedSliceChecker(self, 5, tensor_type=tensor_type)
+          _ = checker2[None]
+          _ = checker2[...]
+          _ = checker2[tuple()]
 
   def testInt64GPU(self):
     if not test_util.is_gpu_available():
@@ -1107,27 +1155,28 @@ class SliceAssignTest(test_util.TensorFlowTestCase):
 
   def doTestSliceAssign(self, use_resource):
     for dtype in STRIDED_SLICE_TYPES:
-      checker = StridedSliceAssignChecker(
-          self, [[1, 2, 3], [4, 5, 6]],
-          use_resource=use_resource,
-          tensor_type=dtype)
-      # Check if equal
-      checker[:] = [[10, 20, 30], [40, 50, 60]]
-      # Check trivial (1,1) shape tensor
-      checker[1:2, 1:2] = [[66]]
-      # shrinks shape changes
-      checker[1:2, 1] = [66]
-      checker[1, 1:2] = [66]
-      checker[1, 1] = 66
-      # newaxis shape changes
-      checker[:, None, :] = [[[10, 20, 30]], [[40, 50, 50]]]
-      # shrink and newaxis
-      checker[None, None, 0, 0:1] = [[[99]]]
-      # Non unit strides
-      checker[::1, ::-2] = [[3, 33], [4, 44]]
-      # degenerate interval
-      checker[8:10, 0] = []
-      checker[8:10, 8:10] = [[]]
+      with self.subTest(dtype=dtype):
+        checker = StridedSliceAssignChecker(
+            self, [[1, 2, 3], [4, 5, 6]],
+            use_resource=use_resource,
+            tensor_type=dtype)
+        # Check if equal
+        checker[:] = [[10, 20, 30], [40, 50, 60]]
+        # Check trivial (1,1) shape tensor
+        checker[1:2, 1:2] = [[66]]
+        # shrinks shape changes
+        checker[1:2, 1] = [66]
+        checker[1, 1:2] = [66]
+        checker[1, 1] = 66
+        # newaxis shape changes
+        checker[:, None, :] = [[[10, 20, 30]], [[40, 50, 50]]]
+        # shrink and newaxis
+        checker[None, None, 0, 0:1] = [[[99]]]
+        # Non unit strides
+        checker[::1, ::-2] = [[3, 33], [4, 44]]
+        # degenerate interval
+        checker[8:10, 0] = []
+        checker[8:10, 8:10] = [[]]
     # Assign vector to scalar (rank-0) using newaxis
     checker2 = StridedSliceAssignChecker(self, 222)
     checker2[()] = 6  # no indices
@@ -1355,11 +1404,12 @@ class InvertPermutationTest(test_util.TensorFlowTestCase):
   @test_util.run_deprecated_v1
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.cached_session(use_gpu=True):
-        x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
-        y = array_ops.invert_permutation(x)
-        self.assertAllEqual(y.get_shape(), [5])
-        self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
+      with self.subTest(dtype=dtype):
+        with self.cached_session(use_gpu=True):
+          x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
+          y = array_ops.invert_permutation(x)
+          self.assertAllEqual(y.get_shape(), [5])
+          self.assertAllEqual(y.eval(), [2, 4, 3, 0, 1])
 
 
 class UnravelIndexTest(test_util.TensorFlowTestCase):
@@ -1369,20 +1419,21 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
   def testUnravelIndex(self):
     with self.cached_session():
       for dtype in [dtypes.int32, dtypes.int64]:
-        indices_1 = constant_op.constant(1621, dtype=dtype)
-        dims_1 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
-        out_1 = array_ops.unravel_index(indices_1, dims_1)
-        self.assertAllEqual(out_1.eval(), [3, 1, 4, 1])
+        with self.subTest(dtype=dtype):
+          indices_1 = constant_op.constant(1621, dtype=dtype)
+          dims_1 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
+          out_1 = array_ops.unravel_index(indices_1, dims_1)
+          self.assertAllEqual(out_1.eval(), [3, 1, 4, 1])
 
-        indices_2 = constant_op.constant([1621], dtype=dtype)
-        dims_2 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
-        out_2 = array_ops.unravel_index(indices_2, dims_2)
-        self.assertAllEqual(out_2.eval(), [[3], [1], [4], [1]])
+          indices_2 = constant_op.constant([1621], dtype=dtype)
+          dims_2 = constant_op.constant([6, 7, 8, 9], dtype=dtype)
+          out_2 = array_ops.unravel_index(indices_2, dims_2)
+          self.assertAllEqual(out_2.eval(), [[3], [1], [4], [1]])
 
-        indices_3 = constant_op.constant([22, 41, 37], dtype=dtype)
-        dims_3 = constant_op.constant([7, 6], dtype=dtype)
-        out_3 = array_ops.unravel_index(indices_3, dims_3)
-        self.assertAllEqual(out_3.eval(), [[3, 6, 6], [4, 5, 1]])
+          indices_3 = constant_op.constant([22, 41, 37], dtype=dtype)
+          dims_3 = constant_op.constant([7, 6], dtype=dtype)
+          out_3 = array_ops.unravel_index(indices_3, dims_3)
+          self.assertAllEqual(out_3.eval(), [[3, 6, 6], [4, 5, 1]])
 
 
 class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
@@ -1398,13 +1449,14 @@ class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
   def testVariables(self):
     with self.cached_session() as sess:
       for use_resource in [False, True]:
-        a = variable_scope.get_variable(
-            "var_{}".format(use_resource), [],
-            initializer=init_ops.constant_initializer(10.0),
-            use_resource=use_resource)
-        guarantee_a = array_ops.guarantee_const(a)
-        self.evaluate(variables.global_variables_initializer())
-        self.assertEqual(10.0, guarantee_a.eval())
+        with self.subTest(use_resource=use_resource):
+          a = variable_scope.get_variable(
+              "var_{}".format(use_resource), [],
+              initializer=init_ops.constant_initializer(10.0),
+              use_resource=use_resource)
+          guarantee_a = array_ops.guarantee_const(a)
+          self.evaluate(variables.global_variables_initializer())
+          self.assertEqual(10.0, guarantee_a.eval())
 
   @test_util.run_deprecated_v1
   def testResourceRejection(self):
@@ -1425,10 +1477,11 @@ class SnapshotOpTest(test_util.TensorFlowTestCase):
   @test_util.run_deprecated_v1
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
-      with self.cached_session(use_gpu=True):
-        x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
-        y = gen_array_ops.snapshot(x)
-        self.assertAllEqual(y.eval(), [0, 1, 2, 3])
+      with self.subTest(dtype=dtype):
+        with self.cached_session(use_gpu=True):
+          x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
+          y = gen_array_ops.snapshot(x)
+          self.assertAllEqual(y.eval(), [0, 1, 2, 3])
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -1453,27 +1506,29 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
         [-1, -0.5, 0, 38.0 / 128, 102.0 / 128, 71.0 / 128, 0.5],
         dtype=np.float32)
     for axis in [None, 0, 1, 2, 3]:
-      inputs = constant_op.constant(self._scale_per_slice(shape, axis, values))
-      expected = self._scale_per_slice(shape, axis, quant_values)
-      unused_minmax_value = 0 if axis is None else [0] * shape[axis]
-      fake_quantized = self.evaluate(
-          array_ops.quantize_and_dequantize(
-              inputs,
-              unused_minmax_value,
-              unused_minmax_value,
-              range_given=False,
-              round_mode="HALF_UP",
-              axis=axis))
-      self.assertAllEqual(fake_quantized, expected)
-      if axis is not None:
+      with self.subTest(axis=axis):
+        inputs = constant_op.constant(
+            self._scale_per_slice(shape, axis, values))
+        expected = self._scale_per_slice(shape, axis, quant_values)
+        unused_minmax_value = 0 if axis is None else [0] * shape[axis]
         fake_quantized = self.evaluate(
             array_ops.quantize_and_dequantize(
                 inputs,
                 unused_minmax_value,
                 unused_minmax_value,
                 range_given=False,
-                axis=(axis - 4)))
-        self.assertAllClose(fake_quantized, expected)
+                round_mode="HALF_UP",
+                axis=axis))
+        self.assertAllEqual(fake_quantized, expected)
+        if axis is not None:
+          fake_quantized = self.evaluate(
+              array_ops.quantize_and_dequantize(
+                  inputs,
+                  unused_minmax_value,
+                  unused_minmax_value,
+                  range_given=False,
+                  axis=(axis - 4)))
+          self.assertAllClose(fake_quantized, expected)
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -1676,18 +1731,24 @@ class SortedSearchTest(test_util.TensorFlowTestCase):
   def testZeroSequenceSize(self):
     dtype = dtypes.int32
     for side in ("left", "right"):
-      self.assertAllEqual(
-          array_ops.searchsorted(array_ops.ones([2, 0]), array_ops.ones([2, 3]),
-                                 side=side, out_type=dtype),
-          array_ops.zeros([2, 3], dtype))
+      with self.subTest(side=side):
+        self.assertAllEqual(
+            array_ops.searchsorted(
+                array_ops.ones([2, 0]),
+                array_ops.ones([2, 3]),
+                side=side,
+                out_type=dtype), array_ops.zeros([2, 3], dtype))
 
   def testZeroValueSize(self):
     dtype = dtypes.int32
     for side in ("left", "right"):
-      self.assertAllEqual(
-          array_ops.searchsorted(array_ops.ones([2, 3]), array_ops.ones([2, 0]),
-                                 side=side, out_type=dtype),
-          array_ops.zeros([2, 0], dtype))
+      with self.subTest(side=side):
+        self.assertAllEqual(
+            array_ops.searchsorted(
+                array_ops.ones([2, 3]),
+                array_ops.ones([2, 0]),
+                side=side,
+                out_type=dtype), array_ops.zeros([2, 0], dtype))
 
 
 class BatchGatherNdTest(test_util.TensorFlowTestCase):
@@ -1715,17 +1776,21 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
     shapes.append(((3, 2, 2, 3, 4), (3, 2, 3, 1), 2),)
 
     for params_shape, indices_shape, batch_dims in shapes:
-      params = constant_op.constant(1.0, shape=(params_shape))
-      indices = constant_op.constant(
-          1, shape=(indices_shape), dtype=dtypes.int32)
-      out = array_ops.batch_gather_nd(
-          params=params, indices=indices, batch_dims=batch_dims)
-      ndims_params = len(params_shape) - batch_dims
-      ndims_rows = ndims_params - indices_shape[-1]
-      expected_out_shape = indices_shape[:-1]
-      if ndims_rows > 0:
-        expected_out_shape += params_shape[-ndims_rows:]
-      self.assertSequenceEqual(out.shape, expected_out_shape)
+      with self.subTest(
+          params_shape=params_shape,
+          indices_shape=indices_shape,
+          batch_dims=batch_dims):
+        params = constant_op.constant(1.0, shape=(params_shape))
+        indices = constant_op.constant(
+            1, shape=(indices_shape), dtype=dtypes.int32)
+        out = array_ops.batch_gather_nd(
+            params=params, indices=indices, batch_dims=batch_dims)
+        ndims_params = len(params_shape) - batch_dims
+        ndims_rows = ndims_params - indices_shape[-1]
+        expected_out_shape = indices_shape[:-1]
+        if ndims_rows > 0:
+          expected_out_shape += params_shape[-ndims_rows:]
+        self.assertSequenceEqual(out.shape, expected_out_shape)
 
   def testReducesToGatherNDWhenBatchDimIsZero(self):
     """Confirms setting batch_dims to zero reduces to tf.gather_nd."""
@@ -1742,11 +1807,12 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
     indices_shapes.append((3, 3, 3))
 
     for indices_shape in indices_shapes:
-      indices = np.random.randint(0, 7, size=indices_shape)
-      gather_nd_result = gen_array_ops.gather_nd(params, indices)
-      batch_gather_nd_result = array_ops.batch_gather_nd(
-          params=params, indices=indices, batch_dims=0)
-      self.assertAllEqual(gather_nd_result, batch_gather_nd_result)
+      with self.subTest(indices_shape=indices_shape):
+        indices = np.random.randint(0, 7, size=indices_shape)
+        gather_nd_result = gen_array_ops.gather_nd(params, indices)
+        batch_gather_nd_result = array_ops.batch_gather_nd(
+            params=params, indices=indices, batch_dims=0)
+        self.assertAllEqual(gather_nd_result, batch_gather_nd_result)
 
   def testSameResultAsMapFn(self):
     """Compares results with gather_nd called on every element with map_fn."""
@@ -1768,28 +1834,32 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
     shapes.append(((3, 2, 2, 3, 4), (3, 2, 3, 1), 2),)
 
     for params_shape, indices_shape, batch_dims in shapes:
-      params = constant_op.constant(
-          np.random.uniform(0.0, 1.0, size=(params_shape)))
-      indices = np.random.randint(0, 2, size=indices_shape)
-      batch_gather_nd_result = array_ops.batch_gather_nd(
-          params=params, indices=indices, batch_dims=batch_dims)
+      with self.subTest(
+          params_shape=params_shape,
+          indices_shape=indices_shape,
+          batch_dims=batch_dims):
+        params = constant_op.constant(
+            np.random.uniform(0.0, 1.0, size=(params_shape)))
+        indices = np.random.randint(0, 2, size=indices_shape)
+        batch_gather_nd_result = array_ops.batch_gather_nd(
+            params=params, indices=indices, batch_dims=batch_dims)
 
-      if batch_dims > 1:
-        params = array_ops.reshape(
-            params, shape=[-1] + list(params_shape[batch_dims:]))
-        indices = array_ops.reshape(
-            indices, shape=[-1] + list(indices_shape[batch_dims:]))
+        if batch_dims > 1:
+          params = array_ops.reshape(
+              params, shape=[-1] + list(params_shape[batch_dims:]))
+          indices = array_ops.reshape(
+              indices, shape=[-1] + list(indices_shape[batch_dims:]))
 
-      map_fn_gather_nd_result = map_fn.map_fn(
-          fn=self._map_fn_body, elems=(params, indices), dtype=dtypes.float64)
+        map_fn_gather_nd_result = map_fn.map_fn(
+            fn=self._map_fn_body, elems=(params, indices), dtype=dtypes.float64)
 
-      if batch_dims > 1:
-        out_shape = map_fn_gather_nd_result.shape.as_list()
-        out_shape = list(params_shape[:batch_dims]) + out_shape[1:]
-        map_fn_gather_nd_result = array_ops.reshape(
-            map_fn_gather_nd_result, shape=out_shape)
+        if batch_dims > 1:
+          out_shape = map_fn_gather_nd_result.shape.as_list()
+          out_shape = list(params_shape[:batch_dims]) + out_shape[1:]
+          map_fn_gather_nd_result = array_ops.reshape(
+              map_fn_gather_nd_result, shape=out_shape)
 
-      self.assertAllEqual(map_fn_gather_nd_result, batch_gather_nd_result)
+        self.assertAllEqual(map_fn_gather_nd_result, batch_gather_nd_result)
 
   def _map_fn_body(self, elems):
     return gen_array_ops.gather_nd(elems[0], elems[1])
@@ -1803,17 +1873,21 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
     shapes.append(((3, 2, 2, 3, 4), (3, 2, 3, 1), 2),)
 
     for params_shape, indices_shape, batch_dims in shapes:
-      params = constant_op.constant(
-          np.random.uniform(0.0, 1.0, size=(params_shape)))
-      indices = np.random.randint(0, 2, size=indices_shape)
-      batch_gather_nd_result = array_ops.gather_nd(
-          params=params, indices=indices, batch_dims=batch_dims)
-      batch_dims_tensor = constant_op.constant([batch_dims])
-      batch_gather_nd_tensor_batch_dims_result = array_ops.gather_nd(
-          params=params, indices=indices, batch_dims=batch_dims_tensor)
+      with self.subTest(
+          params_shape=params_shape,
+          indices_shape=indices_shape,
+          batch_dims=batch_dims):
+        params = constant_op.constant(
+            np.random.uniform(0.0, 1.0, size=(params_shape)))
+        indices = np.random.randint(0, 2, size=indices_shape)
+        batch_gather_nd_result = array_ops.gather_nd(
+            params=params, indices=indices, batch_dims=batch_dims)
+        batch_dims_tensor = constant_op.constant([batch_dims])
+        batch_gather_nd_tensor_batch_dims_result = array_ops.gather_nd(
+            params=params, indices=indices, batch_dims=batch_dims_tensor)
 
-      self.assertAllEqual(batch_gather_nd_tensor_batch_dims_result,
-                          batch_gather_nd_result)
+        self.assertAllEqual(batch_gather_nd_tensor_batch_dims_result,
+                            batch_gather_nd_result)
 
   def testInvalidBatchDimsRaisesException(self):
     """Tests whether invalid batch_dims raise expected exceptions."""
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 10878701418..222716dfdfa 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import dtypes
@@ -26,6 +27,9 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.platform import googletest
 
 
@@ -128,5 +132,505 @@ class BincountTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(v2.get_shape().as_list(), [None])
 
 
+class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_bincount_all_count(self, dtype):
+    np.random.seed(42)
+    size = 1000
+    inp = np.random.randint(0, size, (4096), dtype=dtype)
+    np_out = np.bincount(inp, minlength=size)
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(input=inp, weights=[], size=size)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_bincount_all_count_with_weights(self, dtype):
+    np.random.seed(42)
+    size = 1000
+    inp = np.random.randint(0, size, (4096,), dtype=dtype)
+    np_weight = np.random.random((4096,))
+    np_out = np.bincount(inp, minlength=size, weights=np_weight)
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(
+                  input=inp, weights=np_weight, size=size)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_bincount_all_binary(self, dtype):
+    np.random.seed(42)
+    size = 10
+    inp = np.random.randint(0, size, (4096), dtype=dtype)
+    np_out = np.ones((size,))
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(
+                  input=inp, weights=[], size=size, binary_output=True)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_bincount_all_binary_with_weights(self, dtype):
+    np.random.seed(42)
+    size = 10
+    inp = np.random.randint(0, size, (4096,), dtype=dtype)
+    np_weight = np.random.random((4096,))
+    np_out = np.ones((size,))
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(
+                  input=inp, weights=np_weight, size=size, binary_output=True)))
+
+  def _test_bincount_col_count(self, num_rows, num_cols, size, dtype):
+    np.random.seed(42)
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_out = np.reshape(
+        np.concatenate(
+            [np.bincount(inp[j, :], minlength=size) for j in range(num_rows)],
+            axis=0), (num_rows, size))
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(input=inp, weights=[], size=size)))
+
+  def _test_bincount_col_binary(self, num_rows, num_cols, size, dtype):
+    np.random.seed(42)
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_out = np.reshape(
+        np.concatenate([
+            np.where(np.bincount(inp[j, :], minlength=size) > 0, 1, 0)
+            for j in range(num_rows)
+        ],
+                       axis=0), (num_rows, size))
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(
+                  input=inp, weights=[], size=size, binary_output=True)))
+
+  def _test_bincount_col_count_with_weights(self, num_rows, num_cols, size,
+                                            dtype):
+    np.random.seed(42)
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_weight = np.random.random((num_rows, num_cols))
+    np_out = np.reshape(
+        np.concatenate([
+            np.bincount(inp[j, :], weights=np_weight[j, :], minlength=size)
+            for j in range(num_rows)
+        ],
+                       axis=0), (num_rows, size))
+    with test_util.use_gpu():
+      self.assertAllEqual(
+          np_out,
+          self.evaluate(
+              gen_math_ops.dense_bincount(
+                  input=inp, weights=np_weight, size=size)))
+
+  def test_col_reduce_basic(self):
+    with test_util.use_gpu():
+      v = self.evaluate(
+          gen_math_ops.dense_bincount(
+              input=[[1, 2, 3], [0, 3, 2]], weights=[], size=4))
+    expected_out = [[0., 1., 1., 1.], [1., 0., 1., 1.]]
+    self.assertAllEqual(expected_out, v)
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_col_reduce_shared_memory(self, dtype):
+    # num_rows * num_bins less than half of max shared memory.
+    num_rows = 128
+    num_cols = 27
+    size = 10
+    self._test_bincount_col_count(num_rows, num_cols, size, dtype)
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_col_reduce_global_memory(self, dtype):
+    # num_rows * num_bins more than half of max shared memory.
+    num_rows = 128
+    num_cols = 27
+    size = 1024
+    self._test_bincount_col_count(num_rows, num_cols, size, dtype)
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_col_reduce_shared_memory_with_weights(self, dtype):
+    # num_rows * num_bins less than half of max shared memory.
+    num_rows = 128
+    num_cols = 27
+    size = 100
+    self._test_bincount_col_count_with_weights(num_rows, num_cols, size, dtype)
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_col_reduce_global_memory_with_weights(self, dtype):
+    # num_rows * num_bins more than half of max shared memory.
+    num_rows = 128
+    num_cols = 27
+    size = 1024
+    self._test_bincount_col_count_with_weights(num_rows, num_cols, size, dtype)
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_col_reduce_binary(self, dtype):
+    num_rows = 128
+    num_cols = 7
+    size = 10
+    self._test_bincount_col_binary(num_rows, num_cols, size, dtype)
+
+  @test_util.run_deprecated_v1
+  def test_invalid_rank(self):
+    with self.assertRaisesRegexp(ValueError, "at most rank 2"):
+      with test_util.use_gpu():
+        self.evaluate(
+            gen_math_ops.dense_bincount(
+                input=[[[1, 2, 3], [0, 3, 2]]], weights=[], size=10))
+
+
+class SparseBincountOpTest(test_util.TensorFlowTestCase,
+                           parameterized.TestCase):
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_sparse_bincount_all_count(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    size = 1000
+    n_elems = 4096
+    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
+
+    np_out = np.bincount(inp_vals, minlength=size)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.sparse_bincount(
+                indices=inp_indices,
+                values=inp_vals,
+                dense_shape=[num_rows],
+                size=size,
+                weights=[])))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_sparse_bincount_all_count_with_weights(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    size = 1000
+    n_elems = 4096
+    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
+    inp_weight = np.random.random((n_elems,))
+
+    np_out = np.bincount(inp_vals, minlength=size, weights=inp_weight)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.sparse_bincount(
+                indices=inp_indices,
+                values=inp_vals,
+                dense_shape=[num_rows],
+                size=size,
+                weights=inp_weight)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_sparse_bincount_all_binary(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    size = 10
+    n_elems = 4096
+    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
+
+    np_out = np.ones((size,))
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.sparse_bincount(
+                indices=inp_indices,
+                values=inp_vals,
+                dense_shape=[num_rows],
+                size=size,
+                weights=[],
+                binary_output=True)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_sparse_bincount_all_binary_weights(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    size = 10
+    n_elems = 4096
+    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
+    inp_weight = np.random.random((n_elems,))
+
+    np_out = np.ones((size,))
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.sparse_bincount(
+                indices=inp_indices,
+                values=inp_vals,
+                dense_shape=[num_rows],
+                size=size,
+                weights=inp_weight,
+                binary_output=True)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_sparse_bincount_col_reduce_count(self, dtype):
+    num_rows = 128
+    num_cols = 27
+    size = 100
+    np.random.seed(42)
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_out = np.reshape(
+        np.concatenate(
+            [np.bincount(inp[j, :], minlength=size) for j in range(num_rows)],
+            axis=0), (num_rows, size))
+    # from_dense will filter out 0s.
+    inp = inp + 1
+    # from_dense will cause OOM in GPU.
+    with ops.device("/CPU:0"):
+      inp_sparse = sparse_ops.from_dense(inp)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.sparse_bincount(
+                indices=inp_sparse.indices,
+                values=inp_sparse.values - 1,
+                dense_shape=inp_sparse.dense_shape,
+                size=size,
+                weights=[])))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_sparse_bincount_col_reduce_binary(self, dtype):
+    num_rows = 128
+    num_cols = 27
+    size = 100
+    np.random.seed(42)
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_out = np.reshape(
+        np.concatenate([
+            np.where(np.bincount(inp[j, :], minlength=size) > 0, 1, 0)
+            for j in range(num_rows)
+        ],
+                       axis=0), (num_rows, size))
+    # from_dense will filter out 0s.
+    inp = inp + 1
+    # from_dense will cause OOM in GPU.
+    with ops.device("/CPU:0"):
+      inp_sparse = sparse_ops.from_dense(inp)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.sparse_bincount(
+                indices=inp_sparse.indices,
+                values=inp_sparse.values - 1,
+                dense_shape=inp_sparse.dense_shape,
+                size=size,
+                weights=[],
+                binary_output=True)))
+
+
+class RaggedBincountOpTest(test_util.TensorFlowTestCase,
+                           parameterized.TestCase):
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_ragged_bincount_count(self, dtype):
+    x = ragged_factory_ops.constant([[], [], [3, 0, 1], [], [5, 0, 4, 4]])
+    expected_output = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0,
+                                            0], [1, 1, 0, 1, 0, 0],
+                       [0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 2, 1]]
+    self.assertAllEqual(
+        expected_output,
+        self.evaluate(
+            gen_math_ops.ragged_bincount(
+                splits=x.row_splits, values=x.values, weights=[], size=6)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_ragged_bincount_binary(self, dtype):
+    x = ragged_factory_ops.constant([[], [], [3, 0, 1], [], [5, 0, 4, 4]])
+    expected_output = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0,
+                                            0], [1, 1, 0, 1, 0, 0],
+                       [0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 1]]
+    self.assertAllEqual(
+        expected_output,
+        self.evaluate(
+            gen_math_ops.ragged_bincount(
+                splits=x.row_splits,
+                values=x.values,
+                weights=[],
+                size=6,
+                binary_output=True)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_ragged_bincount_count_with_weights(self, dtype):
+    x = ragged_factory_ops.constant([[], [], [3, 0, 1], [], [5, 0, 4, 4]])
+    weights = ragged_factory_ops.constant([[], [], [.1, .2, .3], [],
+                                           [.2, .5, .6, .3]])
+    expected_output = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
+                       [.2, .3, 0, .1, 0, 0], [0, 0, 0, 0, 0, 0],
+                       [.5, 0, 0, 0, .9, .2]]
+    self.assertAllClose(
+        expected_output,
+        self.evaluate(
+            gen_math_ops.ragged_bincount(
+                splits=x.row_splits,
+                values=x.values,
+                weights=weights.values,
+                size=6)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_ragged_bincount_count_np(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    num_cols = 27
+    size = 1000
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_out = np.reshape(
+        np.concatenate(
+            [np.bincount(inp[j, :], minlength=size) for j in range(num_rows)],
+            axis=0), (num_rows, size))
+    x = ragged_tensor.RaggedTensor.from_tensor(inp)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.ragged_bincount(
+                splits=x.row_splits, values=x.values, weights=[], size=size)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_ragged_bincount_count_np_with_weights(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    num_cols = 27
+    size = 1000
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_weight = np.random.random((num_rows, num_cols))
+    np_out = np.reshape(
+        np.concatenate([
+            np.bincount(inp[j, :], weights=np_weight[j, :], minlength=size)
+            for j in range(num_rows)
+        ],
+                       axis=0), (num_rows, size))
+    x = ragged_tensor.RaggedTensor.from_tensor(inp)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.ragged_bincount(
+                splits=x.row_splits,
+                values=x.values,
+                weights=np_weight,
+                size=size)))
+
+  @parameterized.parameters([{
+      "dtype": np.int32,
+  }, {
+      "dtype": np.int64,
+  }])
+  def test_ragged_bincount_binary_np_with_weights(self, dtype):
+    np.random.seed(42)
+    num_rows = 128
+    num_cols = 27
+    size = 1000
+    inp = np.random.randint(0, size, (num_rows, num_cols), dtype=dtype)
+    np_out = np.reshape(
+        np.concatenate([
+            np.where(np.bincount(inp[j, :], minlength=size) > 0, 1, 0)
+            for j in range(num_rows)
+        ],
+                       axis=0), (num_rows, size))
+    x = ragged_tensor.RaggedTensor.from_tensor(inp)
+    self.assertAllEqual(
+        np_out,
+        self.evaluate(
+            gen_math_ops.ragged_bincount(
+                splits=x.row_splits,
+                values=x.values,
+                weights=[],
+                size=size,
+                binary_output=True)))
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py
index 47f392d7438..37ee8d38f53 100644
--- a/tensorflow/python/kernel_tests/check_ops_test.py
+++ b/tensorflow/python/kernel_tests/check_ops_test.py
@@ -1688,8 +1688,6 @@ class AssertShapesTest(test.TestCase):
         rank_three_shapes, array_ops.constant(1), correct_rank=3, actual_rank=0)
 
   def test_raises_dynamic_incorrect_rank(self):
-    self.skipTest("b/134600611")
-
     x_value = 5
     rank_two_shapes = [(1, 1), (1, 3), ("a", "b"), (None, None)]
     with ops.Graph().as_default():
diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
index e17a029c5ff..7d5f7715eb1 100644
--- a/tensorflow/python/kernel_tests/cholesky_op_test.py
+++ b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -114,12 +114,14 @@ class CholeskyOpTest(test.TestCase):
   def testBasic(self):
     data = np.array([[4., -1., 2.], [-1., 6., 0], [2., 0., 5.]])
     for dtype in (np.float32, np.float64):
-      self._verifyCholesky(data.astype(dtype))
+      with self.subTest(dtype=dtype):
+        self._verifyCholesky(data.astype(dtype))
     for dtype in (np.complex64, np.complex128):
-      complex_data = np.tril(1j * data, -1).astype(dtype)
-      complex_data += np.triu(-1j * data, 1).astype(dtype)
-      complex_data += data
-      self._verifyCholesky(complex_data)
+      with self.subTest(dtype=dtype):
+        complex_data = np.tril(1j * data, -1).astype(dtype)
+        complex_data += np.triu(-1j * data, 1).astype(dtype)
+        complex_data += data
+        self._verifyCholesky(complex_data)
 
   def testBatch(self):
     simple_array = np.array([[[1., 0.], [0., 5.]]])  # shape (1, 2, 2)
@@ -131,13 +133,15 @@ class CholeskyOpTest(test.TestCase):
     # Generate random positive-definite matrices.
     matrices = np.random.rand(10, 5, 5)
     for i in xrange(10):
-      matrices[i] = np.dot(matrices[i].T, matrices[i])
+      with self.subTest(i=i):
+        matrices[i] = np.dot(matrices[i].T, matrices[i])
     self._verifyCholesky(matrices)
 
     # Generate random complex valued positive-definite matrices.
     matrices = np.random.rand(10, 5, 5) + 1j * np.random.rand(10, 5, 5)
     for i in xrange(10):
-      matrices[i] = np.dot(matrices[i].T.conj(), matrices[i])
+      with self.subTest(i=i):
+        matrices[i] = np.dot(matrices[i].T.conj(), matrices[i])
     self._verifyCholesky(matrices)
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index d01d647bc83..8c84bde1431 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -97,23 +97,27 @@ class ComparisonOpTest(test.TestCase):
     for t in dtypes:
       for x in data:
         for y in data:
-          self.assertEqual(self._compareScalar(math_ops.less, x, y, t), x < y)
-          self.assertEqual(
-              self._compareScalar(math_ops.less_equal, x, y, t), x <= y)
-          self.assertEqual(
-              self._compareScalar(math_ops.greater, x, y, t), x > y)
-          self.assertEqual(
-              self._compareScalar(math_ops.greater_equal, x, y, t), x >= y)
-          self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y)
-          self.assertEqual(
-              self._compareScalar(math_ops.not_equal, x, y, t), x != y)
+          with self.subTest(t=t, x=x, y=y):
+            self.assertEqual(self._compareScalar(math_ops.less, x, y, t), x < y)
+            self.assertEqual(
+                self._compareScalar(math_ops.less_equal, x, y, t), x <= y)
+            self.assertEqual(
+                self._compareScalar(math_ops.greater, x, y, t), x > y)
+            self.assertEqual(
+                self._compareScalar(math_ops.greater_equal, x, y, t), x >= y)
+            self.assertEqual(
+                self._compareScalar(math_ops.equal, x, y, t), x == y)
+            self.assertEqual(
+                self._compareScalar(math_ops.not_equal, x, y, t), x != y)
     data = [-1, 0, 1, -1j, 1j, 1 + 1j, 1 - 1j]
     for t in [np.complex64, np.complex128]:
       for x in data:
         for y in data:
-          self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y)
-          self.assertEqual(
-              self._compareScalar(math_ops.not_equal, x, y, t), x != y)
+          with self.subTest(t=t, x=x, y=y):
+            self.assertEqual(
+                self._compareScalar(math_ops.equal, x, y, t), x == y)
+            self.assertEqual(
+                self._compareScalar(math_ops.not_equal, x, y, t), x != y)
 
   def _compare(self, x, y, np_func, tf_func):
     np_ans = np_func(x, y)
@@ -126,22 +130,24 @@ class ComparisonOpTest(test.TestCase):
     x = np.linspace(-15, 15, 6).reshape(1, 3, 2)
     y = np.linspace(20, -10, 6).reshape(1, 3, 2)
     for t in [np.float16, np.float32, np.float64, np.int32, np.int64]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      self._compare(xt, yt, np.less, math_ops.less)
-      self._compare(xt, yt, np.less_equal, math_ops.less_equal)
-      self._compare(xt, yt, np.greater, math_ops.greater)
-      self._compare(xt, yt, np.greater_equal, math_ops.greater_equal)
-      self._compare(xt, yt, np.equal, math_ops.equal)
-      self._compare(xt, yt, np.not_equal, math_ops.not_equal)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        self._compare(xt, yt, np.less, math_ops.less)
+        self._compare(xt, yt, np.less_equal, math_ops.less_equal)
+        self._compare(xt, yt, np.greater, math_ops.greater)
+        self._compare(xt, yt, np.greater_equal, math_ops.greater_equal)
+        self._compare(xt, yt, np.equal, math_ops.equal)
+        self._compare(xt, yt, np.not_equal, math_ops.not_equal)
     # Complex types do not support ordering but do support equality tests.
     for t in [np.complex64, np.complex128]:
-      xt = x.astype(t)
-      xt -= 1j * xt
-      yt = y.astype(t)
-      yt -= 1j * yt
-      self._compare(xt, yt, np.equal, math_ops.equal)
-      self._compare(xt, yt, np.not_equal, math_ops.not_equal)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        xt -= 1j * xt
+        yt = y.astype(t)
+        yt -= 1j * yt
+        self._compare(xt, yt, np.equal, math_ops.equal)
+        self._compare(xt, yt, np.not_equal, math_ops.not_equal)
 
   def _compareBCast(self, xs, ys, dtype, np_func, tf_func):
     x = np.linspace(-15, 15, np.prod(xs)).astype(dtype).reshape(xs)
@@ -178,7 +184,8 @@ class ComparisonOpTest(test.TestCase):
 
     for (xs, ys) in shapes:
       for dtype in dtypes:
-        self._compareBCast(xs, ys, dtype, np_func, tf_func)
+        with self.subTest(xs=xs, ys=ys, dtype=dtype):
+          self._compareBCast(xs, ys, dtype, np_func, tf_func)
 
   def testBCastLess(self):
     self._testBCastByFunc(np.less, math_ops.less)
@@ -209,10 +216,11 @@ class ComparisonOpTest(test.TestCase):
     y = np.arange(0, 10).reshape([5, 2])
     for t in dtypes:
       for f in funcs:
-        with self.assertRaisesRegexp(
-            (ValueError, errors.InvalidArgumentError),
-            "Incompatible shapes|Dimensions must be equal"):
-          f(x.astype(t), y.astype(t))
+        with self.subTest(t=t, f=f):
+          with self.assertRaisesRegexp(
+              (ValueError, errors.InvalidArgumentError),
+              "Incompatible shapes|Dimensions must be equal"):
+            f(x.astype(t), y.astype(t))
 
 
 class LogicalOpTest(test.TestCase):
@@ -241,23 +249,27 @@ class LogicalOpTest(test.TestCase):
     data = [np.array([True]), np.array([False])]
     for use_gpu in [True, False]:
       for x in data:
-        self._not(x, use_gpu)
+        with self.subTest(use_gpu=use_gpu, x=x):
+          self._not(x, use_gpu)
       for x in data:
         for y in data:
-          self._compareBinary(x, y, np.logical_and, math_ops.logical_and,
-                              use_gpu)
-          self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu)
-          self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor,
-                              use_gpu)
+          with self.subTest(use_gpu=use_gpu, x=x, y=y):
+            self._compareBinary(x, y, np.logical_and, math_ops.logical_and,
+                                use_gpu)
+            self._compareBinary(x, y, np.logical_or, math_ops.logical_or,
+                                use_gpu)
+            self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor,
+                                use_gpu)
 
   def testTensor(self):
     x = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
     y = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
     for use_gpu in [True, False]:
-      self._not(x, use_gpu)
-      self._compareBinary(x, y, np.logical_and, math_ops.logical_and, use_gpu)
-      self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu)
-      self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, use_gpu)
+      with self.subTest(use_gpu=use_gpu):
+        self._not(x, use_gpu)
+        self._compareBinary(x, y, np.logical_and, math_ops.logical_and, use_gpu)
+        self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu)
+        self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, use_gpu)
 
   def testBCast(self):
     shapes = [
@@ -277,18 +289,22 @@ class LogicalOpTest(test.TestCase):
       x = np.random.randint(0, 2, np.prod(xs)).astype(np.bool).reshape(xs)
       y = np.random.randint(0, 2, np.prod(ys)).astype(np.bool).reshape(ys)
       for use_gpu in [True, False]:
-        self._compareBinary(x, y, np.logical_and, math_ops.logical_and, use_gpu)
-        self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu)
-        self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor, use_gpu)
+        with self.subTest(xs=xs, ys=ys, use_gpu=use_gpu):
+          self._compareBinary(x, y, np.logical_and, math_ops.logical_and,
+                              use_gpu)
+          self._compareBinary(x, y, np.logical_or, math_ops.logical_or, use_gpu)
+          self._compareBinary(x, y, np.logical_xor, math_ops.logical_xor,
+                              use_gpu)
 
   @test_util.run_deprecated_v1
   def testShapeMismatch(self):
     x = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
     y = np.random.randint(0, 2, 6).astype(np.bool).reshape(3, 2, 1)
     for f in [math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor]:
-      with self.assertRaisesWithPredicateMatch(
-          ValueError, lambda e: "Dimensions must" in str(e)):
-        f(x, y)
+      with self.subTest(f=f):
+        with self.assertRaisesWithPredicateMatch(
+            ValueError, lambda e: "Dimensions must" in str(e)):
+          f(x, y)
 
   @test_util.run_deprecated_v1
   def testUsingAsPythonValueFails(self):
@@ -389,11 +405,12 @@ class SelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      self._compare(fn, c, xt, yt, use_gpu=False)
-      if t in [np.float16, np.float32, np.float64]:
-        self._compare(fn, c, xt, yt, use_gpu=True)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        self._compare(fn, c, xt, yt, use_gpu=False)
+        if t in [np.float16, np.float32, np.float64]:
+          self._compare(fn, c, xt, yt, use_gpu=True)
 
   def testScalar(self):
     self._testScalar(array_ops.where)
@@ -404,11 +421,12 @@ class SelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      self._compare(fn, c, xt, yt, use_gpu=False)
-      if t in [np.float16, np.float32, np.float64]:
-        self._compare(fn, c, xt, yt, use_gpu=True)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        self._compare(fn, c, xt, yt, use_gpu=False)
+        if t in [np.float16, np.float32, np.float64]:
+          self._compare(fn, c, xt, yt, use_gpu=True)
 
   def testScalarBroadcast(self):
     c = True
@@ -450,11 +468,12 @@ class SelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      self._compare(fn, c, xt, yt, use_gpu=False)
-      if t in [np.float16, np.float32, np.float64]:
-        self._compare(fn, c, xt, yt, use_gpu=True)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        self._compare(fn, c, xt, yt, use_gpu=False)
+        if t in [np.float16, np.float32, np.float64]:
+          self._compare(fn, c, xt, yt, use_gpu=True)
 
   def testBasic(self):
     self._testBasic(array_ops.where)
@@ -465,11 +484,12 @@ class SelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      self._compare(fn, c, xt, yt, use_gpu=False)
-      if t in [np.float16, np.float32, np.float64]:
-        self._compare(fn, c, xt, yt, use_gpu=True)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        self._compare(fn, c, xt, yt, use_gpu=False)
+        if t in [np.float16, np.float32, np.float64]:
+          self._compare(fn, c, xt, yt, use_gpu=True)
 
   def testBasicBroadcast(self):
     c0 = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
@@ -478,53 +498,55 @@ class SelectOpTest(test.TestCase):
     c3 = np.random.randint(0, 2, 1).astype(np.bool).reshape(1, 1, 1)
     for c in [c0, c1, c2, c3]:
       # where_v2 only
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 1, 1) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 3, 1) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 1, 2) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 1) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 2) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(3, 2) * 100
-      self._testBasicBroadcast(array_ops.where_v2, c, x, y)
-      self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+      with self.subTest(c=c):
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 1, 1) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 3, 1) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 1, 2) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 1) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 2) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(3, 2) * 100
+        self._testBasicBroadcast(array_ops.where_v2, c, x, y)
+        self._testBasicBroadcast(array_ops.where_v2, c, y, x)
 
   def _testGradients(self, fn):
     c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
     x = np.random.rand(1, 3, 2) * 100
     y = np.random.rand(1, 3, 2) * 100
     for t in [np.float16, np.float32, np.float64]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      if t == np.float16:
-        # Compare fp16 theoretical gradients to fp32 numerical gradients,
-        # since fp16 numerical gradients are too imprecise unless great
-        # care is taken with choosing the inputs and the delta. This is
-        # a weaker check (in particular, it does not test the op itself,
-        # only its gradient), but it's much better than nothing.
-        self._compareGradientX(fn, c, xt, yt, np.float)
-        self._compareGradientY(fn, c, xt, yt, np.float)
-      else:
-        self._compareGradientX(fn, c, xt, yt)
-        self._compareGradientY(fn, c, xt, yt)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        if t == np.float16:
+          # Compare fp16 theoretical gradients to fp32 numerical gradients,
+          # since fp16 numerical gradients are too imprecise unless great
+          # care is taken with choosing the inputs and the delta. This is
+          # a weaker check (in particular, it does not test the op itself,
+          # only its gradient), but it's much better than nothing.
+          self._compareGradientX(fn, c, xt, yt, np.float)
+          self._compareGradientY(fn, c, xt, yt, np.float)
+        else:
+          self._compareGradientX(fn, c, xt, yt)
+          self._compareGradientY(fn, c, xt, yt)
 
   @test_util.run_deprecated_v1
   def testGradients(self):
@@ -536,27 +558,28 @@ class SelectOpTest(test.TestCase):
     c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
     for t in [np.float32, np.float64]:
       # where_v2 only
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 1, 1) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 3, 1) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 1, 2) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 1) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(1, 2) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
-      x = np.random.rand(1, 3, 2) * 100
-      y = np.random.rand(3, 2) * 100
-      self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+      with self.subTest(t=t):
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 1, 1) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 3, 1) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 1, 2) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 1) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(1, 2) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
+        x = np.random.rand(1, 3, 2) * 100
+        y = np.random.rand(3, 2) * 100
+        self._compareGradientX(array_ops.where_v2, c, x.astype(t), y.astype(t))
 
   def _testShapeMismatch(self, fn):
     c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
@@ -566,10 +589,11 @@ class SelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      with self.assertRaises(ValueError):
-        fn(c, xt, yt)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        with self.assertRaises(ValueError):
+          fn(c, xt, yt)
 
   @test_util.run_deprecated_v1
   def testShapeMismatch(self):
@@ -597,9 +621,10 @@ class SelectOpTest(test.TestCase):
       for c in False, True:
         for a in 7.0, np.nan:
           for b in 5.0, np.nan:
-            x = fn(c, a, b).eval()
-            y = a if c else b
-            self.assertEqual(np.isnan(x), np.isnan(y))
+            with self.subTest(c=c, a=a, b=b):
+              x = fn(c, a, b).eval()
+              y = a if c else b
+              self.assertEqual(np.isnan(x), np.isnan(y))
 
   @test_util.run_deprecated_v1
   def testNan(self):
@@ -677,11 +702,12 @@ class BatchSelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      self._compare(c, xt, yt, use_gpu=False)
-      if t in [np.float16, np.float32, np.float64]:
-        self._compare(c, xt, yt, use_gpu=True)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        self._compare(c, xt, yt, use_gpu=False)
+        if t in [np.float16, np.float32, np.float64]:
+          self._compare(c, xt, yt, use_gpu=True)
 
   @test_util.run_deprecated_v1
   def testGradients(self):
@@ -689,19 +715,20 @@ class BatchSelectOpTest(test.TestCase):
     x = np.random.rand(16, 2, 8) * 100
     y = np.random.rand(16, 2, 8) * 100
     for t in [np.float16, np.float32, np.float64]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      if t == np.float16:
-        # Compare fp16 theoretical gradients to fp32 numerical gradients,
-        # since fp16 numerical gradients are too imprecise unless great
-        # care is taken with choosing the inputs and the delta. This is
-        # a weaker check (in particular, it does not test the op itself,
-        # only its gradient), but it's much better than nothing.
-        self._compareGradientX(c, xt, yt, np.float)
-        self._compareGradientY(c, xt, yt, np.float)
-      else:
-        self._compareGradientX(c, xt, yt)
-        self._compareGradientY(c, xt, yt)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        if t == np.float16:
+          # Compare fp16 theoretical gradients to fp32 numerical gradients,
+          # since fp16 numerical gradients are too imprecise unless great
+          # care is taken with choosing the inputs and the delta. This is
+          # a weaker check (in particular, it does not test the op itself,
+          # only its gradient), but it's much better than nothing.
+          self._compareGradientX(c, xt, yt, np.float)
+          self._compareGradientY(c, xt, yt, np.float)
+        else:
+          self._compareGradientX(c, xt, yt)
+          self._compareGradientY(c, xt, yt)
 
   @test_util.run_deprecated_v1
   def testShapeMismatch(self):
@@ -712,10 +739,11 @@ class BatchSelectOpTest(test.TestCase):
         np.float16, np.float32, np.float64, np.int32, np.int64, np.complex64,
         np.complex128
     ]:
-      xt = x.astype(t)
-      yt = y.astype(t)
-      with self.assertRaises(ValueError):
-        array_ops.where(c, xt, yt)
+      with self.subTest(t=t):
+        xt = x.astype(t)
+        yt = y.astype(t)
+        with self.assertRaises(ValueError):
+          array_ops.where(c, xt, yt)
 
 
 class MinMaxOpTest(test.TestCase):
@@ -735,23 +763,26 @@ class MinMaxOpTest(test.TestCase):
     y = np.random.rand(1, 3, 2) * 100.
     for t in [np.float16, np.float32, np.float64, np.uint8, np.int16, np.int32,
               np.int64]:
-      self._compare(x.astype(t), y.astype(t), use_gpu=False)
-      self._compare(x.astype(t), y.astype(t), use_gpu=True)
+      with self.subTest(t=t):
+        self._compare(x.astype(t), y.astype(t), use_gpu=False)
+        self._compare(x.astype(t), y.astype(t), use_gpu=True)
 
   def testDifferentShapes(self):
     x = np.random.rand(1, 3, 2) * 100.
     y = np.random.rand(2) * 100.  # should broadcast
     for t in [np.float16, np.float32, np.float64, np.int32, np.int64]:
-      self._compare(x.astype(t), y.astype(t), use_gpu=False)
-      self._compare(x.astype(t), y.astype(t), use_gpu=True)
+      with self.subTest(t=t):
+        self._compare(x.astype(t), y.astype(t), use_gpu=False)
+        self._compare(x.astype(t), y.astype(t), use_gpu=True)
 
   def testScalar(self):
     x = np.random.rand(1, 3, 2) * 100.
     y = np.random.rand(1).item() * 100.  # should broadcast
     # dropped np.float64, int64 because TF automatically converts to 32 bit
     for t in [np.float32, np.int32]:
-      self._compare(x.astype(t), t(y), use_gpu=False)
-      self._compare(x.astype(t), t(y), use_gpu=True)
+      with self.subTest(t=t):
+        self._compare(x.astype(t), t(y), use_gpu=False)
+        self._compare(x.astype(t), t(y), use_gpu=True)
 
   def _compareGradientX(self, func, x, y):
     with self.cached_session():
@@ -841,13 +872,15 @@ class MathOpsOverloadTest(test.TestCase):
     ]
     for dtype in dtypes:
       for np_func, tf_func in funcs:
-        if dtype in (dtypes_lib.complex64,
-                     dtypes_lib.complex128) and tf_func == _FLOORDIV:
-          continue  # floordiv makes no sense for complex
-        self._compareBinary(10, 5, dtype, np_func, tf_func)
+        with self.subTest(dtype=dtype, np_func=np_func, tf_func=tf_func):
+          if dtype in (dtypes_lib.complex64,
+                       dtypes_lib.complex128) and tf_func == _FLOORDIV:
+            continue  # floordiv makes no sense for complex
+          self._compareBinary(10, 5, dtype, np_func, tf_func)
     # Mod only works for int32 and int64.
     for dtype in [dtypes_lib.int32, dtypes_lib.int64]:
-      self._compareBinary(10, 3, dtype, np.mod, _MOD)
+      with self.subTest(dtype=dtype):
+        self._compareBinary(10, 3, dtype, np.mod, _MOD)
 
   def testOverloadComparisons(self):
     dtypes = [
@@ -865,18 +898,20 @@ class MathOpsOverloadTest(test.TestCase):
     ]
     for dtype in dtypes:
       for np_func, tf_func in funcs:
-        self._compareBinary(10, 5, dtype, np_func, tf_func)
+        with self.subTest(dtype=dtype, np_func=np_func, tf_func=tf_func):
+          self._compareBinary(10, 5, dtype, np_func, tf_func)
     logical_funcs = [(np.logical_and, _AND), (np.logical_or, _OR),
                      (np.logical_xor, _XOR), (np.equal, math_ops.equal),
                      (np.not_equal, math_ops.not_equal)]
     for np_func, tf_func in logical_funcs:
-      self._compareBinary(True, False, dtypes_lib.bool, np_func, tf_func)
-      self._compareBinary(True, True, dtypes_lib.bool, np_func, tf_func)
-      self._compareBinary(False, False, dtypes_lib.bool, np_func, tf_func)
-      self._compareBinary(False, True, dtypes_lib.bool, np_func, tf_func)
-      self._compareBinary([True, True, False, False],
-                          [True, False, True, False], dtypes_lib.bool, np_func,
-                          tf_func)
+      with self.subTest(np_func=np_func, tf_func=tf_func):
+        self._compareBinary(True, False, dtypes_lib.bool, np_func, tf_func)
+        self._compareBinary(True, True, dtypes_lib.bool, np_func, tf_func)
+        self._compareBinary(False, False, dtypes_lib.bool, np_func, tf_func)
+        self._compareBinary(False, True, dtypes_lib.bool, np_func, tf_func)
+        self._compareBinary([True, True, False, False],
+                            [True, False, True, False], dtypes_lib.bool,
+                            np_func, tf_func)
     self._compareUnary(True, dtypes_lib.bool, np.logical_not, _INV)
     self._compareUnary(False, dtypes_lib.bool, np.logical_not, _INV)
     self._compareUnary([True, False], dtypes_lib.bool, np.logical_not, _INV)
@@ -924,16 +959,17 @@ class IsFiniteInfNanTest(test.TestCase):
         # It is not accurate for very large arguments, so we test for
         # fi.max/100 instead of fi.max here.
         for value in [fi.min, -2, -1, 0, fi.tiny, 1, 2, 1000, fi.max / 100]:
-          x = np.full((size,), value, dtype=dtype)
-          np_y = np.sqrt(x)
-          np_nan = np.isnan(np_y)
-          with test_util.use_gpu():
-            tf_y = math_ops.sqrt(x)
-            tf_nan = math_ops.is_nan(tf_y)
-            if value < 0:
-              self.assertAllEqual(np_nan, self.evaluate(tf_nan))
-            else:
-              self.assertAllCloseAccordingToType(np_y, self.evaluate(tf_y))
+          with self.subTest(dtype=dtype, size=size, value=value):
+            x = np.full((size,), value, dtype=dtype)
+            np_y = np.sqrt(x)
+            np_nan = np.isnan(np_y)
+            with test_util.use_gpu():
+              tf_y = math_ops.sqrt(x)
+              tf_nan = math_ops.is_nan(tf_y)
+              if value < 0:
+                self.assertAllEqual(np_nan, self.evaluate(tf_nan))
+              else:
+                self.assertAllCloseAccordingToType(np_y, self.evaluate(tf_y))
 
 
 class RoundingTest(test.TestCase):
@@ -978,7 +1014,8 @@ class RoundingTest(test.TestCase):
   def testTypes(self):
     self.skipTest("b/131162241")
     for dtype in [np.float16, np.float32, np.float64]:
-      self._testDtype(dtype)
+      with self.subTest(dtype=dtype):
+        self._testDtype(dtype)
 
 
 class ComplexMakeRealImagTest(test.TestCase):
@@ -999,19 +1036,21 @@ class ComplexMakeRealImagTest(test.TestCase):
     real = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(np.float32)
     imag = (np.arange(-3, 3) / 5.).reshape([1, 3, 2]).astype(np.float32)
     for use_gpu in [False, True]:
-      self._compareMake(real, imag, use_gpu)
-      self._compareMake(real, 12.0, use_gpu)
-      self._compareMake(23.0, imag, use_gpu)
+      with self.subTest(use_gpu=use_gpu):
+        self._compareMake(real, imag, use_gpu)
+        self._compareMake(real, 12.0, use_gpu)
+        self._compareMake(23.0, imag, use_gpu)
 
   def testRealImagNumericType(self):
     for use_gpu in [True, False]:
       for value in [1., 1j, 1. + 1j]:
-        np_real, np_imag = np.real(value), np.imag(value)
-        with test_util.device(use_gpu=use_gpu):
-          tf_real = math_ops.real(value)
-          tf_imag = math_ops.imag(value)
-          self.assertAllEqual(np_real, self.evaluate(tf_real))
-          self.assertAllEqual(np_imag, self.evaluate(tf_imag))
+        with self.subTest(use_gpu=use_gpu, value=value):
+          np_real, np_imag = np.real(value), np.imag(value)
+          with test_util.device(use_gpu=use_gpu):
+            tf_real = math_ops.real(value)
+            tf_imag = math_ops.imag(value)
+            self.assertAllEqual(np_real, self.evaluate(tf_real))
+            self.assertAllEqual(np_imag, self.evaluate(tf_imag))
 
   def _compareRealImag(self, cplx, use_gpu):
     np_real, np_imag = np.real(cplx), np.imag(cplx)
@@ -1053,27 +1092,36 @@ class ComplexMakeRealImagTest(test.TestCase):
     self.assertAllClose(np_angle, tf_angle_val)
     self.assertShapeEqual(np_angle, tf_angle)
 
-  def testAngle64(self):
-    real = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(np.float32)
-    imag = (np.arange(-3, 3) / 5.).reshape([1, 3, 2]).astype(np.float32)
-    cplx = real + 1j * imag
-    self._compareAngle(cplx, use_gpu=False)
-    self._compareAngle(cplx, use_gpu=True)
-
   def testAngle(self):
-    real = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(np.float64)
-    imag = (np.arange(-3, 3) / 5.).reshape([1, 3, 2]).astype(np.float64)
-    cplx = real + 1j * imag
+    mag = np.random.rand(10).astype(np.float32)
+    angle = (2 * np.pi * np.arange(10) / 10.).astype(np.float32)
+    cplx = mag * np.exp(1j * angle)
+    cplx = np.append(cplx, [1., 1.j, -1., -1.j])
     self._compareAngle(cplx, use_gpu=False)
     self._compareAngle(cplx, use_gpu=True)
+    real = (np.arange(-2, 2) / 2.).astype(np.float64)
+    self._compareAngle(real, use_gpu=False)
+    self._compareAngle(real, use_gpu=True)
+
+  def testAngle64(self):
+    mag = np.random.rand(10).astype(np.float64)
+    angle = (2 * np.pi * np.arange(10) / 100.).astype(np.float64)
+    cplx = mag * np.exp(1j * angle)
+    cplx = np.append(cplx, [1., 1.j, -1., -1.j])
+    self._compareAngle(cplx, use_gpu=False)
+    self._compareAngle(cplx, use_gpu=True)
+    real = (np.arange(-2, 2) / 2.).astype(np.float64)
+    self._compareAngle(real, use_gpu=False)
+    self._compareAngle(real, use_gpu=True)
 
   @test_util.run_deprecated_v1
   def testRealReal(self):
     for dtype in (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.float32,
                   dtypes_lib.float64):
-      x = array_ops.placeholder(dtype)
-      y = math_ops.real(x)
-      self.assertEqual(x, y)
+      with self.subTest(dtype=dtype):
+        x = array_ops.placeholder(dtype)
+        y = math_ops.real(x)
+        self.assertEqual(x, y)
 
   def _compareConj(self, cplx, use_gpu):
     np_ans = np.conj(cplx)
@@ -1102,9 +1150,10 @@ class ComplexMakeRealImagTest(test.TestCase):
   def testConjReal(self):
     for dtype in (dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.float16,
                   dtypes_lib.float32, dtypes_lib.float64):
-      x = array_ops.placeholder(dtype)
-      y = math_ops.conj(x)
-      self.assertEqual(x, y)
+      with self.subTest(dtype=dtype):
+        x = array_ops.placeholder(dtype)
+        y = math_ops.conj(x)
+        self.assertEqual(x, y)
 
   @test_util.run_deprecated_v1
   def testConjString(self):
@@ -1138,10 +1187,11 @@ class ComplexMakeRealImagTest(test.TestCase):
     epsilon = 1e-3
     with self.cached_session():
       for args in [(x_, 0.), (0., x_)]:
-        z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args)))
-        jacob_t, jacob_n = gradient_checker.compute_gradient(
-            x_, list(x.shape), z, [1], x_init_value=x, delta=epsilon)
-        self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
+        with self.subTest(args=args):
+          z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args)))
+          jacob_t, jacob_n = gradient_checker.compute_gradient(
+              x_, list(x.shape), z, [1], x_init_value=x, delta=epsilon)
+          self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
 
   @test_util.run_deprecated_v1
   def testGradient(self):
@@ -1200,7 +1250,8 @@ class PolyvalTest(test.TestCase):
         np.int32, np.float32, np.float64, np.complex64, np.complex128
     ]:
       for degree in range(5):
-        self._runtest(dtype, degree)
+        with self.subTest(dtype=dtype, degree=degree):
+          self._runtest(dtype, degree)
 
   def testBroadcast(self):
     dtype = np.float32
@@ -1208,15 +1259,16 @@ class PolyvalTest(test.TestCase):
     shapes = [(1,), (2, 1), (1, 2), (2, 2)]
     for x_shape in shapes:
       for coeff_shape in shapes:
-        x = np.random.rand(*x_shape).astype(dtype)
-        coeffs = [
-            np.random.rand(*coeff_shape).astype(dtype)
-            for _ in range(degree + 1)
-        ]
-        np_val = np.polyval(coeffs, x)
-        with self.cached_session():
-          tf_val = math_ops.polyval(coeffs, x)
-          self.assertAllClose(np_val, self.evaluate(tf_val))
+        with self.subTest(x_shape=x_shape, coeff_shape=coeff_shape):
+          x = np.random.rand(*x_shape).astype(dtype)
+          coeffs = [
+              np.random.rand(*coeff_shape).astype(dtype)
+              for _ in range(degree + 1)
+          ]
+          np_val = np.polyval(coeffs, x)
+          with self.cached_session():
+            tf_val = math_ops.polyval(coeffs, x)
+            self.assertAllClose(np_val, self.evaluate(tf_val))
 
   def testEmpty(self):
     x = np.random.rand(2, 2).astype(np.float32)
diff --git a/tensorflow/python/kernel_tests/denormal_test.py b/tensorflow/python/kernel_tests/denormal_test.py
index d824e95f213..6e073f0d526 100644
--- a/tensorflow/python/kernel_tests/denormal_test.py
+++ b/tensorflow/python/kernel_tests/denormal_test.py
@@ -23,7 +23,6 @@ import platform
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
@@ -35,32 +34,30 @@ class DenormalTest(test.TestCase):
       tiny = np.finfo(dtype).tiny
       self.assertEqual(tiny, tiny / 16 * 16)
 
-  def _flushDenormalsTest(self, use_gpu, dtypes):
-    if platform.machine() == "ppc64le" or platform.machine(
-    ) == "s390x" or platform.machine() == "aarch64":
+  def _flushDenormalsTest(self, dtypes):
+    if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or
+        platform.machine() == "aarch64"):
       # Disabled denormal_test on power/s390x/aarch64 platform
       # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
       return
-    with self.cached_session(use_gpu=use_gpu):
-      array_ops.identity(7).eval()
-      for dtype in dtypes:
-        tiny = np.finfo(dtype).tiny
-        # Small shape to test main thread, large shape to test thread pool
-        for shape in (), (1 << 20,):
-          flush = 0.1 * constant_op.constant(tiny, shape=shape)
-          self.assertAllEqual(flush.eval(), np.zeros(shape))
-          # Make sure the flags don't leak out
-          self.testPythonHasDenormals()
+    for dtype in dtypes:
+      tiny = np.finfo(dtype).tiny
+      # Small shape to test main thread, large shape to test thread pool
+      for shape in (), (1 << 20,):
+        flush = 0.1 * constant_op.constant(tiny, shape=shape)
+        self.assertAllEqual(self.evaluate(flush), np.zeros(shape))
+        # Make sure the flags don't leak out
+        self.testPythonHasDenormals()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
   def testFlushDenormalsCPU(self):
     # On CPUs, the processor flags flush for both single and double precision.
-    self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64))
+    self._flushDenormalsTest(dtypes=(np.float32, np.float64))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
   def testFlushDenormalsGPU(self):
     # On GPUs, only single precision can flush to zero.
-    self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,))
+    self._flushDenormalsTest(dtypes=(np.float32,))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index 9fbc161aafe..549d7b4c98e 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -189,7 +189,6 @@ cuda_py_test(
 cuda_py_test(
     name = "multinomial_test",
     srcs = ["multinomial_test.py"],
-    tags = ["manual"],  # b/69001419
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/kernel_tests/einsum_op_test.py b/tensorflow/python/kernel_tests/einsum_op_test.py
index 26cfb6abbb8..47d5d457193 100644
--- a/tensorflow/python/kernel_tests/einsum_op_test.py
+++ b/tensorflow/python/kernel_tests/einsum_op_test.py
@@ -42,10 +42,11 @@ class EinsumOpTest(test.TestCase):
     r = np.random.RandomState(0)
     inputs = []
     for shape in input_shapes:
-      arr = np.array(r.randn(*shape)).astype(dtype)
-      if dtype == np.complex64 or dtype == np.complex128:
-        arr += 1j * np.array(r.randn(*shape)).astype(dtype)
-      inputs.append(arr)
+      with self.subTest(s=s, shape=shape):
+        arr = np.array(r.randn(*shape)).astype(dtype)
+        if dtype == np.complex64 or dtype == np.complex128:
+          arr += 1j * np.array(r.randn(*shape)).astype(dtype)
+        inputs.append(arr)
     input_tensors = [constant_op.constant(x, shape=x.shape) for x in inputs]
     a = np.einsum(s, *inputs)
     b = self.evaluate(gen_linalg_ops.einsum(input_tensors, s))
@@ -160,10 +161,11 @@ class EinsumOpTest(test.TestCase):
       input_shapes = [(2, 2), (2, 2)]
       inputs = []
       for shape in input_shapes:
-        arr = np.array(r.randn(*shape)).astype(dtype)
-        if dtype == np.complex64 or dtype == np.complex128:
-          arr += 1j * np.array(r.randn(*shape)).astype(dtype)
-        inputs.append(arr)
+        with self.subTest(dtype=dtype, shape=shape):
+          arr = np.array(r.randn(*shape)).astype(dtype)
+          if dtype == np.complex64 or dtype == np.complex128:
+            arr += 1j * np.array(r.randn(*shape)).astype(dtype)
+          inputs.append(arr)
       input_tensors = [constant_op.constant(x) for x in inputs]
       if dtype == bfloat16:
         # np.einsum doesn't support bfloat16.
@@ -199,14 +201,15 @@ class EinsumOpTest(test.TestCase):
         ('...ij,...jk->ik', r.randn(2, 2, 3), r.randn(3, 4)),
     ]
     for args in cases:
-      with self.assertRaises((ValueError, errors.InvalidArgumentError)):
-        _ = self.evaluate(gen_linalg_ops.einsum(args[1:], args[0]))
+      with self.subTest(args=args):
+        with self.assertRaises((ValueError, errors.InvalidArgumentError)):
+          _ = self.evaluate(gen_linalg_ops.einsum(args[1:], args[0]))
 
-      placeholders = [
-          array_ops.placeholder_with_default(x, shape=None) for x in args[1:]
-      ]
-      with self.assertRaises((ValueError, errors.InvalidArgumentError)):
-        _ = self.evaluate(gen_linalg_ops.einsum(placeholders, args[0]))
+        placeholders = [
+            array_ops.placeholder_with_default(x, shape=None) for x in args[1:]
+        ]
+        with self.assertRaises((ValueError, errors.InvalidArgumentError)):
+          _ = self.evaluate(gen_linalg_ops.einsum(placeholders, args[0]))
 
   @test_util.run_in_graph_and_eager_modes
   def testPlaceholder(self):
@@ -216,10 +219,12 @@ class EinsumOpTest(test.TestCase):
       inputs = []
       input_placeholders = []
       for actual_shape, placeholder_shape in input_and_placeholder_shapes:
-        input_np = np.array(r.randn(*actual_shape))
-        inputs.append(input_np)
-        input_placeholders.append(
-            array_ops.placeholder_with_default(input_np, placeholder_shape))
+        with self.subTest(equation=equation, actual_shape=actual_shape,
+                          placeholder_shape=placeholder_shape):
+          input_np = np.array(r.randn(*actual_shape))
+          inputs.append(input_np)
+          input_placeholders.append(
+              array_ops.placeholder_with_default(input_np, placeholder_shape))
 
       a = np.einsum(equation, *inputs)
       b = self.evaluate(gen_linalg_ops.einsum(input_placeholders, equation))
@@ -286,13 +291,24 @@ class EinsumGradTest(test.TestCase):
 
   def _check_gradient(self, s, *input_shapes):
     with self.cached_session():
-      r = np.random.RandomState(0)
-      inputs = [np.array(r.randn(*shape), np.float64) for shape in input_shapes]
-      input_tensors = [constant_op.constant(x, shape=x.shape) for x in inputs]
-      analytical, numerical = gradient_checker_v2.compute_gradient(
-          lambda *xs: gen_linalg_ops.einsum(xs, s), input_tensors)
-      self.assertLess(
-          gradient_checker_v2.max_error(analytical, numerical), 1e-4)
+      r = np.random.RandomState(seed=0)
+      for dtype in (np.float32, np.float64, np.complex64, np.complex128):
+        with self.subTest(s=s, dtype=dtype):
+          tol = 10 * np.sqrt(np.finfo(dtype).resolution)
+          if dtype in (np.complex64, np.complex128):
+            inputs = [
+                np.array(r.randn(*shape), dtype) +
+                1j * np.array(r.randn(*shape), dtype) for shape in input_shapes
+            ]
+          else:
+            inputs = [
+                np.array(r.randn(*shape), dtype) for shape in input_shapes]
+          input_tensors = [
+              constant_op.constant(x, shape=x.shape) for x in inputs]
+          analytical, numerical = gradient_checker_v2.compute_gradient(
+              lambda *xs: gen_linalg_ops.einsum(xs, s), input_tensors)
+          self.assertLess(
+              gradient_checker_v2.max_error(analytical, numerical), tol)
 
   @test_util.disable_xla('b/131919749')
   def testUnary(self):
diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py
index 953f18bb07a..b966110963c 100644
--- a/tensorflow/python/kernel_tests/gather_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_op_test.py
@@ -62,14 +62,15 @@ class GatherTest(test.TestCase, parameterized.TestCase):
       data = np.array([0, 1, 2, 3, 7, 5])
       for dtype in _TEST_TYPES:
         for indices in 4, [1, 2, 2, 4, 5]:
-          params_np = self._buildParams(data, dtype)
-          params = constant_op.constant(params_np)
-          indices_tf = constant_op.constant(indices)
-          gather_t = array_ops.gather(params, indices_tf)
-          gather_val = self.evaluate(gather_t)
-          np_val = params_np[indices]
-          self.assertAllEqual(np_val, gather_val)
-          self.assertEqual(np_val.shape, gather_t.get_shape())
+          with self.subTest(dtype=dtype, indices=indices):
+            params_np = self._buildParams(data, dtype)
+            params = constant_op.constant(params_np)
+            indices_tf = constant_op.constant(indices)
+            gather_t = array_ops.gather(params, indices_tf)
+            gather_val = self.evaluate(gather_t)
+            np_val = params_np[indices]
+            self.assertAllEqual(np_val, gather_val)
+            self.assertEqual(np_val.shape, gather_t.get_shape())
 
   def testScalar2D(self):
     with self.session(use_gpu=True):
@@ -77,14 +78,15 @@ class GatherTest(test.TestCase, parameterized.TestCase):
                        [9, 10, 11], [12, 13, 14]])
       for dtype in _TEST_TYPES:
         for axis in range(data.ndim):
-          params_np = self._buildParams(data, dtype)
-          params = constant_op.constant(params_np)
-          indices = constant_op.constant(2)
-          gather_t = array_ops.gather(params, indices, axis=axis)
-          gather_val = self.evaluate(gather_t)
-          self.assertAllEqual(np.take(params_np, 2, axis=axis), gather_val)
-          expected_shape = data.shape[:axis] + data.shape[axis + 1:]
-          self.assertEqual(expected_shape, gather_t.get_shape())
+          with self.subTest(dtype=dtype, axis=axis):
+            params_np = self._buildParams(data, dtype)
+            params = constant_op.constant(params_np)
+            indices = constant_op.constant(2)
+            gather_t = array_ops.gather(params, indices, axis=axis)
+            gather_val = self.evaluate(gather_t)
+            self.assertAllEqual(np.take(params_np, 2, axis=axis), gather_val)
+            expected_shape = data.shape[:axis] + data.shape[axis + 1:]
+            self.assertEqual(expected_shape, gather_t.get_shape())
 
   def testSimpleTwoD32(self):
     with self.session(use_gpu=True):
@@ -92,16 +94,17 @@ class GatherTest(test.TestCase, parameterized.TestCase):
                        [9, 10, 11], [12, 13, 14]])
       for dtype in _TEST_TYPES:
         for axis in range(data.ndim):
-          params_np = self._buildParams(data, dtype)
-          params = constant_op.constant(params_np)
-          # The indices must be in bounds for any axis.
-          indices = constant_op.constant([0, 1, 0, 2])
-          gather_t = array_ops.gather(params, indices, axis=axis)
-          gather_val = self.evaluate(gather_t)
-          self.assertAllEqual(np.take(params_np, [0, 1, 0, 2], axis=axis),
-                              gather_val)
-          expected_shape = data.shape[:axis] + (4,) + data.shape[axis + 1:]
-          self.assertEqual(expected_shape, gather_t.get_shape())
+          with self.subTest(dtype=dtype, axis=axis):
+            params_np = self._buildParams(data, dtype)
+            params = constant_op.constant(params_np)
+            # The indices must be in bounds for any axis.
+            indices = constant_op.constant([0, 1, 0, 2])
+            gather_t = array_ops.gather(params, indices, axis=axis)
+            gather_val = self.evaluate(gather_t)
+            self.assertAllEqual(np.take(params_np, [0, 1, 0, 2], axis=axis),
+                                gather_val)
+            expected_shape = data.shape[:axis] + (4,) + data.shape[axis + 1:]
+            self.assertEqual(expected_shape, gather_t.get_shape())
 
   @test_util.run_deprecated_v1
   def testHigherRank(self):
@@ -112,58 +115,60 @@ class GatherTest(test.TestCase, parameterized.TestCase):
         for axis in range(len(shape)):
           params = self._buildParams(np.random.randn(*shape), dtype)
           indices = np.random.randint(shape[axis], size=indices_shape)
-          with self.cached_session(use_gpu=True) as sess:
-            tf_params = constant_op.constant(params)
-            tf_indices = constant_op.constant(indices)
-            # Check that both positive and negative indices for axis work.
-            tf_axis = constant_op.constant(axis)
-            tf_negative_axis = constant_op.constant(-len(shape) + axis)
-            gather = array_ops.gather(tf_params, tf_indices, axis=tf_axis)
-            gather_negative_axis = array_ops.gather(
-                tf_params, tf_indices, axis=tf_negative_axis)
-            gather_value, gather_negative_axis_value = sess.run(
-                [gather, gather_negative_axis])
-            gather_np = np.take(params, indices, axis)
-            self.assertAllEqual(gather_np, gather_value)
-            self.assertAllEqual(gather_np, gather_negative_axis_value)
-            expected_shape = (params.shape[:axis] + indices.shape +
-                              params.shape[axis + 1:])
-            self.assertEqual(expected_shape, gather.shape)
-            self.assertEqual(expected_shape, gather_negative_axis.shape)
+          with self.subTest(indices_shape=indices_shape, dtype=dtype, axis=axis,
+                            indices=indices):
+            with self.cached_session(use_gpu=True) as sess:
+              tf_params = constant_op.constant(params)
+              tf_indices = constant_op.constant(indices)
+              # Check that both positive and negative indices for axis work.
+              tf_axis = constant_op.constant(axis)
+              tf_negative_axis = constant_op.constant(-len(shape) + axis)
+              gather = array_ops.gather(tf_params, tf_indices, axis=tf_axis)
+              gather_negative_axis = array_ops.gather(
+                  tf_params, tf_indices, axis=tf_negative_axis)
+              gather_value, gather_negative_axis_value = sess.run(
+                  [gather, gather_negative_axis])
+              gather_np = np.take(params, indices, axis)
+              self.assertAllEqual(gather_np, gather_value)
+              self.assertAllEqual(gather_np, gather_negative_axis_value)
+              expected_shape = (params.shape[:axis] + indices.shape +
+                                params.shape[axis + 1:])
+              self.assertEqual(expected_shape, gather.shape)
+              self.assertEqual(expected_shape, gather_negative_axis.shape)
 
-            # Test gradients
-            gather_grad = np.random.randn(
-                *gather.get_shape().as_list()).astype(dtype.as_numpy_dtype)
-            if dtype.is_complex:
-              gather_grad -= 1j * gather_grad
-            params_grad, indices_grad, axis_grad = gradients_impl.gradients(
-                gather, [tf_params, tf_indices, tf_axis], gather_grad)
-            self.assertEqual(indices_grad, None)
-            self.assertEqual(axis_grad, None)
-            if dtype.is_integer:
-              self.assertEqual(params_grad, None)
-              continue
-            # For axis 0, we are able to create an efficient IndexedSlices for
-            # the gradient.
-            if axis == 0:
-              self.assertEqual(type(params_grad), ops.IndexedSlices)
-              params_grad = ops.convert_to_tensor(params_grad)
-            correct_params_grad = np.zeros(shape).astype(dtype.as_numpy_dtype)
-            outer_dims = axis
-            inner_dims = len(shape) - axis - 1
-            gather_grad = gather_grad.reshape(
-                shape[:axis] + (indices.size,) + shape[axis + 1:])
-            for source_index, dest_index in enumerate(indices.flat):
-              dest_slice = ((slice(None),) * outer_dims + (dest_index,) +
-                            (slice(None),) * inner_dims)
-              source_slice = ((slice(None),) * outer_dims + (source_index,) +
+              # Test gradients
+              gather_grad = np.random.randn(
+                  *gather.get_shape().as_list()).astype(dtype.as_numpy_dtype)
+              if dtype.is_complex:
+                gather_grad -= 1j * gather_grad
+              params_grad, indices_grad, axis_grad = gradients_impl.gradients(
+                  gather, [tf_params, tf_indices, tf_axis], gather_grad)
+              self.assertEqual(indices_grad, None)
+              self.assertEqual(axis_grad, None)
+              if dtype.is_integer:
+                self.assertEqual(params_grad, None)
+                continue
+              # For axis 0, we are able to create an efficient IndexedSlices for
+              # the gradient.
+              if axis == 0:
+                self.assertEqual(type(params_grad), ops.IndexedSlices)
+                params_grad = ops.convert_to_tensor(params_grad)
+              correct_params_grad = np.zeros(shape).astype(dtype.as_numpy_dtype)
+              outer_dims = axis
+              inner_dims = len(shape) - axis - 1
+              gather_grad = gather_grad.reshape(
+                  shape[:axis] + (indices.size,) + shape[axis + 1:])
+              for source_index, dest_index in enumerate(indices.flat):
+                dest_slice = ((slice(None),) * outer_dims + (dest_index,) +
                               (slice(None),) * inner_dims)
-              correct_params_grad[dest_slice] += gather_grad[source_slice]
-            self.assertAllClose(
-                correct_params_grad,
-                self.evaluate(params_grad),
-                atol=2e-6,
-                rtol=2e-6)
+                source_slice = ((slice(None),) * outer_dims + (source_index,) +
+                                (slice(None),) * inner_dims)
+                correct_params_grad[dest_slice] += gather_grad[source_slice]
+              self.assertAllClose(
+                  correct_params_grad,
+                  self.evaluate(params_grad),
+                  atol=2e-6,
+                  rtol=2e-6)
 
   @test_util.run_deprecated_v1
   def testString(self):
@@ -177,12 +182,14 @@ class GatherTest(test.TestCase, parameterized.TestCase):
   @test_util.run_deprecated_v1
   def testUInt32AndUInt64(self):
     for unsigned_type in (dtypes.uint32, dtypes.uint64):
-      params = self._buildParams(
-          np.array([[1, 2, 3], [7, 8, 9]]), unsigned_type)
-      with self.cached_session():
-        self.assertAllEqual([7, 8, 9],
-                            array_ops.gather(params, 1, axis=0).eval())
-        self.assertAllEqual([1, 7], array_ops.gather(params, 0, axis=1).eval())
+      with self.subTest(unsigned_type=unsigned_type):
+        params = self._buildParams(
+            np.array([[1, 2, 3], [7, 8, 9]]), unsigned_type)
+        with self.cached_session():
+          self.assertAllEqual([7, 8, 9],
+                              array_ops.gather(params, 1, axis=0).eval())
+          self.assertAllEqual([1, 7],
+                              array_ops.gather(params, 0, axis=1).eval())
 
   @test_util.run_deprecated_v1
   def testUnknownIndices(self):
@@ -237,14 +244,15 @@ class GatherTest(test.TestCase, parameterized.TestCase):
       indices = 0
       for bad_axis in (1, 2, -2):
         # Shape inference can validate axis for known params rank.
-        with self.assertRaisesWithPredicateMatch(
-            ValueError, "Shape must be at least rank . but is rank 1"):
-          array_ops.gather(params, indices, axis=bad_axis)
-        # If params rank is unknown, an op error occurs.
-        with self.assertRaisesOpError(
-            r"Expected axis in the range \[-1, 1\), but got %s" % bad_axis):
-          array_ops.gather(params_ph, indices, axis=bad_axis).eval(
-              feed_dict={params_ph: params})
+        with self.subTest(bad_axis=bad_axis):
+          with self.assertRaisesWithPredicateMatch(
+              ValueError, "Shape must be at least rank . but is rank 1"):
+            array_ops.gather(params, indices, axis=bad_axis)
+          # If params rank is unknown, an op error occurs.
+          with self.assertRaisesOpError(
+              r"Expected axis in the range \[-1, 1\), but got %s" % bad_axis):
+            array_ops.gather(params_ph, indices, axis=bad_axis).eval(
+                feed_dict={params_ph: params})
 
   @test_util.run_deprecated_v1
   def testEmptySlices(self):
@@ -252,20 +260,21 @@ class GatherTest(test.TestCase, parameterized.TestCase):
       for dtype in _TEST_TYPES:
         for itype in np.int32, np.int64:
           # Leading axis gather.
-          params = np.zeros((7, 0, 0), dtype=dtype.as_numpy_dtype)
-          indices = np.array([3, 4], dtype=itype)
-          gather = array_ops.gather(params, indices, axis=0)
-          self.assertAllEqual(gather.eval(), np.zeros((2, 0, 0)))
+          with self.subTest(dtype=dtype, itype=itype):
+            params = np.zeros((7, 0, 0), dtype=dtype.as_numpy_dtype)
+            indices = np.array([3, 4], dtype=itype)
+            gather = array_ops.gather(params, indices, axis=0)
+            self.assertAllEqual(gather.eval(), np.zeros((2, 0, 0)))
 
-          # Middle axis gather.
-          params = np.zeros((0, 7, 0), dtype=dtype.as_numpy_dtype)
-          gather = array_ops.gather(params, indices, axis=1)
-          self.assertAllEqual(gather.eval(), np.zeros((0, 2, 0)))
+            # Middle axis gather.
+            params = np.zeros((0, 7, 0), dtype=dtype.as_numpy_dtype)
+            gather = array_ops.gather(params, indices, axis=1)
+            self.assertAllEqual(gather.eval(), np.zeros((0, 2, 0)))
 
-          # Trailing axis gather.
-          params = np.zeros((0, 0, 7), dtype=dtype.as_numpy_dtype)
-          gather = array_ops.gather(params, indices, axis=2)
-          self.assertAllEqual(gather.eval(), np.zeros((0, 0, 2)))
+            # Trailing axis gather.
+            params = np.zeros((0, 0, 7), dtype=dtype.as_numpy_dtype)
+            gather = array_ops.gather(params, indices, axis=2)
+            self.assertAllEqual(gather.eval(), np.zeros((0, 0, 2)))
 
   @parameterized.parameters([
       # batch_dims=0 (equivalent to tf.gather)
diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
index 20cd128783e..916d9a4b8c8 100644
--- a/tensorflow/python/kernel_tests/linalg_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -66,10 +66,11 @@ class CholeskySolveTest(test.TestCase):
                _RandomPDMatrix(n, self.rng)]).astype(np_type)
           chol = linalg_ops.cholesky(array)
           for k in range(1, 3):
-            rhs = self.rng.randn(2, n, k).astype(np_type)
-            x = linalg_ops.cholesky_solve(chol, rhs)
-            self.assertAllClose(
-                rhs, math_ops.matmul(array, x).eval(), atol=atol)
+            with self.subTest(n=n, np_type=np_type, atol=atol, k=k):
+              rhs = self.rng.randn(2, n, k).astype(np_type)
+              x = linalg_ops.cholesky_solve(chol, rhs)
+              self.assertAllClose(
+                  rhs, math_ops.matmul(array, x).eval(), atol=atol)
 
 
 class LogdetTest(test.TestCase):
@@ -82,24 +83,26 @@ class LogdetTest(test.TestCase):
     for n in range(1, 6):
       for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
                              (np.complex64, 0.05), (np.complex128, 1e-5)]:
-        matrix = _RandomPDMatrix(n, self.rng, np_dtype)
-        _, logdet_np = np.linalg.slogdet(matrix)
-        with self.session(use_gpu=True):
-          # Create 2 x n x n matrix
-          # matrix = np.array(
-          #     [_RandomPDMatrix(n, self.rng, np_dtype),
-          #      _RandomPDMatrix(n, self.rng, np_dtype)]).astype(np_dtype)
-          logdet_tf = linalg.logdet(matrix)
-          self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol)
+        with self.subTest(n=n, np_dtype=np_dtype, atol=atol):
+          matrix = _RandomPDMatrix(n, self.rng, np_dtype)
+          _, logdet_np = np.linalg.slogdet(matrix)
+          with self.session(use_gpu=True):
+            # Create 2 x n x n matrix
+            # matrix = np.array(
+            #     [_RandomPDMatrix(n, self.rng, np_dtype),
+            #      _RandomPDMatrix(n, self.rng, np_dtype)]).astype(np_dtype)
+            logdet_tf = linalg.logdet(matrix)
+            self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol)
 
   def test_works_with_underflow_case(self):
     for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
                            (np.complex64, 0.05), (np.complex128, 1e-5)]:
-      matrix = (np.eye(20) * 1e-6).astype(np_dtype)
-      _, logdet_np = np.linalg.slogdet(matrix)
-      with self.session(use_gpu=True):
-        logdet_tf = linalg.logdet(matrix)
-        self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol)
+      with self.subTest(np_dtype=np_dtype, atol=atol):
+        matrix = (np.eye(20) * 1e-6).astype(np_dtype)
+        _, logdet_np = np.linalg.slogdet(matrix)
+        with self.session(use_gpu=True):
+          logdet_tf = linalg.logdet(matrix)
+          self.assertAllClose(logdet_np, self.evaluate(logdet_tf), atol=atol)
 
 
 class SlogdetTest(test.TestCase):
@@ -112,7 +115,20 @@ class SlogdetTest(test.TestCase):
     for n in range(1, 6):
       for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
                              (np.complex64, 0.05), (np.complex128, 1e-5)]:
-        matrix = _RandomPDMatrix(n, self.rng, np_dtype)
+        with self.subTest(n=n, np_dtype=np_dtype, atol=atol):
+          matrix = _RandomPDMatrix(n, self.rng, np_dtype)
+          sign_np, log_abs_det_np = np.linalg.slogdet(matrix)
+          with self.session(use_gpu=True):
+            sign_tf, log_abs_det_tf = linalg.slogdet(matrix)
+            self.assertAllClose(
+                log_abs_det_np, self.evaluate(log_abs_det_tf), atol=atol)
+            self.assertAllClose(sign_np, self.evaluate(sign_tf), atol=atol)
+
+  def test_works_with_underflow_case(self):
+    for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
+                           (np.complex64, 0.05), (np.complex128, 1e-5)]:
+      with self.subTest(np_dtype=np_dtype, atol=atol):
+        matrix = (np.eye(20) * 1e-6).astype(np_dtype)
         sign_np, log_abs_det_np = np.linalg.slogdet(matrix)
         with self.session(use_gpu=True):
           sign_tf, log_abs_det_tf = linalg.slogdet(matrix)
@@ -120,30 +136,20 @@ class SlogdetTest(test.TestCase):
               log_abs_det_np, self.evaluate(log_abs_det_tf), atol=atol)
           self.assertAllClose(sign_np, self.evaluate(sign_tf), atol=atol)
 
-  def test_works_with_underflow_case(self):
-    for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
-                           (np.complex64, 0.05), (np.complex128, 1e-5)]:
-      matrix = (np.eye(20) * 1e-6).astype(np_dtype)
-      sign_np, log_abs_det_np = np.linalg.slogdet(matrix)
-      with self.session(use_gpu=True):
-        sign_tf, log_abs_det_tf = linalg.slogdet(matrix)
-        self.assertAllClose(
-            log_abs_det_np, self.evaluate(log_abs_det_tf), atol=atol)
-        self.assertAllClose(sign_np, self.evaluate(sign_tf), atol=atol)
-
 
 class AdjointTest(test.TestCase):
 
   def test_compare_to_numpy(self):
     for dtype in np.float64, np.float64, np.complex64, np.complex128:
-      matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j,
-                                                       6 + 6j]]).astype(dtype)
-      expected_transposed = np.conj(matrix_np.T)
-      with self.session():
-        matrix = ops.convert_to_tensor(matrix_np)
-        transposed = linalg.adjoint(matrix)
-        self.assertEqual((3, 2), transposed.get_shape())
-        self.assertAllEqual(expected_transposed, self.evaluate(transposed))
+      with self.subTest(dtype=dtype):
+        matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j,
+                                                         6 + 6j]]).astype(dtype)
+        expected_transposed = np.conj(matrix_np.T)
+        with self.session():
+          matrix = ops.convert_to_tensor(matrix_np)
+          transposed = linalg.adjoint(matrix)
+          self.assertEqual((3, 2), transposed.get_shape())
+          self.assertAllEqual(expected_transposed, self.evaluate(transposed))
 
 
 class EyeTest(parameterized.TestCase, test.TestCase):
diff --git a/tensorflow/python/kernel_tests/lu_op_test.py b/tensorflow/python/kernel_tests/lu_op_test.py
index 1c0280c3ce6..7935b66f4af 100644
--- a/tensorflow/python/kernel_tests/lu_op_test.py
+++ b/tensorflow/python/kernel_tests/lu_op_test.py
@@ -128,14 +128,16 @@ class LuOpTest(test.TestCase):
 
     for dtype in (np.float32, np.float64):
       for output_idx_type in (dtypes.int32, dtypes.int64):
-        self._verifyLu(data.astype(dtype), output_idx_type=output_idx_type)
+        with self.subTest(dtype=dtype, output_idx_type=output_idx_type):
+          self._verifyLu(data.astype(dtype), output_idx_type=output_idx_type)
 
     for dtype in (np.complex64, np.complex128):
       for output_idx_type in (dtypes.int32, dtypes.int64):
-        complex_data = np.tril(1j * data, -1).astype(dtype)
-        complex_data += np.triu(-1j * data, 1).astype(dtype)
-        complex_data += data
-        self._verifyLu(complex_data, output_idx_type=output_idx_type)
+        with self.subTest(dtype=dtype, output_idx_type=output_idx_type):
+          complex_data = np.tril(1j * data, -1).astype(dtype)
+          complex_data += np.triu(-1j * data, 1).astype(dtype)
+          complex_data += data
+          self._verifyLu(complex_data, output_idx_type=output_idx_type)
 
   def testPivoting(self):
     # This matrix triggers partial pivoting because the first diagonal entry
@@ -144,38 +146,41 @@ class LuOpTest(test.TestCase):
     self._verifyLu(data.astype(np.float32))
 
     for dtype in (np.float32, np.float64):
-      self._verifyLu(data.astype(dtype))
-      _, p = linalg_ops.lu(data)
-      p_val = self.evaluate([p])
-      # Make sure p_val is not the identity permutation.
-      self.assertNotAllClose(np.arange(3), p_val)
+      with self.subTest(dtype=dtype):
+        self._verifyLu(data.astype(dtype))
+        _, p = linalg_ops.lu(data)
+        p_val = self.evaluate([p])
+        # Make sure p_val is not the identity permutation.
+        self.assertNotAllClose(np.arange(3), p_val)
 
     for dtype in (np.complex64, np.complex128):
-      complex_data = np.tril(1j * data, -1).astype(dtype)
-      complex_data += np.triu(-1j * data, 1).astype(dtype)
-      complex_data += data
-      self._verifyLu(complex_data)
-      _, p = linalg_ops.lu(data)
-      p_val = self.evaluate([p])
-      # Make sure p_val is not the identity permutation.
-      self.assertNotAllClose(np.arange(3), p_val)
+      with self.subTest(dtype=dtype):
+        complex_data = np.tril(1j * data, -1).astype(dtype)
+        complex_data += np.triu(-1j * data, 1).astype(dtype)
+        complex_data += data
+        self._verifyLu(complex_data)
+        _, p = linalg_ops.lu(data)
+        p_val = self.evaluate([p])
+        # Make sure p_val is not the identity permutation.
+        self.assertNotAllClose(np.arange(3), p_val)
 
   def testInvalidMatrix(self):
     # LU factorization gives an error when the input is singular.
     # Note: A singular matrix may return without error but it won't be a valid
     # factorization.
     for dtype in self.float_types:
-      with self.assertRaises(errors.InvalidArgumentError):
-        self.evaluate(
-            linalg_ops.lu(
-                np.array([[1., 2., 3.], [2., 4., 6.], [2., 3., 4.]],
-                         dtype=dtype)))
-      with self.assertRaises(errors.InvalidArgumentError):
-        self.evaluate(
-            linalg_ops.lu(
-                np.array([[[1., 2., 3.], [2., 4., 6.], [1., 2., 3.]],
-                          [[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]],
-                         dtype=dtype)))
+      with self.subTest(dtype=dtype):
+        with self.assertRaises(errors.InvalidArgumentError):
+          self.evaluate(
+              linalg_ops.lu(
+                  np.array([[1., 2., 3.], [2., 4., 6.], [2., 3., 4.]],
+                           dtype=dtype)))
+        with self.assertRaises(errors.InvalidArgumentError):
+          self.evaluate(
+              linalg_ops.lu(
+                  np.array([[[1., 2., 3.], [2., 4., 6.], [1., 2., 3.]],
+                            [[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]],
+                           dtype=dtype)))
 
   def testBatch(self):
     simple_array = np.array([[[1., -1.], [2., 5.]]])  # shape (1, 2, 2)
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index ed66a4f75ab..c0e6262a9d5 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -156,16 +156,18 @@ class StatefulScatterNdTest(test.TestCase):
 
   def testSimple(self):
     indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32)
-    updates = constant_op.constant([9, 10, 11, 12], dtype=dtypes.float32)
-    ref = variables.Variable([0, 0, 0, 0, 0, 0, 0, 0], dtype=dtypes.float32)
-    expected = np.array([0, 11, 0, 10, 9, 0, 0, 12])
-    scatter = state_ops.scatter_nd_update(ref, indices, updates)
-    init = variables.global_variables_initializer()
+    for dtype in (dtypes.int64, dtypes.float32, dtypes.float64,
+                  dtypes.complex64, dtypes.complex128):
+      updates = constant_op.constant([9, 10, 11, 12], dtype=dtype)
+      ref = variables.Variable([0, 0, 0, 0, 0, 0, 0, 0], dtype=dtype)
+      expected = np.array([0, 11, 0, 10, 9, 0, 0, 12])
+      scatter = state_ops.scatter_nd_update(ref, indices, updates)
+      init = variables.global_variables_initializer()
 
-    with self.session(use_gpu=True) as sess:
-      self.evaluate(init)
-      result = self.evaluate(scatter)
-      self.assertAllClose(result, expected)
+      with test_util.use_gpu():
+        self.evaluate(init)
+        result = self.evaluate(scatter)
+        self.assertAllClose(result, expected)
 
   @test_util.run_in_graph_and_eager_modes
   def testString(self):
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index 120e604e7ae..6c2199cc591 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -20,8 +20,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python import tf2
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -31,7 +31,7 @@ from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import stateless_random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import benchmark
 from tensorflow.python.platform import test
@@ -58,42 +58,39 @@ class SvdOpTest(test.TestCase):
                                  "Shape must be at least rank 2 but is rank 1"):
       linalg_ops.svd(vector)
 
-  @test_util.run_v1_only("b/120545219")
-  def testConcurrentExecutesWithoutError(self):
-    with self.session(use_gpu=True) as sess:
-      all_ops = []
-      for compute_uv_ in True, False:
-        for full_matrices_ in True, False:
-          matrix1 = random_ops.random_normal([5, 5], seed=42)
-          matrix2 = random_ops.random_normal([5, 5], seed=42)
-          if compute_uv_:
-            s1, u1, v1 = linalg_ops.svd(
-                matrix1, compute_uv=compute_uv_, full_matrices=full_matrices_)
-            s2, u2, v2 = linalg_ops.svd(
-                matrix2, compute_uv=compute_uv_, full_matrices=full_matrices_)
-            all_ops += [s1, u1, v1, s2, u2, v2]
-          else:
-            s1 = linalg_ops.svd(
-                matrix1, compute_uv=compute_uv_, full_matrices=full_matrices_)
-            s2 = linalg_ops.svd(
-                matrix2, compute_uv=compute_uv_, full_matrices=full_matrices_)
-            all_ops += [s1, s2]
-      val = self.evaluate(all_ops)
-      for i in range(2):
-        s = 6 * i
-        self.assertAllEqual(val[s], val[s + 3])  # s1 == s2
-        self.assertAllEqual(val[s + 1], val[s + 4])  # u1 == u2
-        self.assertAllEqual(val[s + 2], val[s + 5])  # v1 == v2
-      for i in range(2):
-        s = 12 + 2 * i
-        self.assertAllEqual(val[s], val[s + 1])  # s1 == s2
+  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
+  def testExecuteMultipleWithoutError(self):
+    all_ops = []
+    shape = [6, 5]
+    seed = [42, 24]
+    for compute_uv_ in True, False:
+      for full_matrices_ in True, False:
+        matrix1 = stateless_random_ops.stateless_random_normal(shape, seed)
+        matrix2 = stateless_random_ops.stateless_random_normal(shape, seed)
+        self.assertAllEqual(matrix1, matrix2)
+        if compute_uv_:
+          s1, u1, v1 = linalg_ops.svd(
+              matrix1, compute_uv=compute_uv_, full_matrices=full_matrices_)
+          s2, u2, v2 = linalg_ops.svd(
+              matrix2, compute_uv=compute_uv_, full_matrices=full_matrices_)
+          all_ops += [s1, s2, u1, u2, v1, v2]
+        else:
+          s1 = linalg_ops.svd(
+              matrix1, compute_uv=compute_uv_, full_matrices=full_matrices_)
+          s2 = linalg_ops.svd(
+              matrix2, compute_uv=compute_uv_, full_matrices=full_matrices_)
+          all_ops += [s1, s2]
+    val = self.evaluate(all_ops)
+    for i in range(0, len(val), 2):
+      self.assertAllEqual(val[i], val[i + 1])
 
 
 def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
                   full_matrices_):
 
   def CompareSingularValues(self, x, y, tol):
-    self.assertAllClose(x, y, atol=(x[0] + y[0]) * tol)
+    atol = (x[0] + y[0]) * tol if len(x) else tol
+    self.assertAllClose(x, y, atol=atol)
 
   def CompareSingularVectors(self, x, y, rank, tol):
     # We only compare the first 'rank' singular vectors since the
@@ -135,8 +132,10 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
     identity = array_ops.matrix_band_part(array_ops.ones_like(xx), 0, 0)
     self.assertAllClose(identity, xx, atol=tol)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
   def Test(self):
+    if not use_static_shape_ and context.executing_eagerly():
+      return
     is_complex = dtype_ in (np.complex64, np.complex128)
     is_single = dtype_ in (np.float32, np.complex64)
     tol = 3e-4 if is_single else 1e-12
@@ -151,48 +150,48 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
           low=-1.0, high=1.0,
           size=np.prod(shape_)).reshape(shape_).astype(dtype_)
 
-    with self.session(use_gpu=True) as sess:
-      if use_static_shape_:
-        x_tf = constant_op.constant(x_np)
-      else:
-        x_tf = array_ops.placeholder(dtype_)
+    if use_static_shape_:
+      x_tf = constant_op.constant(x_np)
+    else:
+      x_tf = array_ops.placeholder(dtype_)
 
-      if compute_uv_:
-        s_tf, u_tf, v_tf = linalg_ops.svd(
-            x_tf, compute_uv=compute_uv_, full_matrices=full_matrices_)
-        if use_static_shape_:
-          s_tf_val, u_tf_val, v_tf_val = self.evaluate([s_tf, u_tf, v_tf])
-        else:
+    if compute_uv_:
+      s_tf, u_tf, v_tf = linalg_ops.svd(
+          x_tf, compute_uv=compute_uv_, full_matrices=full_matrices_)
+      if use_static_shape_:
+        s_tf_val, u_tf_val, v_tf_val = self.evaluate([s_tf, u_tf, v_tf])
+      else:
+        with self.session(use_gpu=True) as sess:
           s_tf_val, u_tf_val, v_tf_val = sess.run(
               [s_tf, u_tf, v_tf], feed_dict={x_tf: x_np})
+    else:
+      s_tf = linalg_ops.svd(
+          x_tf, compute_uv=compute_uv_, full_matrices=full_matrices_)
+      if use_static_shape_:
+        s_tf_val = self.evaluate(s_tf)
       else:
-        s_tf = linalg_ops.svd(
-            x_tf, compute_uv=compute_uv_, full_matrices=full_matrices_)
-        if use_static_shape_:
-          s_tf_val = self.evaluate(s_tf)
-        else:
+        with self.session(use_gpu=True) as sess:
           s_tf_val = sess.run(s_tf, feed_dict={x_tf: x_np})
 
-      if compute_uv_:
-        u_np, s_np, v_np = np.linalg.svd(
-            x_np, compute_uv=compute_uv_, full_matrices=full_matrices_)
-      else:
-        s_np = np.linalg.svd(
-            x_np, compute_uv=compute_uv_, full_matrices=full_matrices_)
-      # We explicitly avoid the situation where numpy eliminates a first
-      # dimension that is equal to one.
-      s_np = np.reshape(s_np, s_tf_val.shape)
+    if compute_uv_:
+      u_np, s_np, v_np = np.linalg.svd(
+          x_np, compute_uv=compute_uv_, full_matrices=full_matrices_)
+    else:
+      s_np = np.linalg.svd(
+          x_np, compute_uv=compute_uv_, full_matrices=full_matrices_)
+    # We explicitly avoid the situation where numpy eliminates a first
+    # dimension that is equal to one.
+    s_np = np.reshape(s_np, s_tf_val.shape)
 
-      CompareSingularValues(self, s_np, s_tf_val, tol)
-      if compute_uv_:
-        CompareSingularVectors(self, u_np, u_tf_val, min(shape_[-2:]), tol)
-        CompareSingularVectors(self,
-                               np.conj(np.swapaxes(v_np, -2, -1)), v_tf_val,
-                               min(shape_[-2:]), tol)
-        CheckApproximation(self, x_np, u_tf_val, s_tf_val, v_tf_val,
-                           full_matrices_, tol)
-        CheckUnitary(self, u_tf_val, tol)
-        CheckUnitary(self, v_tf_val, tol)
+    CompareSingularValues(self, s_np, s_tf_val, tol)
+    if compute_uv_:
+      CompareSingularVectors(self, u_np, u_tf_val, min(shape_[-2:]), tol)
+      CompareSingularVectors(self, np.conj(np.swapaxes(v_np, -2, -1)), v_tf_val,
+                             min(shape_[-2:]), tol)
+      CheckApproximation(self, x_np, u_tf_val, s_tf_val, v_tf_val,
+                         full_matrices_, tol)
+      CheckUnitary(self, u_tf_val, tol)
+      CheckUnitary(self, v_tf_val, tol)
 
   return Test
 
@@ -374,18 +373,18 @@ if __name__ == "__main__":
   for compute_uv in False, True:
     for full_matrices in False, True:
       for dtype in dtypes_to_test:
-        for rows in 1, 2, 5, 10, 32, 100:
-          for cols in 1, 2, 5, 10, 32, 100:
+        for rows in 0, 1, 2, 5, 10, 32, 100:
+          for cols in 0, 1, 2, 5, 10, 32, 100:
             for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
-              shape = batch_dims + (rows, cols)
-              # TF2 does not support placeholders under eager so we skip it
-              for use_static_shape in set([True, tf2.enabled()]):
+              full_shape = batch_dims + (rows, cols)
+              for use_static_shape in set([True, False]):
                 name = "%s_%s_static_shape_%s__compute_uv_%s_full_%s" % (
-                    dtype.__name__, "_".join(map(str, shape)), use_static_shape,
-                    compute_uv, full_matrices)
-                _AddTest(SvdOpTest, "Svd", name,
-                         _GetSvdOpTest(dtype, shape, use_static_shape,
-                                       compute_uv, full_matrices))
+                    dtype.__name__, "_".join(map(str, full_shape)),
+                    use_static_shape, compute_uv, full_matrices)
+                _AddTest(
+                    SvdOpTest, "Svd", name,
+                    _GetSvdOpTest(dtype, full_shape, use_static_shape,
+                                  compute_uv, full_matrices))
   for compute_uv in False, True:
     for full_matrices in False, True:
       dtypes = ([np.float32, np.float64] + [np.complex64, np.complex128] *
@@ -396,16 +395,16 @@ if __name__ == "__main__":
           mat_shapes += [(5, 11), (11, 5)]
         for mat_shape in mat_shapes:
           for batch_dims in [(), (3,)]:
-            shape = batch_dims + mat_shape
-            name = "%s_%s_compute_uv_%s_full_%s" % (
-                dtype.__name__, "_".join(map(str, shape)), compute_uv,
-                full_matrices)
-            _AddTest(SvdGradOpTest, "SvdGrad", name,
-                     _GetSvdGradOpTest(dtype, shape, compute_uv, full_matrices))
+            full_shape = batch_dims + mat_shape
+            name = "%s_%s_compute_uv_%s_full_%s" % (dtype.__name__, "_".join(
+                map(str, full_shape)), compute_uv, full_matrices)
+            _AddTest(
+                SvdGradOpTest, "SvdGrad", name,
+                _GetSvdGradOpTest(dtype, full_shape, compute_uv, full_matrices))
             # The results are too inaccurate for float32.
             if dtype in (np.float64, np.complex128):
               _AddTest(
                   SvdGradGradOpTest, "SvdGradGrad", name,
-                  _GetSvdGradGradOpTest(dtype, shape, compute_uv,
+                  _GetSvdGradGradOpTest(dtype, full_shape, compute_uv,
                                         full_matrices))
   test.main()
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 33879232fd3..5d587954858 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -1021,7 +1021,7 @@ class TensorArrayTest(test.TestCase):
     # self._testWhileLoopWritePackGradients(
     #     dynamic_size=False, dtype=tf.int64)
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def testSkipEagerWhileLoopDynamicWritePackGradients(self):
     self._testWhileLoopWritePackGradients(
         dynamic_size=True, dtype=dtypes.float32)
@@ -1251,7 +1251,6 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaises(ValueError):
         w1.write(4, c2)
 
-  @test_util.run_v1_only("b/117943489")
   def testUnpackShape(self):
     self._testUnpackShape()
 
@@ -1340,11 +1339,11 @@ class TensorArrayTest(test.TestCase):
       grad = gradients_impl.gradients(ys=[r], xs=[x])
       self.assertAllEqual(np.array([1.0, 1.0, 1.0]), self.evaluate(grad)[0])
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def testSkipEagerTensorArrayUnpackDynamic(self):
     self._testTensorArrayUnpackDynamic()
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def testSkipEagerTensorArraySplitDynamic(self):
     with self.session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
@@ -1422,7 +1421,7 @@ class TensorArrayTest(test.TestCase):
           v2_msg if control_flow_util.ENABLE_CONTROL_FLOW_V2 else v1_msg):
         ta.stack().eval()
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testSkipEagerTensorArrayEvalEmpty(self):
     self._testTensorArrayEvalEmpty()
 
@@ -1445,11 +1444,11 @@ class TensorArrayTest(test.TestCase):
       # first dimension of zero
       self.assertAllEqual([0, 5], self.evaluate(concatenated).shape)
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def testSkipEagerTensorArrayEvalEmptyWithDefault(self):
     self._testTensorArrayEvalEmptyWithDefault()
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def testSkipEagerTensorArrayScatterReadAndGradients(self):
     with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
@@ -1476,7 +1475,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual([10.0, -10.0], read_vals[1])
       self.assertAllEqual([[2.0, 3.0], [4.0, 5.0]], grad_vals[0])
 
-  @test_util.run_v1_only("b/117943489")
+  @test_util.run_deprecated_v1
   def testSkipEagerTensorArrayScatterPartialReadAndGradients(self):
     with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index 8d1fe388c55..87096211a01 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -164,15 +164,18 @@ class TransposeTest(test.TestCase):
     datatypes = [np.int8, np.float16, np.float32, np.float64, np.complex128]
     for datatype in datatypes:
       for input_shape, perm in zip(large_shapes, perms):
-        total_size = np.prod(input_shape)
-        inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape)
-        np_ans = self._np_transpose(inp, perm)
-        with self.cached_session(use_gpu=True):
-          inx = ops.convert_to_tensor(inp)
-          y = array_ops.transpose(inx, perm)
-          tf_ans = self.evaluate(y)
-        self.assertAllEqual(np_ans, tf_ans)
-        self.assertShapeEqual(np_ans, y)
+        with self.subTest(
+            datatype=datatype, input_shape=input_shape, perm=perm):
+          total_size = np.prod(input_shape)
+          inp = np.arange(
+              1, total_size + 1, dtype=datatype).reshape(input_shape)
+          np_ans = self._np_transpose(inp, perm)
+          with self.cached_session(use_gpu=True):
+            inx = ops.convert_to_tensor(inp)
+            y = array_ops.transpose(inx, perm)
+            tf_ans = self.evaluate(y)
+          self.assertAllEqual(np_ans, tf_ans)
+          self.assertShapeEqual(np_ans, y)
 
   def test4DGPU(self):
     # If no GPU available, skip the test
@@ -185,15 +188,17 @@ class TransposeTest(test.TestCase):
     ]] * 3 + [[2, 3, 0, 1]] * 3
 
     for input_shape, perm in zip(large_shapes, perms):
-      total_size = np.prod(input_shape)
-      inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
-      np_ans = self._np_transpose(inp, perm)
-      with self.cached_session(use_gpu=True):
-        inx = ops.convert_to_tensor(inp)
-        y = array_ops.transpose(inx, perm)
-        tf_ans = self.evaluate(y)
-      self.assertAllEqual(np_ans, tf_ans)
-      self.assertShapeEqual(np_ans, y)
+      with self.subTest(input_shape=input_shape, perm=perm):
+        total_size = np.prod(input_shape)
+        inp = np.arange(
+            1, total_size + 1, dtype=np.float32).reshape(input_shape)
+        np_ans = self._np_transpose(inp, perm)
+        with self.cached_session(use_gpu=True):
+          inx = ops.convert_to_tensor(inp)
+          y = array_ops.transpose(inx, perm)
+          tf_ans = self.evaluate(y)
+        self.assertAllEqual(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, y)
 
     # shapes related to Inception (taken from conv_ops_test.py)
     inception_shapes = [[4, 5, 5, 124], [4, 8, 8, 38], [4, 8, 8, 38], [
@@ -219,16 +224,18 @@ class TransposeTest(test.TestCase):
                         [4, 35, 35, 19], [4, 73, 73, 6], [4, 73, 73,
                                                           6], [4, 147, 147, 2]]
     for input_shape in inception_shapes:
-      perm = [0, 3, 1, 2]
-      total_size = np.prod(input_shape)
-      inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
-      np_ans = self._np_transpose(inp, perm)
-      with self.cached_session(use_gpu=True):
-        inx = ops.convert_to_tensor(inp)
-        y = array_ops.transpose(inx, perm)
-        tf_ans = self.evaluate(y)
-      self.assertAllEqual(np_ans, tf_ans)
-      self.assertShapeEqual(np_ans, y)
+      with self.subTest(input_shape=input_shape):
+        perm = [0, 3, 1, 2]
+        total_size = np.prod(input_shape)
+        inp = np.arange(
+            1, total_size + 1, dtype=np.float32).reshape(input_shape)
+        np_ans = self._np_transpose(inp, perm)
+        with self.cached_session(use_gpu=True):
+          inx = ops.convert_to_tensor(inp)
+          y = array_ops.transpose(inx, perm)
+          tf_ans = self.evaluate(y)
+        self.assertAllEqual(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, y)
 
   def test3DGPU(self):
     # If no GPU available, skip the test
@@ -242,15 +249,18 @@ class TransposeTest(test.TestCase):
                                                                   ] * 3
     for datatype in datatypes:
       for input_shape, perm in zip(large_shapes, perms):
-        total_size = np.prod(input_shape)
-        inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape)
-        np_ans = self._np_transpose(inp, perm)
-        with self.cached_session(use_gpu=True):
-          inx = ops.convert_to_tensor(inp)
-          y = array_ops.transpose(inx, perm)
-          tf_ans = self.evaluate(y)
-        self.assertAllEqual(np_ans, tf_ans)
-        self.assertShapeEqual(np_ans, y)
+        with self.subTest(
+            datatype=datatype, input_shape=input_shape, perm=perm):
+          total_size = np.prod(input_shape)
+          inp = np.arange(
+              1, total_size + 1, dtype=datatype).reshape(input_shape)
+          np_ans = self._np_transpose(inp, perm)
+          with self.cached_session(use_gpu=True):
+            inx = ops.convert_to_tensor(inp)
+            y = array_ops.transpose(inx, perm)
+            tf_ans = self.evaluate(y)
+          self.assertAllEqual(np_ans, tf_ans)
+          self.assertShapeEqual(np_ans, y)
 
   def testLargeSizeGPU(self):
     # If no GPU available, skip the test
@@ -263,15 +273,17 @@ class TransposeTest(test.TestCase):
     perms = [[0, 2, 1]] * 9
 
     for input_shape, perm in zip(large_shapes, perms):
-      total_size = np.prod(input_shape)
-      inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape)
-      np_ans = self._np_transpose(inp, perm)
-      with self.cached_session(use_gpu=True):
-        inx = ops.convert_to_tensor(inp)
-        y = array_ops.transpose(inx, perm)
-        tf_ans = self.evaluate(y)
-      self.assertAllEqual(np_ans, tf_ans)
-      self.assertShapeEqual(np_ans, y)
+      with self.subTest(input_shape=input_shape, perm=perm):
+        total_size = np.prod(input_shape)
+        inp = np.arange(
+            1, total_size + 1, dtype=np.float32).reshape(input_shape)
+        np_ans = self._np_transpose(inp, perm)
+        with self.cached_session(use_gpu=True):
+          inx = ops.convert_to_tensor(inp)
+          y = array_ops.transpose(inx, perm)
+          tf_ans = self.evaluate(y)
+        self.assertAllEqual(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, y)
 
   def testRandomizedSmallDimLargeSizeGPU(self):
     # If no GPU available, skip the test
@@ -316,15 +328,16 @@ class TransposeTest(test.TestCase):
 
     for input_shape, perm in zip(input_shapes, perms):
       # generate input data with random ints from 0 to 9.
-      inp = np.random.randint(10, size=input_shape)
-      np_ans = self._np_transpose(inp, perm)
-      with self.cached_session(use_gpu=True):
-        inx = ops.convert_to_tensor(inp)
-        y = array_ops.transpose(inx, perm)
-        tf_ans = self.evaluate(y)
-      self.assertAllEqual(np_ans, tf_ans)
-      self.assertShapeEqual(np_ans, y)
-      self._ClearCachedSession()
+      with self.subTest(input_shape=input_shape, perm=perm):
+        inp = np.random.randint(10, size=input_shape)
+        np_ans = self._np_transpose(inp, perm)
+        with self.cached_session(use_gpu=True):
+          inx = ops.convert_to_tensor(inp)
+          y = array_ops.transpose(inx, perm)
+          tf_ans = self.evaluate(y)
+        self.assertAllEqual(np_ans, tf_ans)
+        self.assertShapeEqual(np_ans, y)
+        self._ClearCachedSession()
 
   @test_util.run_v1_only("b/120545219")
   def testNop(self):
@@ -338,16 +351,17 @@ class TransposeTest(test.TestCase):
 
   def testPermType(self):
     for perm_dtype in [np.int64, np.int32]:
-      x = np.arange(0, 8).reshape([2, 4]).astype(np.float32)
-      p = np.array([1, 0]).astype(perm_dtype)
-      np_ans = np.copy(x).transpose(p)
-      with self.cached_session(use_gpu=True):
-        inx = ops.convert_to_tensor(x)
-        inp = constant_op.constant(p)
-        y = array_ops.transpose(inx, inp)
-        tf_ans = self.evaluate(y)
-        self.assertShapeEqual(np_ans, y)
-        self.assertAllEqual(np_ans, tf_ans)
+      with self.subTest(perm_dtype=perm_dtype):
+        x = np.arange(0, 8).reshape([2, 4]).astype(np.float32)
+        p = np.array([1, 0]).astype(perm_dtype)
+        np_ans = np.copy(x).transpose(p)
+        with self.cached_session(use_gpu=True):
+          inx = ops.convert_to_tensor(x)
+          inp = constant_op.constant(p)
+          y = array_ops.transpose(inx, inp)
+          tf_ans = self.evaluate(y)
+          self.assertShapeEqual(np_ans, y)
+          self.assertAllEqual(np_ans, tf_ans)
 
   def testHalf(self):
     self._compare(np.arange(0, 21).reshape([3, 7]).astype(np.float16))
@@ -423,9 +437,10 @@ class TransposeTest(test.TestCase):
   def testTranspose2DAuto(self):
     x_np = [[1, 2, 3], [4, 5, 6]]
     for use_gpu in [False, True]:
-      with self.cached_session(use_gpu=use_gpu):
-        x_tf = array_ops.transpose(x_np).eval()
-        self.assertAllEqual(x_tf, [[1, 4], [2, 5], [3, 6]])
+      with self.subTest(use_gpu=use_gpu):
+        with self.cached_session(use_gpu=use_gpu):
+          x_tf = array_ops.transpose(x_np).eval()
+          self.assertAllEqual(x_tf, [[1, 4], [2, 5], [3, 6]])
 
   @test_util.run_v1_only("b/120545219")
   def testSingletonDims(self):
@@ -439,8 +454,9 @@ class TransposeTest(test.TestCase):
     # copy here.
     for shape in [[2, 1, 2], [2, 1, 2, 1, 1, 2], [1, 2, 2, 1, 1, 1],
                   [1, 1, 1, 2, 2, 2], [2, 2, 1, 1, 1]]:
-      self._compare_cpu_gpu(
-          np.arange(np.prod(shape)).reshape(shape).astype(np.float32))
+      with self.subTest(shape=shape):
+        self._compare_cpu_gpu(
+            np.arange(np.prod(shape)).reshape(shape).astype(np.float32))
 
   @test_util.run_v1_only("b/120545219")
   def testTransposeShapes(self):
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 7d9e875be2d..436fef8171f 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -61,17 +61,18 @@ class UniqueTest(test.TestCase):
 
   def testInt32Axis(self):
     for dtype in [np.int32, np.int64]:
-      x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
-      y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype))
-      self.assertEqual(y0.shape.rank, 2)
-      tf_y0, tf_idx0 = self.evaluate([y0, idx0])
-      y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype))
-      self.assertEqual(y1.shape.rank, 2)
-      tf_y1, tf_idx1 = self.evaluate([y1, idx1])
-      self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
-      self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
-      self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
-      self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
+      with self.subTest(dtype=dtype):
+        x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
+        y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype))
+        self.assertEqual(y0.shape.rank, 2)
+        tf_y0, tf_idx0 = self.evaluate([y0, idx0])
+        y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype))
+        self.assertEqual(y1.shape.rank, 2)
+        tf_y1, tf_idx1 = self.evaluate([y1, idx1])
+        self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
+        self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
+        self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
+        self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
 
   def testInt32V2(self):
     # This test is only temporary, once V2 is used
@@ -144,26 +145,28 @@ class UniqueWithCountsTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]].decode('ascii'))
     for value, count in zip(tf_y, tf_count):
-      v = [1 if x[i] == value.decode('ascii') else 0 for i in range(7000)]
-      self.assertEqual(count, sum(v))
+      with self.subTest(value=value, count=count):
+        v = [1 if x[i] == value.decode('ascii') else 0 for i in range(7000)]
+        self.assertEqual(count, sum(v))
 
   def testInt32Axis(self):
     for dtype in [np.int32, np.int64]:
-      x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
-      y0, idx0, count0 = gen_array_ops.unique_with_counts_v2(
-          x, axis=np.array([0], dtype))
-      self.assertEqual(y0.shape.rank, 2)
-      tf_y0, tf_idx0, tf_count0 = self.evaluate([y0, idx0, count0])
-      y1, idx1, count1 = gen_array_ops.unique_with_counts_v2(
-          x, axis=np.array([1], dtype))
-      self.assertEqual(y1.shape.rank, 2)
-      tf_y1, tf_idx1, tf_count1 = self.evaluate([y1, idx1, count1])
-      self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
-      self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
-      self.assertAllEqual(tf_count0, np.array([2, 1]))
-      self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
-      self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
-      self.assertAllEqual(tf_count1, np.array([1, 2]))
+      with self.subTest(dtype=dtype):
+        x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
+        y0, idx0, count0 = gen_array_ops.unique_with_counts_v2(
+            x, axis=np.array([0], dtype))
+        self.assertEqual(y0.shape.rank, 2)
+        tf_y0, tf_idx0, tf_count0 = self.evaluate([y0, idx0, count0])
+        y1, idx1, count1 = gen_array_ops.unique_with_counts_v2(
+            x, axis=np.array([1], dtype))
+        self.assertEqual(y1.shape.rank, 2)
+        tf_y1, tf_idx1, tf_count1 = self.evaluate([y1, idx1, count1])
+        self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
+        self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
+        self.assertAllEqual(tf_count0, np.array([2, 1]))
+        self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
+        self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
+        self.assertAllEqual(tf_count1, np.array([1, 2]))
 
   def testInt32V2(self):
     # This test is only temporary, once V2 is used
diff --git a/tensorflow/python/kernel_tests/variable_ops_test.py b/tensorflow/python/kernel_tests/variable_ops_test.py
index 0f3e2619925..4e23792caa4 100644
--- a/tensorflow/python/kernel_tests/variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/variable_ops_test.py
@@ -33,10 +33,13 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 _NP_TO_TF = {
+    np.float16: dtypes.float16,
     np.float32: dtypes.float32,
     np.float64: dtypes.float64,
     np.int32: dtypes.int32,
     np.int64: dtypes.int64,
+    np.complex64: dtypes.complex64,
+    np.complex128: dtypes.complex128,
 }
 
 
@@ -50,7 +53,10 @@ class VariableOpTest(test.TestCase):
       return self.evaluate(p)
 
   def _testTypes(self, vals):
-    for dtype in [np.float32, np.float64, np.int32, np.int64]:
+    for dtype in [
+        np.float16, np.float32, np.float64, np.complex64, np.complex128,
+        np.int32, np.int64
+    ]:
       self.setUp()
       x = vals.astype(dtype)
       tftype = _NP_TO_TF[dtype]
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index b1e5957599e..95bbea156f2 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -49,6 +49,7 @@ from tensorflow.python.ops import while_v2
 from tensorflow.python.ops.while_v2 import while_loop as while_loop_v2
 from tensorflow.python.platform import test
 
+
 def random_gamma(shape):  # pylint: disable=invalid-name
   return random_ops.random_gamma(shape, 1.0)
 
@@ -1222,6 +1223,25 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
           self.assertNotIn("switch", ns.node_name)
     control_flow_util_v2._DISABLE_LOWER_USING_SWITCH_MERGE = old
 
+  def _runBasicWithConfig(self, config):
+    with ops.device("/cpu:0"):
+      x = constant_op.constant(0)
+      ret, = while_loop_v2(lambda x: x < 1000, lambda x: x + 1, [x])
+    with self.cached_session(config=config):
+      self.assertEqual(1000, self.evaluate(ret))
+
+  @test_util.run_deprecated_v1
+  def testRunKernelsInline(self):
+    config = config_pb2.ConfigProto()
+    config.inter_op_parallelism_threads = -1
+    self._runBasicWithConfig(config)
+
+  @test_util.run_deprecated_v1
+  def testSingleThreadedExecution(self):
+    config = config_pb2.ConfigProto()
+    config.experimental.executor_type = "SINGLE_THREADED_EXECUTOR"
+    self._runBasicWithConfig(config)
+
 
 def ScalarShape():
   return ops.convert_to_tensor([], dtype=dtypes.int32)
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index 2f9972c81bf..2afd2888e8f 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <cstring>
 
+#include "tensorflow/c/eager/tfe_context_internal.h"
+#include "tensorflow/c/tf_tensor_internal.h"
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
@@ -488,8 +490,9 @@ Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) {
   return Status::OK();
 }
 
-Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) {
-  DCHECK(out_tensor != nullptr);
+Status NdarrayToTensor(TFE_Context* ctx, PyObject* ndarray,
+                       Safe_TF_TensorPtr* ret, bool convert_string) {
+  DCHECK(ret != nullptr);
 
   // Make sure we dereference this array object in case of error, etc.
   Safe_PyObjectPtr array_safe(make_safe(
@@ -515,26 +518,52 @@ Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor) {
   if (dtype == TF_RESOURCE) {
     size_t size = PyArray_NBYTES(array);
     array_safe.release();
-    *out_tensor = make_safe(TF_NewTensor(dtype, {}, 0, PyArray_DATA(array),
-                                         size, &DelayedNumpyDecref, array));
+
+    if (ctx) {
+      *ret = make_safe(new TF_Tensor{tensorflow::unwrap(ctx)->CreateTensor(
+          static_cast<tensorflow::DataType>(dtype), {}, 0, PyArray_DATA(array),
+          size, convert_string, &DelayedNumpyDecref, array)});
+    } else {
+      *ret = make_safe(TF_NewTensor(dtype, {}, 0, PyArray_DATA(array), size,
+                                    &DelayedNumpyDecref, array));
+    }
 
   } else if (dtype != TF_STRING) {
     size_t size = PyArray_NBYTES(array);
     array_safe.release();
-    *out_tensor = make_safe(TF_NewTensor(dtype, dims.data(), dims.size(),
-                                         PyArray_DATA(array), size,
-                                         &DelayedNumpyDecref, array));
+    if (ctx) {
+      *ret = make_safe(new TF_Tensor{tensorflow::unwrap(ctx)->CreateTensor(
+          static_cast<tensorflow::DataType>(dtype), dims.data(), dims.size(),
+          PyArray_DATA(array), size, convert_string, &DelayedNumpyDecref,
+          array)});
+    } else {
+      *ret = make_safe(TF_NewTensor(dtype, dims.data(), dims.size(),
+                                    PyArray_DATA(array), size,
+                                    &DelayedNumpyDecref, array));
+    }
+
   } else {
     size_t size = 0;
     void* encoded = nullptr;
     TF_RETURN_IF_ERROR(EncodePyBytesArray(array, nelems, &size, &encoded));
-    *out_tensor = make_safe(TF_NewTensor(
-        dtype, dims.data(), dims.size(), encoded, size,
-        [](void* data, size_t len, void* arg) {
-          delete[] reinterpret_cast<char*>(data);
-        },
-        nullptr));
+    if (ctx) {
+      *ret = make_safe(new TF_Tensor{tensorflow::unwrap(ctx)->CreateTensor(
+          static_cast<tensorflow::DataType>(dtype), dims.data(), dims.size(),
+          encoded, size, convert_string,
+          [](void* data, size_t len, void* arg) {
+            delete[] reinterpret_cast<char*>(data);
+          },
+          nullptr)});
+    } else {
+      *ret = make_safe(TF_NewTensor(
+          dtype, dims.data(), dims.size(), encoded, size,
+          [](void* data, size_t len, void* arg) {
+            delete[] reinterpret_cast<char*>(data);
+          },
+          nullptr));
+    }
   }
+
   return Status::OK();
 }
 
@@ -543,7 +572,8 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status);
 
 Status NdarrayToTensor(PyObject* obj, Tensor* ret) {
   Safe_TF_TensorPtr tf_tensor = make_safe(static_cast<TF_Tensor*>(nullptr));
-  Status s = PyArrayToTF_Tensor(obj, &tf_tensor);
+  Status s = NdarrayToTensor(nullptr /*ctx*/, obj, &tf_tensor,
+                             false /*convert_string*/);
   if (!s.ok()) {
     return s;
   }
diff --git a/tensorflow/python/lib/core/ndarray_tensor.h b/tensorflow/python/lib/core/ndarray_tensor.h
index c5cd24cff2d..38c098417d5 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.h
+++ b/tensorflow/python/lib/core/ndarray_tensor.h
@@ -28,15 +28,21 @@ Status TF_TensorToMaybeAliasedPyArray(Safe_TF_TensorPtr tensor,
 
 Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray);
 
-// Converts the given numpy ndarray to a (safe) TF_Tensor. The returned
-// TF_Tensor in `out_tensor` may have its own Python reference to `ndarray`s
-// data. After `out_tensor` is destroyed, this reference must (eventually) be
-// decremented via ClearDecrefCache().
-//
-// `out_tensor` must be non-null. Caller retains ownership of `ndarray`.
-Status PyArrayToTF_Tensor(PyObject* ndarray, Safe_TF_TensorPtr* out_tensor);
+// Creates a tensor in 'ret' from the input `ndarray`. The returned TF_Tensor
+// in `ret` may have its own Python reference to `ndarray`s data. After `ret`
+// is destroyed, this reference must (eventually) be decremented via
+// ClearDecrefCache().
+// `convert_string` indicates whether it has to handle tstring conversion.
+// Expected to be removed once tstring migration is done.
+ABSL_MUST_USE_RESULT
+Status NdarrayToTensor(TFE_Context* ctx, PyObject* ndarray,
+                       Safe_TF_TensorPtr* ret, bool convert_string);
 
 // Creates a tensor in 'ret' from the input Ndarray.
+// TODO(kkb): This is an old conversion function that does not support TFRT.
+// Currently it's used for session, py_func, and an internal project.  Migrate
+// them.
+ABSL_MUST_USE_RESULT
 Status NdarrayToTensor(PyObject* obj, Tensor* ret);
 
 // Creates a numpy array in 'ret' which either aliases the content of 't' or has
diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc
index ecf4a92f0e7..22829f546b1 100644
--- a/tensorflow/python/lib/core/py_seq_tensor.cc
+++ b/tensorflow/python/lib/core/py_seq_tensor.cc
@@ -681,9 +681,11 @@ typedef Converter<bool> BoolConverter;
 // The two may share underlying storage so changes to one may reflect in the
 // other.
 TFE_TensorHandle* NumpyToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj) {
-  tensorflow::Tensor tensor;
-  tensorflow::Status status = tensorflow::NdarrayToTensor(obj, &tensor);
-  if (!status.ok()) {
+  Safe_TF_TensorPtr tf_tensor = make_safe(static_cast<TF_Tensor*>(nullptr));
+  Status status = tensorflow::NdarrayToTensor(ctx, obj, &tf_tensor,
+                                              true /*convert_string*/);
+
+  if (TF_PREDICT_FALSE(!status.ok())) {
     PyErr_SetString(PyExc_ValueError,
                     tensorflow::strings::StrCat(
                         "Failed to convert a NumPy array to a Tensor (",
@@ -692,8 +694,8 @@ TFE_TensorHandle* NumpyToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj) {
     return nullptr;
   }
 
-  TensorInterface t(std::move(tensor));
-  return tensorflow::wrap(tensorflow::unwrap(ctx)->CreateLocalHandle(&t));
+  return tensorflow::wrap(
+      tensorflow::unwrap(ctx)->CreateLocalHandle(tf_tensor->tensor));
 }
 
 }  // namespace
diff --git a/tensorflow/python/lib/core/pybind11_status.h b/tensorflow/python/lib/core/pybind11_status.h
index feb974798de..3f9991c6577 100644
--- a/tensorflow/python/lib/core/pybind11_status.h
+++ b/tensorflow/python/lib/core/pybind11_status.h
@@ -69,6 +69,20 @@ inline void MaybeRaiseRegisteredFromStatus(const tensorflow::Status& status) {
   }
 }
 
+inline void MaybeRaiseRegisteredFromStatusWithGIL(
+    const tensorflow::Status& status) {
+  if (!status.ok()) {
+    // Acquire GIL for throwing exception.
+    pybind11::gil_scoped_acquire acquire;
+
+    PyErr_SetObject(PyExceptionRegistry::Lookup(status.code()),
+                    pybind11::make_tuple(pybind11::none(), pybind11::none(),
+                                         status.error_message())
+                        .ptr());
+    throw pybind11::error_already_set();
+  }
+}
+
 inline void MaybeRaiseFromTFStatus(TF_Status* status) {
   TF_Code code = TF_GetCode(status);
   if (code != TF_OK) {
diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index a1db2fb056c..7c484c825d3 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -165,7 +165,7 @@ class FileIO(object):
     self._read_buf.seek(offset)
 
   def readline(self):
-    r"""Reads the next line from the file. Leaves the '\n' at the end."""
+    r"""Reads the next line, keeping \n. At EOF, returns ''."""
     self._preread_check()
     return self._prepare_value(self._read_buf.readline())
 
diff --git a/tensorflow/python/lib/io/file_io_wrapper.cc b/tensorflow/python/lib/io/file_io_wrapper.cc
index de806a9c969..0a2410b69e1 100644
--- a/tensorflow/python/lib/io/file_io_wrapper.cc
+++ b/tensorflow/python/lib/io/file_io_wrapper.cc
@@ -42,50 +42,65 @@ PYBIND11_MODULE(_pywrap_file_io, m) {
       py::gil_scoped_release release;
       status = tensorflow::Env::Default()->FileExists(filename);
     }
-    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
   });
   m.def("DeleteFile", [](const std::string& filename) {
-    tensorflow::MaybeRaiseRegisteredFromStatus(
-        tensorflow::Env::Default()->DeleteFile(filename));
+    py::gil_scoped_release release;
+    tensorflow::Status status =
+        tensorflow::Env::Default()->DeleteFile(filename);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
   });
   m.def("ReadFileToString", [](const std::string& filename) {
     std::string data;
+    py::gil_scoped_release release;
     const auto status =
         ReadFileToString(tensorflow::Env::Default(), filename, &data);
+    pybind11::gil_scoped_acquire acquire;
     tensorflow::MaybeRaiseRegisteredFromStatus(status);
     return py::bytes(data);
   });
   m.def("WriteStringToFile",
         [](const std::string& filename, tensorflow::StringPiece data) {
-          return WriteStringToFile(tensorflow::Env::Default(), filename, data);
+          py::gil_scoped_release release;
+          const auto status =
+              WriteStringToFile(tensorflow::Env::Default(), filename, data);
+          tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
         });
   m.def("GetChildren", [](const std::string& dirname) {
     std::vector<std::string> results;
+    py::gil_scoped_release release;
     const auto status =
         tensorflow::Env::Default()->GetChildren(dirname, &results);
+    pybind11::gil_scoped_acquire acquire;
     tensorflow::MaybeRaiseRegisteredFromStatus(status);
     return results;
   });
   m.def("GetMatchingFiles", [](const std::string& pattern) {
     std::vector<std::string> results;
+    py::gil_scoped_release release;
     const auto status =
         tensorflow::Env::Default()->GetMatchingPaths(pattern, &results);
+    pybind11::gil_scoped_acquire acquire;
     tensorflow::MaybeRaiseRegisteredFromStatus(status);
     return results;
   });
   m.def("CreateDir", [](const std::string& dirname) {
+    py::gil_scoped_release release;
     const auto status = tensorflow::Env::Default()->CreateDir(dirname);
     if (tensorflow::errors::IsAlreadyExists(status)) {
       return;
     }
-    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
   });
   m.def("RecursivelyCreateDir", [](const std::string& dirname) {
-    tensorflow::MaybeRaiseRegisteredFromStatus(
-        tensorflow::Env::Default()->RecursivelyCreateDir(dirname));
+    py::gil_scoped_release release;
+    const auto status =
+        tensorflow::Env::Default()->RecursivelyCreateDir(dirname);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
   });
   m.def("CopyFile",
         [](const std::string& src, const std::string& target, bool overwrite) {
+          py::gil_scoped_release release;
           auto* env = tensorflow::Env::Default();
           tensorflow::Status status;
           if (!overwrite && env->FileExists(target).ok()) {
@@ -93,10 +108,11 @@ PYBIND11_MODULE(_pywrap_file_io, m) {
           } else {
             status = env->CopyFile(src, target);
           }
-          tensorflow::MaybeRaiseRegisteredFromStatus(status);
+          tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
         });
   m.def("RenameFile",
         [](const std::string& src, const std::string& target, bool overwrite) {
+          py::gil_scoped_release release;
           auto* env = tensorflow::Env::Default();
           tensorflow::Status status;
           if (!overwrite && env->FileExists(target).ok()) {
@@ -104,9 +120,10 @@ PYBIND11_MODULE(_pywrap_file_io, m) {
           } else {
             status = env->RenameFile(src, target);
           }
-          tensorflow::MaybeRaiseRegisteredFromStatus(status);
+          tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
         });
   m.def("DeleteRecursively", [](const std::string& dirname) {
+    py::gil_scoped_release release;
     tensorflow::int64 undeleted_files;
     tensorflow::int64 undeleted_dirs;
     auto status = tensorflow::Env::Default()->DeleteRecursively(
@@ -115,23 +132,25 @@ PYBIND11_MODULE(_pywrap_file_io, m) {
       status =
           tensorflow::errors::PermissionDenied("could not fully delete dir");
     }
-    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
   });
   m.def("IsDirectory", [](const std::string& dirname) {
+    py::gil_scoped_release release;
     const auto status = tensorflow::Env::Default()->IsDirectory(dirname);
     // FAILED_PRECONDITION response means path exists but isn't a dir.
     if (tensorflow::errors::IsFailedPrecondition(status)) {
       return false;
     }
 
-    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
     return true;
   });
   m.def("HasAtomicMove", [](const std::string& path) {
+    py::gil_scoped_release release;
     bool has_atomic_move;
     const auto status =
         tensorflow::Env::Default()->HasAtomicMove(path, &has_atomic_move);
-    tensorflow::MaybeRaiseRegisteredFromStatus(status);
+    tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
     return has_atomic_move;
   });
 
@@ -141,9 +160,11 @@ PYBIND11_MODULE(_pywrap_file_io, m) {
       .def_readonly("is_directory", &tensorflow::FileStatistics::is_directory);
 
   m.def("Stat", [](const std::string& filename) {
+    py::gil_scoped_release release;
     std::unique_ptr<tensorflow::FileStatistics> self(
         new tensorflow::FileStatistics);
     const auto status = tensorflow::Env::Default()->Stat(filename, self.get());
+    py::gil_scoped_acquire acquire;
     tensorflow::MaybeRaiseRegisteredFromStatus(status);
     return self.release();
   });
@@ -151,66 +172,83 @@ PYBIND11_MODULE(_pywrap_file_io, m) {
   using tensorflow::WritableFile;
   py::class_<WritableFile>(m, "WritableFile")
       .def(py::init([](const std::string& filename, const std::string& mode) {
+        py::gil_scoped_release release;
         auto* env = tensorflow::Env::Default();
         std::unique_ptr<WritableFile> self;
         const auto status = mode.find("a") == std::string::npos
                                 ? env->NewWritableFile(filename, &self)
                                 : env->NewAppendableFile(filename, &self);
+        py::gil_scoped_acquire acquire;
         tensorflow::MaybeRaiseRegisteredFromStatus(status);
         return self.release();
       }))
       .def("append",
            [](WritableFile* self, tensorflow::StringPiece data) {
-             tensorflow::MaybeRaiseRegisteredFromStatus(self->Append(data));
+             const auto status = self->Append(data);
+             tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
            })
       // TODO(slebedev): Make WritableFile::Tell const and change self
       // to be a reference.
       .def("tell",
            [](WritableFile* self) {
              tensorflow::int64 pos = -1;
+             py::gil_scoped_release release;
              const auto status = self->Tell(&pos);
-             tensorflow::MaybeRaiseRegisteredFromStatus(status);
+             tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
              return pos;
            })
       .def("flush",
            [](WritableFile* self) {
-             tensorflow::MaybeRaiseRegisteredFromStatus(self->Flush());
+             py::gil_scoped_release release;
+             tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(self->Flush());
            })
       .def("close", [](WritableFile* self) {
-        tensorflow::MaybeRaiseRegisteredFromStatus(self->Close());
+        py::gil_scoped_release release;
+        tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(self->Close());
       });
 
   using tensorflow::io::BufferedInputStream;
   py::class_<BufferedInputStream>(m, "BufferedInputStream")
       .def(py::init([](const std::string& filename, size_t buffer_size) {
+        py::gil_scoped_release release;
         std::unique_ptr<tensorflow::RandomAccessFile> file;
         const auto status =
             tensorflow::Env::Default()->NewRandomAccessFile(filename, &file);
-        tensorflow::MaybeRaiseRegisteredFromStatus(status);
+        tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status);
         std::unique_ptr<tensorflow::io::RandomAccessInputStream> input_stream(
             new tensorflow::io::RandomAccessInputStream(file.release(),
                                                         /*owns_file=*/true));
+        py::gil_scoped_acquire acquire;
         return new BufferedInputStream(input_stream.release(), buffer_size,
                                        /*owns_input_stream=*/true);
       }))
       .def("read",
            [](BufferedInputStream* self, tensorflow::int64 bytes_to_read) {
+             py::gil_scoped_release release;
              tensorflow::tstring result;
              const auto status = self->ReadNBytes(bytes_to_read, &result);
              if (!status.ok() && !tensorflow::errors::IsOutOfRange(status)) {
                result.clear();
                tensorflow::MaybeRaiseRegisteredFromStatus(status);
              }
+             py::gil_scoped_acquire acquire;
              return py::bytes(result);
            })
       .def("readline",
            [](BufferedInputStream* self) {
-             return py::bytes(self->ReadLineAsString());
+             py::gil_scoped_release release;
+             auto output = self->ReadLineAsString();
+             py::gil_scoped_acquire acquire;
+             return py::bytes(output);
            })
       .def("seek",
            [](BufferedInputStream* self, tensorflow::int64 pos) {
-             tensorflow::MaybeRaiseRegisteredFromStatus(self->Seek(pos));
+             py::gil_scoped_release release;
+             tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(self->Seek(pos));
            })
-      .def("tell", [](BufferedInputStream* self) { return self->Tell(); });
+      .def("tell", [](BufferedInputStream* self) {
+        py::gil_scoped_release release;
+        return self->Tell();
+      });
 }
 }  // namespace
diff --git a/tensorflow/python/lite/toco_python_api_wrapper.cc b/tensorflow/python/lite/toco_python_api_wrapper.cc
index 2c6cee5d54d..b77200a3bee 100644
--- a/tensorflow/python/lite/toco_python_api_wrapper.cc
+++ b/tensorflow/python/lite/toco_python_api_wrapper.cc
@@ -56,11 +56,14 @@ PYBIND11_MODULE(_pywrap_toco_api, m) {
     )pbdoc");
   m.def(
       "ExperimentalMlirQuantizeModel",
-      [](py::object input_contents_txt_raw, bool fully_quantize) {
+      [](py::object input_contents_txt_raw, bool disable_per_channel,
+         bool fully_quantize, int inference_type) {
         return tensorflow::PyoOrThrow(toco::MlirQuantizeModel(
-            input_contents_txt_raw.ptr(), fully_quantize));
+            input_contents_txt_raw.ptr(), disable_per_channel, fully_quantize,
+            inference_type));
       },
-      py::arg("input_contents_txt_raw"), py::arg("fully_quantize") = true,
+      py::arg("input_contents_txt_raw"), py::arg("disable_per_channel") = false,
+      py::arg("fully_quantize") = true, py::arg("inference_type") = 9,
       R"pbdoc(
       Returns a quantized model.
     )pbdoc");
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index fbb977f8d9a..9a5e95d8aad 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -39,6 +39,7 @@ from tensorflow.python.ops import gen_math_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_array_ops import *
 from tensorflow.python.ops.gen_array_ops import reverse_v2 as reverse  # pylint: disable=unused-import
+from tensorflow.python.types import core
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import dispatch
 from tensorflow.python.util import nest
@@ -1381,13 +1382,13 @@ def _autopacking_helper(list_or_tuple, dtype, name):
   if context.executing_eagerly():
     # NOTE: Fast path when all the items are tensors, this doesn't do any type
     # checking.
-    if all(ops.is_dense_tensor_like(elem) for elem in list_or_tuple):
+    if all(isinstance(elem, core.Tensor) for elem in list_or_tuple):
       return gen_array_ops.pack(list_or_tuple, name=name)
   must_pack = False
   converted_elems = []
   with ops.name_scope(name) as scope:
     for i, elem in enumerate(list_or_tuple):
-      if ops.is_dense_tensor_like(elem):
+      if isinstance(elem, core.Tensor):
         if dtype is not None and elem.dtype.base_dtype != dtype:
           raise TypeError("Cannot convert a list containing a tensor of dtype "
                           "%s to %s (Tensor is: %r)" %
@@ -1396,7 +1397,7 @@ def _autopacking_helper(list_or_tuple, dtype, name):
         must_pack = True
       elif isinstance(elem, (list, tuple)):
         converted_elem = _autopacking_helper(elem, dtype, str(i))
-        if ops.is_dense_tensor_like(converted_elem):
+        if isinstance(converted_elem, core.Tensor):
           must_pack = True
         converted_elems.append(converted_elem)
       else:
@@ -1404,7 +1405,7 @@ def _autopacking_helper(list_or_tuple, dtype, name):
     if must_pack:
       elems_as_tensors = []
       for i, elem in enumerate(converted_elems):
-        if ops.is_dense_tensor_like(elem):
+        if isinstance(elem, core.Tensor):
           elems_as_tensors.append(elem)
         else:
           # NOTE(mrry): This is inefficient, but it enables us to
@@ -1429,7 +1430,7 @@ def _get_dtype_from_nested_lists(list_or_tuple):
     such object exists.
   """
   for elem in list_or_tuple:
-    if ops.is_dense_tensor_like(elem):
+    if isinstance(elem, core.Tensor):
       return elem.dtype.base_dtype
     elif isinstance(elem, (list, tuple)):
       maybe_dtype = _get_dtype_from_nested_lists(elem)
@@ -1441,7 +1442,7 @@ def _get_dtype_from_nested_lists(list_or_tuple):
 def _cast_nested_seqs_to_dtype(dtype):
 
   def _maybe_cast(elem):
-    if ops.is_dense_tensor_like(elem):
+    if isinstance(elem, core.Tensor):
       if dtype != elem.dtype.base_dtype:
         elem = gen_math_ops.cast(elem, dtype)
     return elem
@@ -1455,7 +1456,7 @@ _NON_AUTOPACKABLE_TYPES.add(np.ndarray)
 
 def _should_not_autopack(v):
   # The condition we really want is
-  #    ops.is_dense_tensor_like(...)
+  #    any(isinstance(elem, core.Tensor))
   # but it is >5x slower due to abc.ABCMeta.__instancecheck__.
   # pylint: disable=unidiomatic-typecheck
   # TODO(slebedev): add nest.all?
@@ -1699,7 +1700,10 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
     axis = 0 if axis is None else axis
-    shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
+    axis_value = tensor_util.constant_value(axis)
+    if axis_value is not None:
+      axis = axis_value
+      shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
 
     leading_size = gen_math_ops.prod(shape(tensor)[axis:axis + ndims_mask], [0])
     tensor = reshape(
@@ -1708,10 +1712,15 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
             shape(tensor)[:axis], [leading_size],
             shape(tensor)[axis + ndims_mask:]
         ], 0))
-    first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
-    tensor.set_shape(
-        tensor_shape.as_shape(shape_tensor[:axis]).concatenate(
-            [first_dim]).concatenate(shape_tensor[axis + ndims_mask:]))
+    # TODO(yongtang): tf.reshape in C++ kernel might have set the shape
+    # correctly, so the following may not be needed? It still might ben
+    # possible that there are some edge case where tensor_util.constant_value
+    # resolves more case than ShapeInference of tf.reshape in C++ kernel.
+    if axis_value is not None:
+      first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
+      tensor.set_shape(
+          tensor_shape.as_shape(shape_tensor[:axis]).concatenate(
+              [first_dim]).concatenate(shape_tensor[axis + ndims_mask:]))
 
     mask = reshape(mask, [-1])
     return _apply_mask_1d(tensor, mask, axis)
@@ -1919,25 +1928,25 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
 
   See also `tf.unstack`.
 
-  If `num_or_size_splits` is an integer, then `value` is split along the
-  dimension `axis` into `num_split` smaller tensors. This requires that
-  `value.shape[axis]` is divisible by `num_split`.
+  If `num_or_size_splits` is an integer,  then `value` is split along the
+  dimension `axis` into `num_or_size_splits` smaller tensors. This requires that
+  `value.shape[axis]` is divisible by `num_or_size_splits`.
 
-  If `num_or_size_splits` is a 1-D Tensor (or list), we call it `size_splits`
-  and `value` is split into `len(size_splits)` elements. The shape of the `i`-th
+  If `num_or_size_splits` is a 1-D Tensor (or list), then `value` is split into
+  `len(num_or_size_splits)` elements. The shape of the `i`-th
   element has the same size as the `value` except along dimension `axis` where
-  the size is `size_splits[i]`.
+  the size is `num_or_size_splits[i]`.
 
   For example:
 
   >>> x = tf.Variable(tf.random.uniform([5, 30], -1, 1))
-
-  Split `x` into 3 tensors along dimension 1
+  >>>
+  >>> # Split `x` into 3 tensors along dimension 1
   >>> s0, s1, s2 = tf.split(x, num_or_size_splits=3, axis=1)
   >>> tf.shape(s0).numpy()
   array([ 5, 10], dtype=int32)
-
-  Split `x` into 3 tensors with sizes [4, 15, 11] along dimension 1
+  >>>
+  >>> # Split `x` into 3 tensors with sizes [4, 15, 11] along dimension 1
   >>> split0, split1, split2 = tf.split(x, [4, 15, 11], 1)
   >>> tf.shape(split0).numpy()
   array([5, 4], dtype=int32)
@@ -3811,68 +3820,88 @@ def batch_to_space_v2(input, block_shape, crops, name=None):  # pylint: disable=
            block_shape[0] - crops[0,0] - crops[0,1], ..., input_shape[M] *
            block_shape[M-1] - crops[M-1,0] - crops[M-1,1],  input_shape[M+1],
            ..., input_shape[N-1]]
-      Some Examples:
-      (1) For the following input of shape `[4, 1, 1, 1]`,
-         `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
-         ```python
-         [[[[1]]],
-          [[[2]]],
-          [[[3]]],
-          [[[4]]]]
-         ```
-         The output tensor has shape `[1, 2, 2, 1]` and value:
-         ``` x = [[[[1], [2]],
-                   [[3], [4]]]] ```
-      (2) For the following input of shape `[4, 1, 1, 3]`,
-         `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
-         ```python
-         [[[1,  2,   3]],
-          [[4,  5,   6]],
-          [[7,  8,   9]],
-          [[10, 11, 12]]]
-         ```
-         The output tensor has shape `[1, 2, 2, 3]` and value:
-         ```python
-         x = [[[[1, 2, 3], [4,  5,  6 ]],
-               [[7, 8, 9], [10, 11, 12]]]]
-         ```
-      (3) For the following
-         input of shape `[4, 2, 2, 1]`,
-         `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
-         ```python
-         x = [[[[1], [3]], [[ 9], [11]]],
-              [[[2], [4]], [[10], [12]]],
-              [[[5], [7]], [[13], [15]]],
-              [[[6], [8]], [[14], [16]]]]
-         ```
-         The output tensor has shape `[1, 4, 4, 1]` and value:
-         ```python
-         x = [[[1],  [2],  [ 3], [ 4]],
-              [[5],  [6],  [ 7], [ 8]],
-              [[9],  [10], [11], [12]],
-              [[13], [14], [15], [16]]]
-         ```
-       (4) For the following input of shape
-          `[8, 1, 3, 1]`,
-          `block_shape = [2, 2]`, and `crops = [[0, 0], [2, 0]]`:
-          ```python
-          x = [[[[0], [ 1], [ 3]]],
-               [[[0], [ 9], [11]]],
-               [[[0], [ 2], [ 4]]],
-               [[[0], [10], [12]]],
-               [[[0], [ 5], [ 7]]],
-               [[[0], [13], [15]]],
-               [[[0], [ 6], [ 8]]],
-               [[[0], [14], [16]]]]
-          ```
-          The output tensor has shape `[2, 2, 4, 1]` and value:
-          ```python
-          x = [[[[ 1], [ 2], [ 3], [ 4]],
-                [[ 5], [ 6], [ 7], [ 8]]],
-               [[[ 9], [10], [11], [12]],
-                [[13], [14], [15], [16]]]] ```
     name: A name for the operation (optional).
 
+  Examples:
+
+  (1) For the following input of shape `[4, 1, 1, 1]`,
+     `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
+
+     ```python
+     [[[[1]]],
+      [[[2]]],
+      [[[3]]],
+      [[[4]]]]
+     ```
+
+    The output tensor has shape `[1, 2, 2, 1]` and value:
+
+     ```
+     x = [[[[1], [2]],
+         [[3], [4]]]]
+     ```
+
+  (2) For the following input of shape `[4, 1, 1, 3]`,
+     `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
+
+     ```python
+     [[[1,  2,   3]],
+      [[4,  5,   6]],
+      [[7,  8,   9]],
+      [[10, 11, 12]]]
+     ```
+
+    The output tensor has shape `[1, 2, 2, 3]` and value:
+
+    ```python
+     x = [[[[1, 2, 3], [4,  5,  6 ]],
+           [[7, 8, 9], [10, 11, 12]]]]
+     ```
+
+  (3) For the following
+     input of shape `[4, 2, 2, 1]`,
+     `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
+
+     ```python
+     x = [[[[1], [3]], [[ 9], [11]]],
+          [[[2], [4]], [[10], [12]]],
+          [[[5], [7]], [[13], [15]]],
+          [[[6], [8]], [[14], [16]]]]
+     ```
+
+    The output tensor has shape `[1, 4, 4, 1]` and value:
+
+    ```python
+     x = [[[1],  [2],  [ 3], [ 4]],
+          [[5],  [6],  [ 7], [ 8]],
+          [[9],  [10], [11], [12]],
+          [[13], [14], [15], [16]]]
+     ```
+
+   (4) For the following input of shape
+      `[8, 1, 3, 1]`,
+      `block_shape = [2, 2]`, and `crops = [[0, 0], [2, 0]]`:
+
+      ```python
+      x = [[[[0], [ 1], [ 3]]],
+           [[[0], [ 9], [11]]],
+           [[[0], [ 2], [ 4]]],
+           [[[0], [10], [12]]],
+           [[[0], [ 5], [ 7]]],
+           [[[0], [13], [15]]],
+           [[[0], [ 6], [ 8]]],
+           [[[0], [14], [16]]]]
+      ```
+
+      The output tensor has shape `[2, 2, 4, 1]` and value:
+
+      ```python
+      x = [[[[ 1], [ 2], [ 3], [ 4]],
+            [[ 5], [ 6], [ 7], [ 8]]],
+           [[[ 9], [10], [11], [12]],
+            [[13], [14], [15], [16]]]]
+      ```
+
   Returns:
     A `Tensor`. Has the same type as `input`.
   """
diff --git a/tensorflow/python/ops/bincount.py b/tensorflow/python/ops/bincount.py
new file mode 100644
index 00000000000..68950eaf596
--- /dev/null
+++ b/tensorflow/python/ops/bincount.py
@@ -0,0 +1,298 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# maxlengthations under the License.
+# ==============================================================================
+"""tf.sparse.bincount ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import gen_count_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("sparse.bincount")
+def sparse_bincount(values,
+                    weights=None,
+                    axis=0,
+                    minlength=None,
+                    maxlength=None,
+                    binary_output=False,
+                    name=None):
+  """Count the number of times an integer value appears in a tensor.
+
+  This op takes an N-dimensional `Tensor`, `RaggedTensor`, or `SparseTensor`,
+  and returns an N-dimensional int64 SparseTensor where element
+  `[i0...i[axis], j]` contains the number of times the value `j` appears in
+  slice `[i0...i[axis], :]` of the input tensor.  Currently, only N=0 and
+  N=-1 are supported.
+
+  Args:
+    values: A Tensor, RaggedTensor, or SparseTensor whose values should be
+      counted. These tensors must have a rank of 1 or 2.
+    weights: A 1-dimensional Tensor of weights. If specified, the input array is
+      weighted by the weight array, i.e. if a value `n` is found at position
+      `i`, `out[n]`  will be increased by `weight[i]` instead of 1.
+    axis: The axis to slice over. Axes at and below `axis` will be flattened
+      before bin counting. Currently, only `0`, and `-1` are supported. If None,
+      all axes will be flattened (identical to passing `0`).
+    minlength: If given, skips `values` that are less than `minlength`, and
+      ensures that the output has a `dense_shape` of at least `minlength` in the
+      inner dimension.
+    maxlength: If given, skips `values` that are greater than or equal to
+      `maxlength`, and ensures that the output has a `dense_shape` of at most
+      `maxlength` in the inner dimension.
+    binary_output: If True, this op will output 1 instead of the number of times
+      a token appears (equivalent to one_hot + reduce_any instead of one_hot +
+      reduce_add). Defaults to False.
+    name: A name for this op.
+
+  Returns:
+    A SparseTensor with `output.shape = values.shape[:axis] + [N]`, where `N` is
+      * `maxlength` (if set);
+      * `minlength` (if set, and `minlength > reduce_max(values)`);
+      * `0` (if `values` is empty);
+      * `reduce_max(values) + 1` otherwise.
+
+
+  Examples:
+
+  **Bin-counting every item in individual batches**
+
+  This example takes an input (which could be a Tensor, RaggedTensor, or
+  SparseTensor) and returns a SparseTensor where the value of (i,j) is the
+  number of times value j appears in batch i.
+
+  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
+  >>> output = tf.sparse.bincount(data, axis=-1)
+  >>> print(output)
+  SparseTensor(indices=tf.Tensor(
+  [[    0    10]
+   [    0    20]
+   [    0    30]
+   [    1    11]
+   [    1   101]
+   [    1 10001]], shape=(6, 2), dtype=int64),
+   values=tf.Tensor([1 2 1 2 1 1], shape=(6,), dtype=int64),
+   dense_shape=tf.Tensor([    2 10002], shape=(2,), dtype=int64))
+
+  **Bin-counting with defined output shape**
+
+  This example takes an input (which could be a Tensor, RaggedTensor, or
+  SparseTensor) and returns a SparseTensor where the value of (i,j) is the
+  number of times value j appears in batch i. However, all values of j
+  above 'maxlength' are ignored. The dense_shape of the output sparse tensor
+  is set to 'minlength'. Note that, while the input is identical to the
+  example above, the value '10001' in batch item 2 is dropped, and the
+  dense shape is [2, 500] instead of [2,10002] or [2, 102].
+
+  >>> minlength = maxlength = 500
+  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
+  >>> output = tf.sparse.bincount(
+  ...    data, axis=-1, minlength=minlength, maxlength=maxlength)
+  >>> print(output)
+  SparseTensor(indices=tf.Tensor(
+  [[  0  10]
+   [  0  20]
+   [  0  30]
+   [  1  11]
+   [  1 101]], shape=(5, 2), dtype=int64),
+   values=tf.Tensor([1 2 1 2 1], shape=(5,), dtype=int64),
+   dense_shape=tf.Tensor([  2 500], shape=(2,), dtype=int64))
+
+  **Binary bin-counting**
+
+  This example takes an input (which could be a Tensor, RaggedTensor, or
+  SparseTensor) and returns a SparseTensor where (i,j) is 1 if the value j
+  appears in batch i at least once and is 0 otherwise. Note that, even though
+  some values (like 20 in batch 1 and 11 in batch 2) appear more than once,
+  the 'values' tensor is all 1s.
+
+  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
+  >>> output = tf.sparse.bincount(data, binary_output=True, axis=-1)
+  >>> print(output)
+  SparseTensor(indices=tf.Tensor(
+  [[    0    10]
+   [    0    20]
+   [    0    30]
+   [    1    11]
+   [    1   101]
+   [    1 10001]], shape=(6, 2), dtype=int64),
+   values=tf.Tensor([1 1 1 1 1 1], shape=(6,), dtype=int64),
+   dense_shape=tf.Tensor([    2 10002], shape=(2,), dtype=int64))
+
+  **Weighted bin-counting**
+
+  This example takes two inputs - a values tensor and a weights tensor. These
+  tensors must be identically shaped, and have the same row splits or indices
+  in the case of RaggedTensors or SparseTensors. When performing a weighted
+  count, the op will output a SparseTensor where the value of (i, j) is the
+  sum of the values in the weight tensor's batch i in the locations where
+  the values tensor has the value j. In this case, the output dtype is the
+  same as the dtype of the weights tensor.
+
+  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
+  >>> weights = [[2, 0.25, 15, 0.5], [2, 17, 3, 0.9]]
+  >>> output = tf.sparse.bincount(data, weights=weights, axis=-1)
+  >>> print(output)
+  SparseTensor(indices=tf.Tensor(
+  [[    0    10]
+   [    0    20]
+   [    0    30]
+   [    1    11]
+   [    1   101]
+   [    1 10001]], shape=(6, 2), dtype=int64),
+   values=tf.Tensor([2. 0.75 15. 5. 17. 0.9], shape=(6,), dtype=float32),
+   dense_shape=tf.Tensor([    2 10002], shape=(2,), dtype=int64))
+
+  """
+  with ops.name_scope(name, "count", [values, weights]):
+    if not isinstance(values, sparse_tensor.SparseTensor):
+      values = ragged_tensor.convert_to_tensor_or_ragged_tensor(
+          values, name="values")
+    if weights is not None:
+      if not isinstance(weights, sparse_tensor.SparseTensor):
+        weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
+            weights, name="weights")
+
+    if weights is not None and binary_output:
+      raise ValueError("binary_output and weights are mutually exclusive.")
+
+    if axis is None:
+      axis = 0
+
+    if axis not in [0, -1]:
+      raise ValueError("Unsupported axis value %s. Only 0 and -1 are currently "
+                       "supported." % axis)
+
+    minlength_value = minlength if minlength is not None else -1
+    maxlength_value = maxlength if maxlength is not None else -1
+
+    if axis == 0:
+      if isinstance(values, sparse_tensor.SparseTensor):
+        if weights is not None:
+          weights = validate_sparse_weights(values, weights)
+        values = values.values
+      elif isinstance(values, ragged_tensor.RaggedTensor):
+        if weights is not None:
+          weights = validate_ragged_weights(values, weights)
+        values = values.values
+      else:
+        if weights is not None:
+          weights = array_ops.reshape(weights, [-1])
+        values = array_ops.reshape(values, [-1])
+
+    if isinstance(values, sparse_tensor.SparseTensor):
+      weights = validate_sparse_weights(values, weights)
+      c_ind, c_val, c_shape = gen_count_ops.sparse_count_sparse_output(
+          values.indices,
+          values.values,
+          values.dense_shape,
+          weights,
+          minlength=minlength_value,
+          maxlength=maxlength_value,
+          binary_output=binary_output)
+    elif isinstance(values, ragged_tensor.RaggedTensor):
+      weights = validate_ragged_weights(values, weights)
+      c_ind, c_val, c_shape = gen_count_ops.ragged_count_sparse_output(
+          values.row_splits,
+          values.values,
+          weights,
+          minlength=minlength_value,
+          maxlength=maxlength_value,
+          binary_output=binary_output)
+    else:
+      weights = validate_dense_weights(values, weights)
+      c_ind, c_val, c_shape = gen_count_ops.dense_count_sparse_output(
+          values,
+          weights=weights,
+          minlength=minlength_value,
+          maxlength=maxlength_value,
+          binary_output=binary_output)
+
+    return sparse_tensor.SparseTensor(c_ind, c_val, c_shape)
+
+
+def validate_dense_weights(values, weights):
+  """Validates the passed weight tensor or creates an empty one."""
+  if weights is None:
+    return array_ops.constant([], dtype=values.dtype)
+
+  if not isinstance(weights, ops.Tensor):
+    raise ValueError(
+        "`weights` must be a tf.Tensor if `values` is a tf.Tensor.")
+
+  return weights
+
+
+def validate_sparse_weights(values, weights):
+  """Validates the passed weight tensor or creates an empty one."""
+  if weights is None:
+    return array_ops.constant([], dtype=values.values.dtype)
+
+  if not isinstance(weights, sparse_tensor.SparseTensor):
+    raise ValueError(
+        "`weights` must be a SparseTensor if `values` is a SparseTensor.")
+
+  checks = []
+  if weights.dense_shape is not values.dense_shape:
+    checks.append(
+        check_ops.assert_equal(
+            weights.dense_shape,
+            values.dense_shape,
+            message="'weights' and 'values' must have the same dense shape."))
+  if weights.indices is not values.indices:
+    checks.append(
+        check_ops.assert_equal(
+            weights.indices,
+            values.indices,
+            message="'weights' and 'values' must have the same indices.")
+    )
+  if checks:
+    with ops.control_dependencies(checks):
+      weights = array_ops.identity(weights.values)
+  else:
+    weights = weights.values
+
+  return weights
+
+
+def validate_ragged_weights(values, weights):
+  """Validates the passed weight tensor or creates an empty one."""
+  if weights is None:
+    return array_ops.constant([], dtype=values.values.dtype)
+
+  if not isinstance(weights, ragged_tensor.RaggedTensor):
+    raise ValueError(
+        "`weights` must be a RaggedTensor if `values` is a RaggedTensor.")
+
+  checks = []
+  if weights.row_splits is not values.row_splits:
+    checks.append(
+        check_ops.assert_equal(
+            weights.row_splits,
+            values.row_splits,
+            message="'weights' and 'values' must have the same row splits."))
+  if checks:
+    with ops.control_dependencies(checks):
+      weights = array_ops.identity(weights.values)
+  else:
+    weights = weights.values
+
+  return weights
diff --git a/tensorflow/python/ops/bincount_test.py b/tensorflow/python/ops/bincount_test.py
new file mode 100644
index 00000000000..839af8dcc35
--- /dev/null
+++ b/tensorflow/python/ops/bincount_test.py
@@ -0,0 +1,604 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# maxlengthations under the License.
+# ==============================================================================
+"""Tests for bincount ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import bincount
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.platform import test
+
+
+class TestSparseCount(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      {
+          "testcase_name": "_no_maxlength",
+          "x": np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [1, 4], [1, 5]],
+          "expected_values": [1, 1, 1, 2, 1],
+          "expected_shape": [2, 6]
+      }, {
+          "testcase_name": "_maxlength",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "maxlength": 7,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [1, 0], [1, 4]],
+          "expected_values": [1, 1, 1, 1, 2],
+          "expected_shape": [2, 7]
+      }, {
+          "testcase_name": "_minlength",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "minlength": 9,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [0, 7], [1, 0], [1, 4],
+                               [1, 7]],
+          "expected_values": [1, 1, 1, 1, 1, 2, 1],
+          "expected_shape": [2, 9]
+      }, {
+          "testcase_name": "_minlength_larger_values",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "minlength": 3,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [0, 7], [1, 0], [1, 4],
+                               [1, 7]],
+          "expected_values": [1, 1, 1, 1, 1, 2, 1],
+          "expected_shape": [2, 8]
+      }, {
+          "testcase_name": "_no_maxlength_binary",
+          "x": np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [1, 4], [1, 5]],
+          "expected_values": [1, 1, 1, 1, 1],
+          "expected_shape": [2, 6],
+          "binary_output": True,
+      }, {
+          "testcase_name": "_maxlength_binary",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "maxlength": 7,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [1, 0], [1, 4]],
+          "expected_values": [1, 1, 1, 1, 1],
+          "expected_shape": [2, 7],
+          "binary_output": True,
+      }, {
+          "testcase_name": "_minlength_binary",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "minlength": 9,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [0, 7], [1, 0], [1, 4],
+                               [1, 7]],
+          "expected_values": [1, 1, 1, 1, 1, 1, 1],
+          "expected_shape": [2, 9],
+          "binary_output": True,
+      }, {
+          "testcase_name": "_minlength_larger_values_binary",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "minlength": 3,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [0, 7], [1, 0], [1, 4],
+                               [1, 7]],
+          "expected_values": [1, 1, 1, 1, 1, 1, 1],
+          "expected_shape": [2, 8],
+          "binary_output": True,
+      }, {
+          "testcase_name": "_no_maxlength_weights",
+          "x": np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [1, 4], [1, 5]],
+          "expected_values": [2, 1, 0.5, 9, 3],
+          "expected_shape": [2, 6],
+          "weights": [[0.5, 1, 2], [3, 4, 5]]
+      }, {
+          "testcase_name": "_maxlength_weights",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "maxlength": 7,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [1, 0], [1, 4]],
+          "expected_values": [2, 1, 0.5, 3, 9],
+          "expected_shape": [2, 7],
+          "weights": [[0.5, 1, 2, 11], [7, 3, 4, 5]]
+      }, {
+          "testcase_name": "_minlength_weights",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "minlength": 9,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [0, 7], [1, 0], [1, 4],
+                               [1, 7]],
+          "expected_values": [2, 1, 0.5, 3, 5, 13, 4],
+          "expected_shape": [2, 9],
+          "weights": [[0.5, 1, 2, 3], [4, 5, 6, 7]]
+      }, {
+          "testcase_name": "_minlength_larger_values_weights",
+          "x": np.array([[3, 2, 1, 7], [7, 0, 4, 4]], dtype=np.int32),
+          "minlength": 3,
+          "expected_indices": [[0, 1], [0, 2], [0, 3], [0, 7], [1, 0], [1, 4],
+                               [1, 7]],
+          "expected_values": [2, 1, 0.5, 3, 5, 13, 4],
+          "expected_shape": [2, 8],
+          "weights": [[0.5, 1, 2, 3], [4, 5, 6, 7]]
+      }, {
+          "testcase_name": "_1d",
+          "x": np.array([3, 2, 1, 1], dtype=np.int32),
+          "expected_indices": [[1], [2], [3]],
+          "expected_values": [2, 1, 1],
+          "expected_shape": [4]
+      }, {
+          "testcase_name": "_all_axes",
+          "x": np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32),
+          "expected_indices": [[1], [2], [3], [4], [5]],
+          "expected_values": [1, 1, 1, 2, 1],
+          "expected_shape": [6],
+          "axis": None
+      })
+  def test_dense_input(self,
+                       x,
+                       expected_indices,
+                       expected_values,
+                       expected_shape,
+                       minlength=None,
+                       maxlength=None,
+                       binary_output=False,
+                       weights=None,
+                       axis=-1):
+    y = bincount.sparse_bincount(
+        x,
+        weights=weights,
+        minlength=minlength,
+        maxlength=maxlength,
+        binary_output=binary_output,
+        axis=axis)
+    self.assertAllEqual(expected_indices, y.indices)
+    self.assertAllEqual(expected_values, y.values)
+    self.assertAllEqual(expected_shape, y.dense_shape)
+
+  @parameterized.named_parameters(
+      {
+          "testcase_name":
+              "_no_maxlength",
+          "x":
+              np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 2, 1],
+          "expected_shape": [3, 6],
+      },
+      {
+          "testcase_name":
+              "_maxlength",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 2, 1],
+          "expected_shape": [3, 7],
+          "maxlength":
+              7,
+      },
+      {
+          "testcase_name":
+              "_minlength",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [1, 7], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 1, 2, 1],
+          "expected_shape": [3, 9],
+          "minlength":
+              9,
+      },
+      {
+          "testcase_name":
+              "_minlength_larger_values",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [1, 7], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 1, 2, 1],
+          "expected_shape": [3, 8],
+          "minlength":
+              3,
+      },
+      {
+          "testcase_name":
+              "_no_maxlength_binary",
+          "x":
+              np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 1, 1],
+          "expected_shape": [3, 6],
+          "binary_output":
+              True,
+      },
+      {
+          "testcase_name":
+              "_maxlength_binary",
+          "x":
+              np.array([[3, 0, 1, 0], [0, 0, 7, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 1, 1],
+          "expected_shape": [3, 7],
+          "maxlength":
+              7,
+          "binary_output":
+              True,
+      },
+      {
+          "testcase_name":
+              "_minlength_binary",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [1, 7], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 1, 1, 1],
+          "expected_shape": [3, 9],
+          "minlength":
+              9,
+          "binary_output":
+              True,
+      },
+      {
+          "testcase_name":
+              "_minlength_larger_values_binary",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [1, 7], [2, 4], [2, 5]],
+          "expected_values": [1, 1, 1, 1, 1],
+          "expected_shape": [3, 8],
+          "minlength":
+              3,
+          "binary_output":
+              True,
+      },
+      {
+          "testcase_name":
+              "_no_maxlength_weights",
+          "x":
+              np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [2, 4], [2, 5]],
+          "expected_values": [2, 6, 7, 10],
+          "expected_shape": [3, 6],
+          "weights":
+              np.array([[6, 0, 2, 0], [0, 0, 0, 0], [10, 0, 3.5, 3.5]]),
+      },
+      {
+          "testcase_name":
+              "_maxlength_weights",
+          "x":
+              np.array([[3, 0, 1, 0], [0, 0, 7, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [2, 4], [2, 5]],
+          "expected_values": [2, 6, 7, 10],
+          "expected_shape": [3, 7],
+          "maxlength":
+              7,
+          "weights":
+              np.array([[6, 0, 2, 0], [0, 0, 14, 0], [10, 0, 3.5, 3.5]]),
+      },
+      {
+          "testcase_name":
+              "_minlength_weights",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [1, 7], [2, 4], [2, 5]],
+          "expected_values": [2, 6, 14, 6.5, 10],
+          "expected_shape": [3, 9],
+          "minlength":
+              9,
+          "weights":
+              np.array([[6, 0, 2, 0], [14, 0, 0, 0], [10, 0, 3, 3.5]]),
+      },
+      {
+          "testcase_name":
+              "_minlength_larger_values_weights",
+          "x":
+              np.array([[3, 0, 1, 0], [7, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[0, 1], [0, 3], [1, 7], [2, 4], [2, 5]],
+          "expected_values": [2, 6, 14, 6.5, 10],
+          "expected_shape": [3, 8],
+          "minlength":
+              3,
+          "weights":
+              np.array([[6, 0, 2, 0], [14, 0, 0, 0], [10, 0, 3, 3.5]]),
+      },
+      {
+          "testcase_name": "_1d",
+          "x": np.array([3, 0, 1, 1], dtype=np.int32),
+          "expected_indices": [[1], [3]],
+          "expected_values": [2, 1],
+          "expected_shape": [4],
+      },
+      {
+          "testcase_name":
+              "_all_axes",
+          "x":
+              np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]],
+                       dtype=np.int32),
+          "expected_indices": [[1], [3], [4], [5]],
+          "expected_values": [1, 1, 2, 1],
+          "expected_shape": [6],
+          "axis":
+              None,
+      },
+  )
+  def test_sparse_input(self,
+                        x,
+                        expected_indices,
+                        expected_values,
+                        expected_shape,
+                        maxlength=None,
+                        minlength=None,
+                        binary_output=False,
+                        weights=None,
+                        axis=-1):
+    x_sparse = sparse_ops.from_dense(x)
+    w_sparse = sparse_ops.from_dense(weights) if weights is not None else None
+    y = bincount.sparse_bincount(
+        x_sparse,
+        weights=w_sparse,
+        minlength=minlength,
+        maxlength=maxlength,
+        binary_output=binary_output,
+        axis=axis)
+    self.assertAllEqual(expected_indices, y.indices)
+    self.assertAllEqual(expected_values, y.values)
+    self.assertAllEqual(expected_shape, y.dense_shape)
+
+  @parameterized.named_parameters(
+      {
+          "testcase_name": "_no_maxlength",
+          "x": [[], [], [3, 0, 1], [], [5, 0, 4, 4]],
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [4, 0], [4, 4], [4, 5]],
+          "expected_values": [1, 1, 1, 1, 2, 1],
+          "expected_shape": [5, 6],
+      },
+      {
+          "testcase_name": "_maxlength",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "maxlength": 7,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [4, 0], [4, 4], [4, 5]],
+          "expected_values": [1, 1, 1, 1, 2, 1],
+          "expected_shape": [5, 7],
+      },
+      {
+          "testcase_name": "_minlength",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "minlength": 9,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [3, 7], [4, 0], [4, 4],
+                               [4, 5]],
+          "expected_values": [1, 1, 1, 1, 1, 2, 1],
+          "expected_shape": [5, 9],
+      },
+      {
+          "testcase_name": "_minlength_larger_values",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "minlength": 3,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [3, 7], [4, 0], [4, 4],
+                               [4, 5]],
+          "expected_values": [1, 1, 1, 1, 1, 2, 1],
+          "expected_shape": [5, 8],
+      },
+      {
+          "testcase_name": "_no_maxlength_binary",
+          "x": [[], [], [3, 0, 1], [], [5, 0, 4, 4]],
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [4, 0], [4, 4], [4, 5]],
+          "expected_values": [1, 1, 1, 1, 1, 1],
+          "expected_shape": [5, 6],
+          "binary_output": True,
+      },
+      {
+          "testcase_name": "_maxlength_binary",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "maxlength": 7,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [4, 0], [4, 4], [4, 5]],
+          "expected_values": [1, 1, 1, 1, 1, 1],
+          "expected_shape": [5, 7],
+          "binary_output": True,
+      },
+      {
+          "testcase_name": "_minlength_binary",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "minlength": 9,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [3, 7], [4, 0], [4, 4],
+                               [4, 5]],
+          "expected_values": [1, 1, 1, 1, 1, 1, 1],
+          "expected_shape": [5, 9],
+          "binary_output": True,
+      },
+      {
+          "testcase_name": "_minlength_larger_values_binary",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "minlength": 3,
+          "binary_output": True,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [3, 7], [4, 0], [4, 4],
+                               [4, 5]],
+          "expected_values": [1, 1, 1, 1, 1, 1, 1],
+          "expected_shape": [5, 8],
+      },
+      {
+          "testcase_name": "_no_maxlength_weights",
+          "x": [[], [], [3, 0, 1], [], [5, 0, 4, 4]],
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [4, 0], [4, 4], [4, 5]],
+          "expected_values": [0.5, 2, 6, 0.25, 8, 10],
+          "expected_shape": [5, 6],
+          "weights": [[], [], [6, 0.5, 2], [], [10, 0.25, 5, 3]],
+      },
+      {
+          "testcase_name": "_maxlength_weights",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "maxlength": 7,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [4, 0], [4, 4], [4, 5]],
+          "expected_values": [0.5, 2, 6, 0.25, 8, 10],
+          "expected_shape": [5, 7],
+          "weights": [[], [], [6, 0.5, 2], [14], [10, 0.25, 5, 3]],
+      },
+      {
+          "testcase_name": "_minlength_weights",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "minlength": 9,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [3, 7], [4, 0], [4, 4],
+                               [4, 5]],
+          "expected_values": [0.5, 2, 6, 14, 0.25, 8, 10],
+          "expected_shape": [5, 9],
+          "weights": [[], [], [6, 0.5, 2], [14], [10, 0.25, 5, 3]],
+      },
+      {
+          "testcase_name": "_minlength_larger_values_weights",
+          "x": [[], [], [3, 0, 1], [7], [5, 0, 4, 4]],
+          "minlength": 3,
+          "expected_indices": [[2, 0], [2, 1], [2, 3], [3, 7], [4, 0], [4, 4],
+                               [4, 5]],
+          "expected_values": [0.5, 2, 6, 14, 0.25, 8, 10],
+          "expected_shape": [5, 8],
+          "weights": [[], [], [6, 0.5, 2], [14], [10, 0.25, 5, 3]],
+      },
+      {
+          "testcase_name": "_1d",
+          "x": [3, 0, 1, 1],
+          "expected_indices": [[0], [1], [3]],
+          "expected_values": [1, 2, 1],
+          "expected_shape": [4],
+      },
+      {
+          "testcase_name": "_all_axes",
+          "x": [[], [], [3, 0, 1], [], [5, 0, 4, 4]],
+          "expected_indices": [[0], [1], [3], [4], [5]],
+          "expected_values": [2, 1, 1, 2, 1],
+          "expected_shape": [6],
+          "axis": None,
+      },
+  )
+  def test_ragged_input(self,
+                        x,
+                        expected_indices,
+                        expected_values,
+                        expected_shape,
+                        maxlength=None,
+                        minlength=None,
+                        binary_output=False,
+                        weights=None,
+                        axis=-1):
+    x_ragged = ragged_factory_ops.constant(x)
+    w = ragged_factory_ops.constant(weights) if weights is not None else None
+    y = bincount.sparse_bincount(
+        x_ragged,
+        weights=w,
+        minlength=minlength,
+        maxlength=maxlength,
+        binary_output=binary_output,
+        axis=axis)
+    self.assertAllEqual(expected_indices, y.indices)
+    self.assertAllEqual(expected_values, y.values)
+    self.assertAllEqual(expected_shape, y.dense_shape)
+
+
+class TestSparseCountFailureModes(test.TestCase):
+
+  def test_dense_input_sparse_weights_fails(self):
+    x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
+    weights = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    with self.assertRaisesRegexp(ValueError, "must be a tf.Tensor"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_dense_input_ragged_weights_fails(self):
+    x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
+    weights = ragged_factory_ops.constant([[6, 0.5, 2], [14], [10, 0.25, 5, 3]])
+    with self.assertRaisesRegexp(ValueError, "must be a tf.Tensor"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_dense_input_wrong_shape_fails(self):
+    x = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
+    weights = np.array([[3, 2], [5, 4], [4, 3]])
+    # Note: Eager mode and graph mode throw different errors here. Graph mode
+    # will fail with a ValueError from the shape checking logic, while Eager
+    # will fail with an InvalidArgumentError from the kernel itself.
+    if context.executing_eagerly():
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "must have the same shape"):
+        self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+    else:
+      with self.assertRaisesRegexp(ValueError, "both shapes must be equal"):
+        self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_sparse_input_dense_weights_fails(self):
+    x = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    weights = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
+    with self.assertRaisesRegexp(ValueError, "must be a SparseTensor"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_sparse_input_ragged_weights_fails(self):
+    x = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    weights = ragged_factory_ops.constant([[6, 0.5, 2], [14], [10, 0.25, 5, 3]])
+    with self.assertRaisesRegexp(ValueError, "must be a SparseTensor"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_sparse_input_wrong_indices_fails(self):
+    x = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    weights = sparse_ops.from_dense(
+        np.array([[3, 1, 0, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "must have the same indices"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_sparse_input_too_many_indices_fails(self):
+    x = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    weights = sparse_ops.from_dense(
+        np.array([[3, 1, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "Incompatible shapes"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_sparse_input_wrong_shape_fails(self):
+    x = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    weights = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4], [0, 0, 0, 0]],
+                 dtype=np.int32))
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "must have the same dense shape"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_ragged_input_dense_weights_fails(self):
+    x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]])
+    weights = np.array([[3, 2, 1], [5, 4, 4]], dtype=np.int32)
+    with self.assertRaisesRegexp(ValueError, "must be a RaggedTensor"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_ragged_input_sparse_weights_fails(self):
+    x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]])
+    weights = sparse_ops.from_dense(
+        np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
+    with self.assertRaisesRegexp(ValueError, "must be a RaggedTensor"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+  def test_ragged_input_different_shape_fails(self):
+    x = ragged_factory_ops.constant([[6, 1, 2], [14], [10, 1, 5, 3]])
+    weights = ragged_factory_ops.constant([[6, 0.5, 2], [], [10, 0.25, 5, 3]])
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "must have the same row splits"):
+      self.evaluate(bincount.sparse_bincount(x, weights=weights, axis=-1))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index 3085e05eaf6..cefca5defae 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -1845,7 +1845,12 @@ def assert_shapes(shapes, data=None, summarize=None, message=None, name=None):
                 'Specified by tensor %s dimension %d' %
                 (tensor_name(specified_by_y), specified_at_dim))
 
-          actual_size = sizes.actual_sizes[tensor_dim]
+          # This is extremely subtle. If actual_sizes is dynamic, we must
+          # make sure a control dependency is inserted here so that this slice
+          # can not execute until the rank is asserted to be enough for the
+          # slice to not fail.
+          with ops.control_dependencies(rank_assertions):
+            actual_size = sizes.actual_sizes[tensor_dim]
           if _has_known_value(actual_size) and _has_known_value(specified_size):
             if int(actual_size) != int(specified_size):
               raise ValueError(
@@ -1871,12 +1876,17 @@ def assert_shapes(shapes, data=None, summarize=None, message=None, name=None):
           size_assertions.append(
               control_flow_ops.Assert(condition, data_, summarize=summarize))
         else:
-          size = sizes.actual_sizes[tensor_dim]
+          # Not sure if actual_sizes is a constant, but for safety, guard
+          # on rank. See explanation above about actual_sizes need for safety.
+          with ops.control_dependencies(rank_assertions):
+            size = sizes.actual_sizes[tensor_dim]
           size_specifications[size_symbol] = (size, sizes.x, tensor_dim)
 
-    with ops.control_dependencies(rank_assertions):
-      shapes_assertion = control_flow_ops.group(size_assertions)
-    return shapes_assertion
+  # Ensure both assertions actually occur.
+  with ops.control_dependencies(rank_assertions):
+    shapes_assertion = control_flow_ops.group(size_assertions)
+
+  return shapes_assertion
 
 
 # pylint: disable=line-too-long
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 32f558b3708..edb35afa52c 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -171,12 +171,33 @@ def clip_by_norm(t, clip_norm, axes=None, name=None):
   of the output will have L2-norm less than or equal to `clip_norm`. If
   `axes == [0]` instead, each column of the output will be clipped.
 
+  Code example:
+
+  >>> some_nums = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.float32)
+  >>> tf.clip_by_norm(some_nums, 2.0).numpy()
+  array([[0.26967996, 0.5393599 , 0.80903983, 1.0787199 , 1.3483998 ]],
+        dtype=float32)
+
   This operation is typically used to clip gradients before applying them with
-  an optimizer.
+  an optimizer.  Most gradient data is a collection of different shaped tensors
+  for different parts of the model.  Thus, this is a common usage:
+
+  ```
+  # Get your gradients after training
+  loss_value, grads = grad(model, features, labels)
+
+  # Apply some clipping
+  grads = [tf.clip_by_norm(g, norm)
+               for g in grads]
+
+  # Continue on with training
+  optimizer.apply_gradients(grads)
+  ```
 
   Args:
-    t: A `Tensor` or `IndexedSlices`.
-    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
+    t: A `Tensor` or `IndexedSlices`.  This must be a floating point type.
+    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value, also
+      floating point
     axes: A 1-D (vector) `Tensor` of type int32 containing the dimensions
       to use for computing the L2-norm. If `None` (the default), uses all
       dimensions.
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 3793be6964b..2979eb79bfd 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -959,7 +959,8 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 class IndexedCaseTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
   def make_name(self):
-    return self.id().split(".")[-1].replace("(", "_").replace(")", "")
+    name = self.id().split(".")[-1].replace("(", "_").replace(")", "")
+    return name.replace(" ", "_")
 
   def disabled_testCase_ticklesGpuVsHostMemoryIssueWithInt32(self):
     nbranches = 5
diff --git a/tensorflow/python/ops/control_flow_util_v2.py b/tensorflow/python/ops/control_flow_util_v2.py
index 98a1db7fe1a..7e87d25fe99 100644
--- a/tensorflow/python/ops/control_flow_util_v2.py
+++ b/tensorflow/python/ops/control_flow_util_v2.py
@@ -285,6 +285,7 @@ def output_all_intermediates():
 
 def get_func_graph(op, input_shapes, func_name):
   """Generates and returns a FuncGraph for the given op and input_shapes."""
+  fdef = None
   graph = op.graph
   # Recursively search the func in graphs.
   while graph is not None:
@@ -297,6 +298,9 @@ def get_func_graph(op, input_shapes, func_name):
     else:
       break
 
+  if fdef is None:
+    raise KeyError("%s cannot be found in the graph" % func_name)
+
   # `op.graph` may not be the same as `ops.get_default_graph()` e.g.
   # in the case of nested if ops or when the gradient is being computed
   # from inside a Defun. We build the `func_graph` with `op.graph` as its
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index fe66e8ccdfb..8ec925824de 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -863,11 +863,24 @@ def Gradient(inputs, f, name=None):
   return symbolic_gradient(input=inputs, Tout=tlist, f=f, name=name)
 
 
+def _GetInputDtypes(func):
+  """Returns the input dtypes of func, excluding dtypes for captured inputs."""
+  if isinstance(func, function._DefinedFunction):  # pylint: disable=protected-access
+    return func.declared_input_types
+
+  # We assume that `func` is a ConcreteFunction here, but we are not able to
+  # verify since importing eager function library will cause cyclic dependence.
+  #
+  # ConcreteFunction.inputs includes captured inputs.
+  num_non_captured_inputs = len(func.inputs) - len(func.captured_inputs)
+  inputs_without_captured = func.inputs[:num_non_captured_inputs]
+  return [t.dtype for t in inputs_without_captured]
+
+
 def _LoopBodyCaptureWrapper(func):
   """Returns a wrapper for `func` that handles loop-carried captured inputs."""
 
-  @function.Defun(
-      *func.declared_input_types, func_name="%s_Wrapper" % func.name)
+  @function.Defun(*_GetInputDtypes(func), func_name="%s_Wrapper" % func.name)
   def Wrapper(*args):
     """A wrapper that handles loop-carried captured inputs."""
     result = func(*args)
@@ -877,11 +890,11 @@ def _LoopBodyCaptureWrapper(func):
     if isinstance(result, ops.Operation):
       return extra_args
     # Unary functions return a single Tensor value.
-    elif not isinstance(result, tuple):
+    elif not isinstance(result, (list, tuple)):
       return (result,) + extra_args
     # N-ary functions return a tuple of Tensors.
     else:
-      return result + extra_args
+      return result + type(result)(extra_args)
 
   return Wrapper
 
@@ -917,19 +930,23 @@ def While(input_, cond, body, name=None, hostmem=None):
     raise ValueError("While op 'cond' argument must be a function "
                      "without implicitly captured inputs.")
 
-  if cond.declared_input_types != body.declared_input_types:
+  cond_input_types = _GetInputDtypes(cond)
+  body_input_types = _GetInputDtypes(body)
+
+  if cond_input_types != body_input_types:
     raise ValueError(
         "While op 'cond' and 'body' signatures do not match. %r vs %r" %
-        (cond.declared_input_types, body.declared_input_types))
+        (cond_input_types, body_input_types))
 
   if body.captured_inputs:
-    cond_dtypes = list(
-        body.declared_input_types) + [t.dtype for t in body.captured_inputs]
+    cond_dtypes = list(body_input_types) + [
+        t.dtype for t in body.captured_inputs
+    ]
 
     @function.Defun(*cond_dtypes, func_name="%s_Wrapper" % cond.name)
     def CondWrapper(*args):
       """A wrapper that handles loop-carried captured inputs."""
-      return cond(*args[:len(body.declared_input_types)])
+      return cond(*args[:len(body_input_types)])
 
     ret = gen_functional_ops._while(
         input_ + body.captured_inputs,
@@ -1184,8 +1201,8 @@ def partitioned_call(args,
   if hasattr(f, "graph"):
     _set_read_only_resource_inputs_attr(op, f.graph)
     if hasattr(f.graph, "collective_manager_ids_used"):
-      ops.set_int_list_attr(
-          op, acd.COLLECTIVE_MANAGER_IDS, f.graph.collective_manager_ids_used)
+      ops.set_int_list_attr(op, acd.COLLECTIVE_MANAGER_IDS,
+                            f.graph.collective_manager_ids_used)
   return outputs if outputs else op
 
 
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index e6a5cdbf4e8..52b65efad67 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1231,8 +1231,10 @@ def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
                                    name='size')
 
     size_const_as_shape = tensor_util.constant_value_as_shape(size)
-    new_height_const = size_const_as_shape.dims[0].value
-    new_width_const = size_const_as_shape.dims[1].value
+    new_height_const = tensor_shape.dimension_at_index(size_const_as_shape,
+                                                       0).value
+    new_width_const = tensor_shape.dimension_at_index(size_const_as_shape,
+                                                      1).value
 
     # If we can determine that the height and width will be unmodified by this
     # transformation, we avoid performing the resize.
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 6ba0401f334..8d3664144a1 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -332,6 +332,10 @@ def _EinsumGrad(op, grad):
   # Obtain the gradients wrt the inputs x and y, without taking into account
   # the unbroadcasting.
   x, y = op.inputs[0], op.inputs[1]
+  if grad.dtype.is_complex:
+    x = math_ops.conj(x)
+    y = math_ops.conj(y)
+
   x_shape = array_ops.shape(x)
   y_shape = array_ops.shape(y)
   grad_x = _GetGradWrt(grad, y, x_shape, x_subs, y_subs, output_subs)
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index f062047cec2..4c4982c6fd5 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -804,7 +804,8 @@ def angle(input, name=None):
     if input.dtype.is_complex:
       return gen_math_ops.angle(input, Tout=input.dtype.real_dtype, name=name)
     else:
-      return array_ops.zeros_like(input)
+      return array_ops.where(input < 0, np.pi * array_ops.ones_like(input),
+                             array_ops.zeros_like(input))
 
 
 # pylint: enable=redefined-outer-name,redefined-builtin
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index de5be20aa84..248c57c1ba5 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -45,6 +45,7 @@ from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.platform import device_context
 from tensorflow.python.util import deprecation
+from tensorflow.python.util import dispatch
 from tensorflow.python.util.compat import collections_abc
 from tensorflow.python.util.deprecation import deprecated_args
 from tensorflow.python.util.deprecation import deprecated_argument_lookup
@@ -4513,6 +4514,7 @@ def _get_noise_shape(x, noise_shape):
 
 
 @tf_export(v1=["nn.dropout"])
+@dispatch.add_dispatch_support
 @deprecation.deprecated_args(None, "Please use `rate` instead of `keep_prob`. "
                              "Rate should be set to `rate = 1 - keep_prob`.",
                              "keep_prob")
@@ -4567,6 +4569,7 @@ def dropout(x, keep_prob=None, noise_shape=None, seed=None, name=None,
 
 
 @tf_export("nn.dropout", v1=[])
+@dispatch.add_dispatch_support
 def dropout_v2(x, rate, noise_shape=None, seed=None, name=None):
   """Computes dropout: randomly sets elements to zero to prevent overfitting.
 
diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD
index 88ddf7a7ec8..cf9f485a40f 100644
--- a/tensorflow/python/ops/parallel_for/BUILD
+++ b/tensorflow/python/ops/parallel_for/BUILD
@@ -114,6 +114,7 @@ cuda_py_test(
         ":test_util",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_v2_toggles",
         "//tensorflow/python:gradients",
         "//tensorflow/python:logging_ops",
         "//tensorflow/python:parsing_ops",
@@ -131,6 +132,7 @@ cuda_py_test(
         # XLA is not enabled by default on Mac or Windows.
         "no_mac",
         "no_windows",
+        "nogpu",  # TODO(b/155761551): Flaky on GPU on TAP
     ],
     xla_enabled = True,
     deps = [
@@ -138,6 +140,8 @@ cuda_py_test(
         ":test_util",
         "//tensorflow/compiler/tf2xla/python:xla",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_v2_toggles",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:random_ops",
         "//tensorflow/python/compiler/xla",
diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
index cb84f4a16b0..01776808525 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
@@ -36,11 +36,13 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import indexed_slices
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import bitwise_ops
 from tensorflow.python.ops import cond_v2
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_v2_toggles
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradients as gradient_ops
@@ -79,7 +81,7 @@ class PForTest(PForTestCase):
       x_i = array_ops.gather(x, i)
       return nn.top_k(x_i)
 
-    with self.assertRaisesRegexp(ValueError, "No converter defined"):
+    with self.assertRaisesRegexp(ValueError, "No pfor vectorization"):
       self._test_loop_fn(loop_fn, 3, fallback_to_while_loop=False)
     self._test_loop_fn(loop_fn, 3, fallback_to_while_loop=True)
 
@@ -987,8 +989,9 @@ class WhileV1Test(PForTestCase):
 
     def loop_fn(_):
       return control_flow_ops.while_loop(
-          lambda j, x: j < 4, lambda j, x:
-          (j + 1, x + random_ops.random_uniform([])), [0, 0.])[0]
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + random_ops.random_uniform([])),
+          [0, 0.])[0]
 
     self._test_loop_fn(loop_fn, 3)
 
@@ -996,8 +999,9 @@ class WhileV1Test(PForTestCase):
   def test_while_unstacked_condition(self):
 
     def loop_fn(i):
-      return control_flow_ops.while_loop(lambda j, x: j < 4, lambda j, x:
-                                         (j + 1, x + i), [0, 0])
+      return control_flow_ops.while_loop(
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + i), [0, 0])
 
     self._test_loop_fn(loop_fn, 3)
 
@@ -1011,8 +1015,8 @@ class WhileV1Test(PForTestCase):
       lengths_i = array_ops.gather(lengths, i)
 
       _, total = control_flow_ops.while_loop(
-          lambda j, _: j < lengths_i, lambda j, t:
-          (j + 1, t + array_ops.gather(x_i, j)), [0, 0.])
+          lambda j, _: j < lengths_i,
+          lambda j, t: (j + 1, t + array_ops.gather(x_i, j)), [0, 0.])
       return total
 
     self._test_loop_fn(loop_fn, 3)
@@ -1202,6 +1206,143 @@ def create_dynamic_lstm(cell_fn, batch_size, state_size, max_steps):
   return pfor_output, tf_output
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class WhileV2Test(PForTestCase):
+
+  def setUp(self):
+    self._enabled = control_flow_v2_toggles.control_flow_v2_enabled()
+    control_flow_v2_toggles.enable_control_flow_v2()
+    super(WhileV2Test, self).setUp()
+
+  def tearDown(self):
+    if not self._enabled:
+      control_flow_v2_toggles.disable_control_flow_v2()
+    super(WhileV2Test, self).tearDown()
+
+  def test_while_outside_loop(self):
+    def _f():
+      return control_flow_ops.while_loop(lambda j: j < 4, lambda j: j + 1, [0])
+
+    def loop_fn(i):
+      return _f() + i
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_invariant_while(self):
+
+    def loop_fn(_):
+      return control_flow_ops.while_loop(lambda j: j < 4, lambda j: j + 1, [0])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_invariant_while_with_control_dependency(self):
+
+    def loop_fn(i):
+      with ops.control_dependencies([i]):
+        return control_flow_ops.while_loop(lambda j: j < 4, lambda j: j + 1,
+                                           [0])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_with_stateful_ops(self):
+
+    def loop_fn(_):
+      j, _ = control_flow_ops.while_loop(
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + random_ops.random_uniform([])),
+          [0, 0.])
+      return j
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_with_variable(self):
+    v = resource_variable_ops.ResourceVariable(5.)
+
+    def loop_fn(_):
+      _, output = control_flow_ops.while_loop(
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + v), [0, 0.])
+      return output
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_unstacked_condition(self):
+
+    def loop_fn(i):
+      return control_flow_ops.while_loop(
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + i), [0, 0])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while(self):
+    x = random_ops.random_uniform([3, 5])
+    lengths = constant_op.constant([4, 0, 2])
+
+    def loop_fn(i):
+      x_i = array_ops.gather(x, i)
+      lengths_i = array_ops.gather(lengths, i)
+
+      return control_flow_ops.while_loop(
+          lambda j, _: j < lengths_i,
+          lambda j, t: (j + 1, t + array_ops.gather(x_i, j)), [0, 0.])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_change_input_invariance(self):
+    # This tests cases where a loop invariant input to while has loop dependent
+    # operations applied to it inside the while body.
+    # It also test inputs that are passed through.
+    def loop_fn(i):
+      return control_flow_ops.while_loop(
+          lambda j, *_: j < i,
+          lambda j, x, y, z, w: (j + 1, x + i, y + x, z, w),
+          [0,
+           constant_op.constant(0),
+           constant_op.constant(1),
+           i,
+           constant_op.constant(2)])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_shape_invariants(self):
+    def loop_fn(i):
+      return control_flow_ops.while_loop(
+          lambda j, *_: j < 4,
+          lambda j, x, y: (j + 1, x + i, y + 1),
+          [0, constant_op.constant([0, 1]), constant_op.constant([2, 3])],
+          shape_invariants=[None,
+                            tensor_shape.TensorShape([2]),
+                            tensor_shape.TensorShape([2])])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_jacobian(self):
+    # Note that we wrap the code below in a tf.function since we don't want the
+    # while_loop call to be evaluated eagerly using a python loop.
+    @def_function.function
+    def _f(x, y, use_pfor):
+      # out = x @ y @ y @ y @ y, where @ is matmul operator.
+      _, out = control_flow_ops.while_loop(
+          lambda i, _: i < 4, lambda i, out: (i + 1, math_ops.matmul(out, y)),
+          [0, x])
+
+      def loop_fn(i):
+        out_i = array_ops.gather(out, i, axis=1)
+        grad = gradient_ops.gradients(out_i, x)
+        return array_ops.reshape(grad[0], [-1])
+
+      if use_pfor:
+        return pfor_control_flow_ops.pfor(loop_fn, iters=3)
+      else:
+        return pfor_control_flow_ops.for_loop(loop_fn, iters=3,
+                                              loop_fn_dtypes=out.dtype)
+
+    x = constant_op.constant(np.random.uniform(size=(1, 3)))
+    y = constant_op.constant(np.random.uniform(size=(3, 3)))
+    self.assertAllClose(_f(x, y, True), _f(x, y, False))
+
+
 @test_util.run_all_in_graph_and_eager_modes
 @test_util.with_control_flow_v2
 class StatelessIfTest(PForTestCase):
@@ -1259,6 +1400,8 @@ class StatelessIfTest(PForTestCase):
 class IfTest(PForTestCase):
 
   def test_read_var(self):
+    self.skipTest("b/156438918")  # Flaky
+
     x = [1, 2, 3, 4, 5.]
     y = 2.5
     z = resource_variable_ops.ResourceVariable(5.)
@@ -1383,8 +1526,9 @@ class Benchmarks(test.Benchmark):
     with ops.Graph().as_default():
 
       def loop_fn(i):
-        _, s = control_flow_ops.while_loop(lambda t, x: t < i, lambda t, x:
-                                           (t + 1, x + i), [0, 0])
+        _, s = control_flow_ops.while_loop(lambda t, x: t < i,
+                                           lambda t, x: (t + 1, x + i),
+                                           [0, 0])
         return s
 
       iters = 50
diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py
index 773195283d6..8e18b9968fe 100644
--- a/tensorflow/python/ops/parallel_for/math_test.py
+++ b/tensorflow/python/ops/parallel_for/math_test.py
@@ -23,6 +23,7 @@ from absl.testing import parameterized
 from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops as framework_ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
@@ -150,72 +151,81 @@ class MathTest(PForTestCase, parameterized.TestCase):
       self._test_loop_fn(loop_fn, 3)
 
   def test_binary_cwise_ops(self):
-    logical_ops = [
-        math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor
-    ]
+    # Enable tensor equality to test `equal` and `not_equal` ops below.
+    default_equality = framework_ops.Tensor._USE_EQUALITY
+    framework_ops.enable_tensor_equality()
+    try:
+      logical_ops = [
+          math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor
+      ]
 
-    # Wrapper functions restricting the range of inputs of zeta and polygamma.
-    def safe_polygamma(x, y):
-      return math_ops.polygamma(
-          math_ops.round(clip_ops.clip_by_value(y, 1, 10)), x * x + 1)
+      # Wrapper functions restricting the range of inputs of zeta and polygamma.
+      def safe_polygamma(x, y):
+        return math_ops.polygamma(
+            math_ops.round(clip_ops.clip_by_value(y, 1, 10)), x * x + 1)
 
-    def safe_zeta(x, y):
-      return math_ops.zeta(x * x + 1, y * y)
+      def safe_zeta(x, y):
+        return math_ops.zeta(x * x + 1, y * y)
 
-    float_ops = [
-        math_ops.add,
-        math_ops.add_v2,
-        math_ops.atan2,
-        math_ops.complex,
-        math_ops.div,
-        math_ops.divide,
-        math_ops.div_no_nan,
-        math_ops.equal,
-        math_ops.floor_mod,
-        math_ops.greater,
-        math_ops.greater_equal,
-        math_ops.igamma,
-        math_ops.igammac,
-        math_ops.igamma_grad_a,
-        math_ops.less,
-        math_ops.less_equal,
-        math_ops.maximum,
-        math_ops.minimum,
-        math_ops.mod,
-        math_ops.multiply,
-        math_ops.not_equal,
-        math_ops.pow,
-        math_ops.squared_difference,
-        math_ops.subtract,
-        math_ops.truncate_mod,
-        safe_polygamma,
-        safe_zeta,
-    ]
-    # FloorDiv fails on XLA due floor's discontinuities exacerbating small
-    # division differences.
-    if not test_util.is_xla_enabled():
-      float_ops += [math_ops.floor_div]
-    for op in logical_ops + float_ops:
-      x = random_ops.random_uniform([7, 3, 5])
-      y = random_ops.random_uniform([3, 5])
-      if op in logical_ops:
-        x = x > 0
-        y = y > 0
+      float_ops = [
+          math_ops.add,
+          math_ops.add_v2,
+          math_ops.atan2,
+          math_ops.complex,
+          math_ops.div,
+          math_ops.divide,
+          math_ops.div_no_nan,
+          math_ops.equal,
+          lambda x, y: framework_ops.convert_to_tensor(x == y),
+          lambda x, y: framework_ops.convert_to_tensor(x != y),
+          math_ops.floor_mod,
+          math_ops.greater,
+          math_ops.greater_equal,
+          math_ops.igamma,
+          math_ops.igammac,
+          math_ops.igamma_grad_a,
+          math_ops.less,
+          math_ops.less_equal,
+          math_ops.maximum,
+          math_ops.minimum,
+          math_ops.mod,
+          math_ops.multiply,
+          math_ops.not_equal,
+          math_ops.pow,
+          math_ops.squared_difference,
+          math_ops.subtract,
+          math_ops.truncate_mod,
+          safe_polygamma,
+          safe_zeta,
+      ]
+      # FloorDiv fails on XLA due floor's discontinuities exacerbating small
+      # division differences.
+      if not test_util.is_xla_enabled():
+        float_ops += [math_ops.floor_div]
+      for op in logical_ops + float_ops:
+        x = random_ops.random_uniform([7, 3, 5])
+        y = random_ops.random_uniform([3, 5])
+        if op in logical_ops:
+          x = x > 0
+          y = y > 0
 
-      output_dtypes = []
+        output_dtypes = []
 
-      # pylint: disable=cell-var-from-loop
-      def loop_fn(i):
-        x1 = array_ops.gather(x, i)
-        y1 = array_ops.gather(y, i)
-        outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)]
-        del output_dtypes[:]
-        output_dtypes.extend(t.dtype for t in outputs)
-        return outputs
+        # pylint: disable=cell-var-from-loop
+        def loop_fn(i):
+          x1 = array_ops.gather(x, i)
+          y1 = array_ops.gather(y, i)
+          outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)]
+          del output_dtypes[:]
+          output_dtypes.extend(t.dtype for t in outputs)
+          return outputs
 
-      # pylint: enable=cell-var-from-loop
+        # pylint: enable=cell-var-from-loop
 
-      self._test_loop_fn(loop_fn, 3)
+        self._test_loop_fn(loop_fn, 3)
+    finally:
+      if not default_equality:
+        framework_ops.disable_tensor_equality()
 
   def test_approximate_equal(self):
     x = random_ops.random_uniform([3, 5])
diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py
index 5c21620dc66..c4621758702 100644
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py
@@ -1473,14 +1473,19 @@ class PFor(object):
           else:
             converter = _pfor_converter_registry.get(y_op.type, None)
           if converter is None:
-            if self._fallback_to_while_loop:
+            has_variant_outputs = any(x.dtype == dtypes.variant for x in
+                                      y_op.outputs)
+            if self._fallback_to_while_loop and not has_variant_outputs:
               converter = _fallback_converter
             else:
-              raise ValueError("No converter defined for %s\n%s\ninputs: %s. "
-                               "\nEither add a converter or "
-                               "enable fallback_to_while_loop "
-                               "option to pfor, which may run slower" %
-                               (y_op.type, y_op, converted_inputs))
+              message = ("No pfor vectorization defined for %s\n"
+                         "%s\n"
+                         "inputs: %s. " %
+                         (y_op.type, y_op, converted_inputs))
+              if not self._fallback_to_while_loop:
+                message += ("Consider enabling the fallback_to_while_loop "
+                            "option to pfor, which may run slower.")
+              raise ValueError(message)
           # TODO(rachelim): Handle the case where some inputs are sparsely
           # stacked. We should only call the converter if it supports handling
           # those inputs.
@@ -2779,8 +2784,8 @@ def _convert_equal(pfor_input):
   x = pfor_input.input(0)[0]
   y = pfor_input.input(1)[0]
   incompatible_shape_error = pfor_input.get_attr("incompatible_shape_error")
-  assert incompatible_shape_error
-  return wrap(math_ops.equal(x, y), True)
+  return wrap(gen_math_ops.equal(
+      x, y, incompatible_shape_error=incompatible_shape_error), True)
 
 
 @RegisterPFor("NotEqual")
@@ -2789,8 +2794,8 @@ def _convert_not_equal(pfor_input):
   x = pfor_input.input(0)[0]
   y = pfor_input.input(1)[0]
   incompatible_shape_error = pfor_input.get_attr("incompatible_shape_error")
-  assert incompatible_shape_error
-  return wrap(math_ops.not_equal(x, y), True)
+  return wrap(gen_math_ops.not_equal(
+      x, y, incompatible_shape_error=incompatible_shape_error), True)
 
 
 @RegisterPFor("ApproximateEqual")
@@ -3727,9 +3732,12 @@ def _outputs_for_branch(func_name, indices, pfor_input, inputs):
   return stacked_outputs
 
 
+# TODO(agarwal): Currently the converted code aggressively tiles loop variant
+# outputs from the then/else branches. Instead, it could do so only if at least
+# one of the branch outputs is loop variant.
 @RegisterPFor("StatelessIf")
 @RegisterPFor("If")
-def _convert_stateless_if(pfor_input):
+def _convert_if(pfor_input):
   cond, cond_stacked, _ = pfor_input.input(0)
   inputs = pfor_input.inputs[1:]
   then_branch = pfor_input.get_attr("then_branch")
@@ -3780,6 +3788,322 @@ def _convert_stateless_if(pfor_input):
     return [wrap(t, True) for t in outputs]
 
 
+class WhileV2(object):
+  """Object for vectorizing V2 while_loop op."""
+
+  def __init__(self, pfor_input):
+    self._pfor_input = pfor_input
+    self._pfor = pfor_input.pfor
+    cond_func_name = pfor_input.get_attr("cond").name
+    self._cond_func = pfor_input.op.graph._get_function(compat.as_bytes(
+        cond_func_name))
+    body_func_name = pfor_input.get_attr("body").name
+    self._body_func = pfor_input.op.graph._get_function(compat.as_bytes(
+        body_func_name))
+    if self._cond_func is None or self._body_func is None:
+      raise ValueError("Error extracting cond and body functions for op %s." % (
+          self._pfor_input.op))
+    # Indices of inputs that are passed unchanged through the while loop body.
+    # Typically these are tensors captured from outside the body context.
+    self._body_pass_through_indices = set()
+    for i, (inp, out) in enumerate(zip(self._body_func.graph.inputs,
+                                       self._body_func.graph.outputs)):
+      if id(inp) == id(out):
+        self._body_pass_through_indices.add(i)
+    self._parallel_iterations = self._pfor_input.get_attr("parallel_iterations")
+
+  def _output_shapes(self):
+    # Calculate output shape for vectorized loop. This will be used as
+    # shape_invariant. Merges shape inference outputs with the `output_shapes`
+    # attribute of the op.
+    output_shapes = [out.shape for out in self._pfor_input.op.outputs]
+    shapes = self._pfor_input.get_attr("output_shapes")
+    if not shapes:
+      shapes = [tensor_shape.TensorShape(None) for _ in output_shapes]
+    else:
+      shapes = [tensor_shape.TensorShape(shape) for shape in shapes]
+    for i, shape in enumerate(shapes):
+      shape = shape.merge_with(output_shapes[i])
+      if self._pfor_input.input(i).is_stacked:
+        shape = tensor_shape.TensorShape([None]).concatenate(shape)
+      output_shapes[i] = shape
+    assert len(output_shapes) == self._pfor_input.num_inputs
+    return output_shapes
+
+  def _init_values(self):
+    """Create arguments passed to converted while_loop."""
+    loop_len = self._pfor.loop_len_vector[0]
+    inputs = []
+    # TensorArrays for outputs of converted while loop
+    output_tas = []
+
+    with ops.name_scope("while_init"):
+      for inp in self._pfor_input.inputs:
+        inputs.append(inp.t)
+        output_tas.append(tensor_array_ops.TensorArray(inp.t.dtype, loop_len))
+    # See documentation for __call__ for the structure of init_values.
+    return [True, self._pfor.all_indices] + inputs + output_tas
+
+  def _process_cond_unstacked(self, conditions, indices, inputs, output_tas):
+    """Handles case when condition is pfor loop invariant."""
+    # Note that all iterations end together. So we don't need to partition the
+    # inputs.
+    not_all_done = array_ops.reshape(conditions, [])
+    return not_all_done, indices, inputs, output_tas
+
+  def _process_cond_stacked(self, conditions, indices, inputs, inputs_stacked,
+                            output_tas):
+    """Handles case when condition is pfor loop dependent."""
+    # Compute if all iterations are done.
+    not_all_done = math_ops.reduce_any(conditions)
+    conditions_int = math_ops.cast(conditions, dtypes.int32)
+    # Partition the indices.
+    done_indices, new_indices = data_flow_ops.dynamic_partition(
+        indices, conditions_int, 2)
+
+    new_inputs = []
+    new_output_tas = []
+    for i, (inp, stacked) in enumerate(zip(inputs, inputs_stacked)):
+      pass_through = i in self._body_pass_through_indices
+      # Partition the inputs.
+      if stacked:
+        done_inp, new_inp = data_flow_ops.dynamic_partition(
+            inp, conditions_int, 2)
+      else:
+        if not pass_through:
+          done_inp = _stack(inp, [array_ops.size(done_indices)]).t
+        new_inp = inp
+
+      new_inputs.append(new_inp)
+      out_ta = output_tas[i]
+      if not pass_through:
+        # Note that done_indices can be empty. done_inp should also be empty
+        # in that case.
+        out_ta = out_ta.scatter(done_indices, done_inp)
+      new_output_tas.append(out_ta)
+
+    assert len(new_output_tas) == len(output_tas)
+    assert len(new_inputs) == len(inputs)
+    return not_all_done, new_indices, new_inputs, new_output_tas
+
+  def _process_body(self, inputs_stacked, new_indices, cond_stacked,
+                    new_inputs, not_all_done):
+    """Convert the body function."""
+    # This is used to store the indices of inputs to the while op that need to
+    # be stacked. This stacking may be needed in cases where the input to the
+    # while_loop is loop_invariant but the corresponding output is not.
+    mismatching_stacked_indices = []
+
+    def true_fn():
+      """Converts the body function for all but last iteration."""
+      wrapped_inputs = [wrap(inp, stacked) for inp, stacked in
+                        zip(new_inputs, inputs_stacked)]
+      # Note the iterative process below to figure out loop invariance.
+      # Here we iterate on vectorization process till a fixed point. The issue
+      # is that the while body can take pfor loop invariant inputs but return
+      # loop variant outputs. For any loop variant output, the corresponding
+      # input has to be then made loop variant (since subsequent while
+      # iterations will need to see loop variant values).
+      # However once we make a new input loop variant, we might make other
+      # outputs loop variant. Hence we need to iterate till we get fixed point.
+      while True:
+        body_pfor = PFor(
+            loop_var=self._pfor.loop_var,
+            loop_len=array_ops.size(new_indices),
+            pfor_ops=self._body_func.graph.get_operations(),
+            fallback_to_while_loop=self._pfor.fallback_to_while_loop,
+            all_indices=new_indices,
+            all_indices_partitioned=(self._pfor.all_indices_partitioned or
+                                     cond_stacked),
+            pfor_config=self._pfor.pfor_config)
+        stacking_mismatch = False
+        outputs = _convert_function_call(self._body_func,
+                                         body_pfor,
+                                         wrapped_inputs)
+        for i, (out, inp) in enumerate(zip(outputs, wrapped_inputs)):
+          if out.is_stacked != inp.is_stacked:
+            stacking_mismatch = True
+            mismatching_stacked_indices.append(i)
+            wrapped_inputs[i] = _stack(inp.t, [array_ops.size(new_indices)])
+        if not stacking_mismatch:
+          if mismatching_stacked_indices:
+            # We needed to stack some inputs. This code will be abandoned and
+            # should not get executed. Hence we simply return `new_inputs` to
+            # make sure the graph construction code completes.
+            with ops.control_dependencies([
+                control_flow_ops.Assert(
+                    False, ["pfor ERROR: this branch should never execute"])]):
+              return [array_ops.identity(x) for x in new_inputs]
+          else:
+            return [out.t for out in outputs]
+
+    # If all are done, we simply return `new_inputs`. Else we need to run the
+    # body function.
+    return control_flow_ops.cond(
+        not_all_done,
+        true_fn,
+        lambda: list(new_inputs)), mismatching_stacked_indices
+
+  def __call__(self):
+    """Converter for the V2 while_loop.
+
+    The conversion of a while_loop is another while_loop.
+
+    The arguments to this converted while_loop are as follows:
+    not_all_done: Boolean scalar Tensor indicating if all the pfor iterations
+      are done.
+    indices: int32 1-D Tensor storing the id of the pfor iterations that are not
+      done.
+    args: Remaining arguments. These can be divided into 2 categories:
+      - The first set of arguments correspond one-to-one to the inputs to the
+        unvectorized while_loop.
+      - The second set are TensorArrays, corresponding one-to-one to each output
+        of the unvectorized while_loop. Each TensorArray has `PFor.loop_len`
+        elements, i.e. the number of pfor iterations. At the end, the i'th
+        element of each TensorArray will contain the output computed by the i'th
+        iteration of pfor. Note that elements can be written into these tensors
+        arrays in any order, depending on when the corresponding pfor iteration
+        is done.
+    In each iteration, the while_loop body recomputes the condition for all
+    active pfor iterations to see which of them are now done. It then partitions
+    all the inputs and passes them along to the converted body. Values for all
+    the iterations that are done are written to TensorArrays indexed by the pfor
+    iteration number. When all iterations are done, the TensorArrays are stacked
+    to get the final value.
+
+    Returns:
+      List of converted outputs.
+    """
+    output_shapes = self._output_shapes()
+    # Note that we use these lists as a hack since we need the `body` to compute
+    # these values during construction of the while_loop graph.
+    cond_is_stacked = [None]
+    indices_to_stack = []
+
+    def cond(not_all_done, *_):
+      return not_all_done
+
+    def body(not_all_done, indices, *args):
+      # See documentation for __call__ for the structure of *args.
+      num_inputs = self._pfor_input.num_inputs
+      inputs = args[:num_inputs]
+      output_tas = args[num_inputs:]
+      inputs_stacked = [x.is_stacked for x in self._pfor_input.inputs]
+      assert len(inputs) >= len(output_tas)
+      assert len(inputs) == len(inputs_stacked)
+      # Convert condition
+      with ops.name_scope("while_cond"):
+        # Note that we set all_indices_partitioned to True here. At this point
+        # we don't know if indices will be partitioned. Hence we use the
+        # conservative value.
+        cond_pfor = PFor(
+            loop_var=self._pfor.loop_var,
+            loop_len=array_ops.size(indices),
+            pfor_ops=self._cond_func.graph.get_operations(),
+            fallback_to_while_loop=self._pfor.fallback_to_while_loop,
+            all_indices=indices,
+            all_indices_partitioned=True,
+            pfor_config=self._pfor.pfor_config)
+
+        wrapped_inputs = [wrap(inp, stacked) for inp, stacked
+                          in zip(inputs, inputs_stacked)]
+        conditions, cond_stacked, _ = _convert_function_call(
+            self._cond_func,
+            cond_pfor,
+            wrapped_inputs)[0]
+        cond_is_stacked[0] = cond_stacked
+
+      # Recompute the new condition, write outputs of done iterations, and
+      # partition the inputs if needed.
+      if not cond_stacked:
+        (not_all_done, new_indices, new_inputs,
+         new_output_tas) = self._process_cond_unstacked(conditions, indices,
+                                                        inputs, output_tas)
+      else:
+        (not_all_done, new_indices, new_inputs,
+         new_output_tas) = self._process_cond_stacked(conditions, indices,
+                                                      inputs, inputs_stacked,
+                                                      output_tas)
+      # Convert body
+      with ops.name_scope("while_body"):
+        #  Compute the outputs from the body.
+        new_outputs, mismatching_stacked_indices = self._process_body(
+            inputs_stacked, new_indices, cond_stacked, new_inputs, not_all_done)
+
+      indices_to_stack[:] = mismatching_stacked_indices
+      for i, new_output in enumerate(new_outputs):
+        new_output.set_shape(output_shapes[i])
+      new_args = ([not_all_done, new_indices] + new_outputs +
+                  list(new_output_tas))
+      return tuple(new_args)
+
+    # Note that we run the code below in a function since we might abandon the
+    # generated code in cases where the conversion dictates that some inputs be
+    # further stacked. Hence we run the graph construction using
+    # `get_concrete_function` and avoid calling the constructed function if not
+    # needed.
+    @def_function.function
+    def while_fn():
+      # Create init_values that will be passed to the while_loop.
+      init_values = self._init_values()
+      ta_shape_invariants = [tensor_shape.TensorShape([]) for _ in
+                             self._pfor_input.outputs]
+      shape_invariants = (
+          [tensor_shape.TensorShape([]), tensor_shape.TensorShape([None])]
+          + output_shapes + ta_shape_invariants)
+
+      while_outputs = control_flow_ops.while_loop(
+          cond, body, init_values,
+          shape_invariants=shape_invariants,
+          parallel_iterations=self._parallel_iterations)
+      if indices_to_stack:
+        # This function will be abandoned.
+        return while_outputs
+      else:
+        num_inputs = self._pfor_input.num_inputs
+        new_inputs = while_outputs[2:num_inputs+2]
+        output_tas = while_outputs[num_inputs+2:]
+        assert cond_is_stacked[0] is not None
+        outputs = []
+        for i, inp in enumerate(new_inputs):
+          if cond_is_stacked[0]:
+            if i in self._body_pass_through_indices:
+              outputs.append(init_values[i + 2])
+            else:
+              ta = output_tas[i]
+              outputs.append(ta.stack())
+          else:
+            outputs.append(inp)
+        return outputs
+
+    _ = while_fn.get_concrete_function()
+    if indices_to_stack:
+      # Need to abandon the current conversion, stack some inputs and restart.
+      self._pfor_input.stack_inputs(stack_indices=indices_to_stack)
+      # Note that this call will recurse at most one time. The first call will
+      # do the required stacking, based on the iterative procedure in
+      # _process_body, and the next invocation to __call__ should not need to do
+      # any more stacking.
+      # We invoke `self()` here as a way to discard any corrupted state.
+      return self()
+    else:
+      outputs = while_fn()
+      wrapped_outputs = []
+      for i, (out, inp) in enumerate(zip(outputs, self._pfor_input.inputs)):
+        if i not in self._body_pass_through_indices and cond_is_stacked[0]:
+          wrapped_outputs.append(wrap(out, True))
+        else:
+          wrapped_outputs.append(wrap(out, inp.is_stacked))
+      return wrapped_outputs
+
+
+@RegisterPFor("StatelessWhile")
+@RegisterPFor("While")
+def _convert_while(pfor_input):
+  converter = WhileV2(pfor_input)
+  return converter()
+
+
 # spectral_ops
 
 
diff --git a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py
index 9d0fac6db4c..b1762e2f55f 100644
--- a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py
@@ -20,13 +20,17 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.compiler.tf2xla.python import xla as xla_ops
+from tensorflow.python.compiler.xla import jit
 from tensorflow.python.compiler.xla import xla
 from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_v2_toggles
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops.parallel_for import control_flow_ops as pfor_control_flow_ops
 from tensorflow.python.ops.parallel_for.test_util import PForTestCase
 from tensorflow.python.platform import test
@@ -118,5 +122,120 @@ class PForTest(PForTestCase):
     self.assertAllClose(ans_val, output_val)
 
 
-if __name__ == '__main__':
+def _make_unstacked(cond, body, pfor_config):
+
+  def _cond(*args):
+    return math_ops.reduce_any(pfor_config.reduce_concat(args[0]))
+
+  def _body(*args):
+    not_done = args[0]
+    args = args[1:]
+    not_done = math_ops.logical_and(not_done, cond(*args))
+    outputs = body(*args)
+    return (not_done,) + tuple(
+        array_ops.where_v2(not_done, x, y) for x, y in zip(outputs, args))
+
+  return _cond, _body
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class WhileV2Test(PForTestCase):
+
+  def setUp(self):
+    self._enabled = control_flow_v2_toggles.control_flow_v2_enabled()
+    control_flow_v2_toggles.enable_control_flow_v2()
+    super(WhileV2Test, self).setUp()
+
+  def tearDown(self):
+    if not self._enabled:
+      control_flow_v2_toggles.disable_control_flow_v2()
+    super(WhileV2Test, self).tearDown()
+
+  def _test_loop_fn(self, loop_fn, iters, force_xla=False):
+
+    def f():
+      return pfor_control_flow_ops.pfor(loop_fn, iters)
+
+    @def_function.function
+    def jit_f():
+      with jit.experimental_jit_scope():
+        return f()
+
+    out = f()
+    jit_out = jit_f()
+    self.run_and_assert_equal(out, jit_out)
+    # TODO(agarwal): The following may complain about uncompilable nodes. Hence
+    # these are currently not enabled for all tests.
+    if force_xla:
+      out_exp_compile_f = def_function.function(experimental_compile=True)(f)()
+      self.run_and_assert_equal(out, out_exp_compile_f)
+      out_xla_compile_f = xla.compile(f, inputs=[])
+      self.run_and_assert_equal(out, out_xla_compile_f)
+
+  def test_stateless_while(self):
+    x = random_ops.random_uniform([3, 5])
+    lengths = constant_op.constant([4, 0, 2])
+
+    def loop_fn(i):
+      x_i = array_ops.gather(x, i)
+      lengths_i = array_ops.gather(lengths, i)
+
+      return control_flow_ops.while_loop(
+          lambda j, _: j < lengths_i,
+          lambda j, t: (j + 1, t + array_ops.gather(x_i, j)),
+          [0, 0.])
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_with_variable(self):
+    v = resource_variable_ops.ResourceVariable(5.)
+
+    def loop_fn(_):
+      _, output = control_flow_ops.while_loop(
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + v),
+          [0, 0.])
+      return output
+
+    self._test_loop_fn(loop_fn, 3)
+
+  def test_while_unstacked_condition(self):
+
+    def loop_fn(i):
+      return control_flow_ops.while_loop(
+          lambda j, x: j < 4,
+          lambda j, x: (j + 1, x + i), [0, 0])
+
+    self._test_loop_fn(loop_fn, 3, force_xla=True)
+
+  def test_while_force_unstacked_condition(self):
+    # The while_loop in this setup is similar to the one in test_stateless_while
+    # whose condition is loop variant. However here we wrap the cond and body of
+    # the loop in a way that makes the while_loop condition pfor loop invariant.
+    # This allows xla compilation to work since the vectorized code no longer
+    # needs to perform dynamic partitioning of the inputs.
+    x = random_ops.random_uniform([3, 5])
+    lengths = constant_op.constant([4, 0, 2])
+
+    def loop_fn(i, pfor_config):
+      x_i = array_ops.gather(x, i)
+      lengths_i = array_ops.gather(lengths, i)
+
+      def _cond(j, _):
+        return j < lengths_i
+
+      def _body(j, t):
+        return (j + 1, t + array_ops.gather(x_i, j))
+
+      cond, body = _make_unstacked(_cond, _body, pfor_config)
+      return control_flow_ops.while_loop(
+          cond,
+          body,
+          [True, 0, 0.])
+
+    # b/155430349: Enabling forrce_xla=True triggers a CHECK in debug mode.
+    self._test_loop_fn(loop_fn, 3, force_xla=False)
+
+
+if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index dd5bd782462..f13bed07ba0 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import gen_bitwise_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variables
@@ -453,6 +454,26 @@ def _ragged_dynamic_partition(data, partitions, num_partitions, name=None):
                                                      num_partitions, name)
   return [result[i] for i in range(num_partitions)]
 
+
+def _ragged_nn_dropout_v1(x, keep_prob=None, noise_shape=None, seed=None,
+                          name=None, rate=None):
+  if noise_shape is not None:
+    raise ValueError('noise_shape is not supported yet for RaggedTensor x')
+  with ops.name_scope(name, 'RaggedNNDropout', [x, rate]):
+    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x')
+    return x.with_flat_values(nn_ops.dropout(x.flat_values, keep_prob=keep_prob,
+                                             seed=seed, rate=rate))
+
+
+def _ragged_nn_dropout_v2(x, rate, noise_shape=None, seed=None, name=None):
+  if noise_shape is not None:
+    raise ValueError('noise_shape is not supported yet for RaggedTensor x')
+  with ops.name_scope(name, 'RaggedNNDropout', [x, rate]):
+    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x')
+    return x.with_flat_values(nn_ops.dropout_v2(x.flat_values, rate=rate,
+                                                seed=seed))
+
+
 # (original_op, ragged_op, ragged_args)
 _RAGGED_DISPATCH_OPS = [
     (array_ops.batch_gather, ragged_batch_gather_ops.batch_gather,
@@ -497,6 +518,8 @@ _RAGGED_DISPATCH_OPS = [
     (math_ops.reduce_mean, ragged_math_ops.reduce_mean, ['input_tensor']),
     (math_ops.reduce_any, ragged_math_ops.reduce_any, ['input_tensor']),
     (math_ops.reduce_all, ragged_math_ops.reduce_all, ['input_tensor']),
+    (nn_ops.dropout, _ragged_nn_dropout_v1, ['x']),
+    (nn_ops.dropout_v2, _ragged_nn_dropout_v2, ['x']),
 ]
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
index 0ce9a6f9771..60d9f6c8713 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
@@ -32,6 +32,7 @@ from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import gen_bitwise_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops.ragged import ragged_dispatch
@@ -232,6 +233,10 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
           {'op': array_ops.check_numerics,
            'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
            'message': 'check-numerics'},
+          {'op': nn_ops.dropout,
+           'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
+           'rate': 0.5,
+           'seed': 1},
       ]
       )  # pyformat: disable
   def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
@@ -820,7 +825,8 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
         'strings.substr', 'strings.to_hash_bucket_fast',
         'strings.to_hash_bucket_strong', 'strings.to_hash_bucket',
         'strings.to_number', 'strings.unicode_script', 'tile', 'truncatediv',
-        'truncatemod', 'zeros_like', 'dynamic_partition', 'reverse'
+        'truncatemod', 'zeros_like', 'dynamic_partition', 'reverse',
+        'nn.dropout',
     ]
 
     # Ops that should be listed as supported in v1 only.
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index f99f886f210..d8a7765a208 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -49,6 +49,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.ops.gen_resource_variable_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.types import core
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated
 
@@ -330,7 +331,7 @@ def variable_accessed(variable):
     tape.variable_accessed(variable)
 
 
-class BaseResourceVariable(variables.VariableV1):
+class BaseResourceVariable(variables.VariableV1, core.Tensor):
   """A python variable from an existing handle."""
 
   # TODO(wangpeng): Deprecate `constraint` when callers no long pass it in.
@@ -1830,7 +1831,6 @@ def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False):
 # allowing instances of the class to be used as tensors.
 ops.register_tensor_conversion_function(BaseResourceVariable,
                                         _dense_var_to_tensor)
-ops.register_dense_tensor_like_type(BaseResourceVariable)
 
 
 class _UnreadVariable(BaseResourceVariable):
@@ -1955,9 +1955,6 @@ class _UnreadVariable(BaseResourceVariable):
     return self._parent_op
 
 
-ops.register_dense_tensor_like_type(_UnreadVariable)
-
-
 @ops.RegisterGradient("ReadVariableOp")
 def _ReadGrad(_, grad):
   """Gradient for read op."""
diff --git a/tensorflow/python/ops/signal/mel_ops.py b/tensorflow/python/ops/signal/mel_ops.py
index aa0769166a4..b95876bc977 100644
--- a/tensorflow/python/ops/signal/mel_ops.py
+++ b/tensorflow/python/ops/signal/mel_ops.py
@@ -128,8 +128,6 @@ def linear_to_mel_weight_matrix(num_mel_bins=20,
       # S has shape [..., num_spectrogram_bins].
       # M has shape [..., num_mel_bins].
       M = tf.tensordot(S, A, 1)
-      # tf.tensordot does not support shape inference for this case yet.
-      M.set_shape(S.shape[:-1].concatenate(A.shape[-1:]))
 
   Args:
     num_mel_bins: Python int. How many bands in the resulting mel spectrum.
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 113795df629..09ba078383a 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -122,37 +122,18 @@ def string_format(template, inputs, placeholder="{}", summarize=3, name=None):
 
   Example:
     Formatting a single-tensor template:
-    ```python
-    sess = tf.compat.v1.Session()
-    with sess.as_default():
-        tensor = tf.range(10)
-        formatted = tf.strings.format("tensor: {}, suffix", tensor)
-        out = sess.run(formatted)
-        expected = "tensor: [0 1 2 ... 7 8 9], suffix"
 
-        assert(out.decode() == expected)
-    ```
+    >>> tensor = tf.range(5)
+    >>> tf.strings.format("tensor: {}, suffix", tensor)
+    <tf.Tensor: shape=(), dtype=string, numpy=b'tensor: [0 1 2 3 4], suffix'>
 
     Formatting a multi-tensor template:
-    ```python
-    sess = tf.compat.v1.Session()
-    with sess.as_default():
-        tensor_one = tf.reshape(tf.range(100), [10, 10])
-        tensor_two = tf.range(10)
-        formatted = tf.strings.format("first: {}, second: {}, suffix",
-          (tensor_one, tensor_two))
 
-        out = sess.run(formatted)
-        expected = ("first: [[0 1 2 ... 7 8 9]\n"
-              " [10 11 12 ... 17 18 19]\n"
-              " [20 21 22 ... 27 28 29]\n"
-              " ...\n"
-              " [70 71 72 ... 77 78 79]\n"
-              " [80 81 82 ... 87 88 89]\n"
-              " [90 91 92 ... 97 98 99]], second: [0 1 2 ... 7 8 9], suffix")
+    >>> tensor_a = tf.range(2)
+    >>> tensor_b = tf.range(1, 4, 2)
+    >>> tf.strings.format("a: {}, b: {}, suffix", (tensor_a, tensor_b))
+    <tf.Tensor: shape=(), dtype=string, numpy=b'a: [0 1], b: [1 3], suffix'>
 
-        assert(out.decode() == expected)
-    ```
 
   Args:
     template: A string template to format tensor values into.
diff --git a/tensorflow/python/ops/structured/BUILD b/tensorflow/python/ops/structured/BUILD
index a45496a175f..e9504efdd99 100644
--- a/tensorflow/python/ops/structured/BUILD
+++ b/tensorflow/python/ops/structured/BUILD
@@ -4,6 +4,7 @@ load("//tensorflow:tensorflow.bzl", "py_test")
 
 package(
     default_visibility = [
+        "//learning/tfx/autotfx:__subpackages__",
         "//tensorflow:internal",
     ],
     licenses = ["notice"],  # Apache 2.0
diff --git a/tensorflow/python/ops/structured/structured_tensor.py b/tensorflow/python/ops/structured/structured_tensor.py
index 6234e21d8fc..2007b68a548 100644
--- a/tensorflow/python/ops/structured/structured_tensor.py
+++ b/tensorflow/python/ops/structured/structured_tensor.py
@@ -62,21 +62,22 @@ class StructuredTensor(composite_tensor.CompositeTensor):
 
   ```python
   >>> # A scalar StructuredTensor describing a single person.
-  >>> s1 = tf.structured.constant({"age": 82, "nicknames": ["Bob", "Bobby"]})
-  >>> print s1.shape
-  ()
-  >>> print s1["age"]
-  tf.Tensor(82, shape=(), dtype=int32)
+  >>> s1 = StructuredTensor.from_pyval(
+  ...     {"age": 82, "nicknames": ["Bob", "Bobby"]})
+  >>> s1.shape
+  TensorShape([])
+  >>> s1["age"]
+  <tf.Tensor: shape=(), dtype=int32, numpy=82>
 
   >>> # A vector StructuredTensor describing three people.
-  >>> s2 = stf.struct.constant([
+  >>> s2 = StructuredTensor.from_pyval([
   ...     {"age": 12, "nicknames": ["Josaphine"]},
   ...     {"age": 82, "nicknames": ["Bob", "Bobby"]},
-  ...     {"age": 82, "nicknames": ["Elmo"]}])
-  >>> print s2.shape
-  (3,)
-  >>> print s2[0]["age"]
-  tf.Tensor(12, shape=(), dtype=int32)
+  ...     {"age": 42, "nicknames": ["Elmo"]}])
+  >>> s2.shape
+  TensorShape([3])
+  >>> s2[0]["age"]
+  <tf.Tensor: shape=(), dtype=int32, numpy=12>
   ```
 
   ### Field Paths
@@ -155,11 +156,17 @@ class StructuredTensor(composite_tensor.CompositeTensor):
     Examples:
 
       >>> StructuredTensor.from_fields({'x': 1, 'y': [1, 2, 3]})
-      (FILL THIS IN)
+      <StructuredTensor(fields={
+                            x: tf.Tensor(1, shape=(), dtype=int32),
+                            y: tf.Tensor([1 2 3], shape=(3,), dtype=int32)},
+                        shape=())>
 
       >>> StructuredTensor.from_fields({'foo': [1, 2], 'bar': [3, 4]},
       ...                              shape=[2])
-      (FILL THIS IN)
+      <StructuredTensor(fields={
+                            bar: tf.Tensor([3 4], shape=(2,), dtype=int32),
+                            foo: tf.Tensor([1 2], shape=(2,), dtype=int32)},
+                        shape=(2,))>
 
     """
     shape = tensor_shape.as_shape(shape)
@@ -312,7 +319,7 @@ class StructuredTensor(composite_tensor.CompositeTensor):
     If `field_name` is a `string`, then it names a field directly owned by this
     `StructuredTensor`.  If this `StructuredTensor` has shape `[D1...DN]`, then
     the returned tensor will have shape `[D1...DN, V1...VM]`, where the slice
-    `result[d1...dN]`contains the field value for the structure at
+    `result[d1...dN]` contains the field value for the structure at
     `self[d1...dN]`.
 
     If `field_name` is a `tuple` of `string`, then it specifies a path to a
@@ -431,7 +438,8 @@ class StructuredTensor(composite_tensor.CompositeTensor):
 
   def __repr__(self):
     return '<StructuredTensor(fields={%s}, shape=%s)>' % (', '.join(
-        '%r' % k for k in sorted(self._fields)), self._shape)
+        '"%s": %s' % (k, v)
+        for k, v in sorted(self._fields.items())), self._shape)
 
   #=============================================================================
   # Conversion
@@ -458,9 +466,9 @@ class StructuredTensor(composite_tensor.CompositeTensor):
 
     Requires that all fields are Eager tensors.
 
-    >>> print(StructuredTensor.from_fields(
-    ...     {'a': [1, 2, 3]}, [3]).to_pyval())
-    [{b'a': 1}, {b'a': 2}, {b'a': 3}]
+    >>> StructuredTensor.from_fields(
+    ...     {'a': [1, 2, 3]}, [3]).to_pyval()
+    [{'a': 1}, {'a': 2}, {'a': 3}]
 
     Note that `StructuredTensor.from_pyval(pyval).to_pyval() == pyval`.
 
@@ -496,9 +504,12 @@ class StructuredTensor(composite_tensor.CompositeTensor):
   def from_pyval(cls, pyval, typespec=None):
     """Constructs a StructuredTensor from a nested Python structure.
 
-    >>> print StructuredTensor.from_pyval(
+    >>> StructuredTensor.from_pyval(
     ...     {'a': [1, 2, 3], 'b': [[4, 5], [6, 7]]})
-    <StructuredTensor {'a': [1, 2, 3], 'b': [[4, 5], [6, 7]]}>
+    <StructuredTensor(fields={
+                          a: tf.Tensor([1 2 3], shape=(3,), dtype=int32),
+                          b: <tf.RaggedTensor [[4, 5], [6, 7]]>},
+                      shape=())>
 
     Note that `StructuredTensor.from_pyval(pyval).to_pyval() == pyval`.
 
@@ -628,7 +639,9 @@ class StructuredTensor(composite_tensor.CompositeTensor):
     ...     [{'foo': 12}, {'foo': 33}, {'foo': 99}])
     >>> partition = RowPartition.from_row_lengths([2, 0, 1])
     >>> st.partition_outer_dimension(partition)
-    <StructuredTensor [[{'foo': 12}, {'foo': 33}], [], [{'foo': 99}]]>
+    <StructuredTensor(fields={
+                          foo: <tf.RaggedTensor [[12, 33], [], [99]]>},
+                      shape=(3, None))>
 
     Args:
       row_partition: A `RowPartition`.
@@ -651,7 +664,9 @@ class StructuredTensor(composite_tensor.CompositeTensor):
     >>> st = StructuredTensor.from_pyval(
     ...     [[{'foo': 12}, {'foo': 33}], [], [{'foo': 99}]])
     >>> st.merge_dims(0, 1)
-    <StructuredTensor [{'foo': 12}, {'foo': 33}, {'foo': 99}]>
+    <StructuredTensor(fields={
+                          foo: tf.Tensor([12 33 99], shape=(3,), dtype=int32)},
+                      shape=(3,))>
 
     Args:
       outer_axis: `int`: The first dimension in the range of dimensions to
@@ -1058,12 +1073,14 @@ def _partition_outer_dimension(value, row_partition):
 
     >>> partition = row_partition.RowPartition.from_row_lengths([2, 0, 1])
     >>> _partition_outer_dimension(tf.constant([1, 2, 3]), partition)
-    [[1, 2], [], [3]]
+    <tf.RaggedTensor [[1, 2], [], [3]]>
 
     >>> struct_value = StructuredTensor.from_pyval(
     ...     [{'x': 1}, {'x': 2}, {'x': 3}])
     >>> _partition_outer_dimension(struct_value, partition)
-    [[{'x': 1}, {'x': 2}], [], [{'x': 3}]])
+    <StructuredTensor(fields={
+                          x: <tf.RaggedTensor [[1, 2], [], [3]]>},
+                      shape=(3, None))>
 
   Args:
     value: Tensor, RaggedTensor, or StructuredTensor
@@ -1078,7 +1095,7 @@ def _partition_outer_dimension(value, row_partition):
     new_shape = array_ops.concat(
         [[row_partition.nrows(),
           row_partition.uniform_row_length()],
-         array_ops.shape(value, out_type=row_partition.dtype)[2:]],
+         array_ops.shape(value, out_type=row_partition.dtype)[1:]],
         axis=0)
     return array_ops.reshape(value, new_shape)
   elif isinstance(value, (ops.Tensor, ragged_tensor.RaggedTensor)):
diff --git a/tensorflow/python/ops/structured/structured_tensor_test.py b/tensorflow/python/ops/structured/structured_tensor_test.py
index 0f2ac2c83e1..420705b07e7 100644
--- a/tensorflow/python/ops/structured/structured_tensor_test.py
+++ b/tensorflow/python/ops/structured/structured_tensor_test.py
@@ -922,14 +922,34 @@ class StructuredTensorTest(test_util.TensorFlowTestCase,
     st = StructuredTensor.from_pyval({"a": 5, "b": {"c": [1, 2, 3]}})
     self.assertAllEqual(st.field_value(("a",)), 5)
     self.assertAllEqual(st.field_value(("b", "c")), [1, 2, 3])
-    with self.assertRaisesRegexp(KeyError,
-                                 r"Field path \('a', 'b'\) not found in .*"):
+    expected = "Field path \(.*a.*,.*b.*\) not found in .*"
+    with self.assertRaisesRegexp(KeyError, expected):
       st.field_value(("a", "b"))
 
   def testRepr(self):
     st = StructuredTensor.from_pyval({"a": 5, "b": {"c": [1, 2, 3]}})
-    self.assertEqual(
-        repr(st), "<StructuredTensor(fields={'a', 'b'}, shape=())>")
+    if context.executing_eagerly():
+      expected = ("<StructuredTensor(fields={"
+                  '"a": tf.Tensor(5, shape=(), dtype=int32), '
+                  '"b": <StructuredTensor(fields={'
+                  '"c": tf.Tensor([1 2 3], shape=(3,), dtype=int32)}, '
+                  "shape=())>}, shape=())>")
+    else:
+      expected = ("<StructuredTensor(fields={"
+                  '"a": Tensor("Const:0", shape=(), dtype=int32), '
+                  '"b": <StructuredTensor(fields={'
+                  '"c": Tensor("RaggedConstant/Const:0", shape=(3,), '
+                  "dtype=int32)}, shape=())>}, shape=())>")
+    self.assertEqual(repr(st), expected)
+
+  def testPartitionOuterDimension2DDenseField(self):
+    struct = structured_tensor.StructuredTensor.from_fields(
+        fields={"r": array_ops.constant([[1, 2], [3, 4]])}, shape=[2])
+
+    result = struct.partition_outer_dimension(
+        row_partition.RowPartition.from_uniform_row_length(2, 2))
+    r = result.field_value("r")
+    self.assertAllEqual(r, [[[1, 2], [3, 4]]])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index e8ea9ff4e4d..d386d14b64a 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -1122,7 +1122,7 @@ class TensorArray(object):
     Returns:
       A new TensorArray object with flow that ensures the control dependencies
       from the contexts will become control dependencies for writes, reads, etc.
-      Use this object all for subsequent operations.
+      Use this object for all subsequent operations.
     """
     return self._implementation.identity()
 
@@ -1152,7 +1152,7 @@ class TensorArray(object):
 
     Returns:
       A new TensorArray object with flow that ensures the write occurs.
-      Use this object all for subsequent operations.
+      Use this object for all subsequent operations.
 
     Raises:
       ValueError: if there are more writers than specified.
@@ -1217,7 +1217,7 @@ class TensorArray(object):
 
     Returns:
       A new TensorArray object with flow that ensures the unstack occurs.
-      Use this object all for subsequent operations.
+      Use this object for all subsequent operations.
 
     Raises:
       ValueError: if the shape inference fails.
@@ -1236,7 +1236,7 @@ class TensorArray(object):
 
     Returns:
       A new TensorArray object with flow that ensures the scatter occurs.
-      Use this object all for subsequent operations.
+      Use this object for all subsequent operations.
 
     Raises:
       ValueError: if the shape inference fails.
@@ -1255,7 +1255,7 @@ class TensorArray(object):
 
     Returns:
       A new TensorArray object with flow that ensures the split occurs.
-      Use this object all for subsequent operations.
+      Use this object for all subsequent operations.
 
     Raises:
       ValueError: if the shape inference fails.
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index d65cd235ca8..81c3f9a2f70 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -42,6 +42,7 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.types import core
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import function_utils
 from tensorflow.python.util import tf_contextlib
@@ -1000,7 +1001,7 @@ class _VariableStore(object):
     return initializer, initializing_from_value
 
 
-class _LazyEvalTensor(object):
+class _LazyEvalTensor(core.Tensor):
   """A Tensor-like object that only evaluates its thunk when used."""
 
   def __init__(self, thunk):
@@ -1069,8 +1070,6 @@ session.register_session_run_conversion_functions(
     lambda fetch: ([fetch._master_tensor], lambda fetched_vals: fetched_vals[0])  # pylint: disable=protected-access
     )
 
-ops.register_dense_tensor_like_type(_LazyEvalTensor)
-
 
 # To stop regularization, use this regularizer
 @tf_export(v1=["no_regularizer"])
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 1080778e3d3..d3df0659b5a 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -47,6 +47,7 @@ from tensorflow.python.util import tf_should_use
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.deprecation import deprecated_args
 from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.types import core
 
 
 def default_variable_creator(_, **kwds):
@@ -264,6 +265,7 @@ class VariableMetaclass(type):
 
 
 @tf_export("Variable", v1=[])
+# TODO(mdan): This should subclass core.Tensor, and not all its subclasses?
 class Variable(six.with_metaclass(VariableMetaclass, trackable.Trackable)):
   """See the [variable guide](https://tensorflow.org/guide/variable).
 
@@ -1551,7 +1553,7 @@ class VariableV1(Variable):
 
 
 # TODO(apassos): do not repeat all comments here
-class RefVariable(VariableV1):
+class RefVariable(VariableV1, core.Tensor):
   """Ref-based implementation of variables."""
 
   def __init__(
@@ -3032,7 +3034,6 @@ class PartitionedVariable(object):
 # allowing instances of the class to be used as tensors.
 ops.register_tensor_conversion_function(RefVariable,
                                         RefVariable._TensorConversionFunction)  # pylint: disable=protected-access
-ops.register_dense_tensor_like_type(RefVariable)
 
 
 @tf_export(v1=["global_variables"])
diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD
index e5ca60843e3..b6565f594c9 100644
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD
@@ -226,6 +226,7 @@ py_library(
     deps = [
         "//tensorflow/python:util",
         "//tensorflow/python/profiler/internal:_pywrap_traceme",
+        "//tensorflow/python/types",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc
index 7f99d64fd8f..63300f2a1ec 100644
--- a/tensorflow/python/profiler/internal/profiler_wrapper.cc
+++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc
@@ -41,6 +41,7 @@ tensorflow::ProfileRequest MakeProfileRequest(
   request.add_tools("input_pipeline");
   request.add_tools("kernel_stats");
   request.add_tools("tensorflow_stats");
+  request.add_tools("memory_profile");
   request.set_host_name(host);
   request.set_repository_root(logdir);
   request.set_session_id(session_id);
diff --git a/tensorflow/python/saved_model/utils_impl.py b/tensorflow/python/saved_model/utils_impl.py
index 42e971d050d..0f635b6bf85 100644
--- a/tensorflow/python/saved_model/utils_impl.py
+++ b/tensorflow/python/saved_model/utils_impl.py
@@ -178,7 +178,7 @@ def get_tensor_from_tensor_info(tensor_info, graph=None, import_scope=None):
     spec = struct_coder.decode_proto(spec_proto)
     components = [_get_tensor(component.name) for component in
                   tensor_info.composite_tensor.components]
-    return spec._from_components(components)  # pylint: disable=protected-access
+    return nest.pack_sequence_as(spec, components, expand_composites=True)
   else:
     raise ValueError("Invalid TensorInfo.encoding: %s" % encoding)
 
diff --git a/tensorflow/python/tf_program/BUILD b/tensorflow/python/tf_program/BUILD
new file mode 100644
index 00000000000..9dfb0df8a24
--- /dev/null
+++ b/tensorflow/python/tf_program/BUILD
@@ -0,0 +1,22 @@
+package(licenses = ["notice"])
+
+py_library(
+    name = "pywrap_tfd",
+    srcs = ["pywrap_tfd.py"],
+    deps = [
+        "//tensorflow/compiler/mlir/python/mlir_wrapper",
+    ],
+)
+
+py_library(
+    name = "mlir_gen",
+    srcs = ["mlir_gen.py"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":pywrap_tfd",
+        "//tensorflow/python/autograph/pyct",
+        "//tensorflow/python/autograph/pyct/static_analysis",
+        "//tensorflow/python/types",
+        "@gast_archive//:gast",
+    ],
+)
diff --git a/tensorflow/python/tf_program/mlir_gen.py b/tensorflow/python/tf_program/mlir_gen.py
new file mode 100644
index 00000000000..8395848a53a
--- /dev/null
+++ b/tensorflow/python/tf_program/mlir_gen.py
@@ -0,0 +1,456 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""mlir_gen: Generate mlir code from python code."""
+
+# pylint: disable=invalid-name
+# pylint: disable=missing-function-docstring
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import gast as ast
+from tensorflow.python.autograph.pyct import anno
+from tensorflow.python.autograph.pyct import cfg
+from tensorflow.python.autograph.pyct import inspect_utils
+from tensorflow.python.autograph.pyct import naming
+from tensorflow.python.autograph.pyct import parser
+from tensorflow.python.autograph.pyct import qual_names
+from tensorflow.python.autograph.pyct import transformer
+from tensorflow.python.autograph.pyct.static_analysis import activity
+from tensorflow.python.autograph.pyct.static_analysis import annos
+from tensorflow.python.autograph.pyct.static_analysis import liveness
+from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
+from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs
+import tensorflow.python.tf_program.pywrap_tfd as tfp
+from tensorflow.python.types import core
+
+
+class SymbolTable(object):
+  """Symbol Table for python code."""
+
+  def __init__(self):
+    self.symbols = []
+    self.enter_scope()
+
+  def enter_scope(self):
+    """Enter a new scope - at function level."""
+    self.symbols.append({'types': {}, 'symbols': {}})
+    self.curr_table = self.symbols[len(self.symbols) - 1]
+
+  def insert_symbol(self, name, value):
+    self.curr_table['symbols'][name] = value
+    self.curr_table['types'][name] = value.getType()
+    return value
+
+  def insert_type(self, name, type_):
+    self.curr_table['types'][name] = type_
+
+  def exit_scope(self):
+    self.symbols.pop()
+    self.curr_table = self.symbols[len(self.symbols) - 1]
+
+  def lookup(self, name):
+    curr_idx = len(self.symbols) - 1
+    while curr_idx >= 0 and (name not in self.symbols[curr_idx]['symbols']):
+      curr_idx -= 1
+    if curr_idx < 0:
+      return None
+    return self.symbols[curr_idx]['symbols'][name]
+
+  def lookup_type(self, name):
+    curr_idx = len(self.symbols) - 1
+    while curr_idx >= 0 and (name not in self.symbols[curr_idx]['types']):
+      curr_idx -= 1
+    if curr_idx < 0:
+      return None
+    return self.symbols[curr_idx]['types'][name]
+
+  def __repr__(self):
+    s = '\n'.join(
+        ' ' * idx * 2 + str(table) for idx, table in enumerate(self.symbols))
+    return s
+
+
+class ProcessType(ast.NodeVisitor):
+  """Visit a node and return processed type Currently only visits annotations and gives their type.
+  """
+
+  def __init__(self, prog, ctx):
+    self.prog = prog
+    self.ctx = ctx
+
+  def visit_Attribute(self, node):
+    # Supported: core.Tensor
+    value = self.visit(node.value)
+    if value is None or not hasattr(value, node.attr):
+      raise AttributeError(str(type(value)) + ' has no attribute ' + node.attr)
+    attr = getattr(value, node.attr)
+
+    if attr == core.Tensor:
+      return tfp.UnrankedTensorType.get(tfp.IntegerType.get(32, self.prog.ctx))
+    return attr
+
+  def visit_Name(self, node):
+    if node.id == 'int':
+      return tfp.IntegerType.get(32, self.prog.ctx)
+    if node.id == 'bool':
+      return tfp.IntegerType.get(1, self.prog.ctx)
+    if node.id in self.ctx.info.namespace:
+      return self.ctx.info.namespace[node.id]
+
+
+class MLIRGen(ast.NodeVisitor):
+  """Visit the AST and generate MLIR code Requires liveness, reading_definitions.
+  """
+
+  def __init__(self, ctx):
+    self.ctx = ctx
+    self.symbol_table = SymbolTable()
+    self.prog = tfp.TFProgram()
+    self.opbuilder = None
+
+  def visit_block(self, block):
+    return [self.visit(item) for item in block]
+
+  def process_type(self, node):
+    return ProcessType(self.prog, self.ctx).visit(node)
+
+  def visit_Assign(self, node):
+    value = self.visit(node.value)
+    if isinstance(value, tuple):
+      # If it is a tuple of values, assign one to each in targets
+      # TODO: This currently is assuming that all elts in targets[0] are Name
+      # objects. This might not be always True.
+      for key, val in zip(node.targets[0].elts, value):
+        self.symbol_table.insert_symbol(key.id, val)
+    else:
+      self.symbol_table.insert_symbol(node.targets[0].id, value)
+
+  def visit_BinOp(self, node):
+    left = self.visit(node.left)
+    right = self.visit(node.right)
+    if isinstance(node.op, ast.Sub):
+      return tfp.Tf_SubOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(),
+                                 left, right).getResult(0)
+    if isinstance(node.op, ast.Add):
+      return tfp.Tf_AddV2Op.create(self.opbuilder,
+                                   self.opbuilder.getUnknownLoc(), left,
+                                   right).getResult(0)
+
+  def visit_BoolOp(self, node):
+    values = [self.visit(value) for value in node.values]
+    if isinstance(node.op, ast.Or):
+      return tfp.OrOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(),
+                             values).getResult(0)
+    if isinstance(node.op, ast.And):
+      return tfp.AndOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(),
+                              values).getResult(0)
+
+  def visit_Call(self, node):
+    func = self.visit(node.func)
+    args = [self.visit(arg) for arg in node.args]
+    callop = tfp.Tf_LegacyCallOp.create(self.opbuilder,
+                                        self.opbuilder.getUnknownLoc(),
+                                        func.getType().getResults(), args,
+                                        func.getName())
+    if callop.getNumResults() == 1:
+      return callop[0]
+    return tuple(callop.getResult(idx) for idx in range(callop.getNumResults()))
+
+  def visit_Compare(self, node):
+    left = self.visit(node.left)
+    opb = self.opbuilder
+    for op, right in zip(node.ops, node.comparators):
+      if isinstance(op, ast.Eq):
+        left = tfp.Tf_EqualOp.create(opb, opb.getUnknownLoc(), left,
+                                     self.visit(right)).getResult(0)
+      elif isinstance(op, ast.Lt):
+        left = tfp.Tf_LessOp.create(opb, opb.getUnknownLoc(), left,
+                                    self.visit(right)).getResult(0)
+      elif isinstance(op, ast.LtE):
+        left = tfp.Tf_LessEqualOp.create(opb, opb.getUnknownLoc(), left,
+                                         self.visit(right)).getResult(0)
+      elif isinstance(op, ast.Gt):
+        left = tfp.Tf_GreaterOp.create(opb, opb.getUnknownLoc(), left,
+                                       self.visit(right)).getResult(0)
+      elif isinstance(op, ast.GtE):
+        left = tfp.Tf_GreaterEqualOp.create(opb, opb.getUnknownLoc(), left,
+                                            self.visit(right)).getResult(0)
+      elif isinstance(op, ast.NotEq):
+        left = tfp.Tf_NotEqualOp.create(opb, opb.getUnknownLoc(), left,
+                                        self.visit(right)).getResult(0)
+      else:
+        raise NotImplementedError('CompareOp operator not recognized')
+    return left
+
+  def visit_Constant(self, node):
+    opb = self.opbuilder
+    value = None
+    if isinstance(node.value, int):
+      value = tfp.Tf_ConstOp.create(
+          opb, opb.getUnknownLoc(),
+          tfp.IntegerAttr.get(
+              tfp.IntegerType.get(32, self.prog.ctx), node.value)).getResult(0)
+    return value
+
+  def visit_FunctionDef(self, node):
+    # Cache the current builder
+    cache_builder = self.opbuilder
+    inputs, outputs = [], []
+
+    for arg in node.args.args:
+      inputs.append(self.process_type(arg.annotation))
+
+    if node.returns:
+      outputs = [self.process_type(node.returns)]
+
+    currfunc = self.prog.add_function(
+        self.ctx.namer.new_symbol(node.name, []),
+        self.prog.get_function_type(inputs, outputs))
+
+    # Add the function to symbol table and enter new scope
+    self.symbol_table.insert_symbol(node.name, currfunc)
+    self.symbol_table.enter_scope()
+
+    # Add arguments to symbol table
+    for arg, value in zip(node.args.args, currfunc.getArguments()):
+      self.symbol_table.insert_symbol(arg.id, value)
+    self.opbuilder = tfp.OpBuilder(currfunc.getBody())
+
+    self.visit_block(node.body)
+    self.symbol_table.exit_scope()
+    self.opbuilder = cache_builder
+
+  def visit_If(self, node):
+    cond = self.visit(node.test)
+
+    # Create ifop
+    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
+    orelse_scope = anno.getanno(node, annos.NodeAnno.ORELSE_SCOPE)
+    modified_in_cond = list(body_scope.modified | orelse_scope.modified)
+    outputs = [
+        self.symbol_table.lookup_type(str(var)) for var in modified_in_cond
+    ]
+    ifop = tfp.IfOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), cond,
+                           outputs)
+
+    # Cache the builder
+    cache_builder = self.opbuilder
+
+    # Visit body
+    self.opbuilder = tfp.OpBuilder(ifop.getRegion(0))
+    # Enter scope to avoid values generated inside the region to come in symbol
+    # table
+    self.symbol_table.enter_scope()
+    for stmt in node.body:
+      self.visit(stmt)
+    retvals = [
+        self.symbol_table.lookup(str(varname)) for varname in modified_in_cond
+    ]
+    tfp.ReturnOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), retvals)
+    self.symbol_table.exit_scope()
+
+    # Visit orelse
+    self.opbuilder = tfp.OpBuilder(ifop.getRegion(1))
+    self.symbol_table.enter_scope()
+    for stmt in node.orelse:
+      self.visit(stmt)
+    retvals = [
+        self.symbol_table.lookup(str(varname)) for varname in modified_in_cond
+    ]
+    tfp.ReturnOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(), retvals)
+    self.symbol_table.exit_scope()
+
+    # Reset builder and enter return values in symbol table
+    self.opbuilder = cache_builder
+    for idx, var in enumerate(modified_in_cond):
+      self.symbol_table.insert_symbol(str(var), ifop.getResult(idx))
+
+    if ifop.getNumResults() == 1:
+      return ifop.getResult(0)
+
+    return tuple(ifop.getResult(i) for i in range(ifop.getNumResults()))
+
+  def visit_Name(self, node):
+    if self.symbol_table.lookup(node.id):
+      return self.symbol_table.lookup(node.id)
+    raise NotImplementedError('Symbol not found' + node.id)
+
+  def visit_Return(self, node):
+    opb = self.opbuilder
+    value = self.visit(node.value)
+    if isinstance(value, tuple):
+      # For more than one return values
+      return tfp.ReturnOp.create(opb, opb.getUnknownLoc(), list(value))
+    return tfp.ReturnOp.create(opb, opb.getUnknownLoc(), [value])
+
+  def visit_Tuple(self, node):
+    return tuple(self.visit(elt) for elt in node.elts)
+
+  def visit_UnaryOp(self, node):
+    operand = self.visit(node.operand)
+    if isinstance(node.op, ast.USub):
+      return tfp.Tf_NegOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(),
+                                 operand).getResult(0)
+
+  def _get_basic_loop_vars(self, modified, live_in, live_out):
+    # [This is directly from
+    # tensorflow/python/autograph/converters/control_flow.py]
+    # The loop variables corresponding to simple symbols (e.g. `x`).
+    basic_loop_vars = []
+    for s in modified:
+      if s.is_composite():
+        # TODO: Raise an error when this happens for a TF loop.
+        continue
+      # Variables not live into or out of the loop are considered local to the
+      # loop.
+      if s not in live_in and s not in live_out:
+        continue
+      basic_loop_vars.append(s)
+    return frozenset(basic_loop_vars)
+
+  def _get_composite_loop_vars(self, modified, live_in):
+    # [This is directly from
+    # tensorflow/python/autograph/converters/control_flow.py]
+    # The loop variables corresponding to composite symbols (e.g. `self.x`).
+    composite_loop_vars = []
+    for s in modified:
+      if not s.is_composite():
+        continue
+      # Mutations made to objects created inside the loop will appear as writes
+      # to composite symbols. Because these mutations appear as modifications
+      # made to composite symbols, we check whether the composite's parent is
+      # actually live into the loop.
+      # Example:
+      #   while cond:
+      #     x = Foo()
+      #     x.foo = 2 * x.foo  # x.foo is live into the loop, but x is not.
+      #
+      # Note that some parents might not be symbols - for example, in x['foo'],
+      # 'foo' is a parent, but it's a literal, not a symbol. We don't check the
+      # liveness of literals.
+      support_set_symbols = tuple(
+          sss for sss in s.support_set if sss.is_symbol())
+      if not all(sss in live_in for sss in support_set_symbols):
+        continue
+      composite_loop_vars.append(s)
+    return frozenset(composite_loop_vars)
+
+  def _get_loop_vars(self, node, modified):
+    # [This is directly from python/autograph/converters/control_flow.py]
+    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
+    defined_in = anno.getanno(node, anno.Static.DEFINED_VARS_IN)
+    live_in = anno.getanno(node, anno.Static.LIVE_VARS_IN)
+    live_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
+    reserved_symbols = body_scope.referenced
+
+    basic_loop_vars = self._get_basic_loop_vars(modified, live_in, live_out)
+    composite_loop_vars = self._get_composite_loop_vars(modified, live_in)
+    loop_vars = tuple(basic_loop_vars | composite_loop_vars)
+
+    # Variable that are used or defined inside the loop, but not defined
+    # before entering the loop. Only simple variables must be defined. The
+    # composite ones will be implicitly checked at runtime.
+    undefined_lives = basic_loop_vars - defined_in
+
+    return loop_vars, reserved_symbols, undefined_lives
+
+  def visit_While(self, node):
+
+    # Create a new WhileOp
+    # `inputs` are initial values for loop variables
+    body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
+    loop_vars, _, _ = self._get_loop_vars(node, body_scope.modified)
+    inputs = [self.symbol_table.lookup(str(name)) for name in loop_vars]
+    types = [input_.getType() for input_ in inputs]
+    while_op = tfp.WhileOp.create(self.opbuilder,
+                                  self.opbuilder.getUnknownLoc(), inputs, types)
+
+    # cache the current builder
+    cache_builder = self.opbuilder
+
+    # Process cond
+    self.symbol_table.enter_scope()
+    for input_, type_ in zip(loop_vars, types):
+      self.symbol_table.insert_symbol(
+          str(input_),
+          while_op.getRegion(0).front().addArgument(type_))
+    self.opbuilder = tfp.OpBuilder(while_op.getRegion(0))
+    tfp.ReturnOp.create(self.opbuilder, self.opbuilder.getUnknownLoc(),
+                        [self.visit(node.test)])
+    self.symbol_table.exit_scope()
+
+    # Process body
+    self.symbol_table.enter_scope()
+    for input_, type_ in zip(loop_vars, types):
+      self.symbol_table.insert_symbol(
+          str(input_),
+          while_op.getRegion(1).front().addArgument(type_))
+    self.opbuilder = tfp.OpBuilder(while_op.getRegion(1))
+    self.visit_block(node.body)
+    tfp.ReturnOp.create(
+        self.opbuilder, self.opbuilder.getUnknownLoc(),
+        [self.symbol_table.lookup(str(name)) for name in loop_vars])
+    self.symbol_table.exit_scope()
+
+    # Enter new values as symbols
+    for idx, var in enumerate(loop_vars):
+      self.symbol_table.insert_symbol(str(var), while_op.getResult(idx))
+
+    # Restore builder
+    self.opbuilder = cache_builder
+
+
+def mlir_gen_internal(node, entity_info):
+  """Returns mlir module for unprocessed node `node`."""
+  namer = naming.Namer({})
+  graphs = cfg.build(node)
+  ctx = transformer.Context(entity_info, namer, None)
+  node = qual_names.resolve(node)
+  node = activity.resolve(node, ctx)
+  node = reaching_definitions.resolve(node, ctx, graphs)
+  node = reaching_fndefs.resolve(node, ctx, graphs)
+  node = liveness.resolve(node, ctx, graphs)
+  mlir_generator = MLIRGen(ctx)
+  mlir_generator.visit(node)
+  return mlir_generator.prog
+
+
+def mlir_gen(func):
+  """Parse a function and return TFProgram."""
+  node, source = parser.parse_entity(func, future_features=())
+  entity_info = transformer.EntityInfo(
+      name=func.__name__,
+      source_code=source,
+      source_file=None,
+      future_features=(),
+      namespace=inspect_utils.getnamespace(func))
+  return mlir_gen_internal(node, entity_info)
+
+
+def mlir_gen_from_source(source=None, src_file=None):
+  """Parse a function as either a string or from a supplied file path and return a TFProgram.
+  """
+  if source is None:
+    source = open(src_file).read()
+  node = ast.parse(source)
+  entity_info = transformer.EntityInfo(
+      name='mlir_module',
+      source_code=source,
+      source_file=None,
+      future_features=(),
+      namespace={})
+  return mlir_gen_internal(node, entity_info)
diff --git a/tensorflow/python/tf_program/pywrap_tfd.py b/tensorflow/python/tf_program/pywrap_tfd.py
new file mode 100644
index 00000000000..0d9a236f5d3
--- /dev/null
+++ b/tensorflow/python/tf_program/pywrap_tfd.py
@@ -0,0 +1,159 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Intermediate between python bindings for MLIR and mlir generation for tensorflow program.
+
+This passes most of the mlir classes as is, but adds a few new operations and
+the basic structure for a tensorflow program.
+"""
+
+# pylint: disable=invalid-name
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.compiler.mlir.python.mlir_wrapper import mlir_wrapper as mlir
+
+# Class Definitions
+OpBuilder = mlir.OpBuilder
+Block = mlir.Block
+
+# Types
+Type = mlir.Type
+IntegerType = mlir.IntegerType
+FloatType = mlir.FloatType
+RankedTensorType = mlir.RankedTensorType
+UnrankedTensorType = mlir.UnrankedTensorType
+IntegerAttr = mlir.IntegerAttr
+
+# Standard Ops
+ReturnOp = mlir.ReturnOp
+
+# TF Dialect Ops
+Tf_AnyOp = mlir.Tf_AnyOp
+Tf_AddV2Op = mlir.Tf_AddV2Op
+Tf_ConstOp = mlir.Tf_ConstOp
+Tf_EqualOp = mlir.Tf_EqualOp
+Tf_GreaterEqualOp = mlir.Tf_GreaterEqualOp
+Tf_GreaterOp = mlir.Tf_GreaterOp
+Tf_LegacyCallOp = mlir.Tf_LegacyCallOp
+Tf_LessEqualOp = mlir.Tf_LessEqualOp
+Tf_LessOp = mlir.Tf_LessOp
+Tf_NegOp = mlir.Tf_NegOp
+Tf_NotEqualOp = mlir.Tf_NotEqualOp
+Tf_SubOp = mlir.Tf_SubOp
+
+
+class IfOp(object):
+  """
+  tfp.if(cond) ({body}, {orelse}) : type If `cond` is true, `body` is
+  executed, otherwise `orelse` is executed.
+  """
+
+  @classmethod
+  def create(cls, opb, loc, cond, outputs):
+    state = mlir.OperationState(loc, "tfp.If")
+    state.addOperands([cond])
+    state.addTypes(outputs)
+    state.addRegion().push_back(Block.new())  # body region
+    state.addRegion().push_back(Block.new())  # orelse region
+    return opb.createOperation(state)
+
+
+class OrOp(object):
+  """
+  tfp.Or(ops...) This is like tf.Any, except that the first dimension is opened
+  into `ops`.
+
+  Returns a tensor of 1-bit integers which is "Logical OR" of the
+  coressponding elements in ops...
+  """
+
+  @classmethod
+  def create(cls, opb, loc, values):
+    state = mlir.OperationState(loc, "tfp.Or")
+    state.addTypes(
+        [UnrankedTensorType.get(IntegerType.get(1, opb.getContext()))])
+    state.addOperands(values)
+    return opb.createOperation(state)
+
+
+class AndOp(object):
+  """
+  tfp.And(ops...) This is like tf.All, except that the first dimension is opened
+  to `ops`.
+
+  Returns a tensor of 1-bit integers which is "Logical AND" of the
+  coressponding elements in ops...
+  """
+
+  @classmethod
+  def create(cls, opb, loc, values):
+    state = mlir.OperationState(loc, "tfp.And")
+    state.addTypes(
+        [UnrankedTensorType.get(IntegerType.get(1, opb.getContext()))])
+    state.addOperands(values)
+    return opb.createOperation(state)
+
+
+class WhileOp(object):
+  """tfp.While(init-vals, {
+
+    ^bb1(cond-args):
+      cond-region
+      return cond
+  }, {
+    ^bb1(body-args):
+      body-region
+  })
+  As long as `cond-region` returns a "true"-like value, the body-region
+  is executed and the arguments are replaced by its return values for the next
+  iteration.
+  """
+
+  @classmethod
+  def create(cls, opb, loc, inputs, outputs):
+    state = mlir.OperationState(loc, "tfp.While")
+    state.addOperands(inputs)
+    state.addTypes(outputs)
+    state.addRegion().push_back(Block.new())  # cond region
+    state.addRegion().push_back(Block.new())  # body region
+    return opb.createOperation(state)
+
+
+class TFProgram(object):
+  """Python wrap for a Tensorflow Program (essentially an mlir Module)."""
+
+  def __init__(self):
+    mlir.registerDialects()
+    self.ctx = mlir.MLIRContext()
+    self.builder = mlir.Builder(self.ctx)
+    self.module = mlir.ModuleOp.create(mlir.UnknownLoc.get(self.ctx))
+    self.curr_func = None
+
+  def add_function(self, name, func_type):
+    self.curr_func = mlir.FuncOp.create(
+        mlir.UnknownLoc.get(self.ctx), name, func_type)
+    self.module.push_back(self.curr_func)
+    return self.curr_func
+
+  def get_function_type(self, inputs, outputs):
+    return self.builder.getFunctionType(inputs, outputs)
+
+  def dump(self):
+    self.module.dump()
+
+  def __str__(self):
+    return self.module.getAsStr()
diff --git a/tensorflow/python/tf_program/tests/BUILD b/tensorflow/python/tf_program/tests/BUILD
new file mode 100644
index 00000000000..1cf0fad6c93
--- /dev/null
+++ b/tensorflow/python/tf_program/tests/BUILD
@@ -0,0 +1,20 @@
+package(licenses = ["notice"])
+
+py_test(
+    name = "mlir_gen_test",
+    size = "small",
+    testonly = True,
+    srcs = ["mlir_gen_test.py"],
+    python_version = "PY3",
+    srcs_version = "PY3",
+    tags = [
+        "no_oss_py2",
+        "no_pip",
+    ],
+    deps = [
+        "//tensorflow/compiler/mlir/python/mlir_wrapper:filecheck_wrapper",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/tf_program:mlir_gen",
+        "//tensorflow/python/types",
+    ],
+)
diff --git a/tensorflow/python/tf_program/tests/mlir_gen_test.py b/tensorflow/python/tf_program/tests/mlir_gen_test.py
new file mode 100644
index 00000000000..5e1ca5b36e0
--- /dev/null
+++ b/tensorflow/python/tf_program/tests/mlir_gen_test.py
@@ -0,0 +1,247 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `mlir_gen` module"""
+
+# pylint: disable=missing-function-docstring
+# pylint: disable=invalid-name
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.platform import test
+from tensorflow.python.types import core
+from tensorflow.python.tf_program.mlir_gen import mlir_gen
+
+import tensorflow.compiler.mlir.python.mlir_wrapper.filecheck_wrapper as fw
+
+
+class MLIRGenTestBase(test.TestCase):
+
+  def _check_code(self, mlir_code, exp_mlir_code):
+    return self.assertTrue(fw.check(str(mlir_code), exp_mlir_code))
+
+
+class MLIRGenTest(MLIRGenTestBase):
+  """MLIR Generation Tests for Tensorflow Program"""
+
+  def test_simple(self):
+
+    def test_fn():
+      pass
+
+    mlir_code = mlir_gen(test_fn)
+    mlir_code_exp = r"""
+      CHECK-LABEL: @test_fn
+    """
+    self._check_code(mlir_code, mlir_code_exp)
+
+  def test_argument(self):
+
+    def test_fn(x: core.Tensor) -> core.Tensor:
+      return x
+
+    mlir_code = mlir_gen(test_fn)
+    mlir_code_exp = r"""
+      CHECK-LABEL: @test_fn(%arg0: tensor<*xi32>) -> tensor<*xi32> {
+        CHECK-NEXT: return %arg0 : tensor<*xi32>
+    """
+    self._check_code(mlir_code, mlir_code_exp)
+
+  def test_constant(self):
+
+    def test_fn() -> int:
+      return 23
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn() -> i32
+      CHECK: %[[r0:[0-9]+]] = "tf.Const"() {value = dense<23> : tensor<i32>} : () -> tensor<i32>
+      CHECK: return %[[r0]] : tensor<i32>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_BoolOp(self):
+
+    def test_fn(x: bool, y: bool) -> bool:
+      return x or y or x and x and y
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn(%arg0: i1, %arg1: i1) -> i1
+      CHECK: %[[r0:[0-9]+]] = "tfp.And"(%arg0, %arg0, %arg1) : (i1, i1, i1) -> tensor<*xi1>
+      CHECK: %[[r1:[0-9]+]] = "tfp.Or"(%arg0, %arg1, %[[r0]]) : (i1, i1, tensor<*xi1>) -> tensor<*xi1>
+      return %[[r1]] : tensor<*xi1>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_Call(self):
+
+    def test_fn():
+
+      def f1():
+        return 23
+
+      def f2():
+        return f1()
+
+      f2()
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn()
+        CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @f2} : () -> ()
+      CHECK: }
+      CHECK-LABEL: func @f1() {
+        CHECK: %[[r0:[0-9]+]] = "tf.Const"() {value = dense<23> : tensor<i32>} : () -> tensor<i32>
+        CHECK: return %[[r0]] : tensor<i32>
+      CHECK: }
+      CHECK-LABEL: func @f2() {
+        CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @f1} : () -> ()
+      }
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_Compare(self):
+
+    def test_fn(x: core.Tensor, y: core.Tensor, z: core.Tensor):
+      return x > y < z
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn(%arg0: tensor<*xi32>, %arg1: tensor<*xi32>, %arg2: tensor<*xi32>)
+      CHECK: %[[r0:[0-9]+]] = "tf.Greater"(%arg0, %arg1) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi1>
+      CHECK: %[[r1:[0-9]+]] = "tf.Less"(%[[r0]], %arg2) : (tensor<*xi1>, tensor<*xi32>) -> tensor<*xi1>
+      CHECK: return %[[r1]] : tensor<*xi1>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_Assign_BinOp(self):
+
+    def test_fn() -> int:
+      y = 12 + 23 - 24
+      return y
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn() -> i32
+      CHECK: %[[r0:[0-9]+]] = "tf.AddV2"(%{{[0-9]+}}, %{{[0-9]+}}) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+      CHECK: %[[r1:[0-9]+]] = "tf.Sub"(%{{[0-9]+}}, %{{[0-9]+}}) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+      CHECK: return %[[r1]] : tensor<i32>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_if(self):
+
+    def test_fn(x: core.Tensor) -> int:
+      res = 0
+      if x > 0:
+        res = 1
+      elif x < 0:
+        res = -1
+      else:
+        res = 0
+      return res
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn(%arg0: tensor<*xi32>) -> i32
+      
+      CHECK: %[[r1:[0-9]+]] = "tf.Greater"(%arg0, %{{[0-9]+}}) : (tensor<*xi32>, tensor<i32>) -> tensor<*xi1>
+      CHECK-NEXT: %[[r2:[0-9]+]] = "tfp.If"(%[[r1]]) ( {
+        CHECK: return %{{[0-9]+}} : tensor<i32>
+      CHECK-NEXT: },  {
+        CHECK: %[[r3:[0-9]+]] = "tf.Less"(%arg0, %{{[0-9]+}}) : (tensor<*xi32>, tensor<i32>) -> tensor<*xi1>
+        CHECK: %[[r4:[0-9]+]] = "tfp.If"(%[[r3]]) ( {
+          CHECK: %[[r5:[0-9]+]] = "tf.Neg"(%{{[0-9]+}}) : (tensor<i32>) -> tensor<i32>
+          CHECK: return %[[r5]] : tensor<i32>
+        CHECK-NEXT: },  {
+          CHECK: return %{{[0-9]+}} : tensor<i32>
+        CHECK-NEXT: }) : (tensor<*xi1>) -> tensor<i32>
+        CHECK: return %[[r4]] : tensor<i32>
+      CHECK-NEXT: }) : (tensor<*xi1>) -> tensor<i32>
+      CHECK-NEXT: return %[[r2]] : tensor<i32>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_while(self):
+
+    def test_fn(x: core.Tensor) -> core.Tensor:
+      s = 0
+      while x > 0:
+        s = s + x
+      return s
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: func @test_fn(%arg0: tensor<*xi32>) -> tensor<*xi32>
+
+      CHECK: %[[r1:[0-9]+]] = "tfp.While"(%0) ( {
+      CHECK-NEXT: ^{{[^ ]+}}(%arg1: tensor<i32>):
+        CHECK: %[[r2:[0-9]+]] = "tf.Greater"(%arg0, %{{[0-9]+}}) : (tensor<*xi32>, tensor<i32>) -> tensor<*xi1>
+        CHECK-NEXT: return %[[r2]] : tensor<*xi1>
+      CHECK-NEXT: },  {
+      CHECK-NEXT: ^{{[^ ]+}}(%arg1: tensor<i32>):
+        CHECK: %[[r3:[0-9]+]] = "tf.AddV2"(%arg1, %arg0) : (tensor<i32>, tensor<*xi32>) -> tensor<*xi32>
+        CHECK-NEXT: return %[[r3]] : tensor<*xi32>
+      CHECK-NEXT: }) : (tensor<i32>) -> tensor<i32>
+      CHECK-NEXT: return %[[r1]] : tensor<i32>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+  def test_fibonacci(self):
+
+    def test_fn(x: core.Tensor) -> core.Tensor:
+      res, idx = 0, 2
+      a, b = 0, 1
+      if x == 0 or x == 1:
+        res = x
+      else:
+        while idx <= x:
+          res = a + b
+          a = b
+          b = res
+          idx = idx + 1
+      return res
+
+    mlir_code = mlir_gen(test_fn)
+    exp_mlir_code = r"""
+      CHECK-LABEL: @test_fn(%arg0: tensor<*xi32>) -> tensor<*xi32>
+      CHECK: %[[r5:[0-9]+]] = "tf.Equal"(%arg0, %{{[0-9]+}}) {incompatible_shape_error = true} : (tensor<*xi32>, tensor<i32>) -> tensor<*xi1>
+      CHECK: %[[r7:[0-9]+]] = "tf.Equal"(%arg0, %{{[0-9]+}}) {incompatible_shape_error = true} : (tensor<*xi32>, tensor<i32>) -> tensor<*xi1>
+      CHECK: %[[r8:[0-9]+]] = "tfp.Or"(%[[r5]], %[[r7]]) : (tensor<*xi1>, tensor<*xi1>) -> tensor<*xi1>
+      
+      CHECK: %[[r9:[0-9]+]]:4 = "tfp.If"(%[[r8]]) ( {
+        CHECK-NEXT: return %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>
+        CHECK-NEXT: },  {
+        CHECK-NEXT: %[[r10:[0-9]+]]:4 = "tfp.While"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
+          CHECK-NEXT: ^{{[^ ]*}}(%arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>):
+          CHECK-NEXT: %[[r11:[0-9]+]] = "tf.LessEqual"(%arg{{[0-9]+}}, %arg{{[0-9]+}}) : (tensor<{{(\*x)?}}i32>, tensor<{{(\*x)?}}i32>) -> tensor<*xi1>
+          CHECK-NEXT: return %[[r11]] : tensor<*xi1>
+        CHECK-NEXT: },  {
+          CHECK-NEXT: ^{{[^ ]*}}(%arg1: tensor<i32>, %arg2: tensor<i32>, %arg3: tensor<i32>, %arg4: tensor<i32>):
+          CHECK-NEXT: %[[r12:[0-9]+]] = "tf.AddV2"(%arg{{[0-9]+}}, %arg{{[0-9]+}}) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+          CHECK: %[[r13:[0-9]+]] = "tf.AddV2"(%arg{{[0-9]+}}, %{{[0-9]+}}) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+          CHECK-NEXT: return %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>
+        CHECK-NEXT: }) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>)
+        CHECK-NEXT: return %[[r10]]#{{[0-9]+}}, %[[r10]]#{{[0-9]+}}, %[[r10]]#{{[0-9]+}}, %[[r10]]#{{[0-9]+}} : tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>
+      CHECK-NEXT: }) : (tensor<*xi1>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<i32>)
+      CHECK-NEXT: return %[[r9]]#{{[0-9]+}} : tensor<i32>
+    """
+    self._check_code(mlir_code, exp_mlir_code)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc
index ec54efa61cf..836cafbd494 100644
--- a/tensorflow/python/tfe_wrapper.cc
+++ b/tensorflow/python/tfe_wrapper.cc
@@ -488,6 +488,18 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
     // NOTE: different from TFE_ContextSyncExecutors that raises potential
     // errors, deliberately ignore executor statuses in cleanup.
   });
+  m.def("TFE_ContextSetSoftDevicePlacement", [](py::handle& ctx, bool enable) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    TFE_ContextSetSoftDevicePlacement(tensorflow::InputTFE_Context(ctx), enable,
+                                      status.get());
+  });
+  m.def("TFE_ContextSetLogDevicePlacement", [](py::handle& ctx, bool enable) {
+    tensorflow::Safe_TF_StatusPtr status =
+        tensorflow::make_safe(TF_NewStatus());
+    TFE_ContextSetSoftDevicePlacement(tensorflow::InputTFE_Context(ctx), enable,
+                                      status.get());
+  });
 
   // TFE_Executor logic
   m.def(
diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py
index c70a26f2b4d..28eba69b7da 100644
--- a/tensorflow/python/tpu/tpu.py
+++ b/tensorflow/python/tpu/tpu.py
@@ -1353,7 +1353,7 @@ def split_compile_and_replicate(computation,
 
       def custom_getter(getter, name, *args, **kwargs):
         """Variables on TPU have a few restrictions."""
-        partitioner = kwargs["partitioner"]
+        partitioner = kwargs.get("partitioner", None)
         if partitioner is not None:
           kwargs["partitioner"] = None
           logging.warning(
diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py
index 24ba9295cbb..d1848f34502 100644
--- a/tensorflow/python/tpu/tpu_embedding.py
+++ b/tensorflow/python/tpu/tpu_embedding.py
@@ -51,7 +51,8 @@ INFERENCE = elc.TPUEmbeddingConfiguration.INFERENCE
 class TableConfig(
     collections.namedtuple('TableConfig', [
         'vocabulary_size', 'dimension', 'initializer', 'combiner',
-        'hot_id_replication', 'learning_rate', 'learning_rate_fn'
+        'hot_id_replication', 'learning_rate', 'learning_rate_fn',
+        'optimization_parameters',
     ])):
   """Embedding table configuration."""
 
@@ -62,7 +63,8 @@ class TableConfig(
               combiner='mean',
               hot_id_replication=False,
               learning_rate=None,
-              learning_rate_fn=None):
+              learning_rate_fn=None,
+              optimization_parameters=None):
     """Embedding table configuration.
 
     Args:
@@ -81,16 +83,19 @@ class TableConfig(
       hot_id_replication: If true, enables hot id replication, which can make
         embedding lookups faster if there are some hot rows in the table.
       learning_rate: float, static learning rate for this table. If
-        learning_rate and learning_rate_fn are both `None`, global
-        static learning rate as specified in `optimization_parameters` in
-        `TPUEmbedding` constructor will be used. `learning_rate_fn` must be
-        `None` if `learning_rate` is not `None.
+        learning_rate and learning_rate_fn are both `None`, static learning
+        rate as specified in local `optimization_parameters` will be used.
+        In case local `optimization_parameters` is `None`, global
+        `optimization_parameters` in `TPUEmbedding` constructor will be used.
+        `learning_rate_fn` must be `None` if `learning_rate` is not `None.
       learning_rate_fn: string, use dynamic learning rate given by the function.
         This function function will be passed the current global step. If
-        learning_rate and learning_rate_fn are both `None`, global static
-        learning rate as specified in `optimization_parameters` in
-        `TPUEmbedding` constructor will be used. `learning_rate` must be `None`
-        if `learning_rate_fn` is not `None.
+        learning_rate and learning_rate_fn are both `None`, static
+        learning rate as specified in `optimization_parameters` is used.
+        `learning_rate` must be `None` if `learning_rate_fn` is not `None.
+      optimization_parameters: `AdagradParameters`, `AdamParameters`,
+        `Stochasticgradientdescentparameters`. Specifies table level optimizer.
+        If it's `None` global optimizer in `TPUEmbedding` constructor is used.
 
     Returns:
       `TableConfig`.
@@ -123,9 +128,17 @@ class TableConfig(
                        'can be None; got {} and {}'
                        .format(learning_rate, learning_rate_fn))
 
-    return super(TableConfig, cls).__new__(
-        cls, vocabulary_size, dimension, initializer, combiner,
-        hot_id_replication, learning_rate, learning_rate_fn)
+    if optimization_parameters is not None:
+      if not isinstance(optimization_parameters, _OptimizationParameters):
+        raise ValueError('`optimization_parameters` must inherit from '
+                         '`_OptimizationParameters`. '
+                         '`type(optimization_parameters)`={}'.format(
+                             type(optimization_parameters)))
+
+    return super(TableConfig,
+                 cls).__new__(cls, vocabulary_size, dimension, initializer,
+                              combiner, hot_id_replication, learning_rate,
+                              learning_rate_fn, optimization_parameters)
 
 
 class FeatureConfig(
@@ -815,7 +828,7 @@ class TPUEmbedding(object):
   ...     end_learning_rate=0.0)
   >>> wordpiece_table_config = TableConfig(
   ...   vocabulary_size=119547,
-  ...   dimension=768,
+  ...   dimension=256,
   ...   learning_rate_fn=learning_rate_fn)
   >>> wordpiece_feature_config = FeatureConfig(
   ...   table_id='bert/embeddings/word_embeddings',
@@ -833,11 +846,11 @@ class TPUEmbedding(object):
   ...  batch_size=128,
   ...  mode=TRAINING,
   ...  optimization_parameters=optimization_parameters,
-  ...  device_config=DeviceConfig(
-  ...    num_cores=64, num_hosts=4, job_name='tpu_worker'))
+  ...  master='')
   >>> with tf.Graph().as_default():
   ...   init_tpu_op = tf.compat.v1.tpu.initialize_system(
-  ...     embedding_config=tpu_embedding.config_proto, job='tpu_worker')
+  ...     embedding_config=tpu_embedding.config_proto)
+  ...   tf.compat.v1.Session().run(init_tpu_op)
   """
 
   # TODO(shizhiw): Consider adding a field to FeatureConfig that indicates that
@@ -882,8 +895,9 @@ class TPUEmbedding(object):
       mode: `TRAINING` or `INFERENCE`.
       master: A `string` representing the TensorFlow master to use.
       optimization_parameters: `AdagradParameters`, `AdamParameters`,
-        `Stochasticgradientdescentparameters`. Must be set in training and must
-        be `None` in inference.
+        `Stochasticgradientdescentparameters`. Must be set in training unless
+        all tables specify their own optimizers. And it must be `None` in
+        inference.
       cluster_def: A ClusterDef object describing the TPU cluster.
       pipeline_execution_with_tensor_core: setting this to `True` makes training
         faster, but trained model will be different if step N and step N+1
@@ -963,7 +977,8 @@ class TPUEmbedding(object):
 
     # TODO(shizhiw): remove `mode`?
     if mode == TRAINING:
-      _validate_optimization_parameters(optimization_parameters)
+      _validate_optimization_parameters(optimization_parameters,
+                                        self._table_to_config_dict)
       self._optimization_parameters = optimization_parameters
     elif mode == INFERENCE:
       if optimization_parameters is not None:
@@ -980,8 +995,8 @@ class TPUEmbedding(object):
     # and create special handler for inference that inherits from
     # StochasticGradientDescentHandler with more user-friendly error message
     # on get_slot().
-    self._optimizer_handler = _get_optimization_handler(
-        self._optimization_parameters)
+    self._optimizer_handler_dict = self._get_optimizer_handler_by_table()
+
     self._pipeline_execution_with_tensor_core = (
         pipeline_execution_with_tensor_core)
     self._learning_rate_fn = list(set(
@@ -1076,35 +1091,39 @@ class TPUEmbedding(object):
 
       table_descriptor.num_features = self._table_to_num_features_dict[table]
 
+      optimization_parameters = (
+          self._optimizer_handler_dict[table].get_optimization_parameters())
+
       parameters = table_descriptor.optimization_parameters
       if table_config.learning_rate:
-        parameters.learning_rate.constant = (table_config.learning_rate)
+        parameters.learning_rate.constant = table_config.learning_rate
       elif table_config.learning_rate_fn:
         parameters.learning_rate.dynamic.tag = (
             self._learning_rate_fn_to_tag[table_config.learning_rate_fn])
       else:
         parameters.learning_rate.constant = (
-            self._optimization_parameters.learning_rate)
+            optimization_parameters.learning_rate)
       parameters.gradient_accumulation_status = (
           optimization_parameters_pb2.GradientAccumulationStatus.ENABLED
-          if self._optimization_parameters.use_gradient_accumulation else
+          if optimization_parameters.use_gradient_accumulation else
           optimization_parameters_pb2.GradientAccumulationStatus.DISABLED)
-      if self._optimization_parameters.clip_weight_min is not None:
+      if optimization_parameters.clip_weight_min is not None:
         parameters.clipping_limits.lower.value = (
-            self._optimization_parameters.clip_weight_min)
-      if self._optimization_parameters.clip_weight_max is not None:
+            optimization_parameters.clip_weight_min)
+      if optimization_parameters.clip_weight_max is not None:
         parameters.clipping_limits.upper.value = (
-            self._optimization_parameters.clip_weight_max)
-      if self._optimization_parameters.weight_decay_factor:
+            optimization_parameters.clip_weight_max)
+      if optimization_parameters.weight_decay_factor:
         parameters.weight_decay_factor = (
-            self._optimization_parameters.weight_decay_factor)
-        if (self._optimization_parameters
+            optimization_parameters.weight_decay_factor)
+        if (optimization_parameters
             .multiply_weight_decay_factor_by_learning_rate):
           parameters.multiply_weight_decay_factor_by_learning_rate = True
       if table_config.hot_id_replication:
         parameters.hot_id_replication_configuration.status = (
             optimization_parameters_pb2.HotIdReplicationConfiguration.ENABLED)
-      self._optimizer_handler.set_optimization_parameters(table_descriptor)
+      optimizer_handler = self._optimizer_handler_dict[table]
+      optimizer_handler.set_optimization_parameters(table_descriptor)
 
     config_proto.mode = self._mode
     config_proto.batch_size_per_tensor_core = self._batch_size_per_core
@@ -1167,8 +1186,9 @@ class TPUEmbedding(object):
       if slot_variable_names_by_table:
         slot_variable_names = slot_variable_names_by_table[table]
       else:
+        optimizer_handler = self._optimizer_handler_dict[table]
         slot_variable_names = (
-            self._optimizer_handler.get_default_slot_variable_names(table))
+            optimizer_handler.get_default_slot_variable_names(table))
 
       # TODO(b/139144091): Multi-host support for mid-level API in
       #  eager context (TF 2.0)
@@ -1192,7 +1212,7 @@ class TPUEmbedding(object):
         # on the first host, other nodes would use config from the first node.
         config = None if i else self.config_proto.SerializeToString()
         slot_variables_for_table, load_ops_fn, retrieve_ops_fn = (
-            self._optimizer_handler.create_variables_and_ops(
+            self._optimizer_handler_dict[table].create_variables_and_ops(
                 table, slot_variable_names, self._num_hosts,
                 self._table_to_config_dict[table], table_variables, config))
         slot_variables_by_table[table] = slot_variables_for_table
@@ -1524,6 +1544,17 @@ class TPUEmbedding(object):
                         for fn in self._learning_rate_fn],
         config=self.config_proto.SerializeToString())
 
+  def _get_optimizer_handler_by_table(self):
+    optimizer_handlers = {}
+    for table, table_config in self.table_to_config_dict.items():
+      if table_config.optimization_parameters is not None:
+        optimizer = table_config.optimization_parameters
+      else:
+        optimizer = self._optimization_parameters
+      optimizer_handlers[table] = _get_optimization_handler(optimizer)
+
+    return optimizer_handlers
+
 
 def _validate_table_to_config_dict(table_to_config_dict):
   """Validate `table_to_config_dict`."""
@@ -1560,12 +1591,35 @@ def _validate_batch_size(batch_size, num_cores):
                          batch_size, num_cores))
 
 
-def _validate_optimization_parameters(optimization_parameters):
-  if not isinstance(optimization_parameters, _OptimizationParameters):
-    raise ValueError('`optimization_parameters` must inherit from '
-                     '`_OptimizationParameters`. '
-                     '`type(optimization_parameters)`={}'.format(
-                         type(optimization_parameters)))
+def _validate_optimization_parameters(optimization_parameters,
+                                      table_to_config_dict):
+  """Validate global optimization_parameters and per table optimizers.
+
+  If global optimizer is `None`, all table optimizers should be non `None`.
+
+  Args:
+      optimization_parameters: global optimizer provided in `TPUEmbedding`
+         constructor.
+      table_to_config_dict: A dictionary mapping from string of table name to
+        `TableConfig`.
+
+  """
+  tbl_optimizer_missing = False
+  for _, table_config in table_to_config_dict.items():
+    if table_config.optimization_parameters is None:
+      tbl_optimizer_missing = True
+      break
+
+  if optimization_parameters:
+    if not isinstance(optimization_parameters, _OptimizationParameters):
+      raise ValueError('`optimization_parameters` must inherit from '
+                       '`_OptimizationParameters`. '
+                       '`type(optimization_parameters)`={}'.format(
+                           type(optimization_parameters)))
+  else:
+    # Missing global optimization_parameters.
+    if tbl_optimizer_missing:
+      ValueError('`optimization_parameters` is missing.')
 
 
 class _OptimizerHandler(object):
@@ -1574,6 +1628,9 @@ class _OptimizerHandler(object):
   def __init__(self, optimization_parameters):
     self._optimization_parameters = optimization_parameters
 
+  def get_optimization_parameters(self):
+    return self._optimization_parameters
+
   def set_optimization_parameters(self, table_descriptor):
     raise NotImplementedError()
 
diff --git a/tensorflow/python/training/adadelta_test.py b/tensorflow/python/training/adadelta_test.py
index 0e5af5a9222..5bc2937e144 100644
--- a/tensorflow/python/training/adadelta_test.py
+++ b/tensorflow/python/training/adadelta_test.py
@@ -158,7 +158,7 @@ class AdadeltaOptimizerTest(test.TestCase):
     with self.cached_session():
       self.doTestBasic(use_resource=False)
 
-  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  @test_util.run_in_graph_and_eager_modes
   def testResourceBasic(self):
     self.doTestBasic(use_resource=True)
 
diff --git a/tensorflow/python/training/adagrad_test.py b/tensorflow/python/training/adagrad_test.py
index 3528fdaa8b0..4c0ee1c66f5 100644
--- a/tensorflow/python/training/adagrad_test.py
+++ b/tensorflow/python/training/adagrad_test.py
@@ -84,7 +84,7 @@ class AdagradOptimizerTest(test.TestCase):
   def testBasic(self):
     self.doTestBasic(use_locking=False)
 
-  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  @test_util.run_in_graph_and_eager_modes
   def testBasicResource(self):
     self.doTestBasic(use_locking=False, use_resource=True)
 
diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py
index 615ac587c21..93bacbdc0bb 100644
--- a/tensorflow/python/training/adam.py
+++ b/tensorflow/python/training/adam.py
@@ -92,11 +92,14 @@ class AdamOptimizer(optimizer.Optimizer):
         Section 2.1), not the epsilon in Algorithm 1 of the paper.
       use_locking: If True use locks for update operations.
       name: Optional name for the operations created when applying gradients.
-        Defaults to "Adam".  @compatibility(eager) When eager execution is
-        enabled, `learning_rate`, `beta1`, `beta2`, and `epsilon` can each be a
-        callable that takes no arguments and returns the actual value to use.
-        This can be useful for changing these values across different
-        invocations of optimizer functions. @end_compatibility
+        Defaults to "Adam".
+
+    @compatibility(eager)
+    When eager execution is enabled, `learning_rate`, `beta1`, `beta2`, and
+    `epsilon` can each be a callable that takes no arguments and returns the
+    actual value to use. This can be useful for changing these values across
+    different invocations of optimizer functions.
+    @end_compatibility
     """
     super(AdamOptimizer, self).__init__(use_locking, name)
     self._lr = learning_rate
diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py
index 8ac5f944cd6..d0ff8603da3 100644
--- a/tensorflow/python/training/adam_test.py
+++ b/tensorflow/python/training/adam_test.py
@@ -243,7 +243,7 @@ class AdamOptimizerTest(test.TestCase):
     with self.cached_session():
       self.doTestBasic(use_resource=False)
 
-  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  @test_util.run_in_graph_and_eager_modes
   def testResourceBasic(self):
     self.doTestBasic(use_resource=True)
 
diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py
index 8d27e957fc8..639276988a1 100644
--- a/tensorflow/python/training/momentum_test.py
+++ b/tensorflow/python/training/momentum_test.py
@@ -126,7 +126,7 @@ class MomentumOptimizerTest(test.TestCase):
     with self.cached_session():
       self.doTestBasic(use_resource=False)
 
-  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  @test_util.run_in_graph_and_eager_modes
   def testResourceBasic(self):
     self.doTestBasic(use_resource=True)
 
@@ -229,7 +229,7 @@ class MomentumOptimizerTest(test.TestCase):
           self.assertAllClose(var0_np, self.evaluate(var0))
           self.assertAllClose(var1_np, self.evaluate(var1))
 
-  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  @test_util.run_in_graph_and_eager_modes
   def testMinimizeSparseResourceVariable(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
       # This test invokes the ResourceSparseApplyMomentum operation, which
@@ -259,7 +259,7 @@ class MomentumOptimizerTest(test.TestCase):
       # Validate updated params
       self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
 
-  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  @test_util.run_in_graph_and_eager_modes
   def testMinimizeWith2DIndicesForEmbeddingLookup(self):
     # This test invokes the ResourceSparseApplyMomentum operation, which
     # did not have a registered GPU kernel as of April 2018. With graph
diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index d77278e98f4..ab63f4237da 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -1189,7 +1189,7 @@ class _WrappedSession(object):
       try:
         self._sess.close()
       except _PREEMPTION_ERRORS as e:
-        logging.warning(
+        logging.error(
             'An error occurred when attempting to close the '
             'session. This may be due to a preemption in a '
             'connected worker or parameter server. Error: %s', e)
diff --git a/tensorflow/python/types/BUILD b/tensorflow/python/types/BUILD
index 3e6be59371c..e93bf5c10b3 100644
--- a/tensorflow/python/types/BUILD
+++ b/tensorflow/python/types/BUILD
@@ -23,9 +23,13 @@ py_strict_library(
     srcs = [
         "__init__.py",
         "core.py",
+        "distribute.py",
         "internal.py",
     ],
     srcs_version = "PY2AND3",
-    visibility = ["//tensorflow:__subpackages__"],
+    visibility = [
+        "//tensorflow:__subpackages__",
+        "//tensorflow:types_whitelist",
+    ],
     deps = [],
 )
diff --git a/tensorflow/python/types/distribute.py b/tensorflow/python/types/distribute.py
new file mode 100644
index 00000000000..fb2a439ba41
--- /dev/null
+++ b/tensorflow/python/types/distribute.py
@@ -0,0 +1,61 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Types specific to tf.distribute."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+# TODO(mdan, anjalisridhar): Decide the location of this file.
+
+
+class Iterable(object):
+  """Interface for distributed objects that admit iteration/reduction."""
+
+  def __iter__(self):
+    pass
+
+  # TODO(mdan): Describe this contract.
+  def reduce(self, initial_state, reduce_func):
+    """Reduces this iterable object to a single element.
+
+    The transformation calls `reduce_func` successively on each element.
+    The `initial_state` argument is used for the initial state and the final
+    state is returned as the result.
+
+    Args:
+      initial_state: An element representing the initial state of the
+        reduction.
+      reduce_func: A function that maps `(old_state, input_element)` to
+        `new_state`. The structure of `new_state` must match the structure of
+        `old_state`. For the first element, `old_state` is `initial_state`.
+
+    Returns:
+      The final state of the transformation.
+    """
+
+
+class Iterator(object):
+  """Interface for distributed iterators."""
+
+  def get_next(self):
+    """Unlike __next__, this may use a non-raising mechanism."""
+
+  def __next__(self):
+    pass
+
+  def __iter__(self):
+    pass
diff --git a/tensorflow/python/util/dispatch.py b/tensorflow/python/util/dispatch.py
index e94e3345348..3868da14b44 100644
--- a/tensorflow/python/util/dispatch.py
+++ b/tensorflow/python/util/dispatch.py
@@ -39,6 +39,10 @@ from tensorflow.python.util import tf_inspect
 DISPATCH_ATTR = "_tf_dispatchers"
 
 
+# OpDispatchers which should be used for all operations.
+_GLOBAL_DISPATCHERS = []
+
+
 class OpDispatcher(object):
   """Abstract base class for TensorFlow operator dispatchers.
 
@@ -82,6 +86,19 @@ class OpDispatcher(object):
     getattr(op, DISPATCH_ATTR).append(self)
 
 
+class GlobalOpDispatcher(object):
+  """Abstract base class for TensorFlow global operator dispatchers."""
+
+  NOT_SUPPORTED = OpDispatcher.NOT_SUPPORTED
+
+  def handle(self, op, args, kwargs):
+    """Handle the specified operation with the specified arguments."""
+
+  def register(self):
+    """Register this dispatcher as a handler for all ops."""
+    _GLOBAL_DISPATCHERS.append(self)
+
+
 def dispatch(op, *args, **kwargs):
   """Returns the result from the first successful dispatcher for a given op.
 
@@ -101,6 +118,10 @@ def dispatch(op, *args, **kwargs):
     result = dispatcher.handle(args, kwargs)
     if result is not OpDispatcher.NOT_SUPPORTED:
       return result
+  for dispatcher in _GLOBAL_DISPATCHERS:
+    result = dispatcher.handle(op, args, kwargs)
+    if result is not OpDispatcher.NOT_SUPPORTED:
+      return result
   return OpDispatcher.NOT_SUPPORTED
 
 
diff --git a/tensorflow/python/util/dispatch_test.py b/tensorflow/python/util/dispatch_test.py
index 89999fcf843..bd35c391924 100644
--- a/tensorflow/python/util/dispatch_test.py
+++ b/tensorflow/python/util/dispatch_test.py
@@ -45,6 +45,47 @@ def test_op(x, y, z):
   return x + (2 * y) + (3 * z)
 
 
+class TensorTracer(object):
+  """An object used to trace TensorFlow graphs.
+
+  This is an example class that is used to test global op dispatchers.  The
+  global op dispatcher for TensorTracers is defined below.
+  """
+
+  def __init__(self, name, args=None, kwargs=None):
+    self.name = name
+    self.args = args
+    self.kwargs = kwargs
+
+  def __repr__(self):
+    if self.args is None and self.kwargs is None:
+      return self.name
+    else:
+      args = [str(x) for x in self.args]
+      args += sorted(
+          ["{}={}".format(name, x) for (name, x) in self.kwargs.items()])
+      return "{}({})".format(self.name, ", ".join(args))
+
+
+class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher):
+  """Global op dispatcher for TensorTracer."""
+
+  def handle(self, op, args, kwargs):
+    # Dispatcher only applies if at least one arg is a TensorTracer.
+    if not (any(self.is_tensor_tracer_arg(x) for x in args) or
+            any(self.is_tensor_tracer_arg(x) for x in kwargs.values())):
+      return self.NOT_SUPPORTED
+
+    return TensorTracer(op.__name__, args, kwargs)
+
+  def is_tensor_tracer_arg(self, value):
+    if isinstance(value, TensorTracer):
+      return True
+    if isinstance(value, (list, tuple)):
+      if any(isinstance(x, TensorTracer) for x in value):
+        return True
+
+
 @test_util.run_all_in_graph_and_eager_modes
 class DispatchTest(test_util.TensorFlowTestCase):
 
@@ -131,8 +172,21 @@ class DispatchTest(test_util.TensorFlowTestCase):
         r".*some_op \(from __main__\) is deprecated and will be "
         "removed in a future version.*")
 
+  def testGlobalDispatcher(self):
+    original_global_dispatchers = dispatch._GLOBAL_DISPATCHERS
+    try:
+      TensorTracerOpDispatcher().register()
+
+      x = TensorTracer("x")
+      y = TensorTracer("y")
+      trace = math_ops.reduce_sum(math_ops.add(math_ops.abs(x), y), axis=3)
+      self.assertEqual(
+          str(trace), "reduce_sum(add(name=None, x=abs(x), y=y), axis=3)")
+
+    finally:
+      # Clean up.
+      dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers
+
 
 if __name__ == "__main__":
   googletest.main()
-
-
diff --git a/tensorflow/security/advisory/tfsa-2018-001.md b/tensorflow/security/advisory/tfsa-2018-001.md
index 1966789c846..f882edca186 100644
--- a/tensorflow/security/advisory/tfsa-2018-001.md
+++ b/tensorflow/security/advisory/tfsa-2018-001.md
@@ -2,7 +2,7 @@
 
 ### CVE Number
 
-CVE-2018-7574
+CVE-2018-21233
 
 ### Issue Description
 
diff --git a/tensorflow/stream_executor/cuda/cudart_stub.cc b/tensorflow/stream_executor/cuda/cudart_stub.cc
index 5ee106a65fd..3b9e0f2937b 100644
--- a/tensorflow/stream_executor/cuda/cudart_stub.cc
+++ b/tensorflow/stream_executor/cuda/cudart_stub.cc
@@ -131,6 +131,13 @@ extern __host__ __device__ unsigned CUDARTAPI __cudaPushCallConfiguration(
   return func_ptr(gridDim, blockDim, sharedMem, stream);
 }
 
+extern char CUDARTAPI __cudaInitModule(void **fatCubinHandle) {
+  using FuncPtr = char(CUDARTAPI *)(void **fatCubinHandle);
+  static auto func_ptr = LoadSymbol<FuncPtr>("__cudaInitModule");
+  if (!func_ptr) return 0;
+  return func_ptr(fatCubinHandle);
+}
+
 #if CUDART_VERSION >= 10010
 extern void CUDARTAPI __cudaRegisterFatBinaryEnd(void **fatCubinHandle) {
   using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle);
diff --git a/tensorflow/stream_executor/device_options.h b/tensorflow/stream_executor/device_options.h
index 00eb8c8dbb0..98660441b42 100644
--- a/tensorflow/stream_executor/device_options.h
+++ b/tensorflow/stream_executor/device_options.h
@@ -64,7 +64,8 @@ struct DeviceOptions {
   unsigned flags() const { return flags_; }
 
   bool operator==(const DeviceOptions& other) const {
-    return flags_ == other.flags_;
+    return flags_ == other.flags_ &&
+           non_portable_tags == other.non_portable_tags;
   }
 
   bool operator!=(const DeviceOptions& other) const {
diff --git a/tensorflow/stream_executor/gpu/BUILD b/tensorflow/stream_executor/gpu/BUILD
index 5cb1642083e..9744fc82593 100644
--- a/tensorflow/stream_executor/gpu/BUILD
+++ b/tensorflow/stream_executor/gpu/BUILD
@@ -222,11 +222,11 @@ cc_library(
     hdrs = if_gpu_is_configured(["asm_compiler.h"]),
     copts = tf_copts(),
     visibility = [
+        "//tensorflow/compiler/mlir/tools/kernel_gen:__subpackages__",
         "//tensorflow/compiler/xla/service/gpu:__subpackages__",
         "//tensorflow/compiler/xla/service/mlir_gpu:__subpackages__",
         "//tensorflow/core/kernels:__subpackages__",
         "//tensorflow/stream_executor:__subpackages__",
-        "//third_party/tf_runtime/tools/tf_kernel_gen:__subpackages__",
     ],
     deps = if_gpu_is_configured([
         ":gpu_asm_opts",
diff --git a/tensorflow/stream_executor/host/BUILD b/tensorflow/stream_executor/host/BUILD
index 362e2199284..be5af1f6ee7 100644
--- a/tensorflow/stream_executor/host/BUILD
+++ b/tensorflow/stream_executor/host/BUILD
@@ -112,6 +112,7 @@ cc_library(
         "//tensorflow/stream_executor:stream_executor_pimpl",
         "//tensorflow/stream_executor:timer",
         "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/strings",
         "@com_google_absl//absl/synchronization",
     ],
     alwayslink = True,
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc
index 5242420fcdb..d6fd0ce9821 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.cc
+++ b/tensorflow/stream_executor/host/host_gpu_executor.cc
@@ -19,6 +19,8 @@ limitations under the License.
 
 #include <string.h>
 
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
 #include "absl/synchronization/notification.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/profile_utils/cpu_utils.h"
@@ -42,6 +44,20 @@ HostExecutor::HostExecutor(const PluginConfig &plugin_config)
 
 HostExecutor::~HostExecutor() {}
 
+port::Status HostExecutor::Init(int device_ordinal,
+                                DeviceOptions device_options) {
+  auto it =
+      device_options.non_portable_tags.find("host_thread_stack_size_in_bytes");
+  if (it != device_options.non_portable_tags.end()) {
+    if (!absl::SimpleAtoi(it->second, &thread_stack_size_in_bytes_)) {
+      return port::InvalidArgumentError(absl::StrCat(
+          "Unable to parse host_thread_stack_size_in_bytes as an integer: ",
+          it->second));
+    }
+  }
+  return port::Status::OK();
+}
+
 DeviceMemoryBase HostExecutor::Allocate(uint64 size, int64 memory_space) {
   CHECK_EQ(memory_space, 0);
   // Use a minimum alignment of 64 bytes to be friendly to AVX512 code.
@@ -332,5 +348,11 @@ rng::RngSupport *HostExecutor::CreateRng() {
   return status.ValueOrDie()(this);
 }
 
+std::unique_ptr<internal::StreamInterface>
+HostExecutor::GetStreamImplementation() {
+  return std::unique_ptr<internal::StreamInterface>(
+      new HostStream(thread_stack_size_in_bytes_));
+}
+
 }  // namespace host
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h
index d40a7a88015..c971ec89bf0 100644
--- a/tensorflow/stream_executor/host/host_gpu_executor.h
+++ b/tensorflow/stream_executor/host/host_gpu_executor.h
@@ -46,9 +46,9 @@ class HostExecutor : public internal::StreamExecutorInterface {
   explicit HostExecutor(const PluginConfig &plugin_config);
   ~HostExecutor() override;
 
-  port::Status Init(int device_ordinal, DeviceOptions device_options) override {
-    return port::Status::OK();
-  }
+  // The stack size used for host streams can be set via
+  // device_options.non_portable_tags["host_stack_size"].
+  port::Status Init(int device_ordinal, DeviceOptions device_options) override;
 
   port::Status GetKernel(const MultiKernelLoaderSpec &spec,
                          KernelBase *kernel) override {
@@ -184,10 +184,7 @@ class HostExecutor : public internal::StreamExecutorInterface {
     return nullptr;
   }
 
-  std::unique_ptr<internal::StreamInterface> GetStreamImplementation()
-      override {
-    return std::unique_ptr<internal::StreamInterface>(new HostStream());
-  }
+  std::unique_ptr<internal::StreamInterface> GetStreamImplementation() override;
 
   std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override {
     return std::unique_ptr<internal::TimerInterface>(new HostTimer());
@@ -197,6 +194,8 @@ class HostExecutor : public internal::StreamExecutorInterface {
 
  private:
   const PluginConfig plugin_config_;
+  // Size of thread stacks for streams in bytes. '0' means "the default size".
+  size_t thread_stack_size_in_bytes_ = 0;
 };
 
 }  // namespace host
diff --git a/tensorflow/stream_executor/host/host_stream.cc b/tensorflow/stream_executor/host/host_stream.cc
index 413edc6739a..320b79ff37a 100644
--- a/tensorflow/stream_executor/host/host_stream.cc
+++ b/tensorflow/stream_executor/host/host_stream.cc
@@ -24,9 +24,20 @@ limitations under the License.
 namespace stream_executor {
 namespace host {
 
-HostStream::HostStream()
+namespace {
+
+port::ThreadOptions GetThreadOptions(size_t stack_size_in_bytes) {
+  port::ThreadOptions options;
+  options.stack_size = stack_size_in_bytes;
+  return options;
+}
+
+}  // namespace
+
+HostStream::HostStream(size_t stack_size_in_bytes)
     : thread_(port::Env::Default()->StartThread(
-          port::ThreadOptions(), "host_executor", [this]() { WorkLoop(); })) {}
+          GetThreadOptions(stack_size_in_bytes), "host_executor",
+          [this]() { WorkLoop(); })) {}
 
 HostStream::~HostStream() {
   {
diff --git a/tensorflow/stream_executor/host/host_stream.h b/tensorflow/stream_executor/host/host_stream.h
index 0a353d4a19b..2ee3f1f449c 100644
--- a/tensorflow/stream_executor/host/host_stream.h
+++ b/tensorflow/stream_executor/host/host_stream.h
@@ -31,7 +31,9 @@ namespace host {
 
 class HostStream : public internal::StreamInterface {
  public:
-  HostStream();
+  // stack_size_in_bytes may be '0', meaning "use the default thread stack
+  // size".
+  explicit HostStream(size_t stack_size_in_bytes);
   ~HostStream() override;
 
   bool EnqueueTask(std::function<void()> task);
diff --git a/tensorflow/stream_executor/lib/status.h b/tensorflow/stream_executor/lib/status.h
index 87269b4591a..170a7955979 100644
--- a/tensorflow/stream_executor/lib/status.h
+++ b/tensorflow/stream_executor/lib/status.h
@@ -36,6 +36,9 @@ using Status = tensorflow::Status;
 inline Status UnimplementedError(absl::string_view message) {
   return Status(error::UNIMPLEMENTED, message);
 }
+inline Status InvalidArgumentError(absl::string_view message) {
+  return Status(error::INVALID_ARGUMENT, message);
+}
 inline Status InternalError(absl::string_view message) {
   return Status(error::INTERNAL, message);
 }
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index d9229e00306..f56330b428a 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -58,7 +58,7 @@ load(
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
 # and tensorflow/tools/pip_package/setup.py
-VERSION = "2.1.0"
+VERSION = "2.2.0"
 VERSION_MAJOR = VERSION.split(".")[0]
 
 # Sanitize a dependency so that it works correctly from code that includes
@@ -193,10 +193,10 @@ def if_macos(a, otherwise = []):
         "//conditions:default": otherwise,
     })
 
-def if_ios(a):
+def if_ios(a, otherwise = []):
     return select({
         clean_dep("//tensorflow:ios"): a,
-        "//conditions:default": [],
+        "//conditions:default": otherwise,
     })
 
 def if_ios_x86_64(a):
diff --git a/tensorflow/tools/android/inference_interface/BUILD b/tensorflow/tools/android/inference_interface/BUILD
index cbd161f05b3..fb3ab00f9bc 100644
--- a/tensorflow/tools/android/inference_interface/BUILD
+++ b/tensorflow/tools/android/inference_interface/BUILD
@@ -34,7 +34,7 @@ cc_library(
     copts = tf_copts(),
     visibility = ["//visibility:public"],
     deps = [
-        "//tensorflow/core:android_tensorflow_lib_lite",
+        "//tensorflow/core:portable_tensorflow_lib_lite",
         "//tensorflow/java/src/main/native",
     ],
     alwayslink = 1,
@@ -83,7 +83,7 @@ cc_binary(
     ],
     deps = [
         ":android_tensorflow_inference_jni",
-        "//tensorflow/core:android_tensorflow_lib",
+        "//tensorflow/core:portable_tensorflow_lib",
         LINKER_SCRIPT,
     ],
 )
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt
index 4a30fae1da9..9315973e51d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt
@@ -2,6 +2,7 @@ path: "tensorflow.Tensor"
 tf_class {
   is_instance: "<class \'tensorflow.python.framework.ops.Tensor\'>"
   is_instance: "<class \'tensorflow.python.types.internal.NativeObject\'>"
+  is_instance: "<class \'tensorflow.python.types.core.Tensor\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "OVERLOADABLE_OPERATORS"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt
index c0dc0054165..658212aca5e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver"
 tf_class {
-  is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver.TPUClusterResolver\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu.tpu_cluster_resolver.TPUClusterResolver\'>"
   is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
   is_instance: "<type \'object\'>"
   member {
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
index 272396239d7..d696021fcb4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
@@ -1,7 +1,6 @@
 path: "tensorflow.keras.Model"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -175,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
index 8979491971f..b8486a27b9e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.keras.Sequential"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.sequential.Sequential\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.functional.Functional\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
index 448ea60cc0f..7bf71844fa6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
@@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.LinearModel"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.premade.linear.LinearModel\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -176,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt
index a51aa88ae23..41483f2b83d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.keras.experimental.SequenceFeatures"
 tf_class {
-  is_instance: "<class \'tensorflow.python.feature_column.sequence_feature_column.SequenceFeatures\'>"
+  is_instance: "<class \'tensorflow.python.keras.feature_column.sequence_feature_column.SequenceFeatures\'>"
   is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
index 8e1d9927434..87a7319639b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
@@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.WideDeepModel"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.premade.wide_deep.WideDeepModel\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -176,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt
new file mode 100644
index 00000000000..8a782f6666f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt
@@ -0,0 +1,218 @@
+path: "tensorflow.keras.layers.experimental.EinsumDense"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.einsum_dense.EinsumDense\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.version_utils.LayerVersionSelector\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'equation\', \'output_shape\', \'activation\', \'bias_axes\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt
index 67d9ef6bbcc..81d2acbd71f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.keras.layers.experimental"
 tf_module {
+  member {
+    name: "EinsumDense"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RandomFourierFeatures"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt
new file mode 100644
index 00000000000..0407188ab6b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt
@@ -0,0 +1,222 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.CategoryCrossing"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.categorical_crossing.CategoryCrossing\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.version_utils.LayerVersionSelector\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'depth\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_spec\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "partial_crossing"
+    argspec: "args=[\'self\', \'partial_inputs\', \'ragged_out\', \'sparse_out\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt
new file mode 100644
index 00000000000..85850223bcb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt
@@ -0,0 +1,218 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.RandomZoom"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.image_preprocessing.RandomZoom\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.version_utils.LayerVersionSelector\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'height_factor\', \'width_factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'reflect\', \'bilinear\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
index 47852865558..4f5b0f480e4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
@@ -221,7 +221,7 @@ tf_class {
   }
   member_method {
     name: "set_vocabulary"
-    argspec: "args=[\'self\', \'vocab\', \'df_data\', \'oov_df_value\', \'append\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], "
+    argspec: "args=[\'self\', \'vocab\', \'df_data\', \'oov_df_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "set_weights"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt
index f369c32a65e..0964922ea26 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.keras.layers.experimental.preprocessing"
 tf_module {
+  member {
+    name: "CategoryCrossing"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "CenterCrop"
     mtype: "<type \'type\'>"
@@ -40,6 +44,10 @@ tf_module {
     name: "RandomWidth"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "RandomZoom"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "Rescaling"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
index 13c3416fc0c..00c9fc22def 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
@@ -1,7 +1,6 @@
 path: "tensorflow.keras.models.Model"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -175,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
index 9218cbea99e..d3cca7311ee 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.keras.models.Sequential"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.sequential.Sequential\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.functional.Functional\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.-l1.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.-l1.pbtxt
new file mode 100644
index 00000000000..5cb133ca85d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.-l1.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.L1"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L1\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l1\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.-l2.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.-l2.pbtxt
new file mode 100644
index 00000000000..c5b706d1d2f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.-l2.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.L2"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L2\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l2\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.l1.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.l1.pbtxt
new file mode 100644
index 00000000000..eb769a0dc44
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.l1.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.l1"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L1\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l1\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.l2.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.l2.pbtxt
new file mode 100644
index 00000000000..fda5c76ecd2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.l2.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.l2"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L2\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l2\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.pbtxt
index bb10d41d704..96a4b193b1b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.regularizers.pbtxt
@@ -1,13 +1,29 @@
 path: "tensorflow.keras.regularizers"
 tf_module {
+  member {
+    name: "L1"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "L1L2"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "L2"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "Regularizer"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "l1"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "l2"
+    mtype: "<type \'type\'>"
+  }
   member_method {
     name: "deserialize"
     argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,18 +32,10 @@ tf_module {
     name: "get"
     argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "l1"
-    argspec: "args=[\'l\'], varargs=None, keywords=None, defaults=[\'0.01\'], "
-  }
   member_method {
     name: "l1_l2"
     argspec: "args=[\'l1\', \'l2\'], varargs=None, keywords=None, defaults=[\'0.01\', \'0.01\'], "
   }
-  member_method {
-    name: "l2"
-    argspec: "args=[\'l\'], varargs=None, keywords=None, defaults=[\'0.01\'], "
-  }
   member_method {
     name: "serialize"
     argspec: "args=[\'regularizer\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-interpreter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-interpreter.pbtxt
index 5af7412e646..e1c235b5150 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.-interpreter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-interpreter.pbtxt
@@ -36,7 +36,7 @@ tf_class {
   }
   member_method {
     name: "resize_tensor_input"
-    argspec: "args=[\'self\', \'input_index\', \'tensor_size\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_index\', \'tensor_size\', \'strict\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
   member_method {
     name: "set_tensor"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-t-f-lite-converter.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-t-f-lite-converter.pbtxt
index 0c43fc556aa..e7689b4320f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.-t-f-lite-converter.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-t-f-lite-converter.pbtxt
@@ -1,11 +1,13 @@
 path: "tensorflow.lite.TFLiteConverter"
 tf_class {
   is_instance: "<class \'tensorflow.lite.python.lite.TFLiteConverter\'>"
+  is_instance: "<class \'tensorflow.lite.python.lite.TFLiteFrozenGraphConverter\'>"
+  is_instance: "<class \'tensorflow.lite.python.lite.TFLiteConverterBaseV1\'>"
   is_instance: "<class \'tensorflow.lite.python.lite.TFLiteConverterBase\'>"
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'graph_def\', \'input_tensors\', \'output_tensors\', \'input_arrays_with_shape\', \'output_arrays\', \'experimental_debug_info_func\', \'saved_model_dir\', \'saved_model_tags\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'graph_def\', \'input_tensors\', \'output_tensors\', \'input_arrays_with_shape\', \'output_arrays\', \'experimental_debug_info_func\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "convert"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
index cf6b807502c..44fb74ac63a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
@@ -840,10 +840,6 @@ tf_module {
     name: "CountUpTo"
     argspec: "args=[\'ref\', \'limit\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "CreateJob"
-    argspec: "args=[\'dataset_id\', \'address\', \'protocol\', \'processing_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "CreateSummaryDbWriter"
     argspec: "args=[\'writer\', \'db_uri\', \'experiment_name\', \'run_name\', \'user_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -938,7 +934,7 @@ tf_module {
   }
   member_method {
     name: "DataServiceDataset"
-    argspec: "args=[\'address\', \'protocol\', \'max_outstanding_requests\', \'output_types\', \'output_shapes\', \'task_refresh_interval_hint_ms\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+    argspec: "args=[\'dataset_id\', \'processing_mode\', \'address\', \'protocol\', \'job_name\', \'max_outstanding_requests\', \'iteration_counter\', \'output_types\', \'output_shapes\', \'task_refresh_interval_hint_ms\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
   }
   member_method {
     name: "DatasetCardinality"
@@ -1076,6 +1072,14 @@ tf_module {
     name: "DeleteSessionTensor"
     argspec: "args=[\'handle\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "DenseBincount"
+    argspec: "args=[\'input\', \'size\', \'weights\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "DenseCountSparseOutput"
+    argspec: "args=[\'values\', \'weights\', \'binary_output\', \'minlength\', \'maxlength\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+  }
   member_method {
     name: "DenseToCSRSparseMatrix"
     argspec: "args=[\'dense_input\', \'indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1176,6 +1180,10 @@ tf_module {
     name: "DrawBoundingBoxesV2"
     argspec: "args=[\'images\', \'boxes\', \'colors\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "DummyIterationCounter"
+    argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "DummyMemoryCache"
     argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -2196,10 +2204,6 @@ tf_module {
     name: "Lu"
     argspec: "args=[\'input\', \'output_idx_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
   }
-  member_method {
-    name: "MakeDataServiceIterator"
-    argspec: "args=[\'dataset\', \'job_token\', \'iterator\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "MakeIterator"
     argspec: "args=[\'dataset\', \'iterator\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -3064,6 +3068,14 @@ tf_module {
     name: "RGBToHSV"
     argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "RaggedBincount"
+    argspec: "args=[\'splits\', \'values\', \'size\', \'weights\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "RaggedCountSparseOutput"
+    argspec: "args=[\'splits\', \'values\', \'weights\', \'binary_output\', \'minlength\', \'maxlength\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+  }
   member_method {
     name: "RaggedCross"
     argspec: "args=[\'ragged_values\', \'ragged_row_splits\', \'sparse_indices\', \'sparse_values\', \'sparse_shape\', \'dense_inputs\', \'input_order\', \'hashed_output\', \'num_buckets\', \'hash_key\', \'out_values_type\', \'out_row_splits_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -4068,6 +4080,10 @@ tf_module {
     name: "SparseApplyRMSProp"
     argspec: "args=[\'var\', \'ms\', \'mom\', \'lr\', \'rho\', \'momentum\', \'epsilon\', \'grad\', \'indices\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "SparseBincount"
+    argspec: "args=[\'indices\', \'values\', \'dense_shape\', \'size\', \'weights\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
   member_method {
     name: "SparseConcat"
     argspec: "args=[\'indices\', \'values\', \'shapes\', \'concat_dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -4076,6 +4092,10 @@ tf_module {
     name: "SparseConditionalAccumulator"
     argspec: "args=[\'dtype\', \'shape\', \'container\', \'shared_name\', \'reduction_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'MEAN\', \'None\'], "
   }
+  member_method {
+    name: "SparseCountSparseOutput"
+    argspec: "args=[\'indices\', \'values\', \'dense_shape\', \'weights\', \'binary_output\', \'minlength\', \'maxlength\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+  }
   member_method {
     name: "SparseCross"
     argspec: "args=[\'indices\', \'values\', \'shapes\', \'dense_inputs\', \'hashed_output\', \'num_buckets\', \'hash_key\', \'out_type\', \'internal_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
index 27c64f2cbf7..f8f8edb26a8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "add"
     argspec: "args=[\'a\', \'b\', \'threshold\', \'thresh\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
+  member_method {
+    name: "bincount"
+    argspec: "args=[\'values\', \'weights\', \'axis\', \'minlength\', \'maxlength\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'False\', \'None\'], "
+  }
   member_method {
     name: "concat"
     argspec: "args=[\'axis\', \'sp_inputs\', \'name\', \'expand_nonconcat_dim\', \'concat_dim\', \'expand_nonconcat_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\', \'None\'], "
@@ -38,7 +42,7 @@ tf_module {
   }
   member_method {
     name: "from_dense"
-    argspec: "args=[\'tensor\', 'name'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt
index 4a30fae1da9..9315973e51d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt
@@ -2,6 +2,7 @@ path: "tensorflow.Tensor"
 tf_class {
   is_instance: "<class \'tensorflow.python.framework.ops.Tensor\'>"
   is_instance: "<class \'tensorflow.python.types.internal.NativeObject\'>"
+  is_instance: "<class \'tensorflow.python.types.core.Tensor\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "OVERLOADABLE_OPERATORS"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt
index c0dc0054165..658212aca5e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver"
 tf_class {
-  is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver.TPUClusterResolver\'>"
+  is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu.tpu_cluster_resolver.TPUClusterResolver\'>"
   is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
   is_instance: "<type \'object\'>"
   member {
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
index 272396239d7..d696021fcb4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
@@ -1,7 +1,6 @@
 path: "tensorflow.keras.Model"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -175,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
index 8979491971f..b8486a27b9e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.keras.Sequential"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.sequential.Sequential\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.functional.Functional\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
index 448ea60cc0f..7bf71844fa6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
@@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.LinearModel"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.premade.linear.LinearModel\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -176,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt
index a51aa88ae23..41483f2b83d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.keras.experimental.SequenceFeatures"
 tf_class {
-  is_instance: "<class \'tensorflow.python.feature_column.sequence_feature_column.SequenceFeatures\'>"
+  is_instance: "<class \'tensorflow.python.keras.feature_column.sequence_feature_column.SequenceFeatures\'>"
   is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
index 8e1d9927434..87a7319639b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
@@ -2,7 +2,6 @@ path: "tensorflow.keras.experimental.WideDeepModel"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.premade.wide_deep.WideDeepModel\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -176,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt
new file mode 100644
index 00000000000..8a782f6666f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt
@@ -0,0 +1,218 @@
+path: "tensorflow.keras.layers.experimental.EinsumDense"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.einsum_dense.EinsumDense\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.version_utils.LayerVersionSelector\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'equation\', \'output_shape\', \'activation\', \'bias_axes\', \'kernel_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'bias_constraint\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'glorot_uniform\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt
index 75c73ca2018..53d4adbed30 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.keras.layers.experimental"
 tf_module {
+  member {
+    name: "EinsumDense"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RandomFourierFeatures"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt
new file mode 100644
index 00000000000..0407188ab6b
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt
@@ -0,0 +1,222 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.CategoryCrossing"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.categorical_crossing.CategoryCrossing\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.version_utils.LayerVersionSelector\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'depth\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_spec\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "partial_crossing"
+    argspec: "args=[\'self\', \'partial_inputs\', \'ragged_out\', \'sparse_out\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt
new file mode 100644
index 00000000000..85850223bcb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt
@@ -0,0 +1,218 @@
+path: "tensorflow.keras.layers.experimental.preprocessing.RandomZoom"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.preprocessing.image_preprocessing.RandomZoom\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
+  is_instance: "<class \'tensorflow.python.module.module.Module\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
+  is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
+  is_instance: "<class \'tensorflow.python.keras.utils.version_utils.LayerVersionSelector\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "activity_regularizer"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "inbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "input_spec"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "losses"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "metrics"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name_scope"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "non_trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "outbound_nodes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_mask"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "output_shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stateful"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "submodules"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "trainable_weights"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "updates"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "variables"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'height_factor\', \'width_factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'reflect\', \'bilinear\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "add_loss"
+    argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_metric"
+    argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_update"
+    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "add_variable"
+    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply"
+    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "build"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "call"
+    argspec: "args=[\'self\', \'inputs\', \'training\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "compute_mask"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "compute_output_shape"
+    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "compute_output_signature"
+    argspec: "args=[\'self\', \'input_signature\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "count_params"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_input_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_losses_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_mask_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_output_shape_at"
+    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_updates_for"
+    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_weights"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_weights"
+    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_name_scope"
+    argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
index 05154268354..a33f65189fd 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt
@@ -219,7 +219,7 @@ tf_class {
   }
   member_method {
     name: "set_vocabulary"
-    argspec: "args=[\'self\', \'vocab\', \'df_data\', \'oov_df_value\', \'append\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\'], "
+    argspec: "args=[\'self\', \'vocab\', \'df_data\', \'oov_df_value\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
   }
   member_method {
     name: "set_weights"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt
index f369c32a65e..0964922ea26 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.pbtxt
@@ -1,5 +1,9 @@
 path: "tensorflow.keras.layers.experimental.preprocessing"
 tf_module {
+  member {
+    name: "CategoryCrossing"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "CenterCrop"
     mtype: "<type \'type\'>"
@@ -40,6 +44,10 @@ tf_module {
     name: "RandomWidth"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "RandomZoom"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "Rescaling"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
index 13c3416fc0c..00c9fc22def 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
@@ -1,7 +1,6 @@
 path: "tensorflow.keras.models.Model"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
@@ -175,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "compute_output_shape"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
index 9218cbea99e..d3cca7311ee 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
@@ -1,8 +1,8 @@
 path: "tensorflow.keras.models.Sequential"
 tf_class {
   is_instance: "<class \'tensorflow.python.keras.engine.sequential.Sequential\'>"
+  is_instance: "<class \'tensorflow.python.keras.engine.functional.Functional\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.training.Model\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.network.Network\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.module.module.Module\'>"
   is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt
index 0b49aa9f3d4..e59c78cc496 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.preprocessing.image.pbtxt
@@ -68,4 +68,8 @@ tf_module {
     name: "save_img"
     argspec: "args=[\'path\', \'x\', \'data_format\', \'file_format\', \'scale\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'True\'], "
   }
+  member_method {
+    name: "smart_resize"
+    argspec: "args=[\'x\', \'size\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'bilinear\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.-l1.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.-l1.pbtxt
new file mode 100644
index 00000000000..5cb133ca85d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.-l1.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.L1"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L1\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l1\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.-l2.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.-l2.pbtxt
new file mode 100644
index 00000000000..c5b706d1d2f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.-l2.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.L2"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L2\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l2\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.l1.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.l1.pbtxt
new file mode 100644
index 00000000000..eb769a0dc44
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.l1.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.l1"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L1\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l1\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.l2.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.l2.pbtxt
new file mode 100644
index 00000000000..fda5c76ecd2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.l2.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.keras.regularizers.l2"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.regularizers.L2\'>"
+  is_instance: "<class \'tensorflow.python.keras.regularizers.Regularizer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'l2\'], varargs=None, keywords=kwargs, defaults=[\'0.01\'], "
+  }
+  member_method {
+    name: "from_config"
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "get_config"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.pbtxt
index bb10d41d704..96a4b193b1b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.regularizers.pbtxt
@@ -1,13 +1,29 @@
 path: "tensorflow.keras.regularizers"
 tf_module {
+  member {
+    name: "L1"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "L1L2"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "L2"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "Regularizer"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "l1"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "l2"
+    mtype: "<type \'type\'>"
+  }
   member_method {
     name: "deserialize"
     argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -16,18 +32,10 @@ tf_module {
     name: "get"
     argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "l1"
-    argspec: "args=[\'l\'], varargs=None, keywords=None, defaults=[\'0.01\'], "
-  }
   member_method {
     name: "l1_l2"
     argspec: "args=[\'l1\', \'l2\'], varargs=None, keywords=None, defaults=[\'0.01\', \'0.01\'], "
   }
-  member_method {
-    name: "l2"
-    argspec: "args=[\'l\'], varargs=None, keywords=None, defaults=[\'0.01\'], "
-  }
   member_method {
     name: "serialize"
     argspec: "args=[\'regularizer\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-interpreter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-interpreter.pbtxt
index 5af7412e646..e1c235b5150 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.-interpreter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-interpreter.pbtxt
@@ -36,7 +36,7 @@ tf_class {
   }
   member_method {
     name: "resize_tensor_input"
-    argspec: "args=[\'self\', \'input_index\', \'tensor_size\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'input_index\', \'tensor_size\', \'strict\'], varargs=None, keywords=None, defaults=[\'False\'], "
   }
   member_method {
     name: "set_tensor"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-t-f-lite-converter.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-t-f-lite-converter.pbtxt
index c575283b74d..c8c163d2f2a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.-t-f-lite-converter.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-t-f-lite-converter.pbtxt
@@ -1,11 +1,13 @@
 path: "tensorflow.lite.TFLiteConverter"
 tf_class {
   is_instance: "<class \'tensorflow.lite.python.lite.TFLiteConverterV2\'>"
+  is_instance: "<class \'tensorflow.lite.python.lite.TFLiteFrozenGraphConverterV2\'>"
+  is_instance: "<class \'tensorflow.lite.python.lite.TFLiteConverterBaseV2\'>"
   is_instance: "<class \'tensorflow.lite.python.lite.TFLiteConverterBase\'>"
   is_instance: "<type \'object\'>"
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'funcs\', \'trackable_obj\', \'saved_model_dir\', \'saved_model_tags\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'funcs\', \'trackable_obj\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "convert"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
index cf6b807502c..44fb74ac63a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
@@ -840,10 +840,6 @@ tf_module {
     name: "CountUpTo"
     argspec: "args=[\'ref\', \'limit\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "CreateJob"
-    argspec: "args=[\'dataset_id\', \'address\', \'protocol\', \'processing_mode\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "CreateSummaryDbWriter"
     argspec: "args=[\'writer\', \'db_uri\', \'experiment_name\', \'run_name\', \'user_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -938,7 +934,7 @@ tf_module {
   }
   member_method {
     name: "DataServiceDataset"
-    argspec: "args=[\'address\', \'protocol\', \'max_outstanding_requests\', \'output_types\', \'output_shapes\', \'task_refresh_interval_hint_ms\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
+    argspec: "args=[\'dataset_id\', \'processing_mode\', \'address\', \'protocol\', \'job_name\', \'max_outstanding_requests\', \'iteration_counter\', \'output_types\', \'output_shapes\', \'task_refresh_interval_hint_ms\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'None\'], "
   }
   member_method {
     name: "DatasetCardinality"
@@ -1076,6 +1072,14 @@ tf_module {
     name: "DeleteSessionTensor"
     argspec: "args=[\'handle\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "DenseBincount"
+    argspec: "args=[\'input\', \'size\', \'weights\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "DenseCountSparseOutput"
+    argspec: "args=[\'values\', \'weights\', \'binary_output\', \'minlength\', \'maxlength\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+  }
   member_method {
     name: "DenseToCSRSparseMatrix"
     argspec: "args=[\'dense_input\', \'indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -1176,6 +1180,10 @@ tf_module {
     name: "DrawBoundingBoxesV2"
     argspec: "args=[\'images\', \'boxes\', \'colors\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "DummyIterationCounter"
+    argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "DummyMemoryCache"
     argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -2196,10 +2204,6 @@ tf_module {
     name: "Lu"
     argspec: "args=[\'input\', \'output_idx_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int32\'>\", \'None\'], "
   }
-  member_method {
-    name: "MakeDataServiceIterator"
-    argspec: "args=[\'dataset\', \'job_token\', \'iterator\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "MakeIterator"
     argspec: "args=[\'dataset\', \'iterator\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -3064,6 +3068,14 @@ tf_module {
     name: "RGBToHSV"
     argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "RaggedBincount"
+    argspec: "args=[\'splits\', \'values\', \'size\', \'weights\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "RaggedCountSparseOutput"
+    argspec: "args=[\'splits\', \'values\', \'weights\', \'binary_output\', \'minlength\', \'maxlength\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+  }
   member_method {
     name: "RaggedCross"
     argspec: "args=[\'ragged_values\', \'ragged_row_splits\', \'sparse_indices\', \'sparse_values\', \'sparse_shape\', \'dense_inputs\', \'input_order\', \'hashed_output\', \'num_buckets\', \'hash_key\', \'out_values_type\', \'out_row_splits_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -4068,6 +4080,10 @@ tf_module {
     name: "SparseApplyRMSProp"
     argspec: "args=[\'var\', \'ms\', \'mom\', \'lr\', \'rho\', \'momentum\', \'epsilon\', \'grad\', \'indices\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
+  member_method {
+    name: "SparseBincount"
+    argspec: "args=[\'indices\', \'values\', \'dense_shape\', \'size\', \'weights\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
   member_method {
     name: "SparseConcat"
     argspec: "args=[\'indices\', \'values\', \'shapes\', \'concat_dim\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
@@ -4076,6 +4092,10 @@ tf_module {
     name: "SparseConditionalAccumulator"
     argspec: "args=[\'dtype\', \'shape\', \'container\', \'shared_name\', \'reduction_type\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'MEAN\', \'None\'], "
   }
+  member_method {
+    name: "SparseCountSparseOutput"
+    argspec: "args=[\'indices\', \'values\', \'dense_shape\', \'weights\', \'binary_output\', \'minlength\', \'maxlength\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+  }
   member_method {
     name: "SparseCross"
     argspec: "args=[\'indices\', \'values\', \'shapes\', \'dense_inputs\', \'hashed_output\', \'num_buckets\', \'hash_key\', \'out_type\', \'internal_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
index da3149947b3..67235bb2cf2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "add"
     argspec: "args=[\'a\', \'b\', \'threshold\'], varargs=None, keywords=None, defaults=[\'0\'], "
   }
+  member_method {
+    name: "bincount"
+    argspec: "args=[\'values\', \'weights\', \'axis\', \'minlength\', \'maxlength\', \'binary_output\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'False\', \'None\'], "
+  }
   member_method {
     name: "concat"
     argspec: "args=[\'axis\', \'sp_inputs\', \'expand_nonconcat_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
@@ -34,7 +38,7 @@ tf_module {
   }
   member_method {
     name: "from_dense"
-    argspec: "args=[\'tensor\', 'name'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "mask"
diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD
index 93b408d522e..674133431f1 100644
--- a/tensorflow/tools/benchmark/BUILD
+++ b/tensorflow/tools/benchmark/BUILD
@@ -28,8 +28,8 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = select({
         "//tensorflow:android": [
-            "//tensorflow/core:android_tensorflow_lib",
-            "//tensorflow/core:android_tensorflow_test_lib",
+            "//tensorflow/core:portable_tensorflow_lib",
+            "//tensorflow/core:portable_tensorflow_test_lib",
         ],
         "//conditions:default": [
             "//tensorflow/core:core_cpu",
diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010
index df4b847b6f7..91d501109d0 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010
@@ -75,6 +75,13 @@ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
 RUN python3.8 get-pip.py
 RUN python3.8 -m pip install --upgrade pip setuptools wheel
 
+# Overwrite include paths that are generated for the multipython image.
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m"
+
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8"
+
 # Make apt work with python 3.6.
 RUN cp /usr/lib/python3/dist-packages/apt_pkg.cpython-35m-x86_64-linux-gnu.so \
        /usr/lib/python3/dist-packages/apt_pkg.so
diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython
index c14eadcada4..9c85091563e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython
@@ -55,13 +55,17 @@ RUN /install/install_bootstrap_deb_packages.sh
 COPY install/install_deb_packages.sh /install/
 RUN /install/install_deb_packages.sh
 
-# Install additional dependencies to build Python from source.
+# Install additional packages needed for this image:
+# - dependencies to build Python from source
+# - patchelf, as it is required by auditwheel
 RUN apt-get update && apt-get install -y \
-    libncurses5-dev \
+    libbz2-dev \
+    libffi-dev \
     libgdbm-dev \
+    libncurses5-dev \
     libnss3-dev \
     libreadline-dev \
-    libffi-dev \
+    patchelf \
       && \
     rm -rf /var/lib/apt/lists/*
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010 b/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010
index 516129ccd43..a14b9ac2a3e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.ubuntu16.04-manylinux2010
@@ -73,13 +73,12 @@ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
 RUN python3.8 get-pip.py
 RUN python3.8 -m pip install --upgrade pip setuptools wheel
 
-# TODO(klimek): Figure out a better way to get the right include paths
-# forwarded when we install new packages.
-RUN ln -s "/usr/include/x86_64-linux-gnu/python2.7" "/dt7/usr/include/x86_64-linux-gnu/python2.7"
-RUN ln -s "/usr/include/x86_64-linux-gnu/python2.7" "/dt8/usr/include/x86_64-linux-gnu/python2.7"
+# Overwrite include paths that are generated for the multipython image.
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python2.7" "/dt7/usr/include/x86_64-linux-gnu/python2.7"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python2.7" "/dt8/usr/include/x86_64-linux-gnu/python2.7"
 
-RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m"
-RUN ln -s "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m"
 
-RUN ln -s "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8"
-RUN ln -s "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8"
+RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8"
\ No newline at end of file
diff --git a/tensorflow/tools/ci_build/install/install_patchelf.sh b/tensorflow/tools/ci_build/install/install_patchelf.sh
new file mode 100644
index 00000000000..44a91c78b07
--- /dev/null
+++ b/tensorflow/tools/ci_build/install/install_patchelf.sh
@@ -0,0 +1,17 @@
+#!/bin/bash -eu
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+apt-get update && apt-get install -y patchelf
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh
index d9953db3b5a..81e5f2b6406 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages_by_version.sh
@@ -26,6 +26,7 @@ if [[ ! -x "$(which "${PIP}")" ]]; then
 fi
 
 PACKAGES=(
+  "auditwheel"
   "wheel"
   "setuptools"
   "virtualenv"
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index 62d13a7e66d..0a9f6eae0b3 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -146,6 +146,7 @@ function install_pip_deps {
   ${PIP_CMD} install --user --upgrade attrs
   ${PIP_CMD} install --user --upgrade tf-estimator-nightly
   ${PIP_CMD} install --user --upgrade "future>=0.17.1"
+  ${PIP_CMD} install --user --upgrade wrapt
   # LINT.ThenChange(:ubuntu_16_pip_installations)
 }
 
@@ -176,8 +177,10 @@ function install_ubuntu_16_pip_deps {
   "${PIP_CMD}" install scipy --user
   "${PIP_CMD}" install scikit-learn --user
   "${PIP_CMD}" install PyYAML==3.13 --user
-  "${PIP_CMD}" install --user --upgrade tf-estimator-nightly
+  # b/156523241
+  "${PIP_CMD}" install --force-reinstall --user --upgrade tf-estimator-nightly
   "${PIP_CMD}" install --user --upgrade tb-nightly
+  "${PIP_CMD}" install --user --upgrade wrapt
   # LINT.ThenChange(:ubuntu_pip_installations)
 }
 
@@ -218,7 +221,9 @@ function install_macos_pip_deps {
   ${SUDO_CMD} ${PIP_CMD} install --upgrade grpcio
   ${SUDO_CMD} ${PIP_CMD} install --upgrade tb-nightly
   ${PIP_CMD} install --user --upgrade attrs
-  ${PIP_CMD} install --user --upgrade tf-estimator-nightly
+  # b/156523241
+  ${PIP_CMD} install --force-reinstall --user --upgrade tf-estimator-nightly
+  ${PIP_CMD} install --user --upgrade wrapt
   ${PIP_CMD} install --user --upgrade "future>=0.17.1"
 }
 
@@ -268,3 +273,46 @@ function copy_to_new_project_name {
   popd
   rm -rf "${TMP_DIR}"
 }
+
+# Create minimalist test XML for web view. It includes the pass/fail status
+# of each target, without including errors or stacktraces.
+# Remember to "set +e" before calling bazel or we'll only generate the XML for
+# passing runs.
+function test_xml_summary {
+  set +x
+  set +e
+  mkdir -p "${KOKORO_ARTIFACTS_DIR}/${KOKORO_JOB_NAME}/summary"
+  # First build the repeated inner XML blocks, since the header block needs to
+  # report the number of test cases / failures / errors.
+  # TODO(rsopher): handle build breakages
+  # TODO(rsopher): extract per-test times as well
+  TESTCASE_XML="$(sed -n '/INFO:\ Build\ completed/,/INFO:\ Build\ completed/p' \
+    /tmpfs/kokoro_build.log \
+    | grep -E '(PASSED|FAILED|TIMEOUT)\ in' \
+    | while read -r line; \
+      do echo '<testcase name="'"$(echo "${line}" | tr -s ' ' | cut -d ' ' -f 1)"\
+          '" status="run" classname="" time="0">'"$( \
+        case "$(echo "${line}" | tr -s ' ' | cut -d ' ' -f 2)" in \
+          FAILED) echo '<failure message="" type=""/>' ;; \
+          TIMEOUT) echo '<failure message="timeout" type=""/>' ;; \
+        esac; \
+      )"'</testcase>'; done; \
+  )"
+  NUMBER_OF_TESTS="$(echo "${TESTCASE_XML}" | wc -l)"
+  NUMBER_OF_FAILURES="$(echo "${TESTCASE_XML}" | grep -c '<failure')"
+  echo '<?xml version="1.0" encoding="UTF-8"?>'\
+  '<testsuites name="1"  tests="1" failures="0" errors="0" time="0">'\
+  '<testsuite name="Kokoro Summary" tests="'"${NUMBER_OF_TESTS}"\
+  '" failures="'"${NUMBER_OF_FAILURES}"'" errors="0" time="0">'\
+  "${TESTCASE_XML}"'</testsuite></testsuites>'\
+  > "${KOKORO_ARTIFACTS_DIR}/${KOKORO_JOB_NAME}/summary/sponge_log.xml"
+}
+
+# Create minimalist test XML for web view, then exit.
+# Ends script with value of previous command, meant to be called immediately
+# after bazel as the last call in the build script.
+function test_xml_summary_exit {
+  RETVAL=$?
+  test_xml_summary
+  exit "${RETVAL}"
+}
diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat
index 85f22c1e4cb..d34c92736c0 100644
--- a/tensorflow/tools/ci_build/release/common_win.bat
+++ b/tensorflow/tools/ci_build/release/common_win.bat
@@ -28,7 +28,7 @@ SET PATH=%PATH%;C:\%PYTHON_DIRECTORY%
 
 %PIP_EXE% install setuptools --upgrade
 %PIP_EXE% install future>=0.17.1 --no-deps
-%PIP_EXE% install tf-estimator-nightly --no-deps
+%PIP_EXE% install --force-reinstall tf-estimator-nightly --no-deps
 %PIP_EXE% install tb-nightly --no-deps
 %PIP_EXE% install numpy --upgrade --no-deps
 %PIP_EXE% install opt_einsum --upgrade
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh
index 1a0cdd26d55..02e9e2eb9f8 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh
@@ -41,9 +41,11 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py2,-v1only,-gpu,-tpu,-benchmark-test"
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt \
   --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} \
   -//tensorflow/lite/...
+test_xml_summary_exit
\ No newline at end of file
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh
index 63b614dd687..06fabd7b1c7 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh
@@ -41,9 +41,11 @@ tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py35,-v1only,-gpu,-tpu,-
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt \
   --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} \
   -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh
index a80cdd88ddc..51cc3da62d6 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh
@@ -41,10 +41,11 @@ tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py36,-v1only,-gpu,-tpu,-
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt \
   --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} \
   -//tensorflow/lite/...
-
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh
index e7234024ca5..e0f2968b45a 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh
@@ -41,9 +41,11 @@ tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-bench
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt \
   --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} \
   -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh
index b9a4157577d..22475f35491 100644
--- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh
@@ -41,9 +41,11 @@ tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-bench
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt \
   --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} \
   -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh
index 8634b9bfb97..5bdb5794e95 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh
@@ -38,6 +38,7 @@ tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py2,-v1only"
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -45,3 +46,4 @@ bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh
index 5b161b2f53b..5339671cce3 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh
@@ -37,6 +37,7 @@ tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py35,-v1only"
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -44,3 +45,4 @@ bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh
index 2b621a5d8ca..c2790420afc 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh
@@ -37,6 +37,7 @@ tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py36,-v1only"
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -44,3 +45,4 @@ bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh
index 3fd9fd66afd..f6415a7c9ad 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh
@@ -37,6 +37,7 @@ tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py37,-v1only"
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -44,3 +45,4 @@ bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh
index 205c488847d..ff7a9f3baef 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh
@@ -37,6 +37,7 @@ tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py38,-v1only"
 source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 # Run tests
+set +e
 bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -44,3 +45,4 @@ bazel test --test_output=errors --config=opt --test_lang_filters=py \
   --build_tag_filters="${tag_filters}" \
   --test_tag_filters="${tag_filters}" -- \
   ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh
index bfacea08173..a886b42daa4 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh
@@ -45,6 +45,7 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py2"
 
+set +e
 bazel test --config=cuda --config=opt \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -56,3 +57,4 @@ bazel test --config=cuda --config=opt \
   --test_output=errors --verbose_failures=true --keep_going \
   --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
   -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh
index c576be51bd7..bdff1f654f8 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh
@@ -45,6 +45,7 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35"
 
+set +e
 bazel test --config=cuda --config=opt \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -56,3 +57,4 @@ bazel test --config=cuda --config=opt \
   --test_output=errors --verbose_failures=true --keep_going \
   --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
   -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh
index deb0dcafbb0..3fecf9abd29 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh
@@ -45,6 +45,7 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36"
 
+set +e
 bazel test --config=cuda --config=opt \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -56,3 +57,4 @@ bazel test --config=cuda --config=opt \
   --test_output=errors --verbose_failures=true --keep_going \
   --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
   -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh
index 7c85556b722..ff11f954c67 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh
@@ -45,6 +45,7 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37"
 
+set +e
 bazel test --config=cuda --config=opt \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -56,3 +57,4 @@ bazel test --config=cuda --config=opt \
   --test_output=errors --verbose_failures=true --keep_going \
   --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
   -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh
index 639ba9edb5a..917fbce7563 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh
@@ -45,6 +45,7 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
 
 tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38"
 
+test +e
 bazel test --config=cuda --config=opt \
   --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
   --linkopt=-lrt \
@@ -56,3 +57,4 @@ bazel test --config=cuda --config=opt \
   --test_output=errors --verbose_failures=true --keep_going \
   --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
   -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
+test_xml_summary_exit
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh
new file mode 100644
index 00000000000..abb85c18711
--- /dev/null
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+
+# Source the external common scripts.
+source tensorflow/tools/ci_build/release/common.sh
+
+
+# Install latest bazel
+install_bazelisk
+which bazel
+
+# Install realpath
+sudo apt-get install realpath
+
+./tensorflow/tools/ci_build/linux/libtensorflow.sh
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh
new file mode 100644
index 00000000000..c399ed2680f
--- /dev/null
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh
@@ -0,0 +1,30 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+
+# Source the external common scripts.
+source tensorflow/tools/ci_build/release/common.sh
+
+
+# Install latest bazel
+install_bazelisk
+which bazel
+
+# Install realpath
+sudo apt-get install realpath
+
+export TF_NEED_CUDA=1
+
+./tensorflow/tools/ci_build/linux/libtensorflow.sh
diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh
index fd803569d33..1667316d214 100644
--- a/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh
+++ b/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh
@@ -61,4 +61,6 @@ test_args=(
   --local_test_jobs=1
 )
 
+set +e
 bazel test "${bazel_args[@]}" "${test_args[@]}" -- "${test_patterns[@]}"
+test_xml_summary_exit
diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD
index c092f21addb..c0442a5986d 100644
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@@ -2,6 +2,7 @@
 #   Doc generator
 
 load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow/python/tpu:tpu.bzl", "tpu_py_test")
 
 package(
     default_visibility = ["//tensorflow:__subpackages__"],
@@ -22,6 +23,7 @@ py_library(
 py_test(
     name = "tf_doctest",
     srcs = ["tf_doctest.py"],
+    args = ["--module_prefix_skip=tpu.,distribute.tpu_strategy"],
     python_version = "PY3",
     tags = [
         "no_oss_py2",
@@ -40,6 +42,28 @@ py_test(
     ],
 )
 
+tpu_py_test(
+    name = "tf_doctest_tpu",
+    srcs = ["tf_doctest.py"],
+    args = ["--module=tpu.,distribute.tpu_strategy"],
+    disable_experimental = True,
+    disable_v3 = True,
+    main = "tf_doctest.py",
+    python_version = "PY3",
+    tags = [
+        "no_oss",
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],
+    deps = [
+        ":tf_doctest_lib",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python/keras/preprocessing",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "tf_doctest_test",
     srcs = ["tf_doctest_test.py"],
diff --git a/tensorflow/tools/docs/tf_doctest.py b/tensorflow/tools/docs/tf_doctest.py
index 19624659e37..fc81d33cfde 100644
--- a/tensorflow/tools/docs/tf_doctest.py
+++ b/tensorflow/tools/docs/tf_doctest.py
@@ -42,7 +42,9 @@ tf.keras.preprocessing = preprocessing
 
 FLAGS = flags.FLAGS
 
-flags.DEFINE_string('module', None, 'A specific module to run doctest on.')
+flags.DEFINE_list('module', [], 'A list of specific module to run doctest on.')
+flags.DEFINE_list('module_prefix_skip', [],
+                  'A list of modules to ignore when resolving modules.')
 flags.DEFINE_boolean('list', None,
                      'List all the modules in the core package imported.')
 flags.DEFINE_string('file', None, 'A specific file to run doctest on.')
@@ -50,6 +52,7 @@ flags.DEFINE_string('file', None, 'A specific file to run doctest on.')
 flags.mark_flags_as_mutual_exclusive(['module', 'file'])
 flags.mark_flags_as_mutual_exclusive(['list', 'file'])
 
+# Both --module and --module_prefix_skip are relative to PACKAGE.
 PACKAGE = 'tensorflow.python.'
 
 
@@ -68,23 +71,24 @@ def find_modules():
   return tf_modules
 
 
-def filter_on_submodules(all_modules, submodule):
-  """Filters all the modules based on the module flag.
+def filter_on_submodules(all_modules, submodules):
+  """Filters all the modules based on the modules flag.
 
   The module flag has to be relative to the core package imported.
-  For example, if `submodule=keras.layers` then, this function will return
+  For example, if `module=keras.layers` then, this function will return
   all the modules in the submodule.
 
   Args:
     all_modules: All the modules in the core package.
-    submodule: Submodule to filter from all the modules.
+    submodules: Submodules to filter from all the modules.
 
   Returns:
     All the modules in the submodule.
   """
 
   filtered_modules = [
-      mod for mod in all_modules if PACKAGE + submodule in mod.__name__
+      mod for mod in all_modules
+      if any(PACKAGE + submodule in mod.__name__ for submodule in submodules)
   ]
   return filtered_modules
 
@@ -140,6 +144,9 @@ def load_tests(unused_loader, tests, unused_ignore):
     tf_modules = get_module_and_inject_docstring(FLAGS.file)
 
   for module in tf_modules:
+    if any(module.__name__.startswith(PACKAGE + prefix)
+           for prefix in FLAGS.module_prefix_skip):
+      continue
     testcase = TfTestCase()
     tests.addTests(
         doctest.DocTestSuite(
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 70d88f294bc..2c3734d2fc2 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -28,6 +28,7 @@ transitive_hdrs(
     deps = [
         "//tensorflow/c/experimental:network",
         "//tensorflow/compiler/tf2xla:xla_compiled_cpu_function",
+        "//tensorflow/compiler/mlir:mlir_graph_optimization_pass",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 1c9a37bf652..4b8289a6202 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -50,36 +50,26 @@ DOCLINES = __doc__.split('\n')
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.1.0'
+_VERSION = '2.2.0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.7.0',
     'astunparse == 1.6.3',
-    'backports.weakref >= 1.0rc1;python_version<"3.4"',
-    'enum34 >= 1.1.6;python_version<"3.4"',
     'gast == 0.3.3',
     'google_pasta >= 0.1.8',
     'h5py >= 2.10.0, < 2.11.0',
-    'keras_preprocessing >= 1.1.0',
+    'keras_preprocessing >= 1.1.1, < 1.2',
     'numpy >= 1.16.0, < 2.0',
     'opt_einsum >= 2.3.2',
     'protobuf >= 3.9.2',
     'tensorboard >= 2.2.0, < 2.3.0',
-    'tensorflow_estimator >= 2.1.0, < 2.2.0',
+    'tensorflow_estimator >= 2.2.0, < 2.3.0',
     'termcolor >= 1.1.0',
     'wrapt >= 1.11.1',
-    # python3 requires wheel 0.26
-    'wheel >= 0.26;python_version>="3"',
-    'wheel;python_version<"3"',
-    # mock comes with unittest.mock for python3, need to install for python2
-    'mock >= 2.0.0;python_version<"3"',
-    # functools comes with python3, need to install the backport for python2
-    'functools32 >= 3.2.3;python_version<"3"',
+    'wheel >= 0.26',
     'six >= 1.12.0',
     # scipy < 1.4.1 causes segfaults due to pybind11
-    # Latest scipy pip for py2 is scipy==1.2.2
-    'scipy == 1.4.1;python_version>="3"',
-    'scipy == 1.2.2;python_version<"3"',
+    'scipy == 1.4.1',
 ]
 
 if sys.byteorder == 'little':
@@ -100,8 +90,6 @@ if 'tf_nightly' in project_name:
   for i, pkg in enumerate(REQUIRED_PACKAGES):
     if 'tensorboard' in pkg:
       REQUIRED_PACKAGES[i] = 'tb-nightly >= 2.3.0a0, < 2.4.0a0'
-    elif 'tensorflow_estimator' in pkg and '2.0' in project_name:
-      REQUIRED_PACKAGES[i] = 'tensorflow-estimator-2.0-preview'
     elif 'tensorflow_estimator' in pkg:
       REQUIRED_PACKAGES[i] = 'tf-estimator-nightly'
 
@@ -121,11 +109,6 @@ CONSOLE_SCRIPTS = [
 ]
 # pylint: enable=line-too-long
 
-# Only keep freeze_graph console script in 1.X.
-if _VERSION.startswith('1.') and '_2.0' not in project_name:
-  CONSOLE_SCRIPTS.append(
-      'freeze_graph = tensorflow.python.tools.freeze_graph:run_main')
-
 # remove the tensorboard console script if building tf_nightly
 if 'tf_nightly' in project_name:
   CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
@@ -252,6 +235,7 @@ headers = (
     list(find_files('*.h', 'tensorflow/c')) +
     list(find_files('*.h', 'tensorflow/cc')) +
     list(find_files('*.h', 'tensorflow/compiler')) +
+    list(find_files('*.h.inc', 'tensorflow/compiler')) +
     list(find_files('*.h', 'tensorflow/core')) +
     list(find_files('*.h', 'tensorflow/python')) +
     list(find_files('*.h', 'tensorflow/stream_executor')) +
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index ce90b5b60c1..c3d097a8362 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -162,26 +162,13 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         print("path_prefix was specified to tf_workspace but is no longer used " +
               "and will be removed in the future.")
 
-    TFRT_COMMIT = "91370b32f683333d39adb736f81463b6418d6775"
-    TFRT_SHA256 = "0105c47c78bba61a366823ce9b6ac221dd92dc7b09a8927ae13e7cc334598ea6"
-    TFRT_URLS = [
-        "http://mirror.tensorflow.org/github.com/tensorflow/runtime/archive/{commit}.zip".format(commit = TFRT_COMMIT),
-        "https://github.com/tensorflow/runtime/archive/{commit}.zip".format(commit = TFRT_COMMIT),
-    ]
-    tf_http_archive(
-        name = "tfrt",
-        sha256 = TFRT_SHA256,
-        strip_prefix = "runtime-" + TFRT_COMMIT,
-        urls = TFRT_URLS,
-    )
-
     tf_http_archive(
         name = "XNNPACK",
-        sha256 = "41a0a396a5a9cb2171c1c7f6d7689316beaa6f638663161fc7f86450eba33070",
-        strip_prefix = "XNNPACK-5871703602c459b98c12be301c01255ae68a45e2",
+        sha256 = "0440d9ad632945f10992664be84eb0c0c76581f8474df3c124aa30350981126c",
+        strip_prefix = "XNNPACK-d9a7e85c30a2bea7b6b263f21f066a93cb2b4dee",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/5871703602c459b98c12be301c01255ae68a45e2.zip",
-            "https://github.com/google/XNNPACK/archive/5871703602c459b98c12be301c01255ae68a45e2.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/d9a7e85c30a2bea7b6b263f21f066a93cb2b4dee.zip",
+            "https://github.com/google/XNNPACK/archive/d9a7e85c30a2bea7b6b263f21f066a93cb2b4dee.zip",
         ],
     )
 
@@ -250,11 +237,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
         patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
-        sha256 = "d96aa8eda6dbf80e313c992a59e9e9451f420a6b9f58ef30aa41bffdc9df2f1b",  # SHARED_EIGEN_SHA
-        strip_prefix = "eigen-1e41406c362788057b3adcd9a25b73f43e6e6492",
+        sha256 = "2c7c0aec4271dfca6b8a7707e2112f67c4cb3bdf7c89c0e98d3fcd39707c4468",  # SHARED_EIGEN_SHA
+        strip_prefix = "eigen-49f1aeb60d9f759859fce0d16aa5d1ecc7168d51",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/1e41406c362788057b3adcd9a25b73f43e6e6492/eigen-1e41406c362788057b3adcd9a25b73f43e6e6492.tar.gz",
-            "https://gitlab.com/libeigen/eigen/-/archive/1e41406c362788057b3adcd9a25b73f43e6e6492/eigen-1e41406c362788057b3adcd9a25b73f43e6e6492.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/49f1aeb60d9f759859fce0d16aa5d1ecc7168d51/eigen-49f1aeb60d9f759859fce0d16aa5d1ecc7168d51.tar.gz",
+            "https://gitlab.com/libeigen/eigen/-/archive/49f1aeb60d9f759859fce0d16aa5d1ecc7168d51/eigen-49f1aeb60d9f759859fce0d16aa5d1ecc7168d51.tar.gz",
         ],
     )
 
@@ -354,11 +341,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "gemmlowp",
-        sha256 = "6678b484d929f2d0d3229d8ac4e3b815a950c86bb9f17851471d143f6d4f7834",  # SHARED_GEMMLOWP_SHA
-        strip_prefix = "gemmlowp-12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3",
+        sha256 = "43146e6f56cb5218a8caaab6b5d1601a083f1f31c06ff474a4378a7d35be9cfb",  # SHARED_GEMMLOWP_SHA
+        strip_prefix = "gemmlowp-fda83bdc38b118cc6b56753bd540caa49e570745",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip",
-            "https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip",
+            "https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip",
         ],
     )
 
@@ -656,17 +643,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
-    tf_http_archive(
-        name = "com_github_nanopb_nanopb",
-        sha256 = "18234d9f01b57248472a9bfa65c3379352b5d66c15b0ef1c2b4feece4b5670fe",
-        build_file = "@com_github_grpc_grpc//third_party:nanopb.BUILD",
-        strip_prefix = "nanopb-0.4.1",
-        urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/nanopb/nanopb/archive/0.4.1.tar.gz",
-            "https://github.com/nanopb/nanopb/archive/0.4.1.tar.gz",
-        ],
-    )
-
     tf_http_archive(
         name = "linenoise",
         build_file = clean_dep("//third_party:linenoise.BUILD"),
@@ -679,8 +655,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     )
 
     # Check out LLVM and MLIR from llvm-project.
-    LLVM_COMMIT = "52eb2f65a7d28bb225ca8a0bc8c4090d324e22d9"
-    LLVM_SHA256 = "ee10022c1b0f6f07cc9fc22ff4c4ec97e31d8d11e08119f0f084e238547df340"
+    LLVM_COMMIT = "bfa200ebcf3706fde0dde335a3c1fa3fe1b3ba3f"
+    LLVM_SHA256 = "72deefcfe20434cb27a31ff9503c348dcf21065dbd27e9fa54c1fb3f5089b8e1"
     LLVM_URLS = [
         "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT),
         "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT),
@@ -1011,30 +987,33 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     # https://github.com/bazelbuild/rules_apple/releases
     tf_http_archive(
         name = "build_bazel_rules_apple",
-        sha256 = "a045a436b642c70fb0c10ca84ff0fd2dcbd59cc89100d597a61e8374afafb366",
+        sha256 = "ee9e6073aeb5a65c100cb9c44b0017c937706a4ae03176e14a7e78620a198079",
+        strip_prefix = "rules_apple-5131f3d46794bf227d296c82f30c2499c9de3c5b",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_apple/releases/download/0.18.0/rules_apple.0.18.0.tar.gz",
-            "https://github.com/bazelbuild/rules_apple/releases/download/0.18.0/rules_apple.0.18.0.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_apple/archive/5131f3d46794bf227d296c82f30c2499c9de3c5b.tar.gz",
+            "https://github.com/bazelbuild/rules_apple/archive/5131f3d46794bf227d296c82f30c2499c9de3c5b.tar.gz",
         ],
     )
 
     # https://github.com/bazelbuild/rules_swift/releases
     tf_http_archive(
         name = "build_bazel_rules_swift",
-        sha256 = "18cd4df4e410b0439a4935f9ca035bd979993d42372ba79e7f2d4fafe9596ef0",
+        sha256 = "d0833bc6dad817a367936a5f902a0c11318160b5e80a20ece35fb85a5675c886",
+        strip_prefix = "rules_swift-3eeeb53cebda55b349d64c9fc144e18c5f7c0eb8",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_swift/releases/download/0.12.1/rules_swift.0.12.1.tar.gz",
-            "https://github.com/bazelbuild/rules_swift/releases/download/0.12.1/rules_swift.0.12.1.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_swift/archive/3eeeb53cebda55b349d64c9fc144e18c5f7c0eb8.tar.gz",
+            "https://github.com/bazelbuild/rules_swift/archive/3eeeb53cebda55b349d64c9fc144e18c5f7c0eb8.tar.gz",
         ],
     )
 
     # https://github.com/bazelbuild/apple_support/releases
     tf_http_archive(
         name = "build_bazel_apple_support",
-        sha256 = "122ebf7fe7d1c8e938af6aeaee0efe788a3a2449ece5a8d6a428cb18d6f88033",
+        sha256 = "ad8ae80e93612b8151019367a3d1604d7a51c14480dae1254e10252007e8260c",
+        strip_prefix = "apple_support-501b4afb27745c4813a88ffa28acd901408014e4",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/apple_support/releases/download/0.7.1/apple_support.0.7.1.tar.gz",
-            "https://github.com/bazelbuild/apple_support/releases/download/0.7.1/apple_support.0.7.1.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/apple_support/archive/501b4afb27745c4813a88ffa28acd901408014e4.tar.gz",
+            "https://github.com/bazelbuild/apple_support/archive/501b4afb27745c4813a88ffa28acd901408014e4.tar.gz",
         ],
     )
 
diff --git a/third_party/aws/BUILD.bazel b/third_party/aws/BUILD.bazel
index fd355eeceb1..d9e40703caa 100644
--- a/third_party/aws/BUILD.bazel
+++ b/third_party/aws/BUILD.bazel
@@ -31,6 +31,14 @@ cc_library(
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),
         "//conditions:default": [],
+    }) + select({
+        "//conditions:default": glob([
+            "aws-cpp-sdk-core/source/net/linux-shared/*.cpp",
+        ]),
+        "@org_tensorflow//tensorflow:windows": glob([
+            "aws-cpp-sdk-core/source/platform/windows/*.cpp",
+            "aws-cpp-sdk-core/source/net/windows/*.cpp",
+        ]),
     }) + glob([
         "aws-cpp-sdk-core/include/**/*.h",
         "aws-cpp-sdk-core/source/*.cpp",
@@ -59,7 +67,6 @@ cc_library(
         "aws-cpp-sdk-transfer/include/**/*.h",
         "aws-cpp-sdk-transfer/source/**/*.cpp",
         "aws-cpp-sdk-core/source/monitoring/*.cpp",
-        "aws-cpp-sdk-core/source/net/linux-shared/*.cpp",
         "aws-cpp-sdk-core/source/utils/memory/*.cpp",
         "aws-cpp-sdk-core/source/utils/crypto/openssl/*.cpp",
     ]),
@@ -94,6 +101,11 @@ cc_library(
             "ENABLE_CURL_CLIENT",
             "OPENSSL_IS_BORINGSSL",
         ],
+        "@org_tensorflow//tensorflow:windows": [
+            "PLATFORM_WINDOWS",
+            "ENABLE_CURL_CLIENT",
+            "OPENSSL_IS_BORINGSSL",
+        ],
         "//conditions:default": [],
     }),
     includes = [
@@ -101,6 +113,13 @@ cc_library(
         "aws-cpp-sdk-s3/include/",
         "aws-cpp-sdk-transfer/include/",
     ],
+    linkopts = select({
+        "@org_tensorflow//tensorflow:windows": [
+            "-DEFAULTLIB:Userenv.lib",
+            "-DEFAULTLIB:Version.lib",
+        ],
+        "//conditions:default": [],
+    }),
     deps = [
         "@aws-c-common",
         "@aws-c-event-stream",
diff --git a/third_party/aws/aws-c-common.bazel b/third_party/aws/aws-c-common.bazel
index f27f50a6eb3..a66fbcb1164 100644
--- a/third_party/aws/aws-c-common.bazel
+++ b/third_party/aws/aws-c-common.bazel
@@ -28,6 +28,9 @@ cc_library(
         "@org_tensorflow//tensorflow:raspberry_pi_armeabi": glob([
             "source/posix/*.c",
         ]),
+        "@org_tensorflow//tensorflow:windows": glob([
+            "source/windows/*.c",
+        ]),
         "//conditions:default": [],
     }) + glob([
         "source/*.c",
@@ -38,6 +41,12 @@ cc_library(
         "include/**/*.h",
         "include/aws/common/**/*.inl"
     ]),
+    linkopts = select({
+        "@org_tensorflow//tensorflow:windows": [
+            "-DEFAULTLIB:BCrypt.lib",
+        ],
+        "//conditions:default": [],
+    }),
     includes = [
         "include/",
     ],
diff --git a/third_party/aws/aws-checksums.bazel b/third_party/aws/aws-checksums.bazel
index 5aa175795b8..759cb2e6fcf 100644
--- a/third_party/aws/aws-checksums.bazel
+++ b/third_party/aws/aws-checksums.bazel
@@ -9,7 +9,12 @@ exports_files(["LICENSE"])
 
 cc_library(
     name = "aws-checksums",
-    srcs = glob([
+    srcs = select({
+        "@org_tensorflow//tensorflow:windows": glob([
+            "source/visualc/*.c",
+        ]),
+        "//conditions:default": [],
+    }) + glob([
         "source/intel/*.c",
         "source/*.c",
     ]),
diff --git a/third_party/cpuinfo/BUILD.bazel b/third_party/cpuinfo/BUILD.bazel
index 22ec4e2f927..5510905cbef 100644
--- a/third_party/cpuinfo/BUILD.bazel
+++ b/third_party/cpuinfo/BUILD.bazel
@@ -97,6 +97,8 @@ cc_library(
     name = "cpuinfo_impl",
     srcs = select({
         ":linux_x86_64": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
+        ":linux_arm": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS,
+        ":linux_armhf": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS,
         ":linux_aarch64": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM64_SRCS,
         ":macos_x86_64": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
         ":windows_x86_64": COMMON_SRCS + X86_SRCS + WINDOWS_X86_SRCS,
@@ -165,6 +167,16 @@ config_setting(
     values = {"cpu": "k8"},
 )
 
+config_setting(
+    name = "linux_arm",
+    values = {"cpu": "arm"},
+)
+
+config_setting(
+    name = "linux_armhf",
+    values = {"cpu": "armhf"},
+)
+
 config_setting(
     name = "linux_aarch64",
     values = {"cpu": "aarch64"},
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX.h
index 182e0131864..1a7cd03d498 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX.h
@@ -11,19 +11,8 @@
 namespace Eigen {
 namespace internal {
 
-typedef struct Packet32q8i {
-  __m256i val;
-  operator __m256i() const { return val; }
-  Packet32q8i() : val(_mm256_setzero_si256()){};
-  Packet32q8i(__m256i val) : val(val) {}
-} Packet32q8i;
-
-typedef struct Packet16q8i {
-  __m128i val;
-  operator __m128i() const { return val; }
-  Packet16q8i() : val(_mm_setzero_si128()) {}
-  Packet16q8i(__m128i val) : val(val) {}
-} Packet16q8i;
+typedef eigen_packet_wrapper<__m256i, 10> Packet32q8i;
+typedef eigen_packet_wrapper<__m128i, 11> Packet16q8i;
 
 template <>
 struct packet_traits<QInt8> : default_packet_traits {
@@ -102,23 +91,23 @@ EIGEN_STRONG_INLINE Packet16q8i pload<Packet16q8i>(const QInt8* from) {
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet32q8i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
-      reinterpret_cast<__m256i*>(to), from.val);
+      reinterpret_cast<__m256i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet16q8i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet32q8i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet16q8i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
-                                            from.val);
+                                            from.m_val);
 }
 
 typedef __m256 Packet8f;
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index 2b16715c723..4c5e02abc9d 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -27,61 +27,14 @@ inline int _mm256_extract_epi8_N1(const __m256i X) {
 namespace Eigen {
 namespace internal {
 
-typedef struct Packet32q8i {
-  __m256i val;
-  operator __m256i() const { return val; }
-  Packet32q8i() : val(_mm256_setzero_si256()){};
-  Packet32q8i(__m256i val) : val(val) {}
-} Packet32q8i;
-
-typedef struct Packet16q16i {
-  __m256i val;
-  operator __m256i() const { return val; }
-  Packet16q16i() : val(_mm256_setzero_si256()){};
-  Packet16q16i(__m256i val) : val(val) {}
-} Packet16q16i;
-
-typedef struct Packet32q8u {
-  __m256i val;
-  operator __m256i() const { return val; }
-  Packet32q8u() : val(_mm256_setzero_si256()){};
-  Packet32q8u(__m256i val) : val(val) {}
-} Packet32q8u;
-
-typedef struct Packet16q8i {
-  __m128i val;
-  operator __m128i() const { return val; }
-  Packet16q8i() : val(_mm_setzero_si128()) {}
-  Packet16q8i(__m128i val) : val(val) {}
-} Packet16q8i;
-
-typedef struct Packet16q8u {
-  __m128i val;
-  operator __m128i() const { return val; }
-  Packet16q8u() : val(_mm_setzero_si128()) {}
-  Packet16q8u(__m128i val) : val(val) {}
-} Packet16q8u;
-
-typedef struct Packet8q16i {
-  __m128i val;
-  operator __m128i() const { return val; }
-  Packet8q16i() : val(_mm_setzero_si128()) {}
-  Packet8q16i(__m128i val) : val(val) {}
-} Packet8q16i;
-
-typedef struct Packet8q32i {
-  __m256i val;
-  operator __m256i() const { return val; }
-  Packet8q32i() : val(_mm256_setzero_si256()){};
-  Packet8q32i(__m256i val) : val(val) {}
-} Packet8q32i;
-
-typedef struct Packet4q32i {
-  __m128i val;
-  operator __m128i() const { return val; }
-  Packet4q32i() : val(_mm_setzero_si128()) {}
-  Packet4q32i(__m128i val) : val(val) {}
-} Packet4q32i;
+typedef eigen_packet_wrapper<__m256i, 20> Packet32q8i;
+typedef eigen_packet_wrapper<__m256i, 21> Packet16q16i;
+typedef eigen_packet_wrapper<__m256i, 22> Packet32q8u;
+typedef eigen_packet_wrapper<__m128i, 23> Packet16q8i;
+typedef eigen_packet_wrapper<__m128i, 25> Packet16q8u;
+typedef eigen_packet_wrapper<__m128i, 26> Packet8q16i;
+typedef eigen_packet_wrapper<__m256i, 27> Packet8q32i;
+typedef eigen_packet_wrapper<__m128i, 28> Packet4q32i;
 
 #ifndef EIGEN_VECTORIZE_AVX512
 template <>
@@ -315,64 +268,64 @@ EIGEN_STRONG_INLINE Packet8q32i pload<Packet8q32i>(const QInt32* from) {
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet32q8i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
-      reinterpret_cast<__m256i*>(to), from.val);
+      reinterpret_cast<__m256i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet16q8i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet32q8u& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
-      reinterpret_cast<__m256i*>(to), from.val);
+      reinterpret_cast<__m256i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet16q16i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
-      reinterpret_cast<__m256i*>(to), from.val);
+      reinterpret_cast<__m256i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet8q16i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet8q32i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(
-      reinterpret_cast<__m256i*>(to), from.val);
+      reinterpret_cast<__m256i*>(to), from.m_val);
 }
 
 // Aligned store
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet8q32i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet16q16i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet8q16i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
-                                            from.val);
+                                            from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet32q8u& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet32q8i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet16q8i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to),
-                                            from.val);
+                                            from.m_val);
 }
 
 // Extract first element.
@@ -382,15 +335,15 @@ EIGEN_STRONG_INLINE QInt32 pfirst<Packet8q32i>(const Packet8q32i& a) {
 }
 template <>
 EIGEN_STRONG_INLINE QInt16 pfirst<Packet16q16i>(const Packet16q16i& a) {
-  return _mm256_extract_epi16_N0(a.val);
+  return _mm256_extract_epi16_N0(a.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE QUInt8 pfirst<Packet32q8u>(const Packet32q8u& a) {
-  return static_cast<uint8_t>(_mm256_extract_epi8_N0(a.val));
+  return static_cast<uint8_t>(_mm256_extract_epi8_N0(a.m_val));
 }
 template <>
 EIGEN_STRONG_INLINE QInt8 pfirst<Packet32q8i>(const Packet32q8i& a) {
-  return _mm256_extract_epi8_N0(a.val);
+  return _mm256_extract_epi8_N0(a.m_val);
 }
 
 // Initialize to constant value.
@@ -411,7 +364,7 @@ EIGEN_STRONG_INLINE Packet8q32i pset1<Packet8q32i>(const QInt32& from) {
 template <>
 EIGEN_STRONG_INLINE Packet8q32i padd<Packet8q32i>(const Packet8q32i& a,
                                                   const Packet8q32i& b) {
-  return _mm256_add_epi32(a.val, b.val);
+  return _mm256_add_epi32(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet16q16i pset1<Packet16q16i>(const QInt16& from) {
@@ -420,62 +373,62 @@ EIGEN_STRONG_INLINE Packet16q16i pset1<Packet16q16i>(const QInt16& from) {
 template <>
 EIGEN_STRONG_INLINE Packet8q32i psub<Packet8q32i>(const Packet8q32i& a,
                                                   const Packet8q32i& b) {
-  return _mm256_sub_epi32(a.val, b.val);
+  return _mm256_sub_epi32(a.m_val, b.m_val);
 }
 // Note: mullo truncates the result to 32 bits.
 template <>
 EIGEN_STRONG_INLINE Packet8q32i pmul<Packet8q32i>(const Packet8q32i& a,
                                                   const Packet8q32i& b) {
-  return _mm256_mullo_epi32(a.val, b.val);
+  return _mm256_mullo_epi32(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8q32i pnegate<Packet8q32i>(const Packet8q32i& a) {
-  return _mm256_sub_epi32(_mm256_setzero_si256(), a.val);
+  return _mm256_sub_epi32(_mm256_setzero_si256(), a.m_val);
 }
 
 // Min and max.
 template <>
 EIGEN_STRONG_INLINE Packet8q32i pmin<Packet8q32i>(const Packet8q32i& a,
                                                   const Packet8q32i& b) {
-  return _mm256_min_epi32(a.val, b.val);
+  return _mm256_min_epi32(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8q32i pmax<Packet8q32i>(const Packet8q32i& a,
                                                   const Packet8q32i& b) {
-  return _mm256_max_epi32(a.val, b.val);
+  return _mm256_max_epi32(a.m_val, b.m_val);
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet16q16i pmin<Packet16q16i>(const Packet16q16i& a,
                                                     const Packet16q16i& b) {
-  return _mm256_min_epi16(a.val, b.val);
+  return _mm256_min_epi16(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet16q16i pmax<Packet16q16i>(const Packet16q16i& a,
                                                     const Packet16q16i& b) {
-  return _mm256_max_epi16(a.val, b.val);
+  return _mm256_max_epi16(a.m_val, b.m_val);
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet32q8u pmin<Packet32q8u>(const Packet32q8u& a,
                                                   const Packet32q8u& b) {
-  return _mm256_min_epu8(a.val, b.val);
+  return _mm256_min_epu8(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet32q8u pmax<Packet32q8u>(const Packet32q8u& a,
                                                   const Packet32q8u& b) {
-  return _mm256_max_epu8(a.val, b.val);
+  return _mm256_max_epu8(a.m_val, b.m_val);
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet32q8i pmin<Packet32q8i>(const Packet32q8i& a,
                                                   const Packet32q8i& b) {
-  return _mm256_min_epi8(a.val, b.val);
+  return _mm256_min_epi8(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet32q8i pmax<Packet32q8i>(const Packet32q8i& a,
                                                   const Packet32q8i& b) {
-  return _mm256_max_epi8(a.val, b.val);
+  return _mm256_max_epi8(a.m_val, b.m_val);
 }
 
 // Reductions.
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
index 6c77aa7b511..5a0ae2e8c8c 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
@@ -6,33 +6,10 @@
 namespace Eigen {
 namespace internal {
 
-typedef struct Packet64q8i {
-  __m512i val;
-  operator __m512i() const { return val; }
-  Packet64q8i();
-  Packet64q8i(__m512i val) : val(val) {}
-} Packet64q8i;
-
-typedef struct Packet32q16i {
-  __m512i val;
-  operator __m512i() const { return val; }
-  Packet32q16i();
-  Packet32q16i(__m512i val) : val(val) {}
-} Packet32q16i;
-
-typedef struct Packet64q8u {
-  __m512i val;
-  operator __m512i() const { return val; }
-  Packet64q8u();
-  Packet64q8u(__m512i val) : val(val) {}
-} Packet64q8u;
-
-typedef struct Packet16q32i {
-  __m512i val;
-  operator __m512i() const { return val; }
-  Packet16q32i();
-  Packet16q32i(__m512i val) : val(val) {}
-} Packet16q32i;
+typedef eigen_packet_wrapper<__m512i, 30> Packet64q8i;
+typedef eigen_packet_wrapper<__m512i, 31> Packet32q16i;
+typedef eigen_packet_wrapper<__m512i, 32> Packet64q8u;
+typedef eigen_packet_wrapper<__m512i, 33> Packet16q32i;
 
 template <>
 struct packet_traits<QInt8> : default_packet_traits {
@@ -216,44 +193,44 @@ EIGEN_STRONG_INLINE Packet16q32i pload<Packet16q32i>(const QInt32* from) {
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt8>(QInt8* to, const Packet64q8i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
-      reinterpret_cast<__m512i*>(to), from.val);
+      reinterpret_cast<__m512i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt16>(QInt16* to, const Packet32q16i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
-      reinterpret_cast<__m512i*>(to), from.val);
+      reinterpret_cast<__m512i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QUInt8>(QUInt8* to, const Packet64q8u& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
-      reinterpret_cast<__m512i*>(to), from.val);
+      reinterpret_cast<__m512i*>(to), from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstoreu<QInt32>(QInt32* to, const Packet16q32i& from) {
   EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
-      reinterpret_cast<__m512i*>(to), from.val);
+      reinterpret_cast<__m512i*>(to), from.m_val);
 }
 
 // Aligned store
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt32>(QInt32* to, const Packet16q32i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QUInt8>(QUInt8* to, const Packet64q8u& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt8>(QInt8* to, const Packet64q8i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE void pstore<QInt16>(QInt16* to, const Packet32q16i& from) {
   EIGEN_DEBUG_ALIGNED_STORE _mm512_store_si512(reinterpret_cast<__m512i*>(to),
-                                               from.val);
+                                               from.m_val);
 }
 
 // Extract first element.
@@ -264,15 +241,15 @@ EIGEN_STRONG_INLINE QInt32 pfirst<Packet16q32i>(const Packet16q32i& a) {
 template <>
 EIGEN_STRONG_INLINE QUInt8 pfirst<Packet64q8u>(const Packet64q8u& a) {
   return static_cast<uint8_t>(
-      _mm_extract_epi8(_mm512_extracti32x4_epi32(a.val, 0), 0));
+      _mm_extract_epi8(_mm512_extracti32x4_epi32(a.m_val, 0), 0));
 }
 template <>
 EIGEN_STRONG_INLINE QInt8 pfirst<Packet64q8i>(const Packet64q8i& a) {
-  return _mm_extract_epi8(_mm512_extracti32x4_epi32(a.val, 0), 0);
+  return _mm_extract_epi8(_mm512_extracti32x4_epi32(a.m_val, 0), 0);
 }
 template <>
 EIGEN_STRONG_INLINE QInt16 pfirst<Packet32q16i>(const Packet32q16i& a) {
-  return _mm_extract_epi16(_mm512_extracti32x4_epi32(a.val, 0), 0);
+  return _mm_extract_epi16(_mm512_extracti32x4_epi32(a.m_val, 0), 0);
 }
 
 // Initialize to constant value.
@@ -297,46 +274,46 @@ EIGEN_STRONG_INLINE Packet16q32i pset1<Packet16q32i>(const QInt32& from) {
 template <>
 EIGEN_STRONG_INLINE Packet16q32i padd<Packet16q32i>(const Packet16q32i& a,
                                                     const Packet16q32i& b) {
-  return _mm512_add_epi32(a.val, b.val);
+  return _mm512_add_epi32(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet16q32i psub<Packet16q32i>(const Packet16q32i& a,
                                                     const Packet16q32i& b) {
-  return _mm512_sub_epi32(a.val, b.val);
+  return _mm512_sub_epi32(a.m_val, b.m_val);
 }
 // Note: mullo truncates the result to 32 bits.
 template <>
 EIGEN_STRONG_INLINE Packet16q32i pmul<Packet16q32i>(const Packet16q32i& a,
                                                     const Packet16q32i& b) {
-  return _mm512_mullo_epi32(a.val, b.val);
+  return _mm512_mullo_epi32(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet16q32i pnegate<Packet16q32i>(const Packet16q32i& a) {
-  return _mm512_sub_epi32(_mm512_setzero_si512(), a.val);
+  return _mm512_sub_epi32(_mm512_setzero_si512(), a.m_val);
 }
 
 // Min and max.
 template <>
 EIGEN_STRONG_INLINE Packet16q32i pmin<Packet16q32i>(const Packet16q32i& a,
                                                     const Packet16q32i& b) {
-  return _mm512_min_epi32(a.val, b.val);
+  return _mm512_min_epi32(a.m_val, b.m_val);
 }
 template <>
 EIGEN_STRONG_INLINE Packet16q32i pmax<Packet16q32i>(const Packet16q32i& a,
                                                     const Packet16q32i& b) {
-  return _mm512_max_epi32(a.val, b.val);
+  return _mm512_max_epi32(a.m_val, b.m_val);
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet64q8u pmin<Packet64q8u>(const Packet64q8u& a,
                                                   const Packet64q8u& b) {
 #ifdef EIGEN_VECTORIZE_AVX512BW
-  return _mm512_min_epu8(a.val, b.val);
+  return _mm512_min_epu8(a.m_val, b.m_val);
 #else
-  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
-  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
-  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
-  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.m_val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.m_val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.m_val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.m_val, 1);
   __m256i r0 = _mm256_min_epu8(ap0, bp0);
   __m256i r1 = _mm256_min_epu8(ap1, bp1);
   return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
@@ -346,12 +323,12 @@ template <>
 EIGEN_STRONG_INLINE Packet64q8u pmax<Packet64q8u>(const Packet64q8u& a,
                                                   const Packet64q8u& b) {
 #ifdef EIGEN_VECTORIZE_AVX512BW
-  return _mm512_max_epu8(a.val, b.val);
+  return _mm512_max_epu8(a.m_val, b.m_val);
 #else
-  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
-  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
-  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
-  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.m_val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.m_val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.m_val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.m_val, 1);
   __m256i r0 = _mm256_max_epu8(ap0, bp0);
   __m256i r1 = _mm256_max_epu8(ap1, bp1);
   return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
@@ -362,12 +339,12 @@ template <>
 EIGEN_STRONG_INLINE Packet64q8i pmin<Packet64q8i>(const Packet64q8i& a,
                                                   const Packet64q8i& b) {
 #ifdef EIGEN_VECTORIZE_AVX512BW
-  return _mm512_min_epi8(a.val, b.val);
+  return _mm512_min_epi8(a.m_val, b.m_val);
 #else
-  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
-  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
-  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
-  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.m_val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.m_val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.m_val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.m_val, 1);
   __m256i r0 = _mm256_min_epi8(ap0, bp0);
   __m256i r1 = _mm256_min_epi8(ap1, bp1);
   return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
@@ -377,12 +354,12 @@ template <>
 EIGEN_STRONG_INLINE Packet32q16i pmin<Packet32q16i>(const Packet32q16i& a,
                                                     const Packet32q16i& b) {
 #ifdef EIGEN_VECTORIZE_AVX512BW
-  return _mm512_min_epi16(a.val, b.val);
+  return _mm512_min_epi16(a.m_val, b.m_val);
 #else
-  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
-  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
-  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
-  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.m_val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.m_val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.m_val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.m_val, 1);
   __m256i r0 = _mm256_min_epi16(ap0, bp0);
   __m256i r1 = _mm256_min_epi16(ap1, bp1);
   return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
@@ -392,12 +369,12 @@ template <>
 EIGEN_STRONG_INLINE Packet64q8i pmax<Packet64q8i>(const Packet64q8i& a,
                                                   const Packet64q8i& b) {
 #ifdef EIGEN_VECTORIZE_AVX512BW
-  return _mm512_max_epi8(a.val, b.val);
+  return _mm512_max_epi8(a.m_val, b.m_val);
 #else
-  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
-  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
-  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
-  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.m_val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.m_val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.m_val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.m_val, 1);
   __m256i r0 = _mm256_max_epi8(ap0, bp0);
   __m256i r1 = _mm256_max_epi8(ap1, bp1);
   return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
@@ -407,12 +384,12 @@ template <>
 EIGEN_STRONG_INLINE Packet32q16i pmax<Packet32q16i>(const Packet32q16i& a,
                                                     const Packet32q16i& b) {
 #ifdef EIGEN_VECTORIZE_AVX512BW
-  return _mm512_max_epi16(a.val, b.val);
+  return _mm512_max_epi16(a.m_val, b.m_val);
 #else
-  __m256i ap0 = _mm512_extracti32x8_epi32(a.val, 0);
-  __m256i ap1 = _mm512_extracti32x8_epi32(a.val, 1);
-  __m256i bp0 = _mm512_extracti32x8_epi32(b.val, 0);
-  __m256i bp1 = _mm512_extracti32x8_epi32(b.val, 1);
+  __m256i ap0 = _mm512_extracti32x8_epi32(a.m_val, 0);
+  __m256i ap1 = _mm512_extracti32x8_epi32(a.m_val, 1);
+  __m256i bp0 = _mm512_extracti32x8_epi32(b.m_val, 0);
+  __m256i bp1 = _mm512_extracti32x8_epi32(b.m_val, 1);
   __m256i r0 = _mm256_max_epi16(ap0, bp0);
   __m256i r1 = _mm256_max_epi16(ap1, bp1);
   return _mm512_inserti32x8(_mm512_castsi256_si512(r0), r1, 1);
@@ -422,112 +399,112 @@ EIGEN_STRONG_INLINE Packet32q16i pmax<Packet32q16i>(const Packet32q16i& a,
 // Reductions.
 template <>
 EIGEN_STRONG_INLINE QInt32 predux_min<Packet16q32i>(const Packet16q32i& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_min_epi32(_mm_min_epi32(lane0, lane1), _mm_min_epi32(lane2, lane3));
   res = _mm_min_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  return pfirst(
-      _mm_min_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_min_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  return pfirst(res);
 }
 template <>
 EIGEN_STRONG_INLINE QInt32 predux_max<Packet16q32i>(const Packet16q32i& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_max_epi32(_mm_max_epi32(lane0, lane1), _mm_max_epi32(lane2, lane3));
   res = _mm_max_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  return pfirst(
-      _mm_max_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_max_epi32(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  return pfirst(res);
 }
 template <>
 EIGEN_STRONG_INLINE QInt16 predux_min<Packet32q16i>(const Packet32q16i& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_min_epi16(_mm_min_epi16(lane0, lane1), _mm_min_epi16(lane2, lane3));
   res = _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  std::uint32_t w = pfirst(
-      _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_min_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  std::uint32_t w = pfirst(res);
   return std::min(
       {static_cast<std::int16_t>(w >> 16), static_cast<std::int16_t>(w)});
 }
 template <>
 EIGEN_STRONG_INLINE QInt16 predux_max<Packet32q16i>(const Packet32q16i& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_max_epi16(_mm_max_epi16(lane0, lane1), _mm_max_epi16(lane2, lane3));
   res = _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  std::uint32_t w = pfirst(
-      _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  std::uint32_t w = pfirst(res);
   return std::max(
       {static_cast<std::int16_t>(w >> 16), static_cast<std::int16_t>(w)});
 }
 template <>
 EIGEN_STRONG_INLINE QUInt8 predux_min<Packet64q8u>(const Packet64q8u& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_min_epu8(_mm_min_epu8(lane0, lane1), _mm_min_epu8(lane2, lane3));
   res = _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  std::uint32_t w = pfirst(
-      _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_min_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  std::uint32_t w = pfirst(res);
   return std::min(
       {static_cast<std::uint8_t>(w >> 24), static_cast<std::uint8_t>(w >> 16),
        static_cast<std::uint8_t>(w >> 8), static_cast<std::uint8_t>(w)});
 }
 template <>
 EIGEN_STRONG_INLINE QUInt8 predux_max<Packet64q8u>(const Packet64q8u& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_max_epu8(_mm_max_epu8(lane0, lane1), _mm_max_epu8(lane2, lane3));
   res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  std::uint32_t w = pfirst(
-      _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  std::uint32_t w = pfirst(res);
   return std::max(
       {static_cast<std::uint8_t>(w >> 24), static_cast<std::uint8_t>(w >> 16),
        static_cast<std::uint8_t>(w >> 8), static_cast<std::uint8_t>(w)});
 }
 template <>
 EIGEN_STRONG_INLINE QInt8 predux_min<Packet64q8i>(const Packet64q8i& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_min_epi8(_mm_min_epi8(lane0, lane1), _mm_min_epi8(lane2, lane3));
   res = _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  std::uint32_t w = pfirst(
-      _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_min_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  std::uint32_t w = pfirst(res);
   return std::min(
       {static_cast<std::int8_t>(w >> 24), static_cast<std::int8_t>(w >> 16),
        static_cast<std::int8_t>(w >> 8), static_cast<std::int8_t>(w)});
 }
 template <>
 EIGEN_STRONG_INLINE QInt8 predux_max<Packet64q8i>(const Packet64q8i& a) {
-  Packet4i lane0 = _mm512_extracti32x4_epi32(a.val, 0);
-  Packet4i lane1 = _mm512_extracti32x4_epi32(a.val, 1);
-  Packet4i lane2 = _mm512_extracti32x4_epi32(a.val, 2);
-  Packet4i lane3 = _mm512_extracti32x4_epi32(a.val, 3);
+  Packet4i lane0 = _mm512_extracti32x4_epi32(a.m_val, 0);
+  Packet4i lane1 = _mm512_extracti32x4_epi32(a.m_val, 1);
+  Packet4i lane2 = _mm512_extracti32x4_epi32(a.m_val, 2);
+  Packet4i lane3 = _mm512_extracti32x4_epi32(a.m_val, 3);
   Packet4i res =
       _mm_max_epi8(_mm_max_epi8(lane0, lane1), _mm_max_epi8(lane2, lane3));
   res = _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
-  std::uint32_t w = pfirst(
-      _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
+  res = _mm_max_epi8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
+  std::uint32_t w = pfirst(res);
   return std::min(
       {static_cast<std::int8_t>(w >> 24), static_cast<std::int8_t>(w >> 16),
        static_cast<std::int8_t>(w >> 8), static_cast<std::int8_t>(w)});
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
index d6954b7b3c4..5dd2cd309b8 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
@@ -13,7 +13,7 @@ struct type_casting_traits<QInt32, float> {
 
 template <>
 EIGEN_STRONG_INLINE Packet8f pcast<Packet8q32i>(const Packet8q32i& a) {
-  return _mm256_cvtepi32_ps(a.val);
+  return _mm256_cvtepi32_ps(a.m_val);
 }
 
 template <>
@@ -35,8 +35,8 @@ template <>
 EIGEN_STRONG_INLINE Packet32q8i
 pcast<Packet8q32i, Packet32q8i>(const Packet8q32i& a, const Packet8q32i& b,
                                 const Packet8q32i& c, const Packet8q32i& d) {
-  __m256i converted = _mm256_packs_epi16(_mm256_packs_epi32(a.val, b.val),
-                                         _mm256_packs_epi32(c.val, d.val));
+  __m256i converted = _mm256_packs_epi16(_mm256_packs_epi32(a.m_val, b.m_val),
+                                         _mm256_packs_epi32(c.m_val, d.m_val));
   // Since packs does not cross 128 bit lane boundaries,
   // we have to permute to properly order the final result.
   const __m256i permute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
@@ -51,7 +51,7 @@ struct type_casting_traits<float, QInt8> {
 template <>
 EIGEN_STRONG_INLINE Packet32q8i
 pcast<Packet8f, Packet32q8i>(const Packet8f& a, const Packet8f& b,
-                                const Packet8f& c, const Packet8f& d) {
+                             const Packet8f& c, const Packet8f& d) {
   const __m256i a_conv = _mm256_cvtps_epi32(a);
   const __m256i b_conv = _mm256_cvtps_epi32(b);
   const __m256i c_conv = _mm256_cvtps_epi32(c);
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
index d3b02402971..17408d13abf 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
@@ -14,7 +14,7 @@ struct type_casting_traits<QInt32, float> {
 
 template <>
 EIGEN_STRONG_INLINE Packet16f pcast<Packet16q32i>(const Packet16q32i& a) {
-  return _mm512_cvtepi32_ps(a.val);
+  return _mm512_cvtepi32_ps(a.m_val);
 }
 
 template <>
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
index f3b2ae6846d..303339e77f7 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
@@ -53,13 +53,6 @@ NVCC_PATH = '%{nvcc_path}'
 PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
 NVCC_VERSION = '%{cuda_version}'
 
-
-# TODO(amitpatankar): Benchmark enabling all capabilities by default.
-# Environment variable for supported TF CUDA Compute Capabilities
-# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
-CUDA_COMPUTE_ENV_VAR = 'TF_CUDA_COMPUTE_CAPABILITIES'
-DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0'
-
 def Log(s):
   print('gpus/crosstool: {0}'.format(s))
 
@@ -78,7 +71,8 @@ def GetOptionValue(argv, option):
   """
 
   parser = ArgumentParser()
-  parser.add_argument('-' + option, nargs='*', action='append')
+  parser.add_argument(option, nargs='*', action='append')
+  option = option.lstrip('-').replace('-', '_')
   args, _ = parser.parse_known_args(argv)
   if not args or not vars(args)[option]:
     return []
@@ -180,17 +174,17 @@ def InvokeNvcc(argv, log=False):
 
   host_compiler_options = GetHostCompilerOptions(argv)
   nvcc_compiler_options = GetNvccOptions(argv)
-  opt_option = GetOptionValue(argv, 'O')
-  m_options = GetOptionValue(argv, 'm')
+  opt_option = GetOptionValue(argv, '-O')
+  m_options = GetOptionValue(argv, '-m')
   m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
-  include_options = GetOptionValue(argv, 'I')
-  out_file = GetOptionValue(argv, 'o')
-  depfiles = GetOptionValue(argv, 'MF')
-  defines = GetOptionValue(argv, 'D')
+  include_options = GetOptionValue(argv, '-I')
+  out_file = GetOptionValue(argv, '-o')
+  depfiles = GetOptionValue(argv, '-MF')
+  defines = GetOptionValue(argv, '-D')
   defines = ''.join([' -D' + define for define in defines])
-  undefines = GetOptionValue(argv, 'U')
+  undefines = GetOptionValue(argv, '-U')
   undefines = ''.join([' -U' + define for define in undefines])
-  std_options = GetOptionValue(argv, 'std')
+  std_options = GetOptionValue(argv, '-std')
   # Supported -std flags as of CUDA 9.0. Only keep last to mimic gcc/clang.
   nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
   std_options = ''.join([' -std=' + define
@@ -198,7 +192,7 @@ def InvokeNvcc(argv, log=False):
 
   # The list of source files get passed after the -c option. I don't know of
   # any other reliable way to just get the list of source files to be compiled.
-  src_files = GetOptionValue(argv, 'c')
+  src_files = GetOptionValue(argv, '-c')
 
   # Pass -w through from host to nvcc, but don't do anything fancier with
   # warnings-related flags, since they're not necessarily the same across
@@ -224,13 +218,12 @@ def InvokeNvcc(argv, log=False):
   srcs = ' '.join(src_files)
   out = ' -o ' + out_file[0]
 
-  supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ]
   nvccopts = '-D_FORCE_INLINES '
-  for capability in supported_cuda_compute_capabilities:
-    capability = capability.replace('.', '')
+  for capability in GetOptionValue(argv, "--cuda-gpu-arch"):
+    capability = capability[len('sm_'):]
     nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % (
         capability, capability, capability)
-  nvccopts += ' ' + nvcc_compiler_options
+  nvccopts += nvcc_compiler_options
   nvccopts += undefines
   nvccopts += defines
   nvccopts += std_options
@@ -272,6 +265,7 @@ def main():
   if args.x and args.x[0] == 'cuda':
     if args.cuda_log: Log('-x cuda')
     leftover = [pipes.quote(s) for s in leftover]
+    args.cuda_log = True
     if args.cuda_log: Log('using nvcc')
     return InvokeNvcc(leftover, log=args.cuda_log)
 
diff --git a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
index 46e8aef3606..de6512e3088 100644
--- a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
+++ b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
@@ -37,13 +37,6 @@ GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
 NVCC_PATH = '%{nvcc_path}'
 NVCC_VERSION = '%{cuda_version}'
 NVCC_TEMP_DIR = "%{nvcc_tmp_dir}"
-DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0'
-
-# Taken from environment variable for supported TF CUDA Compute Capabilities
-# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
-supported_cuda_compute_capabilities = os.environ.get(
-    'TF_CUDA_COMPUTE_CAPABILITIES',
-    DEFAULT_CUDA_COMPUTE_CAPABILITIES).split(',')
 
 def Log(s):
   print('gpus/crosstool: {0}'.format(s))
@@ -53,7 +46,7 @@ def GetOptionValue(argv, option):
   """Extract the list of values for option from options.
 
   Args:
-    option: The option whose value to extract, without the leading '/'.
+    option: The option whose value to extract.
 
   Returns:
     1. A list of values, either directly following the option,
@@ -62,8 +55,9 @@ def GetOptionValue(argv, option):
     2. The leftover options.
   """
 
-  parser = ArgumentParser(prefix_chars='/')
-  parser.add_argument('/' + option, nargs='*', action='append')
+  parser = ArgumentParser(prefix_chars='-/')
+  parser.add_argument(option, nargs='*', action='append')
+  option = option.lstrip('-/').replace('-', '_')
   args, leftover = parser.parse_known_args(argv)
   if args and vars(args)[option]:
     return (sum(vars(args)[option], []), leftover)
@@ -122,18 +116,18 @@ def InvokeNvcc(argv, log=False):
 
   nvcc_compiler_options, argv = GetNvccOptions(argv)
 
-  opt_option, argv = GetOptionValue(argv, 'O')
+  opt_option, argv = GetOptionValue(argv, '/O')
   opt = ['-g']
   if (len(opt_option) > 0 and opt_option[0] != 'd'):
     opt = ['-O2']
 
-  include_options, argv = GetOptionValue(argv, 'I')
+  include_options, argv = GetOptionValue(argv, '/I')
   includes = ["-I " + include for include in include_options]
 
-  defines, argv = GetOptionValue(argv, 'D')
+  defines, argv = GetOptionValue(argv, '/D')
   defines = ['-D' + define for define in defines]
 
-  undefines, argv = GetOptionValue(argv, 'U')
+  undefines, argv = GetOptionValue(argv, '/U')
   undefines = ['-U' + define for define in undefines]
 
   # The rest of the unrecognized options should be passed to host compiler
@@ -142,8 +136,10 @@ def InvokeNvcc(argv, log=False):
   m_options = ["-m64"]
 
   nvccopts = ['-D_FORCE_INLINES']
-  for capability in supported_cuda_compute_capabilities:
-    capability = capability.replace('.', '')
+  compute_capabilities, argv = GetOptionValue(argv, "--cuda-gpu-arch")
+  for capability in compute_capabilities:
+    print(capability)
+    capability = capability[len('sm_'):]
     nvccopts += [r'-gencode=arch=compute_%s,"code=sm_%s,compute_%s"' % (
         capability, capability, capability)]
   nvccopts += nvcc_compiler_options
diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl
index 9d17e1b8f35..92586dd7d11 100644
--- a/third_party/gpus/cuda/BUILD.tpl
+++ b/third_party/gpus/cuda/BUILD.tpl
@@ -166,6 +166,14 @@ cc_library(
     data = [":cuda-nvvm"],
 )
 
+filegroup(
+    name = "cuda_root",
+    srcs = [
+        "cuda/bin/fatbinary",
+        "cuda/bin/bin2c",
+    ],
+)
+
 bzl_library(
     name = "build_defs_bzl",
     srcs = ["build_defs.bzl"],
diff --git a/third_party/gpus/cuda/build_defs.bzl.tpl b/third_party/gpus/cuda/build_defs.bzl.tpl
index 3280d6b041f..bba772e2377 100644
--- a/third_party/gpus/cuda/build_defs.bzl.tpl
+++ b/third_party/gpus/cuda/build_defs.bzl.tpl
@@ -51,6 +51,10 @@ def cuda_is_configured():
     """Returns true if CUDA was enabled during the configure process."""
     return %{cuda_is_configured}
 
+def cuda_gpu_architectures():
+    """Returns a list of supported GPU architectures."""
+    return %{cuda_gpu_architectures}
+
 def if_cuda_is_configured(x):
     """Tests if the CUDA was enabled during the configure process.
 
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 545aeebe97a..aa8a2f0226d 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -714,6 +714,7 @@ def _create_dummy_repository(repository_ctx):
         {
             "%{cuda_is_configured}": "False",
             "%{cuda_extra_copts}": "[]",
+            "%{cuda_gpu_architectures}": "[]",
         },
     )
     _tpl(
@@ -840,10 +841,17 @@ def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
         "--cuda-gpu-arch=sm_" + cap.replace(".", "")
         for cap in compute_capabilities
     ]
+    return str(capability_flags)
 
-    # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
-    # TODO(csigg): Make this consistent with cuda clang and pass unconditionally.
-    return "if_cuda_clang(%s)" % str(capability_flags)
+def _compute_cuda_gpu_architectures(repository_ctx, compute_capabilities):
+    gpu_architectures = [
+        "sm_" + capability.replace(".", "")
+        for capability in compute_capabilities
+    ]
+
+    # Make the list unique.
+    gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
+    return str(gpu_architectures)
 
 def _tpl_path(repository_ctx, filename):
     return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
@@ -976,6 +984,10 @@ def _create_local_cuda_repository(repository_ctx):
                 repository_ctx,
                 cuda_config.compute_capabilities,
             ),
+            "%{cuda_gpu_architectures}": _compute_cuda_gpu_architectures(
+                repository_ctx,
+                cuda_config.compute_capabilities,
+            ),
         },
     )
 
@@ -1092,9 +1104,6 @@ def _create_local_cuda_repository(repository_ctx):
             "%{cuda_version}": cuda_config.cuda_version,
             "%{nvcc_path}": nvcc_path,
             "%{gcc_host_compiler_path}": str(cc),
-            "%{cuda_compute_capabilities}": ", ".join(
-                ["\"%s\"" % c for c in cuda_config.compute_capabilities],
-            ),
             "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx),
         }
         repository_ctx.template(
diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD
index ab7734ef17d..1bddf2180bc 100644
--- a/third_party/mlir/BUILD
+++ b/third_party/mlir/BUILD
@@ -178,7 +178,7 @@ filegroup(
         "include/mlir/Dialect/Affine/IR/AffineOps.td",
         "include/mlir/Dialect/Affine/IR/AffineOpsBase.td",
         "include/mlir/Interfaces/LoopLikeInterface.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
@@ -217,7 +217,7 @@ filegroup(
         "include/mlir/Dialect/AVX512/AVX512.td",
         "include/mlir/Dialect/LLVMIR/LLVMOpBase.td",
         "include/mlir/IR/OpBase.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
     ],
 )
 
@@ -297,37 +297,37 @@ cc_library(
 )
 
 filegroup(
-    name = "LoopOpsTdFiles",
+    name = "SCFTdFiles",
     srcs = [
-        "include/mlir/Dialect/LoopOps/LoopOps.td",
+        "include/mlir/Dialect/SCF/SCFOps.td",
         "include/mlir/Interfaces/ControlFlowInterfaces.td",
         "include/mlir/Interfaces/LoopLikeInterface.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
 
 gentbl(
-    name = "LoopOpsIncGen",
+    name = "SCFIncGen",
     strip_include_prefix = "include",
     tbl_outs = [
         (
             "-gen-op-decls",
-            "include/mlir/Dialect/LoopOps/LoopOps.h.inc",
+            "include/mlir/Dialect/SCF/SCFOps.h.inc",
         ),
         (
             "-gen-op-defs",
-            "include/mlir/Dialect/LoopOps/LoopOps.cpp.inc",
+            "include/mlir/Dialect/SCF/SCFOps.cpp.inc",
         ),
         (
             "-gen-dialect-decls",
-            "include/mlir/Dialect/LoopOps/LoopOpsDialect.h.inc",
+            "include/mlir/Dialect/SCF/SCFOpsDialect.h.inc",
         ),
     ],
     tblgen = ":mlir-tblgen",
-    td_file = "include/mlir/Dialect/LoopOps/LoopOps.td",
+    td_file = "include/mlir/Dialect/SCF/SCFOps.td",
     td_srcs = [
-        ":LoopOpsTdFiles",
+        ":SCFTdFiles",
     ],
 )
 
@@ -337,30 +337,30 @@ gentbl(
     tbl_outs = [
         (
             "-gen-pass-decls",
-            "include/mlir/Dialect/LoopOps/Passes.h.inc",
+            "include/mlir/Dialect/SCF/Passes.h.inc",
         ),
     ],
     tblgen = ":mlir-tblgen",
-    td_file = "include/mlir/Dialect/LoopOps/Passes.td",
+    td_file = "include/mlir/Dialect/SCF/Passes.td",
     td_srcs = [
         ":PassBaseTdFiles",
     ],
 )
 
 cc_library(
-    name = "LoopOpsTransforms",
+    name = "SCFTransforms",
     srcs = glob([
-        "lib/Dialect/LoopOps/Transforms/*.cpp",
-        "lib/Dialect/LoopOps/Transforms/*.h",
+        "lib/Dialect/SCF/Transforms/*.cpp",
+        "lib/Dialect/SCF/Transforms/*.h",
     ]),
-    hdrs = ["include/mlir/Dialect/LoopOps/Passes.h"],
+    hdrs = ["include/mlir/Dialect/SCF/Passes.h"],
     includes = ["include"],
     deps = [
         ":Affine",
         ":IR",
-        ":LoopOps",
         ":LoopPassIncGen",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Transforms",
         "@llvm-project//llvm:support",
@@ -374,7 +374,7 @@ filegroup(
         "include/mlir/IR/OpAsmInterface.td",
         "include/mlir/Interfaces/CallInterfaces.td",
         "include/mlir/Interfaces/ControlFlowInterfaces.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         "include/mlir/Interfaces/ViewLikeInterface.td",
         ":OpBaseTdFiles",
     ],
@@ -521,8 +521,8 @@ cc_library(
         ":AffinePassIncGen",
         ":Analysis",
         ":IR",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":Transforms",
@@ -559,11 +559,12 @@ cc_library(
         ":Affine",
         ":ConversionPassIncGen",
         ":IR",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":Transforms",
+        ":VectorOps",
     ],
 )
 
@@ -588,17 +589,17 @@ cc_library(
 )
 
 cc_library(
-    name = "LoopOps",
+    name = "SCFDialect",
     srcs = glob(
         [
-            "lib/Dialect/LoopOps/*.cpp",
-            "lib/Dialect/LoopOps/*.h",
-            "lib/Dialect/LoopOps/EDSC/*.cpp",
+            "lib/Dialect/SCF/*.cpp",
+            "lib/Dialect/SCF/*.h",
+            "lib/Dialect/SCF/EDSC/*.cpp",
         ],
     ),
     hdrs = glob([
-        "include/mlir/Dialect/LoopOps/*.h",
-        "include/mlir/Dialect/LoopOps/EDSC/*.h",
+        "include/mlir/Dialect/SCF/*.h",
+        "include/mlir/Dialect/SCF/EDSC/*.h",
     ]),
     includes = ["include"],
     deps = [
@@ -606,7 +607,7 @@ cc_library(
         ":EDSC",
         ":IR",
         ":LoopLikeInterface",
-        ":LoopOpsIncGen",
+        ":SCFIncGen",
         ":SideEffects",
         ":StandardOps",
         ":Support",
@@ -657,6 +658,7 @@ gentbl(
     td_file = "include/mlir/Dialect/Shape/IR/ShapeOps.td",
     td_srcs = [
         ":StdOpsTdFiles",
+        "include/mlir/Dialect/Shape/IR/ShapeBase.td",
         "include/mlir/Interfaces/InferTypeOpInterface.td",
     ],
 )
@@ -715,24 +717,35 @@ cc_library(
     ],
 )
 
+gentbl(
+    name = "StandardOpsTransformsPassIncGen",
+    strip_include_prefix = "include",
+    tbl_outs = [(
+        "-gen-pass-decls",
+        "include/mlir/Dialect/StandardOps/Transforms/Passes.h.inc",
+    )],
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Dialect/StandardOps/Transforms/Passes.td",
+    td_srcs = [":PassBaseTdFiles"],
+)
+
 cc_library(
     name = "StandardOpsTransforms",
-    srcs = glob(
-        [
-            "lib/Dialect/StandardOps/Transforms/*.cpp",
-            "lib/Dialect/StandardOps/Transforms/*.h",
-        ],
-    ),
-    hdrs = glob([
-        "include/mlir/Dialect/StandardOps/Transforms/*.h",
+    srcs = glob([
+        "lib/Dialect/StandardOps/Transforms/*.cpp",
+        "lib/Dialect/StandardOps/Transforms/*.h",
     ]),
+    hdrs = glob(["include/mlir/Dialect/StandardOps/Transforms/*.h"]),
     includes = ["include"],
     deps = [
         ":Analysis",
         ":ControlFlowInterfaces",
         ":IR",
+        ":Pass",
         ":StandardOps",
+        ":StandardOpsTransformsPassIncGen",
         ":Support",
+        ":Transforms",
         "@llvm-project//llvm:support",
     ],
 )
@@ -985,7 +998,7 @@ filegroup(
         "include/mlir/Dialect/GPU/GPUOps.td",
         "include/mlir/Dialect/LLVMIR/LLVMOpBase.td",
         "include/mlir/IR/SymbolInterfaces.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
@@ -1101,9 +1114,9 @@ cc_library(
         ":GPUDialect",
         ":GPUPassIncGen",
         ":IR",
-        ":LoopOps",
         ":ParallelLoopMapperAttrGen",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":Transforms",
@@ -1118,7 +1131,7 @@ filegroup(
         "include/mlir/Dialect/LLVMIR/LLVMOps.td",
         "include/mlir/IR/SymbolInterfaces.td",
         "include/mlir/Interfaces/ControlFlowInterfaces.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
@@ -1129,6 +1142,10 @@ cc_library(
         "lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h",
         "lib/Conversion/GPUCommon/OpToFuncCallLowering.h",
     ],
+    # TODO(b/155492113): Move back to hdrs once fixed.
+    textual_hdrs = [
+        "lib/Conversion/GPUCommon/GPUOpsLowering.h",
+    ],
     deps = [
         ":GPUDialect",
         ":IR",
@@ -1181,6 +1198,23 @@ cc_library(
     ],
 )
 
+gentbl(
+    name = "GPUToROCDLTGen",
+    strip_include_prefix = "lib/Conversion/GPUToROCDL",
+    tbl_outs = [
+        (
+            "-gen-rewriters",
+            "lib/Conversion/GPUToROCDL/GPUToROCDL.cpp.inc",
+        ),
+    ],
+    tblgen = ":mlir-tblgen",
+    td_file = "lib/Conversion/GPUToROCDL/GPUToROCDL.td",
+    td_srcs = [
+        ":GPUOpsTdFiles",
+        ":ROCDLOpsTdFiles",
+    ],
+)
+
 cc_library(
     name = "GPUToROCDLTransforms",
     srcs = [
@@ -1195,10 +1229,15 @@ cc_library(
         ":ConversionPassIncGen",
         ":GPUCommonTransforms",
         ":GPUDialect",
+        ":GPUToROCDLTGen",
+        ":GPUTransforms",
         ":LLVMTransforms",
         ":Pass",
         ":ROCDLDialect",
         ":Transforms",
+        ":VectorOps",
+        ":VectorToLLVM",
+        "@llvm-project//llvm:support",
     ],
 )
 
@@ -1286,8 +1325,8 @@ cc_library(
         ":GPUDialect",
         ":GPUToSPIRVIncGen",
         ":IR",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":SPIRVDialect",
         ":SPIRVLowering",
         ":StandardToSPIRVConversions",
@@ -1379,7 +1418,7 @@ filegroup(
     srcs = [
         "include/mlir/Dialect/LLVMIR/LLVMOpBase.td",
         "include/mlir/Dialect/LLVMIR/NVVMOps.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
@@ -1451,7 +1490,7 @@ filegroup(
     srcs = [
         "include/mlir/Dialect/LLVMIR/LLVMOpBase.td",
         "include/mlir/Dialect/LLVMIR/ROCDLOps.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
@@ -1503,7 +1542,7 @@ filegroup(
         "include/mlir/IR/SymbolInterfaces.td",
         "include/mlir/Interfaces/CallInterfaces.td",
         "include/mlir/Interfaces/ControlFlowInterfaces.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ] + glob(["include/mlir/Dialect/SPIRV/*.td"]),
 )
@@ -1763,28 +1802,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "StandardToStandard",
-    srcs = glob([
-        "lib/Conversion/StandardToStandard/*.cpp",
-        "lib/Conversion/StandardToStandard/*.h",
-    ]),
-    hdrs = glob([
-        "include/mlir/Conversion/StandardToStandard/*.h",
-    ]),
-    includes = [
-        "include",
-        "lib/Conversion/StandardToStandard",
-    ],
-    deps = [
-        ":ConversionPassIncGen",
-        ":IR",
-        ":Pass",
-        ":StandardOps",
-        ":Transforms",
-    ],
-)
-
 cc_library(
     name = "SPIRVSerialization",
     srcs = glob(
@@ -1845,7 +1862,7 @@ cc_library(
         ":ControlFlowInterfaces",
         ":IR",
         ":LoopLikeInterface",
-        ":LoopOps",
+        ":SCFDialect",
         ":SideEffects",
         ":StandardOps",
         ":Support",
@@ -1962,8 +1979,8 @@ cc_library(
         ":ControlFlowInterfaces",
         ":IR",
         ":LoopLikeInterface",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":SideEffects",
         ":StandardOps",
         ":Support",
@@ -1999,8 +2016,8 @@ cc_library(
         ":GPUDialect",
         ":GPUTransforms",
         ":IR",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":TransformUtils",
@@ -2023,9 +2040,9 @@ cc_library(
         ":Affine",
         ":ConversionPassIncGen",
         ":GPUDialect",
-        ":LoopOps",
         ":LoopsToGPU",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":Transforms",
@@ -2047,8 +2064,8 @@ cc_library(
         ":ConversionPassIncGen",
         ":IR",
         ":LLVMDialect",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":TransformUtils",
@@ -2206,7 +2223,7 @@ gentbl(
         ),
     ],
     tblgen = ":mlir-tblgen",
-    td_file = "include/mlir/Interfaces/SideEffects.td",
+    td_file = "include/mlir/Interfaces/SideEffectInterfaces.td",
     td_srcs = [
         ":OpBaseTdFiles",
     ],
@@ -2254,7 +2271,7 @@ cc_library(
         ":Affine",
         ":CallOpInterfaces",
         ":IR",
-        ":LoopOps",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         "@llvm-project//llvm:support",
@@ -2441,12 +2458,12 @@ cc_library(
         ":LLVMTransforms",
         ":LinalgToLLVM",
         ":LinalgToSPIRV",
-        ":LoopOpsTransforms",
         ":NVVMDialect",
         ":Parser",
         ":Pass",
+        ":SCFTransforms",
+        ":StandardOpsTransforms",
         ":StandardToSPIRVConversions",
-        ":StandardToStandard",
         ":Support",
         ":Transforms",
         ":VectorToLLVM",
@@ -2527,8 +2544,6 @@ cc_library(
         ":LinalgToLLVM",
         ":LinalgToSPIRV",
         ":LinalgTransforms",
-        ":LoopOps",
-        ":LoopOpsTransforms",
         ":LoopPassIncGen",
         ":LoopsToGPUPass",
         ":NVVMDialect",
@@ -2536,14 +2551,17 @@ cc_library(
         ":QuantOps",
         ":QuantPassIncGen",
         ":ROCDLDialect",
+        ":SCFDialect",
+        ":SCFTransforms",
         ":SDBM",
         ":SPIRVDialect",
         ":SPIRVLowering",
         ":SPIRVPassIncGen",
         ":Shape",
         ":StandardOps",
+        ":StandardOpsTransforms",
+        ":StandardOpsTransformsPassIncGen",
         ":StandardToSPIRVConversions",
-        ":StandardToStandard",
         ":Transforms",
         ":TransformsPassIncGen",
         ":VectorOps",
@@ -2669,6 +2687,37 @@ cc_binary(
     ],
 )
 
+cc_library(
+    name = "VulkanRuntime",
+    srcs = [
+        "tools/mlir-vulkan-runner/VulkanRuntime.cpp",
+    ],
+    hdrs = [
+        "tools/mlir-vulkan-runner/VulkanRuntime.h",
+    ],
+    deps = [
+        ":IR",
+        ":Pass",
+        ":SPIRVDialect",
+        ":SideEffects",
+        ":StandardOps",
+        ":Support",
+        "//third_party/vulkan_loader",
+        "@llvm-project//llvm:support",
+        "@vulkan_headers",
+    ],
+)
+
+cc_binary(
+    name = "tools/libvulkan-runtime-wrappers.so",
+    srcs = ["tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp"],
+    linkshared = True,
+    deps = [
+        ":VulkanRuntime",
+        "@llvm-project//llvm:support",
+    ],
+)
+
 cc_binary(
     name = "mlir-cuda-runner",
     srcs = ["tools/mlir-cuda-runner/mlir-cuda-runner.cpp"],
@@ -2698,6 +2747,28 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "mlir-vulkan-runner",
+    srcs = ["tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp"],
+    data = [
+        ":tools/libvulkan-runtime-wrappers.so",
+        "@llvm-project//mlir/test/mlir-cpu-runner:libmlir_runner_utils.so",
+    ],
+    deps = [
+        ":AllPassesAndDialectsNoRegistration",
+        ":ExecutionEngineUtils",
+        ":GPUToSPIRVTransforms",
+        ":GPUToVulkanTransforms",
+        ":GPUTransforms",
+        ":LLVMTransforms",
+        ":MlirJitRunner",
+        ":Pass",
+        ":SPIRVDialect",
+        ":StandardToSPIRVConversions",
+        "@llvm-project//llvm:support",
+    ],
+)
+
 cc_library(
     name = "TableGen",
     srcs = glob(["lib/TableGen/*.cpp"]),
@@ -2816,7 +2887,7 @@ filegroup(
     srcs = [
         "include/mlir/Dialect/Quant/QuantOps.td",
         "include/mlir/Dialect/Quant/QuantOpsBase.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         ":OpBaseTdFiles",
     ],
 )
@@ -3018,32 +3089,6 @@ gentbl(
     ],
 )
 
-filegroup(
-    name = "LinalgTransformPatternsTdFiles",
-    srcs = [
-        "include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td",
-        ":AffineOpsTdFiles",
-        ":LinalgOpsTdFiles",
-        ":LinalgStructuredOpsTdFiles",
-        ":OpBaseTdFiles",
-    ],
-)
-
-gentbl(
-    name = "LinalgTransformPatternsIncGen",
-    tbl_outs = [
-        (
-            "-gen-rewriters",
-            "include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.h.inc",
-        ),
-    ],
-    tblgen = ":mlir-tblgen",
-    td_file = "include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td",
-    td_srcs = [
-        ":LinalgTransformPatternsTdFiles",
-    ],
-)
-
 cc_library(
     name = "LinalgToLLVM",
     srcs = glob([
@@ -3160,7 +3205,7 @@ cc_library(
         "include/mlir/Dialect/Linalg/EDSC/Builders.h",
         "include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h",
         "include/mlir/Dialect/Linalg/Passes.h",
-        "include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h",
+        "include/mlir/Dialect/Linalg/Transforms/Transforms.h",
         "include/mlir/Dialect/Linalg/Utils/Utils.h",
     ],
     includes = ["include"],
@@ -3177,8 +3222,8 @@ cc_library(
         ":LinalgOps",
         ":LinalgPassIncGen",
         ":LinalgStructuredOpsIncGen",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":TransformUtils",
@@ -3299,8 +3344,8 @@ cc_library(
         ":IR",
         ":LLVMDialect",
         ":LLVMTransforms",
-        ":LoopOps",
         ":Pass",
+        ":SCFDialect",
         ":StandardOps",
         ":Support",
         ":Transforms",
@@ -3322,7 +3367,7 @@ exports_files(
         "include/mlir/Interfaces/CallInterfaces.td",
         "include/mlir/Interfaces/ControlFlowInterfaces.h",
         "include/mlir/Interfaces/ControlFlowInterfaces.td",
-        "include/mlir/Interfaces/SideEffects.td",
+        "include/mlir/Interfaces/SideEffectInterfaces.td",
         "include/mlir/Interfaces/ViewLikeInterface.td",
         "include/mlir/Dialect/LLVMIR/LLVMOpBase.td",
         "include/mlir/Dialect/StandardOps/IR/Ops.td",
diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD
index 43cc152b601..eb5d8a650eb 100644
--- a/third_party/mlir/test.BUILD
+++ b/third_party/mlir/test.BUILD
@@ -16,21 +16,6 @@ cc_library(
     includes = ["."],
 )
 
-gentbl(
-    name = "TestLinalgTransformPatternsIncGen",
-    tbl_outs = [
-        (
-            "-gen-rewriters",
-            "lib/DeclarativeTransforms/TestLinalgTransformPatterns.h.inc",
-        ),
-    ],
-    tblgen = "@llvm-project//mlir:mlir-tblgen",
-    td_file = "lib/DeclarativeTransforms/TestLinalgTransformPatterns.td",
-    td_srcs = [
-        "@llvm-project//mlir:LinalgTransformPatternsTdFiles",
-    ],
-)
-
 gentbl(
     name = "TestVectorTransformPatternsIncGen",
     tbl_outs = [
@@ -46,22 +31,6 @@ gentbl(
     ],
 )
 
-gentbl(
-    name = "TestLinalgMatmulToVectorPatternsIncGen",
-    tbl_outs = [
-        (
-            "-gen-rewriters",
-            "lib/DeclarativeTransforms/TestLinalgMatmulToVectorPatterns.h.inc",
-        ),
-    ],
-    tblgen = "@llvm-project//mlir:mlir-tblgen",
-    td_file = "lib/DeclarativeTransforms/TestLinalgMatmulToVectorPatterns.td",
-    td_srcs = [
-        "@llvm-project//mlir:VectorTransformPatternsTdFiles",
-        "@llvm-project//mlir:LinalgTransformPatternsTdFiles",
-    ],
-)
-
 gentbl(
     name = "TestOpsIncGen",
     strip_include_prefix = "lib/Dialect/Test",
@@ -108,7 +77,7 @@ gentbl(
         "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td",
         "@llvm-project//mlir:include/mlir/Interfaces/ControlFlowInterfaces.td",
         "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td",
-        "@llvm-project//mlir:include/mlir/Interfaces/SideEffects.td",
+        "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td",
     ],
     test = True,
 )
@@ -137,7 +106,7 @@ cc_library(
         "@llvm-project//mlir:Pass",
         "@llvm-project//mlir:SideEffects",
         "@llvm-project//mlir:StandardOps",
-        "@llvm-project//mlir:StandardToStandard",
+        "@llvm-project//mlir:StandardOpsTransforms",
         "@llvm-project//mlir:TransformUtils",
         "@llvm-project//mlir:Transforms",
     ],
@@ -183,8 +152,6 @@ cc_library(
     includes = ["lib/Dialect/Test"],
     deps = [
         ":TestDialect",
-        ":TestLinalgMatmulToVectorPatternsIncGen",
-        ":TestLinalgTransformPatternsIncGen",
         ":TestVectorTransformPatternsIncGen",
         "@llvm-project//llvm:support",
         "@llvm-project//mlir:Affine",
@@ -196,8 +163,8 @@ cc_library(
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:LinalgOps",
         "@llvm-project//mlir:LinalgTransforms",
-        "@llvm-project//mlir:LoopOps",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:SCFDialect",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:TransformUtils",
diff --git a/third_party/remote_config/remote_platform_configure.bzl b/third_party/remote_config/remote_platform_configure.bzl
index 185120f7626..386ad603950 100644
--- a/third_party/remote_config/remote_platform_configure.bzl
+++ b/third_party/remote_config/remote_platform_configure.bzl
@@ -12,13 +12,16 @@ def _remote_platform_configure_impl(repository_ctx):
             platform = "linux"
 
     cpu = "x86_64"
-    if "MACHTYPE" in repository_ctx.os.environ:
-        machine_type = repository_ctx.os.environ["MACHTYPE"]
-        if (machine_type.startswith("ppc") or
-            machine_type.startswith("powerpc")):
-            cpu = "ppc"
-        elif machine_type.startswith("s390x"):
-            cpu = "s390x"
+    machine_type = repository_ctx.execute(["bash", "-c", "echo $MACHTYPE"]).stdout
+    if (machine_type.startswith("ppc") or
+        machine_type.startswith("powerpc")):
+        cpu = "ppc"
+    elif machine_type.startswith("s390x"):
+        cpu = "s390x"
+    elif machine_type.startswith("aarch64"):
+        cpu = "aarch64"
+    elif machine_type.startswith("arm"):
+        cpu = "arm"
 
     exec_properties = repository_ctx.attr.platform_exec_properties
 
diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl
index bbd792fd87d..c4ed692df4d 100644
--- a/third_party/ruy/workspace.bzl
+++ b/third_party/ruy/workspace.bzl
@@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive")
 def repo():
     third_party_http_archive(
         name = "ruy",
-        sha256 = "51c1492196cdd6fc524dd8b539de5d644bbb436699fab3908585a575e347c789",
-        strip_prefix = "ruy-4bdb31ab484e624deef9620ecde2156ca17f6567",
+        sha256 = "b21524de00c63b3d5683b42557f78452e791cf77fddb2e63f9bcba1f7bd99093",
+        strip_prefix = "ruy-1b313682ef8b8fc8ed08719c610d1c3503b016bf",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/4bdb31ab484e624deef9620ecde2156ca17f6567.zip",
-            "https://github.com/google/ruy/archive/4bdb31ab484e624deef9620ecde2156ca17f6567.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip",
+            "https://github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip",
         ],
         build_file = "//third_party/ruy:BUILD",
     )
diff --git a/third_party/toolchains/preconfig/generate/containers.bzl b/third_party/toolchains/preconfig/generate/containers.bzl
index b1d0389a16d..8e6f48df99e 100644
--- a/third_party/toolchains/preconfig/generate/containers.bzl
+++ b/third_party/toolchains/preconfig/generate/containers.bzl
@@ -2,14 +2,14 @@
 container_digests = {
     "ubuntu16.04": "sha256:b90dcf2f35f3354909f4491bdf019c110b4b4d95ef0395ebf178bc5d523a4208",
     "centos6": "sha256:d09c12fb26fbbe8398b4973260c75172eb67d509dae9d6f4ad54279b7d6b0494",
-    "ubuntu16.04-manylinux2010": "sha256:b5227c4069980005336dd5cf04e3122974984da3396a514a06d7db3a7ae7b2f9",
+    "ubuntu16.04-manylinux2010": "sha256:d5b056506e14eb216b6e27988814617a09dea77ec1ab46972072038f9df3e728",
     "cuda10.0-cudnn7-ubuntu14.04": "sha256:d433e1221f802dac393bc8652fabcc63aa46896cd920bb888ae0e2002fe6b756",
     "cuda10.0-cudnn7-centos7": "sha256:a453b7147a60928a8345689eae48916a746b3578b5e831bfa151f0529d469c88",
     "cuda10.0-cudnn7-centos6": "sha256:a1909ba09c703340ee0074ce63dd94fe8fea48035a25264677907a609e2375e0",
     "cuda10.1-cudnn7-centos6": "sha256:454b899657e87893ee5e68dc0f87df59b6a0a7418ae09cafcc3dd65ac71feca9",
     "cuda10.0-cudnn7-ubuntu16.04-manylinux2010": "sha256:5812d9d0ef0a3276fc5faaf4cd01f3d6e03d635893a6e2d2e04f6f01d626c432",
-    "cuda10.1-cudnn7-ubuntu16.04-manylinux2010": "sha256:cc7f760195d7bbe283b45ae740409751d0b74d8ffbdc2f7a3cb62c71a71fbe25",
-    "cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython": "sha256:c460570b88eab3da92f06fdf30098d89be4de0f3b010ee3d39086f4d000dd3b8",
+    "cuda10.1-cudnn7-ubuntu16.04-manylinux2010": "sha256:1e4e888f14a3d5b127151f7970487613a46ca957babe0432786627c78c0b1a36",
+    "cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython": "sha256:13aa5e700bb609521cd4365d4152d7d8f4118cae7ce174ce7d54cc529e21766a",
     "rocm-ubuntu16.04": "sha256:e645447dd6127325f3e97b8bf23424f637a8579d963b34fcc6772cf7cfaa0ebe",
     "windows-1803": "sha256:f109576c7c0c8a1783ff22b666e8923b52dbbe7933f69a1c7a7275202c304a12",
 }
diff --git a/third_party/toolchains/remote_config/rbe_config.bzl b/third_party/toolchains/remote_config/rbe_config.bzl
index 3860756cf97..744496e8335 100644
--- a/third_party/toolchains/remote_config/rbe_config.bzl
+++ b/third_party/toolchains/remote_config/rbe_config.bzl
@@ -58,18 +58,6 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N
             "Pool": "default",
         }
 
-        remote_platform_configure(
-            name = "%s_config_platform" % name,
-            platform = "linux",
-            platform_exec_properties = exec_properties,
-        )
-
-        remote_python_configure(
-            name = "%s_config_python" % name,
-            environ = env,
-            exec_properties = exec_properties,
-        )
-
         remote_cuda_configure(
             name = "%s_config_cuda" % name,
             environ = env,
@@ -102,19 +90,6 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N
             "Pool": "default",
         }
 
-        remote_platform_configure(
-            name = "%s_config_platform" % name,
-            platform = "linux",
-            platform_exec_properties = exec_properties,
-        )
-
-        remote_python_configure(
-            name = "%s_config_python" % name,
-            environ = env,
-            exec_properties = exec_properties,
-            platform_constraint = "@%s_config_platform//:platform_constraint" % name,
-        )
-
         remote_rocm_configure(
             name = "%s_config_rocm" % name,
             environ = env,
@@ -130,15 +105,15 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N
     else:
         fail("Neither cuda_version, rocm_version nor python_version specified.")
 
+    remote_platform_configure(
+        name = "%s_config_platform" % name,
+        platform = "linux",
+        platform_exec_properties = exec_properties,
+    )
     for python_version in python_versions:
         env.update({
             "PYTHON_BIN_PATH": "%s/bin/python%s" % (python_install_path, python_version),
         })
-        remote_platform_configure(
-            name = "%s_config_platform" % name,
-            platform = "linux",
-            platform_exec_properties = exec_properties,
-        )
 
         # For backwards compatibility do not add the python version to the name
         # if we only create a single python configuration.